You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by pa...@apache.org on 2013/11/07 14:53:45 UTC

[30/53] [abbrv] git commit: Port Facet.Codecs

Port Facet.Codecs


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/9e1b6df7
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/9e1b6df7
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/9e1b6df7

Branch: refs/heads/branch_4x
Commit: 9e1b6df70f570cfe529c9bcd9c9097b780aa3360
Parents: 2332015
Author: Paul Irwin <pa...@gmail.com>
Authored: Sun Nov 3 19:15:58 2013 -0500
Committer: Paul Irwin <pa...@gmail.com>
Committed: Sun Nov 3 19:15:58 2013 -0500

----------------------------------------------------------------------
 .../Codecs/Facet42/Facet42BinaryDocValues.cs    |  33 ++++++
 .../Facet/Codecs/Facet42/Facet42Codec.cs        |  48 ++++++++
 .../Codecs/Facet42/Facet42DocValuesConsumer.cs  | 113 +++++++++++++++++++
 .../Codecs/Facet42/Facet42DocValuesFormat.cs    |  32 ++++++
 .../Codecs/Facet42/Facet42DocValuesProducer.cs  |  71 ++++++++++++
 src/contrib/Facet/Contrib.Facet.csproj          |   5 +
 src/core/Codecs/Lucene42/Lucene42Codec.cs       |   4 +-
 7 files changed, 304 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9e1b6df7/src/contrib/Facet/Codecs/Facet42/Facet42BinaryDocValues.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Facet/Codecs/Facet42/Facet42BinaryDocValues.cs b/src/contrib/Facet/Codecs/Facet42/Facet42BinaryDocValues.cs
new file mode 100644
index 0000000..fdaffd5
--- /dev/null
+++ b/src/contrib/Facet/Codecs/Facet42/Facet42BinaryDocValues.cs
@@ -0,0 +1,33 @@
+using Lucene.Net.Index;
+using Lucene.Net.Store;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Packed;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Facet.Codecs.Facet42
+{
+    internal class Facet42BinaryDocValues : BinaryDocValues
+    {
+        private readonly sbyte[] bytes;
+        private readonly PackedInts.IReader addresses;
+
+        internal Facet42BinaryDocValues(DataInput in_renamed)
+        {
+            int totBytes = in_renamed.ReadVInt();
+            bytes = new sbyte[totBytes];
+            in_renamed.ReadBytes(bytes, 0, totBytes);
+            addresses = PackedInts.GetReader(in_renamed);
+        }
+
+        public override void Get(int docID, BytesRef ret)
+        {
+            int start = (int)addresses.Get(docID);
+            ret.bytes = bytes;
+            ret.offset = start;
+            ret.length = (int)(addresses.Get(docID + 1) - start);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9e1b6df7/src/contrib/Facet/Codecs/Facet42/Facet42Codec.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Facet/Codecs/Facet42/Facet42Codec.cs b/src/contrib/Facet/Codecs/Facet42/Facet42Codec.cs
new file mode 100644
index 0000000..58fa223
--- /dev/null
+++ b/src/contrib/Facet/Codecs/Facet42/Facet42Codec.cs
@@ -0,0 +1,48 @@
+using Lucene.Net.Codecs;
+using Lucene.Net.Codecs.Lucene42;
+using Lucene.Net.Facet.Params;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Facet.Codecs.Facet42
+{
+    public class Facet42Codec : Lucene42Codec
+    {
+        private readonly ISet<String> facetFields;
+        private readonly DocValuesFormat facetsDVFormat = DocValuesFormat.ForName(@"Facet42");
+        private readonly DocValuesFormat lucene42DVFormat = DocValuesFormat.ForName(@"Lucene42");
+
+        public Facet42Codec()
+            : this(FacetIndexingParams.DEFAULT)
+        {
+        }
+
+        public Facet42Codec(FacetIndexingParams fip)
+        {
+            if (fip.PartitionSize != int.MaxValue)
+            {
+                throw new ArgumentException("this Codec does not support partitions");
+            }
+
+            this.facetFields = new HashSet<String>();
+            foreach (CategoryListParams clp in fip.AllCategoryListParams)
+            {
+                facetFields.Add(clp.field);
+            }
+        }
+
+        public override DocValuesFormat GetDocValuesFormatForField(string field)
+        {
+            if (facetFields.Contains(field))
+            {
+                return facetsDVFormat;
+            }
+            else
+            {
+                return lucene42DVFormat;
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9e1b6df7/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesConsumer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesConsumer.cs b/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesConsumer.cs
new file mode 100644
index 0000000..bf0b4d4
--- /dev/null
+++ b/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesConsumer.cs
@@ -0,0 +1,113 @@
+using Lucene.Net.Codecs;
+using Lucene.Net.Index;
+using Lucene.Net.Store;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Packed;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Facet.Codecs.Facet42
+{
+    public class Facet42DocValuesConsumer : DocValuesConsumer
+    {
+        readonly IndexOutput output;
+        readonly int maxDoc;
+        readonly float acceptableOverheadRatio;
+
+        public Facet42DocValuesConsumer(SegmentWriteState state)
+            : this(state, PackedInts.DEFAULT)
+        {
+        }
+
+        public Facet42DocValuesConsumer(SegmentWriteState state, float acceptableOverheadRatio)
+        {
+            this.acceptableOverheadRatio = acceptableOverheadRatio;
+            bool success = false;
+            try
+            {
+                string fileName = IndexFileNames.SegmentFileName(state.segmentInfo.name, state.segmentSuffix, Facet42DocValuesFormat.EXTENSION);
+                output = state.directory.CreateOutput(fileName, state.context);
+                CodecUtil.WriteHeader(output, Facet42DocValuesFormat.CODEC, Facet42DocValuesFormat.VERSION_CURRENT);
+                maxDoc = state.segmentInfo.DocCount;
+                success = true;
+            }
+            finally
+            {
+                if (!success)
+                {
+                    IOUtils.CloseWhileHandlingException((IDisposable)this);
+                }
+            }
+        }
+
+        public override void AddNumericField(FieldInfo field, IEnumerable<long> values)
+        {
+            throw new NotSupportedException(@"FacetsDocValues can only handle binary fields");
+        }
+
+        public override void AddBinaryField(FieldInfo field, IEnumerable<BytesRef> values)
+        {
+            output.WriteVInt(field.number);
+            long totBytes = 0;
+            foreach (BytesRef v in values)
+            {
+                totBytes += v.length;
+            }
+
+            if (totBytes > int.MaxValue)
+            {
+                throw new InvalidOperationException(@"too many facets in one segment: Facet42DocValues cannot handle more than 2 GB facet data per segment");
+            }
+
+            output.WriteVInt((int)totBytes);
+            foreach (BytesRef v in values)
+            {
+                output.WriteBytes(v.bytes, v.offset, v.length);
+            }
+
+            PackedInts.Writer w = PackedInts.GetWriter(output, maxDoc + 1, PackedInts.BitsRequired(totBytes + 1), acceptableOverheadRatio);
+            int address = 0;
+            foreach (BytesRef v in values)
+            {
+                w.Add(address);
+                address += v.length;
+            }
+
+            w.Add(address);
+            w.Finish();
+        }
+
+        public override void AddSortedField(FieldInfo field, IEnumerable<BytesRef> values, IEnumerable<int> docToOrd)
+        {
+            throw new NotSupportedException(@"FacetsDocValues can only handle binary fields");
+        }
+
+        public override void AddSortedSetField(FieldInfo field, IEnumerable<BytesRef> values, IEnumerable<int> docToOrdCount, IEnumerable<long> ords)
+        {
+            throw new NotSupportedException(@"FacetsDocValues can only handle binary fields");
+        }
+
+        protected override void Dispose(bool disposing)
+        {
+            bool success = false;
+            try
+            {
+                output.WriteVInt(-1);
+                success = true;
+            }
+            finally
+            {
+                if (success)
+                {
+                    IOUtils.Close(output);
+                }
+                else
+                {
+                    IOUtils.CloseWhileHandlingException((IDisposable)output);
+                }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9e1b6df7/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesFormat.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesFormat.cs b/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesFormat.cs
new file mode 100644
index 0000000..4de3192
--- /dev/null
+++ b/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesFormat.cs
@@ -0,0 +1,32 @@
+using Lucene.Net.Codecs;
+using Lucene.Net.Index;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Facet.Codecs.Facet42
+{
+    public sealed class Facet42DocValuesFormat : DocValuesFormat
+    {
+        public const string CODEC = @"FacetsDocValues";
+        public const string EXTENSION = @"fdv";
+        public const int VERSION_START = 0;
+        public const int VERSION_CURRENT = VERSION_START;
+
+        public Facet42DocValuesFormat()
+            : base(@"Facet42")
+        {
+        }
+
+        public override DocValuesConsumer FieldsConsumer(SegmentWriteState state)
+        {
+            return new Facet42DocValuesConsumer(state);
+        }
+
+        public override DocValuesProducer FieldsProducer(SegmentReadState state)
+        {
+            return new Facet42DocValuesProducer(state);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9e1b6df7/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesProducer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesProducer.cs b/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesProducer.cs
new file mode 100644
index 0000000..6ec8239
--- /dev/null
+++ b/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesProducer.cs
@@ -0,0 +1,71 @@
+using Lucene.Net.Codecs;
+using Lucene.Net.Index;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Facet.Codecs.Facet42
+{
+    internal class Facet42DocValuesProducer : DocValuesProducer
+    {
+        private readonly IDictionary<int, Facet42BinaryDocValues> fields = new HashMap<int, Facet42BinaryDocValues>();
+
+        internal Facet42DocValuesProducer(SegmentReadState state)
+        {
+            string fileName = IndexFileNames.SegmentFileName(state.segmentInfo.name, state.segmentSuffix, Facet42DocValuesFormat.EXTENSION);
+            IndexInput input = state.directory.OpenInput(fileName, state.context);
+            bool success = false;
+            try
+            {
+                CodecUtil.CheckHeader(input, Facet42DocValuesFormat.CODEC, Facet42DocValuesFormat.VERSION_START, Facet42DocValuesFormat.VERSION_START);
+                int fieldNumber = input.ReadVInt();
+                while (fieldNumber != -1)
+                {
+                    fields[fieldNumber] = new Facet42BinaryDocValues(input);
+                    fieldNumber = input.ReadVInt();
+                }
+
+                success = true;
+            }
+            finally
+            {
+                if (success)
+                {
+                    IOUtils.Close(input);
+                }
+                else
+                {
+                    IOUtils.CloseWhileHandlingException((IDisposable)input);
+                }
+            }
+        }
+
+        public override NumericDocValues GetNumeric(FieldInfo field)
+        {
+            throw new NotSupportedException(@"FacetsDocValues only implements binary");
+        }
+
+        public override BinaryDocValues GetBinary(FieldInfo field)
+        {
+            return fields[field.number];
+        }
+
+        public override SortedDocValues GetSorted(FieldInfo field)
+        {
+            throw new NotSupportedException(@"FacetsDocValues only implements binary");
+        }
+
+        public override SortedSetDocValues GetSortedSet(FieldInfo field)
+        {
+            throw new NotSupportedException(@"FacetsDocValues only implements binary");
+        }
+
+        protected override void Dispose(bool disposing)
+        {
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9e1b6df7/src/contrib/Facet/Contrib.Facet.csproj
----------------------------------------------------------------------
diff --git a/src/contrib/Facet/Contrib.Facet.csproj b/src/contrib/Facet/Contrib.Facet.csproj
index ff48e89..750ab35 100644
--- a/src/contrib/Facet/Contrib.Facet.csproj
+++ b/src/contrib/Facet/Contrib.Facet.csproj
@@ -39,6 +39,11 @@
     <Reference Include="System.Xml" />
   </ItemGroup>
   <ItemGroup>
+    <Compile Include="Codecs\Facet42\Facet42BinaryDocValues.cs" />
+    <Compile Include="Codecs\Facet42\Facet42Codec.cs" />
+    <Compile Include="Codecs\Facet42\Facet42DocValuesConsumer.cs" />
+    <Compile Include="Codecs\Facet42\Facet42DocValuesFormat.cs" />
+    <Compile Include="Codecs\Facet42\Facet42DocValuesProducer.cs" />
     <Compile Include="Collections\LRUHashMap.cs" />
     <Compile Include="Encoding\DGapVInt8IntDecoder.cs" />
     <Compile Include="Encoding\DGapVInt8IntEncoder.cs" />

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9e1b6df7/src/core/Codecs/Lucene42/Lucene42Codec.cs
----------------------------------------------------------------------
diff --git a/src/core/Codecs/Lucene42/Lucene42Codec.cs b/src/core/Codecs/Lucene42/Lucene42Codec.cs
index 2484e34..c8c766e 100644
--- a/src/core/Codecs/Lucene42/Lucene42Codec.cs
+++ b/src/core/Codecs/Lucene42/Lucene42Codec.cs
@@ -88,12 +88,12 @@ namespace Lucene.Net.Codecs.Lucene42
             get { return liveDocsFormat; }
         }
 
-        public PostingsFormat GetPostingsFormatForField(String field)
+        public virtual PostingsFormat GetPostingsFormatForField(String field)
         {
             return defaultFormat;
         }
 
-        public DocValuesFormat GetDocValuesFormatForField(String field)
+        public virtual DocValuesFormat GetDocValuesFormatForField(String field)
         {
             return defaultDVFormat;
         }