You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/02/26 23:37:47 UTC
[59/72] [abbrv] lucenenet git commit: Lucene.Net.TestFramework:
Renamed Codecs\lucene40\ to Codecs\Lucene40\
Lucene.Net.TestFramework: Renamed Codecs\lucene40\ to Codecs\Lucene40\
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/c0e9469c
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/c0e9469c
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/c0e9469c
Branch: refs/heads/api-work
Commit: c0e9469cc2dabf993d19e1ef342956778dfe686e
Parents: 8304ca8
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Feb 26 03:12:28 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Mon Feb 27 06:17:57 2017 +0700
----------------------------------------------------------------------
.../Codecs/Lucene40/Lucene40DocValuesWriter.cs | 624 +++++++++++++++++++
.../Codecs/Lucene40/Lucene40FieldInfosWriter.cs | 134 ++++
.../Codecs/Lucene40/Lucene40PostingsWriter.cs | 381 +++++++++++
.../Codecs/Lucene40/Lucene40RWCodec.cs | 100 +++
.../Lucene40/Lucene40RWDocValuesFormat.cs | 66 ++
.../Codecs/Lucene40/Lucene40RWNormsFormat.cs | 66 ++
.../Codecs/Lucene40/Lucene40RWPostingsFormat.cs | 84 +++
.../Codecs/Lucene40/Lucene40SkipListWriter.cs | 168 +++++
.../Codecs/lucene40/Lucene40DocValuesWriter.cs | 624 -------------------
.../Codecs/lucene40/Lucene40FieldInfosWriter.cs | 134 ----
.../Codecs/lucene40/Lucene40PostingsWriter.cs | 381 -----------
.../Codecs/lucene40/Lucene40RWCodec.cs | 100 ---
.../lucene40/Lucene40RWDocValuesFormat.cs | 66 --
.../Codecs/lucene40/Lucene40RWNormsFormat.cs | 66 --
.../Codecs/lucene40/Lucene40RWPostingsFormat.cs | 84 ---
.../Codecs/lucene40/Lucene40SkipListWriter.cs | 168 -----
.../Lucene.Net.TestFramework.csproj | 16 +-
17 files changed, 1631 insertions(+), 1631 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c0e9469c/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40DocValuesWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40DocValuesWriter.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40DocValuesWriter.cs
new file mode 100644
index 0000000..42856fc
--- /dev/null
+++ b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40DocValuesWriter.cs
@@ -0,0 +1,624 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+
+namespace Lucene.Net.Codecs.Lucene40
+{
+ using BytesRef = Lucene.Net.Util.BytesRef;
+ using CompoundFileDirectory = Lucene.Net.Store.CompoundFileDirectory;
+ using Directory = Lucene.Net.Store.Directory;
+ using FieldInfo = Lucene.Net.Index.FieldInfo;
+ using IndexFileNames = Lucene.Net.Index.IndexFileNames;
+ using IndexOutput = Lucene.Net.Store.IndexOutput;
+ using IOUtils = Lucene.Net.Util.IOUtils;
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using LegacyDocValuesType = Lucene.Net.Codecs.Lucene40.Lucene40FieldInfosReader.LegacyDocValuesType;
+ using PackedInt32s = Lucene.Net.Util.Packed.PackedInt32s;
+ using SegmentWriteState = Lucene.Net.Index.SegmentWriteState;
+
+#pragma warning disable 612, 618
+ internal class Lucene40DocValuesWriter : DocValuesConsumer
+ {
+ private readonly Directory Dir;
+ private readonly SegmentWriteState State;
+ private readonly string LegacyKey;
+ private const string SegmentSuffix = "dv";
+
+ // note: intentionally ignores seg suffix
+ internal Lucene40DocValuesWriter(SegmentWriteState state, string filename, string legacyKey)
+ {
+ this.State = state;
+ this.LegacyKey = legacyKey;
+ this.Dir = new CompoundFileDirectory(state.Directory, filename, state.Context, true);
+ }
+
+ public override void AddNumericField(FieldInfo field, IEnumerable<long?> values)
+ {
+ // examine the values to determine best type to use
+ long minValue = long.MaxValue;
+ long maxValue = long.MinValue;
+ foreach (long? n in values)
+ {
+ long v = n == null ? 0 : (long)n;
+ minValue = Math.Min(minValue, v);
+ maxValue = Math.Max(maxValue, v);
+ }
+
+ string fileName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "dat");
+ IndexOutput data = Dir.CreateOutput(fileName, State.Context);
+ bool success = false;
+ try
+ {
+ if (minValue >= sbyte.MinValue && maxValue <= sbyte.MaxValue && PackedInt32s.BitsRequired(maxValue - minValue) > 4)
+ {
+ // fits in a byte[], would be more than 4bpv, just write byte[]
+ AddBytesField(field, data, values);
+ }
+ else if (minValue >= short.MinValue && maxValue <= short.MaxValue && PackedInt32s.BitsRequired(maxValue - minValue) > 8)
+ {
+ // fits in a short[], would be more than 8bpv, just write short[]
+ AddShortsField(field, data, values);
+ }
+ else if (minValue >= int.MinValue && maxValue <= int.MaxValue && PackedInt32s.BitsRequired(maxValue - minValue) > 16)
+ {
+ // fits in a int[], would be more than 16bpv, just write int[]
+ AddIntsField(field, data, values);
+ }
+ else
+ {
+ AddVarIntsField(field, data, values, minValue, maxValue);
+ }
+ success = true;
+ }
+ finally
+ {
+ if (success)
+ {
+ IOUtils.Close(data);
+ }
+ else
+ {
+ IOUtils.CloseWhileHandlingException(data);
+ }
+ }
+ }
+
+ private void AddBytesField(FieldInfo field, IndexOutput output, IEnumerable<long?> values)
+ {
+ field.PutAttribute(LegacyKey, LegacyDocValuesType.FIXED_INTS_8.Name);
+ CodecUtil.WriteHeader(output, Lucene40DocValuesFormat.INTS_CODEC_NAME, Lucene40DocValuesFormat.INTS_VERSION_CURRENT);
+ output.WriteInt32(1); // size
+ foreach (long? n in values)
+ {
+ output.WriteByte(n == null ? (byte)0 : (byte)n);
+ }
+ }
+
+ private void AddShortsField(FieldInfo field, IndexOutput output, IEnumerable<long?> values)
+ {
+ field.PutAttribute(LegacyKey, LegacyDocValuesType.FIXED_INTS_16.Name);
+ CodecUtil.WriteHeader(output, Lucene40DocValuesFormat.INTS_CODEC_NAME, Lucene40DocValuesFormat.INTS_VERSION_CURRENT);
+ output.WriteInt32(2); // size
+ foreach (long? n in values)
+ {
+ output.WriteInt16(n == null ? (short)0 : (short)n);
+ }
+ }
+
+ private void AddIntsField(FieldInfo field, IndexOutput output, IEnumerable<long?> values)
+ {
+ field.PutAttribute(LegacyKey, LegacyDocValuesType.FIXED_INTS_32.Name);
+ CodecUtil.WriteHeader(output, Lucene40DocValuesFormat.INTS_CODEC_NAME, Lucene40DocValuesFormat.INTS_VERSION_CURRENT);
+ output.WriteInt32(4); // size
+ foreach (long? n in values)
+ {
+ output.WriteInt32(n == null ? 0 : (int)n);
+ }
+ }
+
+ private void AddVarIntsField(FieldInfo field, IndexOutput output, IEnumerable<long?> values, long minValue, long maxValue)
+ {
+ field.PutAttribute(LegacyKey, LegacyDocValuesType.VAR_INTS.Name);
+
+ CodecUtil.WriteHeader(output, Lucene40DocValuesFormat.VAR_INTS_CODEC_NAME, Lucene40DocValuesFormat.VAR_INTS_VERSION_CURRENT);
+
+ long delta = maxValue - minValue;
+
+ if (delta < 0)
+ {
+ // writes longs
+ output.WriteByte((byte)Lucene40DocValuesFormat.VAR_INTS_FIXED_64);
+ foreach (long? n in values)
+ {
+ output.WriteInt64(n == null ? 0 : n.Value);
+ }
+ }
+ else
+ {
+ // writes packed ints
+ output.WriteByte((byte)Lucene40DocValuesFormat.VAR_INTS_PACKED);
+ output.WriteInt64(minValue);
+ output.WriteInt64(0 - minValue); // default value (representation of 0)
+ PackedInt32s.Writer writer = PackedInt32s.GetWriter(output, State.SegmentInfo.DocCount, PackedInt32s.BitsRequired(delta), PackedInt32s.DEFAULT);
+ foreach (long? n in values)
+ {
+ long v = n == null ? 0 : (long)n;
+ writer.Add(v - minValue);
+ }
+ writer.Finish();
+ }
+ }
+
+ public override void AddBinaryField(FieldInfo field, IEnumerable<BytesRef> values)
+ {
+ // examine the values to determine best type to use
+ HashSet<BytesRef> uniqueValues = new HashSet<BytesRef>();
+ int minLength = int.MaxValue;
+ int maxLength = int.MinValue;
+
+ var vals = values.ToArray();
+
+ for (int i = 0; i < vals.Length; i++)
+ {
+ var b = vals[i];
+
+ if (b == null)
+ {
+ b = vals[i] = new BytesRef(); // 4.0 doesnt distinguish
+ }
+ if (b.Length > Lucene40DocValuesFormat.MAX_BINARY_FIELD_LENGTH)
+ {
+ throw new System.ArgumentException("DocValuesField \"" + field.Name + "\" is too large, must be <= " + Lucene40DocValuesFormat.MAX_BINARY_FIELD_LENGTH);
+ }
+ minLength = Math.Min(minLength, b.Length);
+ maxLength = Math.Max(maxLength, b.Length);
+ if (uniqueValues != null)
+ {
+ if (uniqueValues.Add(BytesRef.DeepCopyOf(b)))
+ {
+ if (uniqueValues.Count > 256)
+ {
+ uniqueValues = null;
+ }
+ }
+ }
+ }
+
+ int maxDoc = State.SegmentInfo.DocCount;
+ bool @fixed = minLength == maxLength;
+ bool dedup = uniqueValues != null && uniqueValues.Count * 2 < maxDoc;
+
+ if (dedup)
+ {
+ // we will deduplicate and deref values
+ bool success = false;
+ IndexOutput data = null;
+ IndexOutput index = null;
+ string dataName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "dat");
+ string indexName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "idx");
+ try
+ {
+ data = Dir.CreateOutput(dataName, State.Context);
+ index = Dir.CreateOutput(indexName, State.Context);
+ if (@fixed)
+ {
+ AddFixedDerefBytesField(field, data, index, values, minLength);
+ }
+ else
+ {
+ AddVarDerefBytesField(field, data, index, values);
+ }
+ success = true;
+ }
+ finally
+ {
+ if (success)
+ {
+ IOUtils.Close(data, index);
+ }
+ else
+ {
+ IOUtils.CloseWhileHandlingException(data, index);
+ }
+ }
+ }
+ else
+ {
+ // we dont deduplicate, just write values straight
+ if (@fixed)
+ {
+ // fixed byte[]
+ string fileName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "dat");
+ IndexOutput data = Dir.CreateOutput(fileName, State.Context);
+ bool success = false;
+ try
+ {
+ AddFixedStraightBytesField(field, data, values, minLength);
+ success = true;
+ }
+ finally
+ {
+ if (success)
+ {
+ IOUtils.Close(data);
+ }
+ else
+ {
+ IOUtils.CloseWhileHandlingException(data);
+ }
+ }
+ }
+ else
+ {
+ // variable byte[]
+ bool success = false;
+ IndexOutput data = null;
+ IndexOutput index = null;
+ string dataName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "dat");
+ string indexName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "idx");
+ try
+ {
+ data = Dir.CreateOutput(dataName, State.Context);
+ index = Dir.CreateOutput(indexName, State.Context);
+ AddVarStraightBytesField(field, data, index, values);
+ success = true;
+ }
+ finally
+ {
+ if (success)
+ {
+ IOUtils.Close(data, index);
+ }
+ else
+ {
+ IOUtils.CloseWhileHandlingException(data, index);
+ }
+ }
+ }
+ }
+ }
+
+ private void AddFixedStraightBytesField(FieldInfo field, IndexOutput output, IEnumerable<BytesRef> values, int length)
+ {
+ field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_FIXED_STRAIGHT.Name);
+
+ CodecUtil.WriteHeader(output, Lucene40DocValuesFormat.BYTES_FIXED_STRAIGHT_CODEC_NAME, Lucene40DocValuesFormat.BYTES_FIXED_STRAIGHT_VERSION_CURRENT);
+
+ output.WriteInt32(length);
+ foreach (BytesRef v in values)
+ {
+ if (v != null)
+ {
+ output.WriteBytes(v.Bytes, v.Offset, v.Length);
+ }
+ }
+ }
+
+ // NOTE: 4.0 file format docs are crazy/wrong here...
+ private void AddVarStraightBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable<BytesRef> values)
+ {
+ field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_VAR_STRAIGHT.Name);
+
+ CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT);
+
+ CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT);
+
+ /* values */
+
+ long startPos = data.FilePointer;
+
+ foreach (BytesRef v in values)
+ {
+ if (v != null)
+ {
+ data.WriteBytes(v.Bytes, v.Offset, v.Length);
+ }
+ }
+
+ /* addresses */
+
+ long maxAddress = data.FilePointer - startPos;
+ index.WriteVInt64(maxAddress);
+
+ int maxDoc = State.SegmentInfo.DocCount;
+ Debug.Assert(maxDoc != int.MaxValue); // unsupported by the 4.0 impl
+
+ PackedInt32s.Writer w = PackedInt32s.GetWriter(index, maxDoc + 1, PackedInt32s.BitsRequired(maxAddress), PackedInt32s.DEFAULT);
+ long currentPosition = 0;
+ foreach (BytesRef v in values)
+ {
+ w.Add(currentPosition);
+ if (v != null)
+ {
+ currentPosition += v.Length;
+ }
+ }
+ // write sentinel
+ Debug.Assert(currentPosition == maxAddress);
+ w.Add(currentPosition);
+ w.Finish();
+ }
+
+ private void AddFixedDerefBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable<BytesRef> values, int length)
+ {
+ field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_FIXED_DEREF.Name);
+
+ CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_VERSION_CURRENT);
+
+ CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_VERSION_CURRENT);
+
+ // deduplicate
+ SortedSet<BytesRef> dictionary = new SortedSet<BytesRef>();
+ foreach (BytesRef v in values)
+ {
+ dictionary.Add(v == null ? new BytesRef() : BytesRef.DeepCopyOf(v));
+ }
+
+ /* values */
+ data.WriteInt32(length);
+ foreach (BytesRef v in dictionary)
+ {
+ data.WriteBytes(v.Bytes, v.Offset, v.Length);
+ }
+
+ /* ordinals */
+ int valueCount = dictionary.Count;
+ Debug.Assert(valueCount > 0);
+ index.WriteInt32(valueCount);
+ int maxDoc = State.SegmentInfo.DocCount;
+ PackedInt32s.Writer w = PackedInt32s.GetWriter(index, maxDoc, PackedInt32s.BitsRequired(valueCount - 1), PackedInt32s.DEFAULT);
+
+ BytesRef brefDummy;
+ foreach (BytesRef v in values)
+ {
+ brefDummy = v;
+
+ if (v == null)
+ {
+ brefDummy = new BytesRef();
+ }
+ //int ord = dictionary.HeadSet(brefDummy).Size();
+ int ord = dictionary.Count(@ref => @ref.CompareTo(brefDummy) < 0);
+ w.Add(ord);
+ }
+ w.Finish();
+ }
+
+ private void AddVarDerefBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable<BytesRef> values)
+ {
+ field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_VAR_DEREF.Name);
+
+ CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT);
+
+ CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT);
+
+ // deduplicate
+ SortedSet<BytesRef> dictionary = new SortedSet<BytesRef>();
+ foreach (BytesRef v in values)
+ {
+ dictionary.Add(v == null ? new BytesRef() : BytesRef.DeepCopyOf(v));
+ }
+
+ /* values */
+ long startPosition = data.FilePointer;
+ long currentAddress = 0;
+ Dictionary<BytesRef, long> valueToAddress = new Dictionary<BytesRef, long>();
+ foreach (BytesRef v in dictionary)
+ {
+ currentAddress = data.FilePointer - startPosition;
+ valueToAddress[v] = currentAddress;
+ WriteVShort(data, v.Length);
+ data.WriteBytes(v.Bytes, v.Offset, v.Length);
+ }
+
+ /* ordinals */
+ long totalBytes = data.FilePointer - startPosition;
+ index.WriteInt64(totalBytes);
+ int maxDoc = State.SegmentInfo.DocCount;
+ PackedInt32s.Writer w = PackedInt32s.GetWriter(index, maxDoc, PackedInt32s.BitsRequired(currentAddress), PackedInt32s.DEFAULT);
+
+ foreach (BytesRef v in values)
+ {
+ w.Add(valueToAddress[v == null ? new BytesRef() : v]);
+ }
+ w.Finish();
+ }
+
+ // the little vint encoding used for var-deref
+ private static void WriteVShort(IndexOutput o, int i)
+ {
+ Debug.Assert(i >= 0 && i <= short.MaxValue);
+ if (i < 128)
+ {
+ o.WriteByte((byte)(sbyte)i);
+ }
+ else
+ {
+ o.WriteByte((byte)unchecked((sbyte)(0x80 | (i >> 8))));
+ o.WriteByte((byte)unchecked((sbyte)(i & 0xff)));
+ }
+ }
+
+ public override void AddSortedField(FieldInfo field, IEnumerable<BytesRef> values, IEnumerable<long?> docToOrd)
+ {
+ // examine the values to determine best type to use
+ int minLength = int.MaxValue;
+ int maxLength = int.MinValue;
+ foreach (BytesRef b in values)
+ {
+ minLength = Math.Min(minLength, b.Length);
+ maxLength = Math.Max(maxLength, b.Length);
+ }
+
+ // but dont use fixed if there are missing values (we are simulating how lucene40 wrote dv...)
+ bool anyMissing = false;
+ foreach (long n in docToOrd)
+ {
+ if ((long)n == -1)
+ {
+ anyMissing = true;
+ break;
+ }
+ }
+
+ bool success = false;
+ IndexOutput data = null;
+ IndexOutput index = null;
+ string dataName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "dat");
+ string indexName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "idx");
+
+ try
+ {
+ data = Dir.CreateOutput(dataName, State.Context);
+ index = Dir.CreateOutput(indexName, State.Context);
+ if (minLength == maxLength && !anyMissing)
+ {
+ // fixed byte[]
+ AddFixedSortedBytesField(field, data, index, values, docToOrd, minLength);
+ }
+ else
+ {
+ // var byte[]
+ // three cases for simulating the old writer:
+ // 1. no missing
+ // 2. missing (and empty string in use): remap ord=-1 -> ord=0
+ // 3. missing (and empty string not in use): remap all ords +1, insert empty string into values
+ if (!anyMissing)
+ {
+ AddVarSortedBytesField(field, data, index, values, docToOrd);
+ }
+ else if (minLength == 0)
+ {
+ AddVarSortedBytesField(field, data, index, values, MissingOrdRemapper.MapMissingToOrd0(docToOrd));
+ }
+ else
+ {
+ AddVarSortedBytesField(field, data, index, MissingOrdRemapper.InsertEmptyValue(values), MissingOrdRemapper.MapAllOrds(docToOrd));
+ }
+ }
+ success = true;
+ }
+ finally
+ {
+ if (success)
+ {
+ IOUtils.Close(data, index);
+ }
+ else
+ {
+ IOUtils.CloseWhileHandlingException(data, index);
+ }
+ }
+ }
+
+ private void AddFixedSortedBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable<BytesRef> values, IEnumerable<long?> docToOrd, int length)
+ {
+ field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_FIXED_SORTED.Name);
+
+ CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT);
+
+ CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT);
+
+ /* values */
+
+ data.WriteInt32(length);
+ int valueCount = 0;
+ foreach (BytesRef v in values)
+ {
+ data.WriteBytes(v.Bytes, v.Offset, v.Length);
+ valueCount++;
+ }
+
+ /* ordinals */
+
+ index.WriteInt32(valueCount);
+ int maxDoc = State.SegmentInfo.DocCount;
+ Debug.Assert(valueCount > 0);
+ PackedInt32s.Writer w = PackedInt32s.GetWriter(index, maxDoc, PackedInt32s.BitsRequired(valueCount - 1), PackedInt32s.DEFAULT);
+ foreach (long n in docToOrd)
+ {
+ w.Add((long)n);
+ }
+ w.Finish();
+ }
+
+ private void AddVarSortedBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable<BytesRef> values, IEnumerable<long?> docToOrd)
+ {
+ field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_VAR_SORTED.Name);
+
+ CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);
+
+ CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);
+
+ /* values */
+
+ long startPos = data.FilePointer;
+
+ int valueCount = 0;
+ foreach (BytesRef v in values)
+ {
+ data.WriteBytes(v.Bytes, v.Offset, v.Length);
+ valueCount++;
+ }
+
+ /* addresses */
+
+ long maxAddress = data.FilePointer - startPos;
+ index.WriteInt64(maxAddress);
+
+ Debug.Assert(valueCount != int.MaxValue); // unsupported by the 4.0 impl
+
+ PackedInt32s.Writer w = PackedInt32s.GetWriter(index, valueCount + 1, PackedInt32s.BitsRequired(maxAddress), PackedInt32s.DEFAULT);
+ long currentPosition = 0;
+ foreach (BytesRef v in values)
+ {
+ w.Add(currentPosition);
+ currentPosition += v.Length;
+ }
+ // write sentinel
+ Debug.Assert(currentPosition == maxAddress);
+ w.Add(currentPosition);
+ w.Finish();
+
+ /* ordinals */
+
+ int maxDoc = State.SegmentInfo.DocCount;
+ Debug.Assert(valueCount > 0);
+ PackedInt32s.Writer ords = PackedInt32s.GetWriter(index, maxDoc, PackedInt32s.BitsRequired(valueCount - 1), PackedInt32s.DEFAULT);
+ foreach (long n in docToOrd)
+ {
+ ords.Add((long)n);
+ }
+ ords.Finish();
+ }
+
+ public override void AddSortedSetField(FieldInfo field, IEnumerable<BytesRef> values, IEnumerable<long?> docToOrdCount, IEnumerable<long?> ords)
+ {
+ throw new System.NotSupportedException("Lucene 4.0 does not support SortedSet docvalues");
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ Dir.Dispose();
+ }
+ }
+#pragma warning restore 612, 618
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c0e9469c/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40FieldInfosWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40FieldInfosWriter.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40FieldInfosWriter.cs
new file mode 100644
index 0000000..688e365
--- /dev/null
+++ b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40FieldInfosWriter.cs
@@ -0,0 +1,134 @@
+using System;
+using System.Diagnostics;
+
+namespace Lucene.Net.Codecs.Lucene40
+{
+ using Directory = Lucene.Net.Store.Directory;
+ using DocValuesType = Lucene.Net.Index.DocValuesType;
+ using FieldInfo = Lucene.Net.Index.FieldInfo;
+ using FieldInfos = Lucene.Net.Index.FieldInfos;
+ using IndexFileNames = Lucene.Net.Index.IndexFileNames;
+ using IndexOutput = Lucene.Net.Store.IndexOutput;
+ using IOContext = Lucene.Net.Store.IOContext;
+ using IOUtils = Lucene.Net.Util.IOUtils;
+ using IndexOptions = Lucene.Net.Index.IndexOptions;
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using LegacyDocValuesType = Lucene.Net.Codecs.Lucene40.Lucene40FieldInfosReader.LegacyDocValuesType;
+
+ /// <summary>
+ /// Lucene 4.0 FieldInfos writer.
+ /// </summary>
+ /// <seealso> cref= Lucene40FieldInfosFormat
+ /// @lucene.experimental </seealso>
+ [Obsolete]
+ public class Lucene40FieldInfosWriter : FieldInfosWriter
+ {
+ /// <summary>
+ /// Sole constructor. </summary>
+ public Lucene40FieldInfosWriter()
+ {
+ }
+
+ public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context)
+ {
+ string fileName = IndexFileNames.SegmentFileName(segmentName, "", Lucene40FieldInfosFormat.FIELD_INFOS_EXTENSION);
+ IndexOutput output = directory.CreateOutput(fileName, context);
+ bool success = false;
+ try
+ {
+ CodecUtil.WriteHeader(output, Lucene40FieldInfosFormat.CODEC_NAME, Lucene40FieldInfosFormat.FORMAT_CURRENT);
+ output.WriteVInt32(infos.Count);
+ foreach (FieldInfo fi in infos)
+ {
+ IndexOptions? indexOptions = fi.IndexOptions;
+ sbyte bits = 0x0;
+ if (fi.HasVectors)
+ {
+ bits |= Lucene40FieldInfosFormat.STORE_TERMVECTOR;
+ }
+ if (fi.OmitsNorms)
+ {
+ bits |= Lucene40FieldInfosFormat.OMIT_NORMS;
+ }
+ if (fi.HasPayloads)
+ {
+ bits |= Lucene40FieldInfosFormat.STORE_PAYLOADS;
+ }
+ if (fi.IsIndexed)
+ {
+ bits |= Lucene40FieldInfosFormat.IS_INDEXED;
+ Debug.Assert(indexOptions >= IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.HasPayloads);
+ if (indexOptions == IndexOptions.DOCS_ONLY)
+ {
+ bits |= Lucene40FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS;
+ }
+ else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
+ {
+ bits |= Lucene40FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS;
+ }
+ else if (indexOptions == IndexOptions.DOCS_AND_FREQS)
+ {
+ bits |= Lucene40FieldInfosFormat.OMIT_POSITIONS;
+ }
+ }
+ output.WriteString(fi.Name);
+ output.WriteVInt32(fi.Number);
+ output.WriteByte((byte)bits);
+
+ // pack the DV types in one byte
+ sbyte dv = DocValuesByte(fi.DocValuesType, fi.GetAttribute(Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY));
+ sbyte nrm = DocValuesByte(fi.NormType, fi.GetAttribute(Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY));
+ Debug.Assert((dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0);
+ var val = unchecked((sbyte)(0xff & ((nrm << 4) | dv)));
+ output.WriteByte((byte)val);
+ output.WriteStringStringMap(fi.Attributes);
+ }
+ success = true;
+ }
+ finally
+ {
+ if (success)
+ {
+ output.Dispose();
+ }
+ else
+ {
+ IOUtils.CloseWhileHandlingException(output);
+ }
+ }
+ }
+
+ /// <summary>
+ /// 4.0-style docvalues byte </summary>
+ public virtual sbyte DocValuesByte(DocValuesType? type, string legacyTypeAtt)
+ {
+ if (type == null)
+ {
+ Debug.Assert(legacyTypeAtt == null);
+ return 0;
+ }
+ else
+ {
+ Debug.Assert(legacyTypeAtt != null);
+ return (sbyte)LegacyDocValuesType.ordinalLookup[legacyTypeAtt];
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c0e9469c/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40PostingsWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40PostingsWriter.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40PostingsWriter.cs
new file mode 100644
index 0000000..11e2dc0
--- /dev/null
+++ b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40PostingsWriter.cs
@@ -0,0 +1,381 @@
+using System.Diagnostics;
+
+namespace Lucene.Net.Codecs.Lucene40
+{
+ using BytesRef = Lucene.Net.Util.BytesRef;
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Consumes doc & freq, writing them using the current
+ /// index file format
+ /// </summary>
+
+ using CorruptIndexException = Lucene.Net.Index.CorruptIndexException;
+ using DataOutput = Lucene.Net.Store.DataOutput;
+ using FieldInfo = Lucene.Net.Index.FieldInfo;
+ using IndexFileNames = Lucene.Net.Index.IndexFileNames;
+ using IndexOptions = Lucene.Net.Index.IndexOptions;
+ using IndexOutput = Lucene.Net.Store.IndexOutput;
+ using IOUtils = Lucene.Net.Util.IOUtils;
+ using SegmentWriteState = Lucene.Net.Index.SegmentWriteState;
+
+ /// <summary>
+ /// Concrete class that writes the 4.0 frq/prx postings format.
+ /// </summary>
+ /// <seealso> cref= Lucene40PostingsFormat
+ /// @lucene.experimental </seealso>
+#pragma warning disable 612, 618
+ public sealed class Lucene40PostingsWriter : PostingsWriterBase
+ {
+ internal readonly IndexOutput FreqOut;
+ internal readonly IndexOutput ProxOut;
+ internal readonly Lucene40SkipListWriter SkipListWriter;
+
+ /// <summary>
+ /// Expert: The fraction of TermDocs entries stored in skip tables,
+ /// used to accelerate <seealso cref="DocsEnum#advance(int)"/>. Larger values result in
+ /// smaller indexes, greater acceleration, but fewer accelerable cases, while
+ /// smaller values result in bigger indexes, less acceleration and more
+ /// accelerable cases. More detailed experiments would be useful here.
+ /// </summary>
+ internal const int DEFAULT_SKIP_INTERVAL = 16;
+
+ internal readonly int SkipInterval;
+
+ /// <summary>
+ /// Expert: minimum docFreq to write any skip data at all
+ /// </summary>
+ internal readonly int SkipMinimum;
+
+ /// <summary>
+ /// Expert: The maximum number of skip levels. Smaller values result in
+ /// slightly smaller indexes, but slower skipping in big posting lists.
+ /// </summary>
+ internal readonly int MaxSkipLevels = 10;
+
+ internal readonly int TotalNumDocs;
+
+ internal IndexOptions? IndexOptions;
+ internal bool StorePayloads;
+ internal bool StoreOffsets;
+
+ // Starts a new term
+ internal long FreqStart;
+
+ internal long ProxStart;
+ internal FieldInfo FieldInfo;
+ internal int LastPayloadLength;
+ internal int LastOffsetLength;
+ internal int LastPosition;
+ internal int LastOffset;
+
+ internal static readonly StandardTermState EmptyState = new StandardTermState();
+ internal StandardTermState LastState;
+
+ // private String segment;
+
+ /// <summary>
+ /// Creates a <seealso cref="Lucene40PostingsWriter"/>, with the
+ /// <seealso cref="#DEFAULT_SKIP_INTERVAL"/>.
+ /// </summary>
+ public Lucene40PostingsWriter(SegmentWriteState state)
+ : this(state, DEFAULT_SKIP_INTERVAL)
+ {
+ }
+
+ /// <summary>
+ /// Creates a <seealso cref="Lucene40PostingsWriter"/>, with the
+ /// specified {@code skipInterval}.
+ /// </summary>
+ public Lucene40PostingsWriter(SegmentWriteState state, int skipInterval)
+ : base()
+ {
+ this.SkipInterval = skipInterval;
+ this.SkipMinimum = skipInterval; // set to the same for now
+ // this.segment = state.segmentName;
+ string fileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, Lucene40PostingsFormat.FREQ_EXTENSION);
+ FreqOut = state.Directory.CreateOutput(fileName, state.Context);
+ bool success = false;
+ IndexOutput proxOut = null;
+ try
+ {
+ CodecUtil.WriteHeader(FreqOut, Lucene40PostingsReader.FRQ_CODEC, Lucene40PostingsReader.VERSION_CURRENT);
+ // TODO: this is a best effort, if one of these fields has no postings
+ // then we make an empty prx file, same as if we are wrapped in
+ // per-field postingsformat. maybe... we shouldn't
+ // bother w/ this opto? just create empty prx file...?
+ if (state.FieldInfos.HasProx)
+ {
+ // At least one field does not omit TF, so create the
+ // prox file
+ fileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, Lucene40PostingsFormat.PROX_EXTENSION);
+ proxOut = state.Directory.CreateOutput(fileName, state.Context);
+ CodecUtil.WriteHeader(proxOut, Lucene40PostingsReader.PRX_CODEC, Lucene40PostingsReader.VERSION_CURRENT);
+ }
+ else
+ {
+ // Every field omits TF so we will write no prox file
+ proxOut = null;
+ }
+ this.ProxOut = proxOut;
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ IOUtils.CloseWhileHandlingException(FreqOut, proxOut);
+ }
+ }
+
+ TotalNumDocs = state.SegmentInfo.DocCount;
+
+ SkipListWriter = new Lucene40SkipListWriter(skipInterval, MaxSkipLevels, TotalNumDocs, FreqOut, proxOut);
+ }
+
+ public override void Init(IndexOutput termsOut)
+ {
+ CodecUtil.WriteHeader(termsOut, Lucene40PostingsReader.TERMS_CODEC, Lucene40PostingsReader.VERSION_CURRENT);
+ termsOut.WriteInt32(SkipInterval); // write skipInterval
+ termsOut.WriteInt32(MaxSkipLevels); // write maxSkipLevels
+ termsOut.WriteInt32(SkipMinimum); // write skipMinimum
+ }
+
+ public override BlockTermState NewTermState()
+ {
+ return new StandardTermState();
+ }
+
+ public override void StartTerm()
+ {
+ FreqStart = FreqOut.FilePointer;
+ //if (DEBUG) System.out.println("SPW: startTerm freqOut.fp=" + freqStart);
+ if (ProxOut != null)
+ {
+ ProxStart = ProxOut.FilePointer;
+ }
+ // force first payload to write its length
+ LastPayloadLength = -1;
+ // force first offset to write its length
+ LastOffsetLength = -1;
+ SkipListWriter.ResetSkip();
+ }
+
+ // Currently, this instance is re-used across fields, so
+ // our parent calls setField whenever the field changes
+ public override int SetField(FieldInfo fieldInfo)
+ {
+ //System.out.println("SPW: setField");
+ /*
+ if (BlockTreeTermsWriter.DEBUG && fieldInfo.Name.equals("id")) {
+ DEBUG = true;
+ } else {
+ DEBUG = false;
+ }
+ */
+ this.FieldInfo = fieldInfo;
+ IndexOptions = fieldInfo.IndexOptions;
+
+ StoreOffsets = IndexOptions >= Index.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
+ StorePayloads = fieldInfo.HasPayloads;
+ LastState = EmptyState;
+ //System.out.println(" set init blockFreqStart=" + freqStart);
+ //System.out.println(" set init blockProxStart=" + proxStart);
+ return 0;
+ }
+
+ internal int LastDocID;
+ internal int Df;
+
+ public override void StartDoc(int docID, int termDocFreq)
+ {
+ // if (DEBUG) System.out.println("SPW: startDoc seg=" + segment + " docID=" + docID + " tf=" + termDocFreq + " freqOut.fp=" + freqOut.getFilePointer());
+
+ int delta = docID - LastDocID;
+
+ if (docID < 0 || (Df > 0 && delta <= 0))
+ {
+ throw new CorruptIndexException("docs out of order (" + docID + " <= " + LastDocID + " ) (freqOut: " + FreqOut + ")");
+ }
+
+ if ((++Df % SkipInterval) == 0)
+ {
+ SkipListWriter.SetSkipData(LastDocID, StorePayloads, LastPayloadLength, StoreOffsets, LastOffsetLength);
+ SkipListWriter.BufferSkip(Df);
+ }
+
+ Debug.Assert(docID < TotalNumDocs, "docID=" + docID + " totalNumDocs=" + TotalNumDocs);
+
+ LastDocID = docID;
+ if (IndexOptions == Index.IndexOptions.DOCS_ONLY)
+ {
+ FreqOut.WriteVInt32(delta);
+ }
+ else if (1 == termDocFreq)
+ {
+ FreqOut.WriteVInt32((delta << 1) | 1);
+ }
+ else
+ {
+ FreqOut.WriteVInt32(delta << 1);
+ FreqOut.WriteVInt32(termDocFreq);
+ }
+
+ LastPosition = 0;
+ LastOffset = 0;
+ }
+
+ /// <summary>
+ /// Add a new position & payload </summary>
+ public override void AddPosition(int position, BytesRef payload, int startOffset, int endOffset)
+ {
+ //if (DEBUG) System.out.println("SPW: addPos pos=" + position + " payload=" + (payload == null ? "null" : (payload.Length + " bytes")) + " proxFP=" + proxOut.getFilePointer());
+ Debug.Assert(IndexOptions >= Index.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, "invalid indexOptions: " + IndexOptions);
+ Debug.Assert(ProxOut != null);
+
+ int delta = position - LastPosition;
+
+ Debug.Assert(delta >= 0, "position=" + position + " lastPosition=" + LastPosition); // not quite right (if pos=0 is repeated twice we don't catch it)
+
+ LastPosition = position;
+
+ int payloadLength = 0;
+
+ if (StorePayloads)
+ {
+ payloadLength = payload == null ? 0 : payload.Length;
+
+ if (payloadLength != LastPayloadLength)
+ {
+ LastPayloadLength = payloadLength;
+ ProxOut.WriteVInt32((delta << 1) | 1);
+ ProxOut.WriteVInt32(payloadLength);
+ }
+ else
+ {
+ ProxOut.WriteVInt32(delta << 1);
+ }
+ }
+ else
+ {
+ ProxOut.WriteVInt32(delta);
+ }
+
+ if (StoreOffsets)
+ {
+ // don't use startOffset - lastEndOffset, because this creates lots of negative vints for synonyms,
+ // and the numbers aren't that much smaller anyways.
+ int offsetDelta = startOffset - LastOffset;
+ int offsetLength = endOffset - startOffset;
+ Debug.Assert(offsetDelta >= 0 && offsetLength >= 0, "startOffset=" + startOffset + ",lastOffset=" + LastOffset + ",endOffset=" + endOffset);
+ if (offsetLength != LastOffsetLength)
+ {
+ ProxOut.WriteVInt32(offsetDelta << 1 | 1);
+ ProxOut.WriteVInt32(offsetLength);
+ }
+ else
+ {
+ ProxOut.WriteVInt32(offsetDelta << 1);
+ }
+ LastOffset = startOffset;
+ LastOffsetLength = offsetLength;
+ }
+
+ if (payloadLength > 0)
+ {
+ ProxOut.WriteBytes(payload.Bytes, payload.Offset, payloadLength);
+ }
+ }
+
+ public override void FinishDoc()
+ {
+ }
+
+ internal class StandardTermState : BlockTermState
+ {
+ public long FreqStart;
+ public long ProxStart;
+ public long SkipOffset;
+ }
+
+ /// <summary>
+ /// Called when we are done adding docs to this term </summary>
+ public override void FinishTerm(BlockTermState _state)
+ {
+ StandardTermState state = (StandardTermState)_state;
+ // if (DEBUG) System.out.println("SPW: finishTerm seg=" + segment + " freqStart=" + freqStart);
+ Debug.Assert(state.DocFreq > 0);
+
+ // TODO: wasteful we are counting this (counting # docs
+ // for this term) in two places?
+ Debug.Assert(state.DocFreq == Df);
+ state.FreqStart = FreqStart;
+ state.ProxStart = ProxStart;
+ if (Df >= SkipMinimum)
+ {
+ state.SkipOffset = SkipListWriter.WriteSkip(FreqOut) - FreqStart;
+ }
+ else
+ {
+ state.SkipOffset = -1;
+ }
+ LastDocID = 0;
+ Df = 0;
+ }
+
+ public override void EncodeTerm(long[] empty, DataOutput @out, FieldInfo fieldInfo, BlockTermState _state, bool absolute)
+ {
+ StandardTermState state = (StandardTermState)_state;
+ if (absolute)
+ {
+ LastState = EmptyState;
+ }
+ @out.WriteVInt64(state.FreqStart - LastState.FreqStart);
+ if (state.SkipOffset != -1)
+ {
+ Debug.Assert(state.SkipOffset > 0);
+ @out.WriteVInt64(state.SkipOffset);
+ }
+ if (IndexOptions >= Index.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
+ {
+ @out.WriteVInt64(state.ProxStart - LastState.ProxStart);
+ }
+ LastState = state;
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (disposing)
+ {
+ try
+ {
+ FreqOut.Dispose();
+ }
+ finally
+ {
+ if (ProxOut != null)
+ {
+ ProxOut.Dispose();
+ }
+ }
+ }
+ }
+ }
+#pragma warning restore 612, 618
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c0e9469c/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWCodec.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWCodec.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWCodec.cs
new file mode 100644
index 0000000..79fbb42
--- /dev/null
+++ b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWCodec.cs
@@ -0,0 +1,100 @@
+namespace Lucene.Net.Codecs.Lucene40
+{
+ using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Read-write version of Lucene40Codec for testing </summary>
+#pragma warning disable 612, 618
+ public sealed class Lucene40RWCodec : Lucene40Codec
+ {
+ private readonly FieldInfosFormat fieldInfos;
+
+ /// <summary>
+ /// LUCENENET specific
+ /// Creates the codec with OldFormatImpersonationIsActive = true.
+ /// </summary>
+ /// <remarks>
+ /// Added so that SPIClassIterator can locate this Codec. The iterator
+ /// only recognises classes that have empty constructors.
+ /// </remarks>
+ public Lucene40RWCodec()
+ : this(true)
+ { }
+
+ /// <param name="oldFormatImpersonationIsActive">
+ /// LUCENENET specific
+ /// Added to remove dependency on then-static <see cref="LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE"/>
+ /// </param>
+ public Lucene40RWCodec(bool oldFormatImpersonationIsActive) : base()
+ {
+ fieldInfos = new Lucene40FieldInfosFormatAnonymousInnerClassHelper(oldFormatImpersonationIsActive);
+ DocValues = new Lucene40RWDocValuesFormat(oldFormatImpersonationIsActive);
+ Norms = new Lucene40RWNormsFormat(oldFormatImpersonationIsActive);
+ }
+
+ private class Lucene40FieldInfosFormatAnonymousInnerClassHelper : Lucene40FieldInfosFormat
+ {
+ private readonly bool _oldFormatImpersonationIsActive;
+
+ /// <param name="oldFormatImpersonationIsActive">
+ /// LUCENENET specific
+ /// Added to remove dependency on then-static <see cref="LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE"/>
+ /// </param>
+ public Lucene40FieldInfosFormatAnonymousInnerClassHelper(bool oldFormatImpersonationIsActive) : base()
+ {
+ _oldFormatImpersonationIsActive = oldFormatImpersonationIsActive;
+ }
+
+ public override FieldInfosWriter FieldInfosWriter
+ {
+ get
+ {
+ if (!_oldFormatImpersonationIsActive)
+ {
+ return base.FieldInfosWriter;
+ }
+ else
+ {
+ return new Lucene40FieldInfosWriter();
+ }
+ }
+ }
+ }
+
+ private readonly DocValuesFormat DocValues;
+ private readonly NormsFormat Norms;
+
+ public override FieldInfosFormat FieldInfosFormat
+ {
+ get { return fieldInfos; }
+ }
+
+ public override DocValuesFormat DocValuesFormat
+ {
+ get { return DocValues; }
+ }
+
+ public override NormsFormat NormsFormat
+ {
+ get { return Norms; }
+ }
+ }
+#pragma warning restore 612, 618
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c0e9469c/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWDocValuesFormat.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWDocValuesFormat.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWDocValuesFormat.cs
new file mode 100644
index 0000000..2281475
--- /dev/null
+++ b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWDocValuesFormat.cs
@@ -0,0 +1,66 @@
+namespace Lucene.Net.Codecs.Lucene40
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using IndexFileNames = Lucene.Net.Index.IndexFileNames;
+ using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+ using SegmentWriteState = Lucene.Net.Index.SegmentWriteState;
+
+ /// <summary>
+ /// Read-write version of <seealso cref="Lucene40DocValuesFormat"/> for testing </summary>
+#pragma warning disable 612, 618
+ public class Lucene40RWDocValuesFormat : Lucene40DocValuesFormat
+ {
+ private readonly bool _oldFormatImpersonationIsActive;
+
+ /// <summary>
+ /// LUCENENET specific
+ /// Creates the codec with OldFormatImpersonationIsActive = true.
+ /// </summary>
+ /// <remarks>
+ /// Added so that SPIClassIterator can locate this Codec. The iterator
+ /// only recognises classes that have empty constructors.
+ /// </remarks>
+ public Lucene40RWDocValuesFormat()
+ : this(true)
+ { }
+
+ /// <param name="oldFormatImpersonationIsActive">
+ /// LUCENENET specific
+ /// Added to remove dependency on then-static <see cref="LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE"/>
+ /// </param>
+ public Lucene40RWDocValuesFormat(bool oldFormatImpersonationIsActive) : base()
+ {
+ _oldFormatImpersonationIsActive = oldFormatImpersonationIsActive;
+ }
+
+ public override DocValuesConsumer FieldsConsumer(SegmentWriteState state)
+ {
+ if (!_oldFormatImpersonationIsActive)
+ {
+ return base.FieldsConsumer(state);
+ }
+ else
+ {
+ string filename = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, "dv", IndexFileNames.COMPOUND_FILE_EXTENSION);
+ return new Lucene40DocValuesWriter(state, filename, Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY);
+ }
+ }
+ }
+#pragma warning restore 612, 618
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c0e9469c/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWNormsFormat.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWNormsFormat.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWNormsFormat.cs
new file mode 100644
index 0000000..0830c86
--- /dev/null
+++ b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWNormsFormat.cs
@@ -0,0 +1,66 @@
+namespace Lucene.Net.Codecs.Lucene40
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using IndexFileNames = Lucene.Net.Index.IndexFileNames;
+ using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+ using SegmentWriteState = Lucene.Net.Index.SegmentWriteState;
+
+ /// <summary>
+ /// Read-write version of <seealso cref="Lucene40NormsFormat"/> for testing </summary>
+#pragma warning disable 612, 618
+ public class Lucene40RWNormsFormat : Lucene40NormsFormat
+ {
+ private readonly bool _oldFormatImpersonationIsActive;
+
+ /// <summary>
+ /// LUCENENET specific
+ /// Creates the codec with OldFormatImpersonationIsActive = true.
+ /// </summary>
+ /// <remarks>
+ /// Added so that SPIClassIterator can locate this Codec. The iterator
+ /// only recognises classes that have empty constructors.
+ /// </remarks>
+ public Lucene40RWNormsFormat()
+ : this(true)
+ { }
+
+ /// <param name="oldFormatImpersonationIsActive">
+ /// LUCENENET specific
+ /// Added to remove dependency on then-static <see cref="LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE"/>
+ /// </param>
+ public Lucene40RWNormsFormat(bool oldFormatImpersonationIsActive) : base()
+ {
+ _oldFormatImpersonationIsActive = oldFormatImpersonationIsActive;
+ }
+
+ public override DocValuesConsumer NormsConsumer(SegmentWriteState state)
+ {
+ if (!_oldFormatImpersonationIsActive)
+ {
+ return base.NormsConsumer(state);
+ }
+ else
+ {
+ string filename = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, "nrm", IndexFileNames.COMPOUND_FILE_EXTENSION);
+ return new Lucene40DocValuesWriter(state, filename, Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY);
+ }
+ }
+ }
+#pragma warning restore 612, 618
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c0e9469c/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWPostingsFormat.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWPostingsFormat.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWPostingsFormat.cs
new file mode 100644
index 0000000..7a2c9cf
--- /dev/null
+++ b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40RWPostingsFormat.cs
@@ -0,0 +1,84 @@
+namespace Lucene.Net.Codecs.Lucene40
+{
+ using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+ using SegmentWriteState = Lucene.Net.Index.SegmentWriteState;
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Read-write version of <seealso cref="Lucene40PostingsFormat"/> for testing.
+ /// </summary>
+#pragma warning disable 612, 618
+ public class Lucene40RWPostingsFormat : Lucene40PostingsFormat
+ {
+ private readonly bool _oldFormatImpersonationIsActive;
+
+ /// <summary>
+ /// LUCENENET specific
+ /// Creates the codec with OldFormatImpersonationIsActive = true.
+ /// </summary>
+ /// <remarks>
+ /// Added so that SPIClassIterator can locate this Codec. The iterator
+ /// only recognises classes that have empty constructors.
+ /// </remarks>
+ public Lucene40RWPostingsFormat()
+ : this(true)
+ { }
+
+ /// <param name="oldFormatImpersonationIsActive">
+ /// LUCENENET specific
+ /// Added to remove dependency on then-static <see cref="LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE"/>
+ /// </param>
+ public Lucene40RWPostingsFormat(bool oldFormatImpersonationIsActive) : base()
+ {
+ _oldFormatImpersonationIsActive = oldFormatImpersonationIsActive;
+ }
+
+ public override FieldsConsumer FieldsConsumer(SegmentWriteState state)
+ {
+ if (!_oldFormatImpersonationIsActive)
+ {
+ return base.FieldsConsumer(state);
+ }
+ else
+ {
+ PostingsWriterBase docs = new Lucene40PostingsWriter(state);
+
+ // TODO: should we make the terms index more easily
+ // pluggable? Ie so that this codec would record which
+ // index impl was used, and switch on loading?
+ // Or... you must make a new Codec for this?
+ bool success = false;
+ try
+ {
+ FieldsConsumer ret = new BlockTreeTermsWriter(state, docs, m_minBlockSize, m_maxBlockSize);
+ success = true;
+ return ret;
+ }
+ finally
+ {
+ if (!success)
+ {
+ docs.Dispose();
+ }
+ }
+ }
+ }
+ }
+#pragma warning restore 612, 618
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c0e9469c/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40SkipListWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40SkipListWriter.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40SkipListWriter.cs
new file mode 100644
index 0000000..aa8e52e
--- /dev/null
+++ b/src/Lucene.Net.TestFramework/Codecs/Lucene40/Lucene40SkipListWriter.cs
@@ -0,0 +1,168 @@
+using System;
+using System.Diagnostics;
+
+namespace Lucene.Net.Codecs.Lucene40
+{
+ using Lucene.Net.Support;
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using IndexOutput = Lucene.Net.Store.IndexOutput;
+
+ /// <summary>
+ /// Implements the skip list writer for the 4.0 posting list format
+ /// that stores positions and payloads.
+ /// </summary>
+ /// <seealso> cref= Lucene40PostingsFormat </seealso>
+ /// @deprecated Only for reading old 4.0 segments
+ [Obsolete("Only for reading old 4.0 segments")]
+ public class Lucene40SkipListWriter : MultiLevelSkipListWriter
+ {
+ private int[] LastSkipDoc;
+ private int[] LastSkipPayloadLength;
+ private int[] LastSkipOffsetLength;
+ private long[] LastSkipFreqPointer;
+ private long[] LastSkipProxPointer;
+
+ private IndexOutput FreqOutput;
+ private IndexOutput ProxOutput;
+
+ private int CurDoc;
+ private bool CurStorePayloads;
+ private bool CurStoreOffsets;
+ private int CurPayloadLength;
+ private int CurOffsetLength;
+ private long CurFreqPointer;
+ private long CurProxPointer;
+
+ /// <summary>
+ /// Sole constructor. </summary>
+ public Lucene40SkipListWriter(int skipInterval, int numberOfSkipLevels, int docCount, IndexOutput freqOutput, IndexOutput proxOutput)
+ : base(skipInterval, numberOfSkipLevels, docCount)
+ {
+ this.FreqOutput = freqOutput;
+ this.ProxOutput = proxOutput;
+
+ LastSkipDoc = new int[numberOfSkipLevels];
+ LastSkipPayloadLength = new int[numberOfSkipLevels];
+ LastSkipOffsetLength = new int[numberOfSkipLevels];
+ LastSkipFreqPointer = new long[numberOfSkipLevels];
+ LastSkipProxPointer = new long[numberOfSkipLevels];
+ }
+
+ /// <summary>
+ /// Sets the values for the current skip data.
+ /// </summary>
+ public virtual void SetSkipData(int doc, bool storePayloads, int payloadLength, bool storeOffsets, int offsetLength)
+ {
+ Debug.Assert(storePayloads || payloadLength == -1);
+ Debug.Assert(storeOffsets || offsetLength == -1);
+ this.CurDoc = doc;
+ this.CurStorePayloads = storePayloads;
+ this.CurPayloadLength = payloadLength;
+ this.CurStoreOffsets = storeOffsets;
+ this.CurOffsetLength = offsetLength;
+ this.CurFreqPointer = FreqOutput.FilePointer;
+ if (ProxOutput != null)
+ {
+ this.CurProxPointer = ProxOutput.FilePointer;
+ }
+ }
+
+ public override void ResetSkip()
+ {
+ base.ResetSkip();
+ Arrays.Fill(LastSkipDoc, 0);
+ Arrays.Fill(LastSkipPayloadLength, -1); // we don't have to write the first length in the skip list
+ Arrays.Fill(LastSkipOffsetLength, -1); // we don't have to write the first length in the skip list
+ Arrays.Fill(LastSkipFreqPointer, FreqOutput.FilePointer);
+ if (ProxOutput != null)
+ {
+ Arrays.Fill(LastSkipProxPointer, ProxOutput.FilePointer);
+ }
+ }
+
+ protected override void WriteSkipData(int level, IndexOutput skipBuffer)
+ {
+ // To efficiently store payloads/offsets in the posting lists we do not store the length of
+ // every payload/offset. Instead we omit the length if the previous lengths were the same
+ //
+ // However, in order to support skipping, the length at every skip point must be known.
+ // So we use the same length encoding that we use for the posting lists for the skip data as well:
+ // Case 1: current field does not store payloads/offsets
+ // SkipDatum --> DocSkip, FreqSkip, ProxSkip
+ // DocSkip,FreqSkip,ProxSkip --> VInt
+ // DocSkip records the document number before every SkipInterval th document in TermFreqs.
+ // Document numbers are represented as differences from the previous value in the sequence.
+ // Case 2: current field stores payloads/offsets
+ // SkipDatum --> DocSkip, PayloadLength?,OffsetLength?,FreqSkip,ProxSkip
+ // DocSkip,FreqSkip,ProxSkip --> VInt
+ // PayloadLength,OffsetLength--> VInt
+ // In this case DocSkip/2 is the difference between
+ // the current and the previous value. If DocSkip
+ // is odd, then a PayloadLength encoded as VInt follows,
+ // if DocSkip is even, then it is assumed that the
+ // current payload/offset lengths equals the lengths at the previous
+ // skip point
+ int delta = CurDoc - LastSkipDoc[level];
+
+ if (CurStorePayloads || CurStoreOffsets)
+ {
+ Debug.Assert(CurStorePayloads || CurPayloadLength == LastSkipPayloadLength[level]);
+ Debug.Assert(CurStoreOffsets || CurOffsetLength == LastSkipOffsetLength[level]);
+
+ if (CurPayloadLength == LastSkipPayloadLength[level] && CurOffsetLength == LastSkipOffsetLength[level])
+ {
+ // the current payload/offset lengths equals the lengths at the previous skip point,
+ // so we don't store the lengths again
+ skipBuffer.WriteVInt32(delta << 1);
+ }
+ else
+ {
+ // the payload and/or offset length is different from the previous one. We shift the DocSkip,
+ // set the lowest bit and store the current payload and/or offset lengths as VInts.
+ skipBuffer.WriteVInt32(delta << 1 | 1);
+
+ if (CurStorePayloads)
+ {
+ skipBuffer.WriteVInt32(CurPayloadLength);
+ LastSkipPayloadLength[level] = CurPayloadLength;
+ }
+ if (CurStoreOffsets)
+ {
+ skipBuffer.WriteVInt32(CurOffsetLength);
+ LastSkipOffsetLength[level] = CurOffsetLength;
+ }
+ }
+ }
+ else
+ {
+ // current field does not store payloads or offsets
+ skipBuffer.WriteVInt32(delta);
+ }
+
+ skipBuffer.WriteVInt32((int)(CurFreqPointer - LastSkipFreqPointer[level]));
+ skipBuffer.WriteVInt32((int)(CurProxPointer - LastSkipProxPointer[level]));
+
+ LastSkipDoc[level] = CurDoc;
+
+ LastSkipFreqPointer[level] = CurFreqPointer;
+ LastSkipProxPointer[level] = CurProxPointer;
+ }
+ }
+}
\ No newline at end of file