You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by pn...@apache.org on 2014/09/15 00:06:11 UTC
[3/5] Lucuene.Net.Codecs BlockTerms converted from java to C#
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b0601f29/src/Lucene.Net.Codecs/BlockTerms/BlockTermsWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/BlockTerms/BlockTermsWriter.cs b/src/Lucene.Net.Codecs/BlockTerms/BlockTermsWriter.cs
index eaf7afa..ca9c2a7 100644
--- a/src/Lucene.Net.Codecs/BlockTerms/BlockTermsWriter.cs
+++ b/src/Lucene.Net.Codecs/BlockTerms/BlockTermsWriter.cs
@@ -20,9 +20,9 @@ namespace Lucene.Net.Codecs.BlockTerms
using System;
using System.Collections.Generic;
using System.Diagnostics;
- using Lucene.Net.Index;
- using Lucene.Net.Store;
- using Lucene.Net.Util;
+ using Index;
+ using Store;
+ using Util;
/// <summary>
/// Writes terms dict, block-encoding (column stride) each term's metadata
@@ -49,37 +49,37 @@ namespace Lucene.Net.Codecs.BlockTerms
/** Extension of terms file */
public const String TERMS_EXTENSION = "tib";
- protected IndexOutput output;
- protected readonly PostingsWriterBase postingsWriter;
- protected readonly FieldInfos fieldInfos;
- protected FieldInfo currentField;
- private readonly TermsIndexWriterBase termsIndexWriter;
- private readonly List<FieldMetaData> fields = new List<FieldMetaData>();
+ private IndexOutput _output;
+ protected readonly PostingsWriterBase PostingsWriter;
+ protected readonly FieldInfos FieldInfos;
+ protected FieldInfo CurrentField;
+ private readonly TermsIndexWriterBase _termsIndexWriter;
+ private readonly List<FieldMetaData> _fields = new List<FieldMetaData>();
public BlockTermsWriter(TermsIndexWriterBase termsIndexWriter,
SegmentWriteState state, PostingsWriterBase postingsWriter)
{
- String termsFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix,
+ var termsFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix,
TERMS_EXTENSION);
- this.termsIndexWriter = termsIndexWriter;
- output = state.Directory.CreateOutput(termsFileName, state.Context);
- bool success = false;
+ _termsIndexWriter = termsIndexWriter;
+ _output = state.Directory.CreateOutput(termsFileName, state.Context);
+ var success = false;
try
{
- fieldInfos = state.FieldInfos;
- WriteHeader(output);
- currentField = null;
- this.postingsWriter = postingsWriter;
+ FieldInfos = state.FieldInfos;
+ WriteHeader(_output);
+ CurrentField = null;
+ PostingsWriter = postingsWriter;
- postingsWriter.Init(output); // have consumer write its format/header
+ postingsWriter.Init(_output); // have consumer write its format/header
success = true;
}
finally
{
if (!success)
{
- IOUtils.CloseWhileHandlingException(output);
+ IOUtils.CloseWhileHandlingException(_output);
}
}
}
@@ -91,57 +91,55 @@ namespace Lucene.Net.Codecs.BlockTerms
public override TermsConsumer AddField(FieldInfo field)
{
- Debug.Assert(currentField == null || currentField.Name.CompareTo(field.Name) < 0);
+ Debug.Assert(CurrentField == null || CurrentField.Name.CompareTo(field.Name) < 0);
- currentField = field;
- var fiw = termsIndexWriter.AddField(field, output.FilePointer);
- return new TermsWriter(fiw, field, postingsWriter);
+ CurrentField = field;
+ var fiw = _termsIndexWriter.AddField(field, _output.FilePointer);
+ return new TermsWriter(fiw, field, PostingsWriter, this);
}
public override void Dispose()
{
- if (output != null)
+ if (_output == null) return;
+
+ try
{
- try
- {
- long dirStart = output.FilePointer;
+ var dirStart = _output.FilePointer;
- output.WriteVInt(fields.Size);
+ _output.WriteVInt(_fields.Count);
- foreach (var field in fields)
+ foreach (var field in _fields)
+ {
+ _output.WriteVInt(field.FieldInfo.Number);
+ _output.WriteVLong(field.NumTerms);
+ _output.WriteVLong(field.TermsStartPointer);
+ if (field.FieldInfo.FieldIndexOptions != FieldInfo.IndexOptions.DOCS_ONLY)
{
- output.WriteVInt(field.FieldInfo.Number);
- output.WriteVLong(field.NumTerms);
- output.WriteVLong(field.TermsStartPointer);
- if (field.FieldInfo.FieldIndexOptions != FieldInfo.IndexOptions.DOCS_ONLY)
- {
- output.WriteVLong(field.SumTotalTermFreq);
- }
- output.WriteVLong(field.SumDocFreq);
- output.WriteVInt(field.DocCount);
- if (VERSION_CURRENT >= VERSION_META_ARRAY)
- {
- output.WriteVInt(field.LongsSize);
- }
-
+ _output.WriteVLong(field.SumTotalTermFreq);
}
- WriteTrailer(dirStart);
- CodecUtil.WriteFooter(output);
- }
- finally
- {
- IOUtils.Close(output, postingsWriter, termsIndexWriter);
- output = null;
+ _output.WriteVLong(field.SumDocFreq);
+ _output.WriteVInt(field.DocCount);
+ if (VERSION_CURRENT >= VERSION_META_ARRAY)
+ {
+ _output.WriteVInt(field.LongsSize);
+ }
+
}
+ WriteTrailer(dirStart);
+ CodecUtil.WriteFooter(_output);
+ }
+ finally
+ {
+ IOUtils.Close(_output, PostingsWriter, _termsIndexWriter);
+ _output = null;
}
}
private void WriteTrailer(long dirStart)
{
- output.WriteLong(dirStart);
+ _output.WriteLong(dirStart);
}
-
protected class FieldMetaData
{
public FieldInfo FieldInfo { get; private set; }
@@ -175,49 +173,53 @@ namespace Lucene.Net.Codecs.BlockTerms
public class TermsWriter : TermsConsumer
{
- private readonly FieldInfo fieldInfo;
- private readonly PostingsWriterBase postingsWriter;
- private readonly long termsStartPointer;
-
- private readonly BytesRef lastPrevTerm = new BytesRef();
- private readonly TermsIndexWriterBase.FieldWriter fieldIndexWriter;
-
- private long numTerms;
- private long sumTotalTermFreq;
- private long sumDocFreq;
- private int docCount;
- private int longsSize;
-
- private TermEntry[] pendingTerms;
-
- private int pendingCount;
-
- private TermsWriter(
+ private readonly RAMOutputStream _bytesWriter = new RAMOutputStream();
+ private readonly RAMOutputStream _bufferWriter = new RAMOutputStream();
+ private readonly BytesRef _lastPrevTerm = new BytesRef();
+
+ private readonly FieldInfo _fieldInfo;
+ private readonly PostingsWriterBase _postingsWriter;
+ private readonly long _termsStartPointer;
+ private readonly TermsIndexWriterBase.FieldWriter _fieldIndexWriter;
+ private readonly BlockTermsWriter _btw;
+
+ private TermEntry[] _pendingTerms;
+ private int _pendingCount;
+
+ private long _numTerms;
+ private long _sumTotalTermFreq;
+ private long _sumDocFreq;
+ private int _docCount;
+ private readonly int _longsSize;
+
+ public TermsWriter(
TermsIndexWriterBase.FieldWriter fieldIndexWriter,
FieldInfo fieldInfo,
- PostingsWriterBase postingsWriter)
+ PostingsWriterBase postingsWriter, BlockTermsWriter btw)
{
- this.fieldInfo = fieldInfo;
- this.fieldIndexWriter = fieldIndexWriter;
- pendingTerms = new TermEntry[32];
- for (int i = 0; i < pendingTerms.Length; i++)
+ _fieldInfo = fieldInfo;
+ _fieldIndexWriter = fieldIndexWriter;
+ _btw = btw;
+
+ _pendingTerms = new TermEntry[32];
+ for (int i = 0; i < _pendingTerms.Length; i++)
{
- pendingTerms[i] = new TermEntry();
+ _pendingTerms[i] = new TermEntry();
}
- termsStartPointer = output.FilePointer;
- this.postingsWriter = postingsWriter;
- this.longsSize = postingsWriter.SetField(fieldInfo);
+ _termsStartPointer = _btw._output.FilePointer;
+ _postingsWriter = postingsWriter;
+ _longsSize = postingsWriter.SetField(fieldInfo);
}
- public override IComparer<BytesRef> Comparator()
+ public override IComparer<BytesRef> Comparator
{
- return BytesRef.UTF8SortedAsUnicodeComparer;
- }
-
+ get { return BytesRef.UTF8SortedAsUnicodeComparer; }
+ }
+
public override PostingsConsumer StartTerm(BytesRef text)
{
- postingsWriter.StartTerm();
- return postingsWriter;
+ _postingsWriter.StartTerm();
+ return _postingsWriter;
}
public override void FinishTerm(BytesRef text, TermStats stats)
@@ -225,77 +227,79 @@ namespace Lucene.Net.Codecs.BlockTerms
Debug.Assert(stats.DocFreq > 0);
- bool isIndexTerm = fieldIndexWriter.CheckIndexTerm(text, stats);
+ var isIndexTerm = _fieldIndexWriter.CheckIndexTerm(text, stats);
if (isIndexTerm)
{
- if (pendingCount > 0)
+ if (_pendingCount > 0)
{
// Instead of writing each term, live, we gather terms
// in RAM in a pending buffer, and then write the
// entire block in between index terms:
FlushBlock();
}
- fieldIndexWriter.Add(text, stats, output.FilePointer);
+ _fieldIndexWriter.Add(text, stats, _btw._output.FilePointer);
}
- if (pendingTerms.Length == pendingCount)
+ if (_pendingTerms.Length == _pendingCount)
{
- TermEntry[] newArray =
- new TermEntry[ArrayUtil.Oversize(pendingCount + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
- System.Arraycopy(pendingTerms, 0, newArray, 0, pendingCount);
- for (int i = pendingCount; i < newArray.Length; i++)
+ var newArray =
+ new TermEntry[ArrayUtil.Oversize(_pendingCount + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+ Array.Copy(_pendingTerms, 0, newArray, 0, _pendingCount);
+ for (var i = _pendingCount; i < newArray.Length; i++)
{
newArray[i] = new TermEntry();
}
- pendingTerms = newArray;
+ _pendingTerms = newArray;
}
- TermEntry te = pendingTerms[pendingCount];
+ var te = _pendingTerms[_pendingCount];
te.Term.CopyBytes(text);
- te.State = postingsWriter.NewTermState();
+ te.State = _postingsWriter.NewTermState();
te.State.DocFreq = stats.DocFreq;
te.State.TotalTermFreq = stats.TotalTermFreq;
- postingsWriter.FinishTerm(te.State);
+ _postingsWriter.FinishTerm(te.State);
- pendingCount++;
- numTerms++;
+ _pendingCount++;
+ _numTerms++;
}
// Finishes all terms in this field
public override void Finish(long sumTotalTermFreq, long sumDocFreq, int docCount)
{
- if (pendingCount > 0)
+ if (_pendingCount > 0)
{
FlushBlock();
}
// EOF marker:
- output.WriteVInt(0);
+ _btw._output.WriteVInt(0);
- this.sumTotalTermFreq = sumTotalTermFreq;
- this.sumDocFreq = sumDocFreq;
- this.docCount = docCount;
- fieldIndexWriter.Finish(output.FilePointer);
+ _sumTotalTermFreq = sumTotalTermFreq;
+ _sumDocFreq = sumDocFreq;
+ _docCount = docCount;
+ _fieldIndexWriter.Finish(_btw._output.FilePointer);
- if (numTerms > 0)
+ if (_numTerms > 0)
{
- fields.Add(new FieldMetaData(fieldInfo,
- numTerms,
- termsStartPointer,
+ _btw._fields.Add(new FieldMetaData(_fieldInfo,
+ _numTerms,
+ _termsStartPointer,
sumTotalTermFreq,
sumDocFreq,
docCount,
- longsSize));
+ _longsSize));
}
}
- private int SharedPrefix(BytesRef term1, BytesRef term2)
+ private static int SharedPrefix(BytesRef term1, BytesRef term2)
{
Debug.Assert(term1.Offset == 0);
Debug.Assert(term2.Offset == 0);
- int pos1 = 0;
- int pos1End = pos1 + Math.Min(term1.Length, term2.Length);
- int pos2 = 0;
+
+ var pos1 = 0;
+ var pos1End = pos1 + Math.Min(term1.Length, term2.Length);
+ var pos2 = 0;
+
while (pos1 < pos1End)
{
if (term1.Bytes[pos1] != term2.Bytes[pos2])
@@ -305,82 +309,80 @@ namespace Lucene.Net.Codecs.BlockTerms
pos1++;
pos2++;
}
+
return pos1;
}
- private readonly RAMOutputStream bytesWriter = new RAMOutputStream();
- private readonly RAMOutputStream bufferWriter = new RAMOutputStream();
-
private void FlushBlock()
{
// First pass: compute common prefix for all terms
// in the block, against term before first term in
// this block:
- int commonPrefix = SharedPrefix(lastPrevTerm, pendingTerms[0].Term);
- for (int termCount = 1; termCount < pendingCount; termCount++)
+ int commonPrefix = SharedPrefix(_lastPrevTerm, _pendingTerms[0].Term);
+ for (int termCount = 1; termCount < _pendingCount; termCount++)
{
commonPrefix = Math.Min(commonPrefix,
- SharedPrefix(lastPrevTerm,
- pendingTerms[termCount].Term));
+ SharedPrefix(_lastPrevTerm,
+ _pendingTerms[termCount].Term));
}
- output.WriteVInt(pendingCount);
- output.WriteVInt(commonPrefix);
+ _btw._output.WriteVInt(_pendingCount);
+ _btw._output.WriteVInt(commonPrefix);
// 2nd pass: write suffixes, as separate byte[] blob
- for (int termCount = 0; termCount < pendingCount; termCount++)
+ for (var termCount = 0; termCount < _pendingCount; termCount++)
{
- int suffix = pendingTerms[termCount].Term.Length - commonPrefix;
+ var suffix = _pendingTerms[termCount].Term.Length - commonPrefix;
// TODO: cutover to better intblock codec, instead
// of interleaving here:
- bytesWriter.WriteVInt(suffix);
- bytesWriter.WriteBytes(pendingTerms[termCount].Term.Bytes, commonPrefix, suffix);
+ _bytesWriter.WriteVInt(suffix);
+ _bytesWriter.WriteBytes(_pendingTerms[termCount].Term.Bytes, commonPrefix, suffix);
}
- output.WriteVInt((int) bytesWriter.FilePointer);
- bytesWriter.WriteTo(output);
- bytesWriter.Reset();
+ _btw._output.WriteVInt((int)_bytesWriter.FilePointer);
+ _bytesWriter.WriteTo(_btw._output);
+ _bytesWriter.Reset();
// 3rd pass: write the freqs as byte[] blob
// TODO: cutover to better intblock codec. simple64?
// write prefix, suffix first:
- for (int termCount = 0; termCount < pendingCount; termCount++)
+ for (int termCount = 0; termCount < _pendingCount; termCount++)
{
- BlockTermState state = pendingTerms[termCount].State;
+ BlockTermState state = _pendingTerms[termCount].State;
Debug.Assert(state != null);
- bytesWriter.WriteVInt(state.DocFreq);
- if (fieldInfo.FieldIndexOptions != FieldInfo.IndexOptions.DOCS_ONLY)
+ _bytesWriter.WriteVInt(state.DocFreq);
+ if (_fieldInfo.FieldIndexOptions != FieldInfo.IndexOptions.DOCS_ONLY)
{
- bytesWriter.WriteVLong(state.TotalTermFreq - state.DocFreq);
+ _bytesWriter.WriteVLong(state.TotalTermFreq - state.DocFreq);
}
}
- output.WriteVInt((int) bytesWriter.FilePointer);
- bytesWriter.WriteTo(output);
- bytesWriter.Reset();
+ _btw._output.WriteVInt((int)_bytesWriter.FilePointer);
+ _bytesWriter.WriteTo(_btw._output);
+ _bytesWriter.Reset();
// 4th pass: write the metadata
- var longs = new long[longsSize];
+ var longs = new long[_longsSize];
bool absolute = true;
- for (int termCount = 0; termCount < pendingCount; termCount++)
+ for (int termCount = 0; termCount < _pendingCount; termCount++)
{
- BlockTermState state = pendingTerms[termCount].State;
- postingsWriter.EncodeTerm(longs, bufferWriter, fieldInfo, state, absolute);
- for (int i = 0; i < longsSize; i++)
+ BlockTermState state = _pendingTerms[termCount].State;
+ _postingsWriter.EncodeTerm(longs, _bufferWriter, _fieldInfo, state, absolute);
+ for (int i = 0; i < _longsSize; i++)
{
- bytesWriter.WriteVLong(longs[i]);
+ _bytesWriter.WriteVLong(longs[i]);
}
- bufferWriter.WriteTo(bytesWriter);
- bufferWriter.Reset();
+ _bufferWriter.WriteTo(_bytesWriter);
+ _bufferWriter.Reset();
absolute = false;
}
- output.WriteVInt((int) bytesWriter.FilePointer);
- bytesWriter.WriteTo(output);
- bytesWriter.Reset();
+ _btw._output.WriteVInt((int)_bytesWriter.FilePointer);
+ _bytesWriter.WriteTo(_btw._output);
+ _bytesWriter.Reset();
- lastPrevTerm.CopyBytes(pendingTerms[pendingCount - 1].Term);
- pendingCount = 0;
+ _lastPrevTerm.CopyBytes(_pendingTerms[_pendingCount - 1].Term);
+ _pendingCount = 0;
}
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b0601f29/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexReader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexReader.cs b/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexReader.cs
index d2aa242..ec45b30 100644
--- a/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexReader.cs
+++ b/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexReader.cs
@@ -43,118 +43,114 @@ namespace Lucene.Net.Codecs.BlockTerms
// will overflow int during those multiplies. So to avoid
// having to upgrade each multiple to long in multiple
// places (error prone), we use long here:
- private long totalIndexInterval;
-
- private int indexDivisor;
+ private readonly long _totalIndexInterval;
+ private readonly int _indexDivisor;
private readonly int indexInterval;
// Closed if indexLoaded is true:
- private IndexInput input;
- private volatile bool indexLoaded;
-
- private readonly IComparer<BytesRef> termComp;
+ private readonly IndexInput _input;
- private static readonly int PAGED_BYTES_BITS = 15;
+ private volatile bool _indexLoaded;
+ private readonly IComparer<BytesRef> _termComp;
+ private const int PAGED_BYTES_BITS = 15;
// all fields share this single logical byte[]
- private readonly PagedBytes termBytes = new PagedBytes(PAGED_BYTES_BITS);
- private PagedBytes.Reader termBytesReader;
-
- private readonly Dictionary<FieldInfo, FieldIndexData> fields = new Dictionary<FieldInfo, FieldIndexData>();
+ private readonly PagedBytes _termBytes = new PagedBytes(PAGED_BYTES_BITS);
+ private readonly PagedBytes.Reader _termBytesReader;
+ private readonly Dictionary<FieldInfo, FieldIndexData> _fields = new Dictionary<FieldInfo, FieldIndexData>();
// start of the field info data
- private long dirOffset;
-
- private readonly int version;
+ private long _dirOffset;
+ private readonly int _version;
public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor,
IComparer<BytesRef> termComp, String segmentSuffix, IOContext context)
{
- this.termComp = termComp;
+ _termComp = termComp;
- Debug.Assert(indexDivisor == -1 || indexDivisor > 0;
+ Debug.Assert(indexDivisor == -1 || indexDivisor > 0);
- input =
+ _input =
dir.OpenInput(
IndexFileNames.SegmentFileName(segment, segmentSuffix,
FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION),
context);
- bool success = false;
+ var success = false;
try
{
- version = ReadHeader(input);
+ _version = ReadHeader(_input);
- if (version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM)
- CodecUtil.ChecksumEntireFile(input);
+ if (_version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM)
+ CodecUtil.ChecksumEntireFile(_input);
- indexInterval = input.ReadInt();
+ indexInterval = _input.ReadInt();
if (indexInterval < 1)
{
throw new CorruptIndexException(String.Format("Invalid indexInterval: {0}, Resource: {1}",
- indexInterval, input));
+ indexInterval, _input));
}
- this.indexDivisor = indexDivisor;
+ _indexDivisor = indexDivisor;
if (indexDivisor < 0)
{
- totalIndexInterval = indexInterval;
+ _totalIndexInterval = indexInterval;
}
else
{
// In case terms index gets loaded, later, on demand
- totalIndexInterval = indexInterval*indexDivisor;
+ _totalIndexInterval = indexInterval*indexDivisor;
}
- Debug.Assert(totalIndexInterval > 0);
+ Debug.Assert(_totalIndexInterval > 0);
- SeekDir(input, dirOffset);
+ SeekDir(_input, _dirOffset);
// Read directory
- int numFields = input.ReadVInt();
+ int numFields = _input.ReadVInt();
if (numFields < 0)
throw new CorruptIndexException(String.Format("Invalid numFields: {0}, Resource: {1}", numFields,
- input));
+ _input));
for (int i = 0; i < numFields; i++)
{
- int field = input.ReadVInt();
- int numIndexTerms = input.ReadVInt();
+ int field = _input.ReadVInt();
+ int numIndexTerms = _input.ReadVInt();
if (numIndexTerms < 0)
throw new CorruptIndexException(String.Format("Invalid numIndexTerms: {0}, Resource: {1}",
numIndexTerms,
- input));
+ _input));
- long termsStart = input.ReadVLong();
- long indexStart = input.ReadVLong();
- long packedIndexStart = input.ReadVLong();
- long packedOffsetsStart = input.ReadVLong();
+ long termsStart = _input.ReadVLong();
+ long indexStart = _input.ReadVLong();
+ long packedIndexStart = _input.ReadVLong();
+ long packedOffsetsStart = _input.ReadVLong();
if (packedIndexStart < indexStart)
throw new CorruptIndexException(
String.Format(
"Invalid packedIndexStart: {0}, IndexStart: {1}, NumIndexTerms: {2}, Resource: {3}",
packedIndexStart,
- indexStart, numIndexTerms, input));
+ indexStart, numIndexTerms, _input));
FieldInfo fieldInfo = fieldInfos.FieldInfo(field);
try
{
- fields.Add(fieldInfo,
- new FieldIndexData(fieldInfo, numIndexTerms, indexStart, termsStart, packedIndexStart,
- packedOffsetsStart));
+ _fields.Add(fieldInfo,
+ new FieldIndexData(numIndexTerms, indexStart, termsStart, packedIndexStart,
+ packedOffsetsStart, this));
}
catch (ArgumentException)
{
throw new CorruptIndexException(String.Format("Duplicate field: {0}, Resource {1}",
fieldInfo.Name,
- input));
+ _input));
}
@@ -165,65 +161,117 @@ namespace Lucene.Net.Codecs.BlockTerms
{
if (!success)
{
- IOUtils.CloseWhileHandlingException(input);
+ IOUtils.CloseWhileHandlingException(_input);
}
if (indexDivisor > 0)
{
- input.Dispose();
- input = null;
+ _input.Dispose();
+ _input = null;
if (success)
- indexLoaded = true;
+ _indexLoaded = true;
- termBytesReader = termBytes.Freeze(true);
+ _termBytesReader = _termBytes.Freeze(true);
}
}
}
public override int Divisor
{
- get { return indexDivisor; }
+ get { return _indexDivisor; }
}
- private int ReadHeader(IndexInput input)
+ private int ReadHeader(DataInput input)
{
- int version = CodecUtil.CheckHeader(input, FixedGapTermsIndexWriter.CODEC_NAME,
+ var version = CodecUtil.CheckHeader(input, FixedGapTermsIndexWriter.CODEC_NAME,
FixedGapTermsIndexWriter.VERSION_START, FixedGapTermsIndexWriter.VERSION_CURRENT);
+
if (version < FixedGapTermsIndexWriter.VERSION_APPEND_ONLY)
- dirOffset = input.ReadLong();
+ _dirOffset = input.ReadLong();
return version;
}
+ public override bool SupportsOrd
+ {
+ get { return true; }
+ }
+
+ public override FieldIndexEnum GetFieldEnum(FieldInfo fieldInfo)
+ {
+ FieldIndexData fieldData = _fields[fieldInfo];
+ return fieldData.CoreIndex == null ? null : new IndexEnum(fieldData.CoreIndex, this);
+ }
+
+ public override void Dispose()
+ {
+ if (_input != null && !_indexLoaded)
+ _input.Dispose();
+ }
+
+ private void SeekDir(IndexInput input, long dirOffset)
+ {
+ if (_version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM)
+ {
+ input.Seek(input.Length() - CodecUtil.FooterLength() - 8);
+ dirOffset = input.ReadLong();
+
+ }
+ else if (_version >= FixedGapTermsIndexWriter.VERSION_APPEND_ONLY)
+ {
+ input.Seek(input.Length() - 8);
+ dirOffset = input.ReadLong();
+ }
+
+ input.Seek(dirOffset);
+ }
+
+ public override long RamBytesUsed
+ {
+ get
+ {
+ var sizeInBytes = ((_termBytes != null) ? _termBytes.RamBytesUsed() : 0) +
+ ((_termBytesReader != null) ? _termBytesReader.RamBytesUsed() : 0);
+
+ return _fields.Values.Aggregate(sizeInBytes,
+ (current, entry) => (current + entry.CoreIndex.RamBytesUsed));
+ }
+ }
+
private class IndexEnum : FieldIndexEnum
{
- private readonly FieldIndexData.CoreFieldIndex fieldIndex;
- public override long Ord { get; set; }
+ private readonly FieldIndexData.CoreFieldIndex _fieldIndex;
+ private readonly FixedGapTermsIndexReader _fgtir;
- public IndexEnum(FieldIndexData.CoreFieldIndex fieldIndex)
+ public IndexEnum(FieldIndexData.CoreFieldIndex fieldIndex, FixedGapTermsIndexReader fgtir)
{
Term = new BytesRef();
- this.fieldIndex = fieldIndex;
+ _fieldIndex = fieldIndex;
+ _fgtir = fgtir;
}
- public override BytesRef Term { get; set; }
+ public override long Ord { get; set; }
+
+ public override sealed BytesRef Term { get; set; }
public override long Seek(BytesRef target)
{
- int lo = 0; // binary search
- int hi = fieldIndex.numIndexTerms - 1;
- Debug.Assert(totalIndexInterval > 0, "totalIndexInterval=" + totalIndexInterval);
+ var lo = 0; // binary search
+ var hi = _fieldIndex.NumIndexTerms - 1;
+
+ Debug.Assert(_fgtir._totalIndexInterval > 0,
+ String.Format("TotalIndexInterval: {0}", _fgtir._totalIndexInterval));
+ long offset;
+ int length;
while (hi >= lo)
{
- int mid = (lo + hi) >> > 1;
+ var mid = (int)((uint)(lo + hi) >> 1);
- readonly
- long offset = fieldIndex.termOffsets.get(mid);
- readonly
- int length = (int) (fieldIndex.termOffsets.Get(1 + mid) - offset);
- termBytesReader.FillSlice(Term, fieldIndex.termBytesStart + offset, length);
+ offset = _fieldIndex.TermOffsets.Get(mid);
+ length = (int) (_fieldIndex.TermOffsets.Get(1 + mid) - offset);
+ _fgtir._termBytesReader.FillSlice(Term, _fieldIndex.TermBytesStart + offset, length);
- int delta = termComp.compare(target, term);
+ int delta = _fgtir._termComp.Compare(target, Term);
if (delta < 0)
{
hi = mid - 1;
@@ -234,155 +282,161 @@ namespace Lucene.Net.Codecs.BlockTerms
}
else
{
- Debug.Assert(mid >= 0;
- ord = mid*totalIndexInterval;
- return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(mid);
+ Debug.Assert(mid >= 0);
+ Ord = mid * _fgtir._totalIndexInterval;
+ return _fieldIndex.TermsStart + _fieldIndex.TermsDictOffsets.Get(mid);
}
}
if (hi < 0)
{
- Debug.Assert(hi == -1;
+ Debug.Assert(hi == -1);
hi = 0;
}
-
- long offset = fieldIndex.termOffsets.Get(hi);
- int length = (int) (fieldIndex.termOffsets.Get(1 + hi) - offset);
- termBytesReader.FillSlice(term, fieldIndex.termBytesStart + offset, length);
+ offset = _fieldIndex.TermOffsets.Get(hi);
+ length = (int) (_fieldIndex.TermOffsets.Get(1 + hi) - offset);
+ _fgtir._termBytesReader.FillSlice(Term, _fieldIndex.TermBytesStart + offset, length);
- ord = hi*totalIndexInterval;
- return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(hi);
+ Ord = hi * _fgtir._totalIndexInterval;
+ return _fieldIndex.TermsStart + _fieldIndex.TermsDictOffsets.Get(hi);
}
public override long Next
{
get
{
- int idx = 1 + (int) (ord/totalIndexInterval);
- if (idx >= fieldIndex.numIndexTerms)
- {
+ var idx = 1 + (int)(Ord / _fgtir._totalIndexInterval);
+ if (idx >= _fieldIndex.NumIndexTerms)
return -1;
- }
- ord += totalIndexInterval;
- long offset = fieldIndex.termOffsets.Get(idx);
- int length = (int) (fieldIndex.termOffsets.Get(1 + idx) - offset);
- termBytesReader.FillSlice(term, fieldIndex.termBytesStart + offset, length);
- return fieldIndex.termsStart + fieldIndex.termsDictOffsets.Get(idx);
+ Ord += _fgtir._totalIndexInterval;
+
+ var offset = _fieldIndex.TermOffsets.Get(idx);
+ var length = (int)(_fieldIndex.TermOffsets.Get(1 + idx) - offset);
+
+ _fgtir._termBytesReader.FillSlice(Term, _fieldIndex.TermBytesStart + offset, length);
+
+ return _fieldIndex.TermsStart + _fieldIndex.TermsDictOffsets.Get(idx);
}
}
public override long Seek(long ord)
{
- int idx = (int) (ord/totalIndexInterval);
+ var idx = (int)(ord / _fgtir._totalIndexInterval);
+
// caller must ensure ord is in bounds
- Debug.Assert(idx < fieldIndex.NumIndexTerms);
+ Debug.Assert(idx < _fieldIndex.NumIndexTerms);
- long offset = fieldIndex.termOffsets.get(idx);
- int length = (int) (fieldIndex.termOffsets.get(1 + idx) - offset);
- termBytesReader.FillSlice(term, fieldIndex.termBytesStart + offset, length);
- Ord = idx*totalIndexInterval;
- return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(idx);
- }
- }
+ var offset = _fieldIndex.TermOffsets.Get(idx);
+ var length = (int)(_fieldIndex.TermOffsets.Get(1 + idx) - offset);
- public override bool SupportsOrd
- {
- get { return true; }
+ _fgtir._termBytesReader.FillSlice(Term, _fieldIndex.TermBytesStart + offset, length);
+ Ord = idx * _fgtir._totalIndexInterval;
+
+ return _fieldIndex.TermsStart + _fieldIndex.TermsDictOffsets.Get(idx);
+ }
}
protected class FieldIndexData
{
-
public volatile CoreFieldIndex CoreIndex;
- private readonly long indexStart;
- private readonly long termsStart;
- private readonly long packedIndexStart;
- private readonly long packedOffsetsStart;
-
- private readonly int numIndexTerms;
+ private readonly long _indexStart;
+ private readonly long _termsStart;
+ private readonly long _packedIndexStart;
+ private readonly long _packedOffsetsStart;
+ private readonly int _numIndexTerms;
+ private readonly FixedGapTermsIndexReader _fgtir;
- public FieldIndexData(FieldInfo fieldInfo, int numIndexTerms, long indexStart, long termsStart,
+ public FieldIndexData(int numIndexTerms, long indexStart, long termsStart,
long packedIndexStart,
- long packedOffsetsStart)
+ long packedOffsetsStart, FixedGapTermsIndexReader fgtir)
{
- this.termsStart = termsStart;
- this.indexStart = indexStart;
- this.packedIndexStart = packedIndexStart;
- this.packedOffsetsStart = packedOffsetsStart;
- this.numIndexTerms = numIndexTerms;
+ _termsStart = termsStart;
+ _indexStart = indexStart;
+ _packedIndexStart = packedIndexStart;
+ _packedOffsetsStart = packedOffsetsStart;
+ _numIndexTerms = numIndexTerms;
+ _fgtir = fgtir;
- if (indexDivisor > 0)
- {
- loadTermsIndex();
- }
+ if (_fgtir._indexDivisor > 0)
+ LoadTermsIndex();
}
- private void loadTermsIndex()
+ private void LoadTermsIndex()
{
- if (coreIndex == null)
- {
- coreIndex = new CoreFieldIndex(indexStart, termsStart, packedIndexStart, packedOffsetsStart,
- numIndexTerms);
- }
+ if (CoreIndex == null)
+ CoreIndex = new CoreFieldIndex(_indexStart, _termsStart, _packedIndexStart, _packedOffsetsStart,
+ _numIndexTerms, _fgtir);
}
- private class CoreFieldIndex
+ public class CoreFieldIndex
{
+ /// <summary>
+ /// Where this fields term begin in the packed byte[] data
+ /// </summary>
+ public long TermBytesStart { get; private set; }
+
+ /// <summary>
+ /// Offset into index TermBytes
+ /// </summary>
+ public PackedInts.Reader TermOffsets { get; private set; }
+
+ /// <summary>
+ /// Index pointers into main terms dict
+ /// </summary>
+ public PackedInts.Reader TermsDictOffsets { get; private set; }
+
+ /// <summary>Returns approximate RAM bytes Used</summary>
+ public long RamBytesUsed
+ {
+ get
+ {
+ return ((TermOffsets != null) ? TermOffsets.RamBytesUsed() : 0) +
+ ((TermsDictOffsets != null) ? TermsDictOffsets.RamBytesUsed() : 0);
+ }
+ }
- // where this field's terms begin in the packed byte[]
- // data
- private readonly long termBytesStart;
-
- // offset into index termBytes
- private readonly PackedInts.Reader termOffsets;
-
- // index pointers into main terms dict
- private readonly PackedInts.Reader termsDictOffsets;
-
- private readonly int numIndexTerms;
- private readonly long termsStart;
+ public int NumIndexTerms { get; private set; }
+ public long TermsStart { get; private set; }
public CoreFieldIndex(long indexStart, long termsStart, long packedIndexStart, long packedOffsetsStart,
- int numIndexTerms)
+ int numIndexTerms, FixedGapTermsIndexReader fgtir)
{
+ TermsStart = termsStart;
+ TermBytesStart = fgtir._termBytes.Pointer;
- this.termsStart = termsStart;
- termBytesStart = termBytes.Pointer;
-
- IndexInput clone = input.Clone();
+ var clone = (IndexInput)fgtir._input.Clone();
clone.Seek(indexStart);
// -1 is passed to mean "don't load term index", but
// if we are then later loaded it's overwritten with
// a real value
- Debug.Assert(indexDivisor > 0);
+ Debug.Assert(fgtir._indexDivisor > 0);
- this.numIndexTerms = 1 + (numIndexTerms - 1)/indexDivisor;
+ NumIndexTerms = 1 + (numIndexTerms - 1)/fgtir._indexDivisor;
- Debug.Assert(this.numIndexTerms > 0:
- "numIndexTerms=" + numIndexTerms + " indexDivisor=" + indexDivisor;
+ Debug.Assert(NumIndexTerms > 0,
+ String.Format("NumIndexTerms: {0}, IndexDivisor: {1}", NumIndexTerms, fgtir._indexDivisor));
- if (indexDivisor == 1)
+ if (fgtir._indexDivisor == 1)
{
// Default (load all index terms) is fast -- slurp in the images from disk:
try
{
- readonly
- long numTermBytes = packedIndexStart - indexStart;
- termBytes.copy(clone, numTermBytes);
+ var numTermBytes = packedIndexStart - indexStart;
+ fgtir._termBytes.Copy(clone, numTermBytes);
// records offsets into main terms dict file
- termsDictOffsets = PackedInts.getReader(clone);
- Debug.Assert(termsDictOffsets.size() == numIndexTerms;
+ TermsDictOffsets = PackedInts.GetReader(clone);
+ Debug.Assert(TermsDictOffsets.Size() == numIndexTerms);
// records offsets into byte[] term data
- termOffsets = PackedInts.GetReader(clone);
- Debug.Assert(termOffsets.Size() == 1 + numIndexTerms);
+ TermOffsets = PackedInts.GetReader(clone);
+ Debug.Assert(TermOffsets.Size() == 1 + numIndexTerms);
}
finally
{
@@ -392,8 +446,8 @@ namespace Lucene.Net.Codecs.BlockTerms
else
{
// Get packed iterators
- var clone1 = input.Clone();
- var clone2 = input.Clone();
+ var clone1 = (IndexInput)fgtir._input.Clone();
+ var clone2 = (IndexInput)fgtir._input.Clone();
try
{
@@ -413,28 +467,27 @@ namespace Lucene.Net.Codecs.BlockTerms
// we'd have to try @ fewer bits and then grow
// if we overflowed it.
- PackedInts.Mutable termsDictOffsetsM = PackedInts.GetMutable(this.numIndexTerms,
+ PackedInts.Mutable termsDictOffsetsM = PackedInts.GetMutable(NumIndexTerms,
termsDictOffsetsIter.BitsPerValue, PackedInts.DEFAULT);
- PackedInts.Mutable termOffsetsM = PackedInts.GetMutable(this.numIndexTerms + 1,
+ PackedInts.Mutable termOffsetsM = PackedInts.GetMutable(NumIndexTerms + 1,
termOffsetsIter.BitsPerValue, PackedInts.DEFAULT);
- termsDictOffsets = termsDictOffsetsM;
- termOffsets = termOffsetsM;
-
- int upto = 0;
+ TermsDictOffsets = termsDictOffsetsM;
+ TermOffsets = termOffsetsM;
+ var upto = 0;
long termOffsetUpto = 0;
- while (upto < this.numIndexTerms)
+ while (upto < NumIndexTerms)
{
// main file offset copies straight over
termsDictOffsetsM.Set(upto, termsDictOffsetsIter.Next());
termOffsetsM.Set(upto, termOffsetUpto);
- long termOffset = termOffsetsIter.Next();
- long nextTermOffset = termOffsetsIter.Next();
- int numTermBytes = (int) (nextTermOffset - termOffset);
+ var termOffset = termOffsetsIter.Next();
+ var nextTermOffset = termOffsetsIter.Next();
+ var numTermBytes = (int) (nextTermOffset - termOffset);
clone.Seek(indexStart + termOffset);
@@ -444,18 +497,16 @@ namespace Lucene.Net.Codecs.BlockTerms
Debug.Assert(indexStart + termOffset + numTermBytes < clone.Length());
- termBytes.Copy(clone, numTermBytes);
+ fgtir._termBytes.Copy(clone, numTermBytes);
termOffsetUpto += numTermBytes;
upto++;
- if (upto == this.numIndexTerms)
- {
+ if (upto == NumIndexTerms)
break;
- }
-
+
// skip terms:
termsDictOffsetsIter.Next();
- for (int i = 0; i < indexDivisor - 2; i++)
+ for (var i = 0; i < fgtir._indexDivisor - 2; i++)
{
termOffsetsIter.Next();
termsDictOffsetsIter.Next();
@@ -473,54 +524,6 @@ namespace Lucene.Net.Codecs.BlockTerms
}
}
- /** Returns approximate RAM bytes Used */
-
- public long RamBytesUsed()
- {
- return ((termOffsets != null) ? termOffsets.RamBytesUsed() : 0) +
- ((termsDictOffsets != null) ? termsDictOffsets.RamBytesUsed() : 0);
- }
- }
- }
-
- public override FieldIndexEnum GetFieldEnum(FieldInfo fieldInfo)
- {
- FieldIndexData fieldData = fields[fieldInfo];
- return fieldData.CoreIndex == null ? null : new IndexEnum(fieldData.CoreIndex);
- }
-
- public override void Dispose()
- {
- if (input != null && !indexLoaded)
- input.Dispose();
- }
-
- private void SeekDir(IndexInput input, long dirOffset)
- {
- if (version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM)
- {
- input.Seek(input.Length() - CodecUtil.FooterLength() - 8);
- dirOffset = input.ReadLong();
-
- }
- else if (version >= FixedGapTermsIndexWriter.VERSION_APPEND_ONLY)
- {
- input.Seek(input.Length() - 8);
- dirOffset = input.ReadLong();
- }
-
- input.Seek(dirOffset);
- }
-
- public override long RamBytesUsed
- {
- get
- {
- var sizeInBytes = ((termBytes != null) ? termBytes.RamBytesUsed() : 0) +
- ((termBytesReader != null) ? termBytesReader.RamBytesUsed() : 0);
-
- return fields.Values.Aggregate(sizeInBytes,
- (current, entry) => (long) (current + entry.CoreIndex.RamBytesUsed));
}
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b0601f29/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexWriter.cs b/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexWriter.cs
index 39d9065..faf5d1c 100644
--- a/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexWriter.cs
+++ b/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexWriter.cs
@@ -40,8 +40,10 @@ namespace Lucene.Net.Codecs.BlockTerms
protected IndexOutput Output;
/** Extension of terms index file */
- private const String TERMS_INDEX_EXTENSION = "tii";
+ public const String TERMS_INDEX_EXTENSION = "tii";
public const String CODEC_NAME = "SIMPLE_STANDARD_TERMS_INDEX";
+ public const int VERSION_START = 0;
+ public const int VERSION_APPEND_ONLY = 1;
public const int VERSION_CHECKSUM = 1000;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b0601f29/src/Lucene.Net.Codecs/BlockTerms/TermsIndexReaderBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/BlockTerms/TermsIndexReaderBase.cs b/src/Lucene.Net.Codecs/BlockTerms/TermsIndexReaderBase.cs
index 3fc2c18..a3b71cd 100644
--- a/src/Lucene.Net.Codecs/BlockTerms/TermsIndexReaderBase.cs
+++ b/src/Lucene.Net.Codecs/BlockTerms/TermsIndexReaderBase.cs
@@ -60,7 +60,7 @@ namespace Lucene.Net.Codecs.BlockTerms
/** Returns -1 at end */
public abstract long Next { get; }
- public abstract BytesRef Term { get; }
+ public abstract BytesRef Term { get; set; }
/// <summary></summary>
/// <remarks>Only implemented if {@link TermsIndexReaderBase.supportsOrd()}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b0601f29/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexReader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexReader.cs b/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexReader.cs
index 7fcc1fa..b31ba0d 100644
--- a/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexReader.cs
+++ b/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexReader.cs
@@ -15,14 +15,13 @@
* limitations under the License.
*/
-using System.Linq;
-
namespace Lucene.Net.Codecs.BlockTerms
{
using System;
using System.Collections.Generic;
using System.Diagnostics;
+ using System.Linq;
using Index;
using Store;
using Util;
@@ -237,6 +236,7 @@ namespace Lucene.Net.Codecs.BlockTerms
public override BytesRef Term
{
get { return _current == null ? null : _current.Input; }
+ set { }
}
public override long Seek(BytesRef target)
@@ -260,6 +260,7 @@ namespace Lucene.Net.Codecs.BlockTerms
public override long Ord
{
get { throw new NotImplementedException(); }
+ set { }
}
public override long Seek(long ord)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b0601f29/src/Lucene.Net.Codecs/Lucene.Net.Codecs.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Lucene.Net.Codecs.csproj b/src/Lucene.Net.Codecs/Lucene.Net.Codecs.csproj
index 3f014ce..06e1908 100644
--- a/src/Lucene.Net.Codecs/Lucene.Net.Codecs.csproj
+++ b/src/Lucene.Net.Codecs/Lucene.Net.Codecs.csproj
@@ -43,7 +43,6 @@
<Compile Include="Appending\AppendingCodec.cs" />
<Compile Include="Appending\AppendingPostingsFormat.cs" />
<Compile Include="Appending\AppendingTermsReader.cs" />
- <Compile Include="BlockTerms\BlockTermsFieldAndTerm.cs" />
<Compile Include="BlockTerms\BlockTermsReader.cs" />
<Compile Include="BlockTerms\BlockTermsWriter.cs" />
<Compile Include="BlockTerms\FixedGapTermsIndexReader.cs" />
@@ -122,7 +121,7 @@
</ItemGroup>
<ItemGroup />
<ItemGroup>
- <ProjectReference Include="..\core\Lucene.Net.csproj">
+ <ProjectReference Include="..\Lucene.Net.Core\Lucene.Net.csproj">
<Project>{5d4ad9be-1ffb-41ab-9943-25737971bf57}</Project>
<Name>Lucene.Net</Name>
</ProjectReference>