You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by pn...@apache.org on 2014/11/08 23:55:02 UTC
[1/5] lucenenet git commit: Updating Memory Codec
Repository: lucenenet
Updated Branches:
refs/heads/master 997171765 -> bcff24dd1
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0cc0e7ec/src/Lucene.Net.Codecs/Memory/MemoryDocValuesProducer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Memory/MemoryDocValuesProducer.cs b/src/Lucene.Net.Codecs/Memory/MemoryDocValuesProducer.cs
index 129aaf7..aa6cefb 100644
--- a/src/Lucene.Net.Codecs/Memory/MemoryDocValuesProducer.cs
+++ b/src/Lucene.Net.Codecs/Memory/MemoryDocValuesProducer.cs
@@ -20,976 +20,983 @@ using System.Collections.Generic;
using System.Diagnostics;
using Lucene.Net.Codecs.Sep;
using Lucene.Net.Index;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Fst;
namespace Lucene.Net.Codecs.Memory
{
/// <summary>
- /// Reader for <seealso cref="MemoryDocValuesFormat"/>
- /// </summary>
- internal class MemoryDocValuesProducer : DocValuesProducer
- {
- // metadata maps (just file pointers and minimal stuff)
- private readonly IDictionary<int?, NumericEntry> numerics;
- private readonly IDictionary<int?, BinaryEntry> binaries;
- private readonly IDictionary<int?, FSTEntry> fsts;
- private readonly IndexInput data;
-
- // ram instances we have already loaded
- private readonly IDictionary<int?, NumericDocValues> numericInstances = new Dictionary<int?, NumericDocValues>();
- private readonly IDictionary<int?, BinaryDocValues> binaryInstances = new Dictionary<int?, BinaryDocValues>();
- private readonly IDictionary<int?, FST<long?>> fstInstances = new Dictionary<int?, FST<long?>>();
- private readonly IDictionary<int?, Bits> docsWithFieldInstances = new Dictionary<int?, Bits>();
-
- private readonly int maxDoc;
- private readonly AtomicLong ramBytesUsed_Renamed;
- private readonly int version;
-
- internal const sbyte NUMBER = 0;
- internal const sbyte BYTES = 1;
- internal const sbyte org;
-
- internal const int BLOCK_SIZE = 4096;
-
- internal const sbyte DELTA_COMPRESSED = 0;
- internal const sbyte TABLE_COMPRESSED = 1;
- internal const sbyte UNCOMPRESSED = 2;
- internal const sbyte GCD_COMPRESSED = 3;
-
- internal const int VERSION_START = 0;
- internal const int VERSION_GCD_COMPRESSION = 1;
- internal const int VERSION_CHECKSUM = 2;
- internal const int VERSION_CURRENT = VERSION_CHECKSUM;
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: MemoryDocValuesProducer(index.SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws java.io.IOException
- internal MemoryDocValuesProducer(SegmentReadState state, string dataCodec, string dataExtension, string metaCodec, string metaExtension)
- {
- maxDoc = state.segmentInfo.DocCount;
- string metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
- // read in the entries from the metadata file.
- ChecksumIndexInput @in = state.directory.openChecksumInput(metaName, state.context);
- bool success = false;
- try
- {
- version = CodecUtil.checkHeader(@in, metaCodec, VERSION_START, VERSION_CURRENT);
- numerics = new Dictionary<>();
- binaries = new Dictionary<>();
- fsts = new Dictionary<>();
- readFields(@in, state.fieldInfos);
- if (version >= VERSION_CHECKSUM)
- {
- CodecUtil.checkFooter(@in);
- }
- else
- {
- CodecUtil.checkEOF(@in);
- }
- ramBytesUsed_Renamed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(this.GetType()));
- success = true;
- }
- finally
- {
- if (success)
- {
- IOUtils.close(@in);
- }
- else
- {
- IOUtils.closeWhileHandlingException(@in);
- }
- }
-
- success = false;
- try
- {
- string dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
- data = state.directory.openInput(dataName, state.context);
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int version2 = codecs.CodecUtil.checkHeader(data, dataCodec, VERSION_START, VERSION_CURRENT);
- int version2 = CodecUtil.checkHeader(data, dataCodec, VERSION_START, VERSION_CURRENT);
- if (version != version2)
- {
- throw new CorruptIndexException("Format versions mismatch");
- }
-
- success = true;
- }
- finally
- {
- if (!success)
- {
- IOUtils.closeWhileHandlingException(this.data);
- }
- }
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: private void readFields(store.IndexInput meta, index.FieldInfos infos) throws java.io.IOException
- private void readFields(IndexInput meta, FieldInfos infos)
- {
- int fieldNumber = meta.readVInt();
- while (fieldNumber != -1)
- {
- int fieldType = meta.readByte();
- if (fieldType == NUMBER)
- {
- NumericEntry entry = new NumericEntry();
- entry.offset = meta.readLong();
- entry.missingOffset = meta.readLong();
- if (entry.missingOffset != -1)
- {
- entry.missingBytes = meta.readLong();
- }
- else
- {
- entry.missingBytes = 0;
- }
- entry.format = meta.readByte();
- switch (entry.format)
- {
- case DELTA_COMPRESSED:
- case TABLE_COMPRESSED:
- case GCD_COMPRESSED:
- case UNCOMPRESSED:
- break;
- default:
- throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta);
- }
- if (entry.format != UNCOMPRESSED)
- {
- entry.packedIntsVersion = meta.readVInt();
- }
- numerics[fieldNumber] = entry;
- }
- else if (fieldType == BYTES)
- {
- BinaryEntry entry = new BinaryEntry();
- entry.offset = meta.readLong();
- entry.numBytes = meta.readLong();
- entry.missingOffset = meta.readLong();
- if (entry.missingOffset != -1)
- {
- entry.missingBytes = meta.readLong();
- }
- else
- {
- entry.missingBytes = 0;
- }
- entry.minLength = meta.readVInt();
- entry.maxLength = meta.readVInt();
- if (entry.minLength != entry.maxLength)
- {
- entry.packedIntsVersion = meta.readVInt();
- entry.blockSize = meta.readVInt();
- }
- binaries[fieldNumber] = entry;
- }
- else if (fieldType == FST)
- {
- FSTEntry entry = new FSTEntry();
- entry.offset = meta.readLong();
- entry.numOrds = meta.readVLong();
- fsts[fieldNumber] = entry;
- }
- else
- {
- throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta);
- }
- fieldNumber = meta.readVInt();
- }
- }
+ /// Reader for <seealso cref="MemoryDocValuesFormat"/>
+ /// </summary>
+ internal class MemoryDocValuesProducer : DocValuesProducer
+ {
+ // metadata maps (just file pointers and minimal stuff)
+ private readonly IDictionary<int?, NumericEntry> numerics;
+ private readonly IDictionary<int?, BinaryEntry> binaries;
+ private readonly IDictionary<int?, FSTEntry> fsts;
+ private readonly IndexInput data;
+
+ // ram instances we have already loaded
+ private readonly IDictionary<int?, NumericDocValues> numericInstances = new Dictionary<int?, NumericDocValues>();
+ private readonly IDictionary<int?, BinaryDocValues> binaryInstances = new Dictionary<int?, BinaryDocValues>();
+ private readonly IDictionary<int?, FST<long?>> fstInstances = new Dictionary<int?, FST<long?>>();
+ private readonly IDictionary<int?, Bits> docsWithFieldInstances = new Dictionary<int?, Bits>();
+
+ private readonly int maxDoc;
+ private readonly AtomicLong ramBytesUsed_Renamed;
+ private readonly int version;
+
+ internal const sbyte NUMBER = 0;
+ internal const sbyte BYTES = 1;
+ internal const sbyte org;
+
+ internal const int BLOCK_SIZE = 4096;
+
+ internal const sbyte DELTA_COMPRESSED = 0;
+ internal const sbyte TABLE_COMPRESSED = 1;
+ internal const sbyte UNCOMPRESSED = 2;
+ internal const sbyte GCD_COMPRESSED = 3;
+
+ internal const int VERSION_START = 0;
+ internal const int VERSION_GCD_COMPRESSION = 1;
+ internal const int VERSION_CHECKSUM = 2;
+ internal const int VERSION_CURRENT = VERSION_CHECKSUM;
+
+
+ internal MemoryDocValuesProducer(SegmentReadState state, string dataCodec, string dataExtension,
+ string metaCodec, string metaExtension)
+ {
+ maxDoc = state.SegmentInfo.DocCount;
+ string metaName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension);
+ // read in the entries from the metadata file.
+ ChecksumIndexInput @in = state.Directory.OpenChecksumInput(metaName, state.Context);
+ bool success = false;
+ try
+ {
+ version = CodecUtil.CheckHeader(@in, metaCodec, VERSION_START, VERSION_CURRENT);
+ numerics = new Dictionary<>();
+ binaries = new Dictionary<>();
+ fsts = new Dictionary<>();
+ ReadFields(@in, state.FieldInfos);
+ if (version >= VERSION_CHECKSUM)
+ {
+ CodecUtil.CheckFooter(@in);
+ }
+ else
+ {
+ CodecUtil.CheckEOF(@in);
+ }
+ ramBytesUsed_Renamed = new AtomicLong(RamUsageEstimator.ShallowSizeOfInstance(this.GetType()));
+ success = true;
+ }
+ finally
+ {
+ if (success)
+ {
+ IOUtils.Close(@in);
+ }
+ else
+ {
+ IOUtils.CloseWhileHandlingException(@in);
+ }
+ }
+
+ success = false;
+ try
+ {
+ string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix,
+ dataExtension);
+ data = state.Directory.OpenInput(dataName, state.Context);
+
+ int version2 = CodecUtil.CheckHeader(data, dataCodec, VERSION_START, VERSION_CURRENT);
+ if (version != version2)
+ {
+ throw new CorruptIndexException("Format versions mismatch");
+ }
+
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ IOUtils.CloseWhileHandlingException(this.data);
+ }
+ }
+ }
+
+ private void ReadFields(IndexInput meta, FieldInfos infos)
+ {
+ int fieldNumber = meta.ReadVInt();
+ while (fieldNumber != -1)
+ {
+ int fieldType = meta.ReadByte();
+ if (fieldType == NUMBER)
+ {
+ NumericEntry entry = new NumericEntry();
+ entry.offset = meta.ReadLong();
+ entry.missingOffset = meta.ReadLong();
+ if (entry.missingOffset != -1)
+ {
+ entry.missingBytes = meta.ReadLong();
+ }
+ else
+ {
+ entry.missingBytes = 0;
+ }
+ entry.format = meta.ReadByte();
+ switch (entry.format)
+ {
+ case DELTA_COMPRESSED:
+ case TABLE_COMPRESSED:
+ case GCD_COMPRESSED:
+ case UNCOMPRESSED:
+ break;
+ default:
+ throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta);
+ }
+ if (entry.format != UNCOMPRESSED)
+ {
+ entry.packedIntsVersion = meta.ReadVInt();
+ }
+ numerics[fieldNumber] = entry;
+ }
+ else if (fieldType == BYTES)
+ {
+ BinaryEntry entry = new BinaryEntry();
+ entry.offset = meta.ReadLong();
+ entry.numBytes = meta.ReadLong();
+ entry.missingOffset = meta.ReadLong();
+ if (entry.missingOffset != -1)
+ {
+ entry.missingBytes = meta.ReadLong();
+ }
+ else
+ {
+ entry.missingBytes = 0;
+ }
+ entry.minLength = meta.ReadVInt();
+ entry.maxLength = meta.ReadVInt();
+ if (entry.minLength != entry.maxLength)
+ {
+ entry.packedIntsVersion = meta.ReadVInt();
+ entry.blockSize = meta.ReadVInt();
+ }
+ binaries[fieldNumber] = entry;
+ }
+ else if (fieldType == FST)
+ {
+ FSTEntry entry = new FSTEntry();
+ entry.offset = meta.ReadLong();
+ entry.numOrds = meta.ReadVLong();
+ fsts[fieldNumber] = entry;
+ }
+ else
+ {
+ throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta);
+ }
+ fieldNumber = meta.ReadVInt();
+ }
+ }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public synchronized index.NumericDocValues getNumeric(index.FieldInfo field) throws java.io.IOException
- public override NumericDocValues getNumeric(FieldInfo field)
- {
- lock (this)
- {
- NumericDocValues instance = numericInstances[field.number];
- if (instance == null)
- {
- instance = loadNumeric(field);
- numericInstances[field.number] = instance;
- }
- return instance;
- }
- }
-
- public override long ramBytesUsed()
- {
- return ramBytesUsed_Renamed.get();
- }
+ public override NumericDocValues getNumeric(FieldInfo field)
+ {
+ lock (this)
+ {
+ NumericDocValues instance = numericInstances[field.number];
+ if (instance == null)
+ {
+ instance = loadNumeric(field);
+ numericInstances[field.number] = instance;
+ }
+ return instance;
+ }
+ }
+
+ public override long ramBytesUsed()
+ {
+ return ramBytesUsed_Renamed.get();
+ }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public void checkIntegrity() throws java.io.IOException
- public override void checkIntegrity()
- {
- if (version >= VERSION_CHECKSUM)
- {
- CodecUtil.checksumEntireFile(data);
- }
- }
+ public override void checkIntegrity()
+ {
+ if (version >= VERSION_CHECKSUM)
+ {
+ CodecUtil.checksumEntireFile(data);
+ }
+ }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: private index.NumericDocValues loadNumeric(index.FieldInfo field) throws java.io.IOException
- private NumericDocValues loadNumeric(FieldInfo field)
- {
- NumericEntry entry = numerics[field.number];
- data.seek(entry.offset + entry.missingBytes);
- switch (entry.format)
- {
- case TABLE_COMPRESSED:
- int size = data.readVInt();
- if (size > 256)
- {
- throw new CorruptIndexException("TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + data);
- }
+ private NumericDocValues loadNumeric(FieldInfo field)
+ {
+ NumericEntry entry = numerics[field.number];
+ data.seek(entry.offset + entry.missingBytes);
+ switch (entry.format)
+ {
+ case TABLE_COMPRESSED:
+ int size = data.ReadVInt();
+ if (size > 256)
+ {
+ throw new CorruptIndexException(
+ "TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + data);
+ }
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final long decode[] = new long[size];
- long[] decode = new long[size];
- for (int i = 0; i < decode.Length; i++)
- {
- decode[i] = data.readLong();
- }
+ long[] decode = new long[size];
+ for (int i = 0; i < decode.Length; i++)
+ {
+ decode[i] = data.ReadLong();
+ }
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int formatID = data.readVInt();
- int formatID = data.readVInt();
+//ORIGINAL LINE: final int formatID = data.ReadVInt();
+ int formatID = data.ReadVInt();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int bitsPerValue = data.readVInt();
- int bitsPerValue = data.readVInt();
+//ORIGINAL LINE: final int bitsPerValue = data.ReadVInt();
+ int bitsPerValue = data.ReadVInt();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final util.packed.PackedInts.Reader ordsReader = util.packed.PackedInts.getReaderNoHeader(data, util.packed.PackedInts.Format.byId(formatID), entry.packedIntsVersion, maxDoc, bitsPerValue);
- PackedInts.Reader ordsReader = PackedInts.getReaderNoHeader(data, PackedInts.Format.byId(formatID), entry.packedIntsVersion, maxDoc, bitsPerValue);
- ramBytesUsed_Renamed.addAndGet(RamUsageEstimator.sizeOf(decode) + ordsReader.ramBytesUsed());
- return new NumericDocValuesAnonymousInnerClassHelper(this, decode, ordsReader);
- case DELTA_COMPRESSED:
+ PackedInts.Reader ordsReader = PackedInts.getReaderNoHeader(data, PackedInts.Format.byId(formatID),
+ entry.packedIntsVersion, maxDoc, bitsPerValue);
+ ramBytesUsed_Renamed.addAndGet(RamUsageEstimator.sizeOf(decode) + ordsReader.ramBytesUsed());
+ return new NumericDocValuesAnonymousInnerClassHelper(this, decode, ordsReader);
+ case DELTA_COMPRESSED:
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int blockSize = data.readVInt();
- int blockSize = data.readVInt();
+//ORIGINAL LINE: final int blockSize = data.ReadVInt();
+ int blockSize = data.ReadVInt();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final util.packed.BlockPackedReader reader = new util.packed.BlockPackedReader(data, entry.packedIntsVersion, blockSize, maxDoc, false);
- BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, blockSize, maxDoc, false);
- ramBytesUsed_Renamed.addAndGet(reader.ramBytesUsed());
- return reader;
- case UNCOMPRESSED:
+ BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, blockSize, maxDoc,
+ false);
+ ramBytesUsed_Renamed.addAndGet(reader.ramBytesUsed());
+ return reader;
+ case UNCOMPRESSED:
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final byte bytes[] = new byte[maxDoc];
- sbyte[] bytes = new sbyte[maxDoc];
- data.readBytes(bytes, 0, bytes.Length);
- ramBytesUsed_Renamed.addAndGet(RamUsageEstimator.sizeOf(bytes));
- return new NumericDocValuesAnonymousInnerClassHelper2(this, bytes);
- case GCD_COMPRESSED:
+ sbyte[] bytes = new sbyte[maxDoc];
+ data.ReadBytes(bytes, 0, bytes.Length);
+ ramBytesUsed_Renamed.addAndGet(RamUsageEstimator.sizeOf(bytes));
+ return new NumericDocValuesAnonymousInnerClassHelper2(this, bytes);
+ case GCD_COMPRESSED:
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final long min = data.readLong();
- long min = data.readLong();
+//ORIGINAL LINE: final long min = data.ReadLong();
+ long min = data.ReadLong();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final long mult = data.readLong();
- long mult = data.readLong();
+//ORIGINAL LINE: final long mult = data.ReadLong();
+ long mult = data.ReadLong();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int quotientBlockSize = data.readVInt();
- int quotientBlockSize = data.readVInt();
+//ORIGINAL LINE: final int quotientBlockSize = data.ReadVInt();
+ int quotientBlockSize = data.ReadVInt();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final util.packed.BlockPackedReader quotientReader = new util.packed.BlockPackedReader(data, entry.packedIntsVersion, quotientBlockSize, maxDoc, false);
- BlockPackedReader quotientReader = new BlockPackedReader(data, entry.packedIntsVersion, quotientBlockSize, maxDoc, false);
- ramBytesUsed_Renamed.addAndGet(quotientReader.ramBytesUsed());
- return new NumericDocValuesAnonymousInnerClassHelper3(this, min, mult, quotientReader);
- default:
- throw new AssertionError();
- }
- }
-
- private class NumericDocValuesAnonymousInnerClassHelper : NumericDocValues
- {
- private readonly MemoryDocValuesProducer outerInstance;
-
- private long[] decode;
- private IntIndexInput.Reader ordsReader;
-
- public NumericDocValuesAnonymousInnerClassHelper(MemoryDocValuesProducer outerInstance, long[] decode, IntIndexInput.Reader ordsReader)
- {
- this.outerInstance = outerInstance;
- this.decode = decode;
- this.ordsReader = ordsReader;
- }
-
- public override long get(int docID)
- {
- return decode[(int)ordsReader.get(docID)];
- }
- }
-
- private class NumericDocValuesAnonymousInnerClassHelper2 : NumericDocValues
- {
- private readonly MemoryDocValuesProducer outerInstance;
-
- private sbyte[] bytes;
-
- public NumericDocValuesAnonymousInnerClassHelper2(MemoryDocValuesProducer outerInstance, sbyte[] bytes)
- {
- this.outerInstance = outerInstance;
- this.bytes = bytes;
- }
-
- public override long get(int docID)
- {
- return bytes[docID];
- }
- }
-
- private class NumericDocValuesAnonymousInnerClassHelper3 : NumericDocValues
- {
- private readonly MemoryDocValuesProducer outerInstance;
-
- private long min;
- private long mult;
- private BlockPackedReader quotientReader;
-
- public NumericDocValuesAnonymousInnerClassHelper3(MemoryDocValuesProducer outerInstance, long min, long mult, BlockPackedReader quotientReader)
- {
- this.outerInstance = outerInstance;
- this.min = min;
- this.mult = mult;
- this.quotientReader = quotientReader;
- }
-
- public override long get(int docID)
- {
- return min + mult * quotientReader.get(docID);
- }
- }
+ BlockPackedReader quotientReader = new BlockPackedReader(data, entry.packedIntsVersion,
+ quotientBlockSize, maxDoc, false);
+ ramBytesUsed_Renamed.addAndGet(quotientReader.ramBytesUsed());
+ return new NumericDocValuesAnonymousInnerClassHelper3(this, min, mult, quotientReader);
+ default:
+ throw new AssertionError();
+ }
+ }
+
+ private class NumericDocValuesAnonymousInnerClassHelper : NumericDocValues
+ {
+ private readonly MemoryDocValuesProducer outerInstance;
+
+ private long[] decode;
+ private IntIndexInput.Reader ordsReader;
+
+ public NumericDocValuesAnonymousInnerClassHelper(MemoryDocValuesProducer outerInstance, long[] decode,
+ IntIndexInput.Reader ordsReader)
+ {
+ this.outerInstance = outerInstance;
+ this.decode = decode;
+ this.ordsReader = ordsReader;
+ }
+
+ public override long get(int docID)
+ {
+ return decode[(int) ordsReader.get(docID)];
+ }
+ }
+
+ private class NumericDocValuesAnonymousInnerClassHelper2 : NumericDocValues
+ {
+ private readonly MemoryDocValuesProducer outerInstance;
+
+ private sbyte[] bytes;
+
+ public NumericDocValuesAnonymousInnerClassHelper2(MemoryDocValuesProducer outerInstance, sbyte[] bytes)
+ {
+ this.outerInstance = outerInstance;
+ this.bytes = bytes;
+ }
+
+ public override long get(int docID)
+ {
+ return bytes[docID];
+ }
+ }
+
+ private class NumericDocValuesAnonymousInnerClassHelper3 : NumericDocValues
+ {
+ private readonly MemoryDocValuesProducer outerInstance;
+
+ private long min;
+ private long mult;
+ private BlockPackedReader quotientReader;
+
+ public NumericDocValuesAnonymousInnerClassHelper3(MemoryDocValuesProducer outerInstance, long min, long mult,
+ BlockPackedReader quotientReader)
+ {
+ this.outerInstance = outerInstance;
+ this.min = min;
+ this.mult = mult;
+ this.quotientReader = quotientReader;
+ }
+
+ public override long get(int docID)
+ {
+ return min + mult*quotientReader.get(docID);
+ }
+ }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public synchronized index.BinaryDocValues getBinary(index.FieldInfo field) throws java.io.IOException
- public override BinaryDocValues getBinary(FieldInfo field)
- {
- lock (this)
- {
- BinaryDocValues instance = binaryInstances[field.number];
- if (instance == null)
- {
- instance = loadBinary(field);
- binaryInstances[field.number] = instance;
- }
- return instance;
- }
- }
+ public override BinaryDocValues getBinary(FieldInfo field)
+ {
+ lock (this)
+ {
+ BinaryDocValues instance = binaryInstances[field.number];
+ if (instance == null)
+ {
+ instance = loadBinary(field);
+ binaryInstances[field.number] = instance;
+ }
+ return instance;
+ }
+ }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: private index.BinaryDocValues loadBinary(index.FieldInfo field) throws java.io.IOException
- private BinaryDocValues loadBinary(FieldInfo field)
- {
- BinaryEntry entry = binaries[field.number];
- data.seek(entry.offset);
- PagedBytes bytes = new PagedBytes(16);
- bytes.copy(data, entry.numBytes);
+ private BinaryDocValues loadBinary(FieldInfo field)
+ {
+ BinaryEntry entry = binaries[field.number];
+ data.seek(entry.offset);
+ PagedBytes bytes = new PagedBytes(16);
+ bytes.copy(data, entry.numBytes);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final util.PagedBytes.Reader bytesReader = bytes.freeze(true);
- PagedBytes.Reader bytesReader = bytes.freeze(true);
- if (entry.minLength == entry.maxLength)
- {
+ PagedBytes.Reader bytesReader = bytes.freeze(true);
+ if (entry.minLength == entry.maxLength)
+ {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int fixedLength = entry.minLength;
- int fixedLength = entry.minLength;
- ramBytesUsed_Renamed.addAndGet(bytes.ramBytesUsed());
- return new BinaryDocValuesAnonymousInnerClassHelper(this, bytesReader, fixedLength);
- }
- else
- {
- data.seek(data.FilePointer + entry.missingBytes);
+ int fixedLength = entry.minLength;
+ ramBytesUsed_Renamed.addAndGet(bytes.ramBytesUsed());
+ return new BinaryDocValuesAnonymousInnerClassHelper(this, bytesReader, fixedLength);
+ }
+ else
+ {
+ data.seek(data.FilePointer + entry.missingBytes);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final util.packed.MonotonicBlockPackedReader addresses = new util.packed.MonotonicBlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, maxDoc, false);
- MonotonicBlockPackedReader addresses = new MonotonicBlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, maxDoc, false);
- ramBytesUsed_Renamed.addAndGet(bytes.ramBytesUsed() + addresses.ramBytesUsed());
- return new BinaryDocValuesAnonymousInnerClassHelper2(this, bytesReader, addresses);
- }
- }
-
- private class BinaryDocValuesAnonymousInnerClassHelper : BinaryDocValues
- {
- private readonly MemoryDocValuesProducer outerInstance;
-
- private IntIndexInput.Reader bytesReader;
- private int fixedLength;
-
- public BinaryDocValuesAnonymousInnerClassHelper(MemoryDocValuesProducer outerInstance, IntIndexInput.Reader bytesReader, int fixedLength)
- {
- this.outerInstance = outerInstance;
- this.bytesReader = bytesReader;
- this.fixedLength = fixedLength;
- }
-
- public override void get(int docID, BytesRef result)
- {
- bytesReader.fillSlice(result, fixedLength * (long)docID, fixedLength);
- }
- }
-
- private class BinaryDocValuesAnonymousInnerClassHelper2 : BinaryDocValues
- {
- private readonly MemoryDocValuesProducer outerInstance;
-
- private IntIndexInput.Reader bytesReader;
- private MonotonicBlockPackedReader addresses;
-
- public BinaryDocValuesAnonymousInnerClassHelper2(MemoryDocValuesProducer outerInstance, IntIndexInput.Reader bytesReader, MonotonicBlockPackedReader addresses)
- {
- this.outerInstance = outerInstance;
- this.bytesReader = bytesReader;
- this.addresses = addresses;
- }
-
- public override void get(int docID, BytesRef result)
- {
- long startAddress = docID == 0 ? 0 : addresses.get(docID - 1);
- long endAddress = addresses.get(docID);
- bytesReader.fillSlice(result, startAddress, (int)(endAddress - startAddress));
- }
- }
+ MonotonicBlockPackedReader addresses = new MonotonicBlockPackedReader(data, entry.packedIntsVersion,
+ entry.blockSize, maxDoc, false);
+ ramBytesUsed_Renamed.addAndGet(bytes.ramBytesUsed() + addresses.ramBytesUsed());
+ return new BinaryDocValuesAnonymousInnerClassHelper2(this, bytesReader, addresses);
+ }
+ }
+
+ private class BinaryDocValuesAnonymousInnerClassHelper : BinaryDocValues
+ {
+ private readonly MemoryDocValuesProducer outerInstance;
+
+ private IntIndexInput.Reader bytesReader;
+ private int fixedLength;
+
+ public BinaryDocValuesAnonymousInnerClassHelper(MemoryDocValuesProducer outerInstance,
+ IntIndexInput.Reader bytesReader, int fixedLength)
+ {
+ this.outerInstance = outerInstance;
+ this.bytesReader = bytesReader;
+ this.fixedLength = fixedLength;
+ }
+
+ public override void get(int docID, BytesRef result)
+ {
+ bytesReader.fillSlice(result, fixedLength*(long) docID, fixedLength);
+ }
+ }
+
+ private class BinaryDocValuesAnonymousInnerClassHelper2 : BinaryDocValues
+ {
+ private readonly MemoryDocValuesProducer outerInstance;
+
+ private IntIndexInput.Reader bytesReader;
+ private MonotonicBlockPackedReader addresses;
+
+ public BinaryDocValuesAnonymousInnerClassHelper2(MemoryDocValuesProducer outerInstance,
+ IntIndexInput.Reader bytesReader, MonotonicBlockPackedReader addresses)
+ {
+ this.outerInstance = outerInstance;
+ this.bytesReader = bytesReader;
+ this.addresses = addresses;
+ }
+
+ public override void get(int docID, BytesRef result)
+ {
+ long startAddress = docID == 0 ? 0 : addresses.get(docID - 1);
+ long endAddress = addresses.get(docID);
+ bytesReader.fillSlice(result, startAddress, (int) (endAddress - startAddress));
+ }
+ }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public index.SortedDocValues getSorted(index.FieldInfo field) throws java.io.IOException
- public override SortedDocValues getSorted(FieldInfo field)
- {
+ public override SortedDocValues getSorted(FieldInfo field)
+ {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final FSTEntry entry = fsts.get(field.number);
- FSTEntry entry = fsts[field.number];
- if (entry.numOrds == 0)
- {
- return DocValues.EMPTY_SORTED;
- }
- FST<long?> instance;
- lock (this)
- {
- instance = fstInstances[field.number];
- if (instance == null)
- {
- data.seek(entry.offset);
- instance = new FST<>(data, PositiveIntOutputs.Singleton);
- ramBytesUsed_Renamed.addAndGet(instance.sizeInBytes());
- fstInstances[field.number] = instance;
- }
- }
+ FSTEntry entry = fsts[field.number];
+ if (entry.numOrds == 0)
+ {
+ return DocValues.EMPTY_SORTED;
+ }
+ FST<long?> instance;
+ lock (this)
+ {
+ instance = fstInstances[field.number];
+ if (instance == null)
+ {
+ data.seek(entry.offset);
+ instance = new FST<>(data, PositiveIntOutputs.Singleton);
+ ramBytesUsed_Renamed.addAndGet(instance.sizeInBytes());
+ fstInstances[field.number] = instance;
+ }
+ }
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final index.NumericDocValues docToOrd = getNumeric(field);
- NumericDocValues docToOrd = getNumeric(field);
+ NumericDocValues docToOrd = getNumeric(field);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final util.fst.FST<Long> fst = instance;
- FST<long?> fst = instance;
+ FST<long?> fst = instance;
- // per-thread resources
+ // per-thread resources
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final util.fst.FST.BytesReader in = fst.getBytesReader();
- FST.BytesReader @in = fst.BytesReader;
+ FST.BytesReader @in = fst.BytesReader;
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final util.fst.FST.Arc<Long> firstArc = new util.fst.FST.Arc<>();
- FST.Arc<long?> firstArc = new FST.Arc<long?>();
+ FST.Arc<long?> firstArc = new FST.Arc<long?>();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final util.fst.FST.Arc<Long> scratchArc = new util.fst.FST.Arc<>();
- FST.Arc<long?> scratchArc = new FST.Arc<long?>();
+ FST.Arc<long?> scratchArc = new FST.Arc<long?>();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final util.IntsRef scratchInts = new util.IntsRef();
- IntsRef scratchInts = new IntsRef();
+ IntsRef scratchInts = new IntsRef();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final util.fst.BytesRefFSTEnum<Long> fstEnum = new util.fst.BytesRefFSTEnum<>(fst);
- BytesRefFSTEnum<long?> fstEnum = new BytesRefFSTEnum<long?>(fst);
+ BytesRefFSTEnum<long?> fstEnum = new BytesRefFSTEnum<long?>(fst);
- return new SortedDocValuesAnonymousInnerClassHelper(this, entry, docToOrd, fst, @in, firstArc, scratchArc, scratchInts, fstEnum);
- }
+ return new SortedDocValuesAnonymousInnerClassHelper(this, entry, docToOrd, fst, @in, firstArc, scratchArc,
+ scratchInts, fstEnum);
+ }
- private class SortedDocValuesAnonymousInnerClassHelper : SortedDocValues
- {
- private readonly MemoryDocValuesProducer outerInstance;
+ private class SortedDocValuesAnonymousInnerClassHelper : SortedDocValues
+ {
+ private readonly MemoryDocValuesProducer outerInstance;
- private MemoryDocValuesProducer.FSTEntry entry;
- private NumericDocValues docToOrd;
+ private MemoryDocValuesProducer.FSTEntry entry;
+ private NumericDocValues docToOrd;
//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
//ORIGINAL LINE: private util.fst.FST<long?> fst;
- private FST<long?> fst;
- private FST.BytesReader @in;
+ private FST<long?> fst;
+ private FST.BytesReader @in;
//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
//ORIGINAL LINE: private util.fst.FST.Arc<long?> firstArc;
- private FST.Arc<long?> firstArc;
+ private FST.Arc<long?> firstArc;
//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
//ORIGINAL LINE: private util.fst.FST.Arc<long?> scratchArc;
- private FST.Arc<long?> scratchArc;
- private IntsRef scratchInts;
+ private FST.Arc<long?> scratchArc;
+ private IntsRef scratchInts;
//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
//ORIGINAL LINE: private util.fst.BytesRefFSTEnum<long?> fstEnum;
- private BytesRefFSTEnum<long?> fstEnum;
-
- public SortedDocValuesAnonymousInnerClassHelper<T1, T2, T3, T4>(MemoryDocValuesProducer outerInstance, org.MemoryDocValuesProducer.FSTEntry entry, NumericDocValues docToOrd, FST<T1> fst, FST.BytesReader @in, FST.Arc<T2> firstArc, FST.Arc<T3> scratchArc, IntsRef scratchInts, BytesRefFSTEnum<T4> fstEnum)
- {
- this.outerInstance = outerInstance;
- this.entry = entry;
- this.docToOrd = docToOrd;
- this.fst = fst;
- this.@in = @in;
- this.firstArc = firstArc;
- this.scratchArc = scratchArc;
- this.scratchInts = scratchInts;
- this.fstEnum = fstEnum;
- }
-
- public override int getOrd(int docID)
- {
- return (int) docToOrd.get(docID);
- }
-
- public override void lookupOrd(int ord, BytesRef result)
- {
- try
- {
- @in.Position = 0;
- fst.getFirstArc(firstArc);
- IntsRef output = Util.getByOutput(fst, ord, @in, firstArc, scratchArc, scratchInts);
- result.bytes = new sbyte[output.length];
- result.offset = 0;
- result.length = 0;
- Util.toBytesRef(output, result);
- }
- catch (IOException bogus)
- {
- throw new Exception(bogus);
- }
- }
-
- public override int lookupTerm(BytesRef key)
- {
- try
- {
- BytesRefFSTEnum.InputOutput<long?> o = fstEnum.seekCeil(key);
- if (o == null)
- {
- return -ValueCount - 1;
- }
- else if (o.input.Equals(key))
- {
- return (int)o.output;
- }
- else
- {
- return (int) -o.output - 1;
- }
- }
- catch (IOException bogus)
- {
- throw new Exception(bogus);
- }
- }
-
- public override int ValueCount
- {
- get
- {
- return (int)entry.numOrds;
- }
- }
-
- public override TermsEnum termsEnum()
- {
- return new FSTTermsEnum(fst);
- }
- }
+ private BytesRefFSTEnum<long?> fstEnum;
+
+ public SortedDocValuesAnonymousInnerClassHelper<T1, T2, T3, T4>
+ (
+ private MemoryDocValuesProducer outerInstance, org
+ .
+ private MemoryDocValuesProducer.FSTEntry entry, NumericDocValues
+ private docToOrd
+ ,
+ private FST<T1> fst, FST
+ .
+ private BytesReader @in, FST
+ .
+ private Arc<T2> firstArc, FST
+ .
+ private Arc<T3> scratchArc, IntsRef
+ private scratchInts
+ ,
+ private BytesRefFSTEnum<T4> fstEnum
+ )
+ {
+ this.outerInstance = outerInstance;
+ this.entry = entry;
+ this.docToOrd = docToOrd;
+ this.fst = fst;
+ this.@in = @in;
+ this.firstArc = firstArc;
+ this.scratchArc = scratchArc;
+ this.scratchInts = scratchInts;
+ this.fstEnum = fstEnum;
+ }
+
+ public override int getOrd(int docID)
+ {
+ return (int) docToOrd.get(docID);
+ }
+
+ public override void lookupOrd(int ord, BytesRef result)
+ {
+ try
+ {
+ @in.Position = 0;
+ fst.getFirstArc(firstArc);
+ IntsRef output = Util.getByOutput(fst, ord, @in, firstArc, scratchArc, scratchInts);
+ result.bytes = new sbyte[output.length];
+ result.offset = 0;
+ result.length = 0;
+ Util.toBytesRef(output, result);
+ }
+ catch (IOException bogus)
+ {
+ throw new Exception(bogus);
+ }
+ }
+
+ public override int lookupTerm(BytesRef key)
+ {
+ try
+ {
+ BytesRefFSTEnum.InputOutput<long?> o = fstEnum.seekCeil(key);
+ if (o == null)
+ {
+ return -ValueCount - 1;
+ }
+ else if (o.input.Equals(key))
+ {
+ return (int) o.output;
+ }
+ else
+ {
+ return (int) -o.output - 1;
+ }
+ }
+ catch (IOException bogus)
+ {
+ throw new Exception(bogus);
+ }
+ }
+
+ public override int ValueCount
+ {
+ get { return (int) entry.numOrds; }
+ }
+
+ public override TermsEnum termsEnum()
+ {
+ return new FSTTermsEnum(fst);
+ }
+ }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public index.SortedSetDocValues getSortedSet(index.FieldInfo field) throws java.io.IOException
- public override SortedSetDocValues getSortedSet(FieldInfo field)
- {
+ public override SortedSetDocValues getSortedSet(FieldInfo field)
+ {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final FSTEntry entry = fsts.get(field.number);
- FSTEntry entry = fsts[field.number];
- if (entry.numOrds == 0)
- {
- return DocValues.EMPTY_SORTED_SET; // empty FST!
- }
- FST<long?> instance;
- lock (this)
- {
- instance = fstInstances[field.number];
- if (instance == null)
- {
- data.seek(entry.offset);
- instance = new FST<>(data, PositiveIntOutputs.Singleton);
- ramBytesUsed_Renamed.addAndGet(instance.sizeInBytes());
- fstInstances[field.number] = instance;
- }
- }
+ FSTEntry entry = fsts[field.number];
+ if (entry.numOrds == 0)
+ {
+ return DocValues.EMPTY_SORTED_SET; // empty FST!
+ }
+ FST<long?> instance;
+ lock (this)
+ {
+ instance = fstInstances[field.number];
+ if (instance == null)
+ {
+ data.seek(entry.offset);
+ instance = new FST<>(data, PositiveIntOutputs.Singleton);
+ ramBytesUsed_Renamed.addAndGet(instance.sizeInBytes());
+ fstInstances[field.number] = instance;
+ }
+ }
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final index.BinaryDocValues docToOrds = getBinary(field);
- BinaryDocValues docToOrds = getBinary(field);
+ BinaryDocValues docToOrds = getBinary(field);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final util.fst.FST<Long> fst = instance;
- FST<long?> fst = instance;
+ FST<long?> fst = instance;
- // per-thread resources
+ // per-thread resources
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final util.fst.FST.BytesReader in = fst.getBytesReader();
- FST.BytesReader @in = fst.BytesReader;
+ FST.BytesReader @in = fst.BytesReader;
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final util.fst.FST.Arc<Long> firstArc = new util.fst.FST.Arc<>();
- FST.Arc<long?> firstArc = new FST.Arc<long?>();
+ FST.Arc<long?> firstArc = new FST.Arc<long?>();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final util.fst.FST.Arc<Long> scratchArc = new util.fst.FST.Arc<>();
- FST.Arc<long?> scratchArc = new FST.Arc<long?>();
+ FST.Arc<long?> scratchArc = new FST.Arc<long?>();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final util.IntsRef scratchInts = new util.IntsRef();
- IntsRef scratchInts = new IntsRef();
+ IntsRef scratchInts = new IntsRef();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final util.fst.BytesRefFSTEnum<Long> fstEnum = new util.fst.BytesRefFSTEnum<>(fst);
- BytesRefFSTEnum<long?> fstEnum = new BytesRefFSTEnum<long?>(fst);
+ BytesRefFSTEnum<long?> fstEnum = new BytesRefFSTEnum<long?>(fst);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final util.BytesRef ref = new util.BytesRef();
- BytesRef @ref = new BytesRef();
+ BytesRef @ref = new BytesRef();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final store.ByteArrayDataInput input = new store.ByteArrayDataInput();
- ByteArrayDataInput input = new ByteArrayDataInput();
- return new SortedSetDocValuesAnonymousInnerClassHelper(this, entry, docToOrds, fst, @in, firstArc, scratchArc, scratchInts, fstEnum, @ref, input);
- }
+ ByteArrayDataInput input = new ByteArrayDataInput();
+ return new SortedSetDocValuesAnonymousInnerClassHelper(this, entry, docToOrds, fst, @in, firstArc,
+ scratchArc, scratchInts, fstEnum, @ref, input);
+ }
- private class SortedSetDocValuesAnonymousInnerClassHelper : SortedSetDocValues
- {
- private readonly MemoryDocValuesProducer outerInstance;
+ private class SortedSetDocValuesAnonymousInnerClassHelper : SortedSetDocValues
+ {
+ private readonly MemoryDocValuesProducer outerInstance;
- private MemoryDocValuesProducer.FSTEntry entry;
- private BinaryDocValues docToOrds;
+ private MemoryDocValuesProducer.FSTEntry entry;
+ private BinaryDocValues docToOrds;
//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
//ORIGINAL LINE: private util.fst.FST<long?> fst;
- private FST<long?> fst;
- private FST.BytesReader @in;
+ private FST<long?> fst;
+ private FST.BytesReader @in;
//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
//ORIGINAL LINE: private util.fst.FST.Arc<long?> firstArc;
- private FST.Arc<long?> firstArc;
+ private FST.Arc<long?> firstArc;
//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
//ORIGINAL LINE: private util.fst.FST.Arc<long?> scratchArc;
- private FST.Arc<long?> scratchArc;
- private IntsRef scratchInts;
+ private FST.Arc<long?> scratchArc;
+ private IntsRef scratchInts;
//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
//ORIGINAL LINE: private util.fst.BytesRefFSTEnum<long?> fstEnum;
- private BytesRefFSTEnum<long?> fstEnum;
- private BytesRef @ref;
- private ByteArrayDataInput input;
-
- public SortedSetDocValuesAnonymousInnerClassHelper<T1, T2, T3, T4>(MemoryDocValuesProducer outerInstance, org.MemoryDocValuesProducer.FSTEntry entry, BinaryDocValues docToOrds, FST<T1> fst, FST.BytesReader @in, FST.Arc<T2> firstArc, FST.Arc<T3> scratchArc, IntsRef scratchInts, BytesRefFSTEnum<T4> fstEnum, BytesRef @ref, ByteArrayDataInput input)
- {
- this.outerInstance = outerInstance;
- this.entry = entry;
- this.docToOrds = docToOrds;
- this.fst = fst;
- this.@in = @in;
- this.firstArc = firstArc;
- this.scratchArc = scratchArc;
- this.scratchInts = scratchInts;
- this.fstEnum = fstEnum;
- this.@ref = @ref;
- this.input = input;
- }
-
- internal long currentOrd;
-
- public override long nextOrd()
- {
- if (input.eof())
- {
- return NO_MORE_ORDS;
- }
- else
- {
- currentOrd += input.readVLong();
- return currentOrd;
- }
- }
-
- public override int Document
- {
- set
- {
- docToOrds.get(value, @ref);
- input.reset(@ref.bytes, @ref.offset, @ref.length);
- currentOrd = 0;
- }
- }
-
- public override void lookupOrd(long ord, BytesRef result)
- {
- try
- {
- @in.Position = 0;
- fst.getFirstArc(firstArc);
- IntsRef output = Util.getByOutput(fst, ord, @in, firstArc, scratchArc, scratchInts);
- result.bytes = new sbyte[output.length];
- result.offset = 0;
- result.length = 0;
- Util.toBytesRef(output, result);
- }
- catch (IOException bogus)
- {
- throw new Exception(bogus);
- }
- }
-
- public override long lookupTerm(BytesRef key)
- {
- try
- {
- BytesRefFSTEnum.InputOutput<long?> o = fstEnum.seekCeil(key);
- if (o == null)
- {
- return -ValueCount - 1;
- }
- else if (o.input.Equals(key))
- {
- return (int)o.output;
- }
- else
- {
- return -o.output - 1;
- }
- }
- catch (IOException bogus)
- {
- throw new Exception(bogus);
- }
- }
-
- public override long ValueCount
- {
- get
- {
- return entry.numOrds;
- }
- }
-
- public override TermsEnum termsEnum()
- {
- return new FSTTermsEnum(fst);
- }
- }
+ private BytesRefFSTEnum<long?> fstEnum;
+ private BytesRef @ref;
+ private ByteArrayDataInput input;
+
+ public SortedSetDocValuesAnonymousInnerClassHelper<T1, T2, T3, T4>
+ (
+ private MemoryDocValuesProducer outerInstance, org
+ .
+ private MemoryDocValuesProducer.FSTEntry entry, BinaryDocValues
+ private docToOrds
+ ,
+ private FST<T1> fst, FST
+ .
+ private BytesReader @in, FST
+ .
+ private Arc<T2> firstArc, FST
+ .
+ private Arc<T3> scratchArc, IntsRef
+ private scratchInts
+ ,
+ private BytesRefFSTEnum<T4> fstEnum, BytesRef
+ private @ref
+ ,
+ private ByteArrayDataInput input
+ )
+ {
+ this.outerInstance = outerInstance;
+ this.entry = entry;
+ this.docToOrds = docToOrds;
+ this.fst = fst;
+ this.@in = @in;
+ this.firstArc = firstArc;
+ this.scratchArc = scratchArc;
+ this.scratchInts = scratchInts;
+ this.fstEnum = fstEnum;
+ this.@ref = @ref;
+ this.input = input;
+ }
+
+ internal long currentOrd;
+
+ public override long nextOrd()
+ {
+ if (input.eof())
+ {
+ return NO_MORE_ORDS;
+ }
+ else
+ {
+ currentOrd += input.ReadVLong();
+ return currentOrd;
+ }
+ }
+
+ public override int Document
+ {
+ set
+ {
+ docToOrds.get(value, @ref);
+ input.reset(@ref.bytes, @ref.offset, @ref.length);
+ currentOrd = 0;
+ }
+ }
+
+ public override void lookupOrd(long ord, BytesRef result)
+ {
+ try
+ {
+ @in.Position = 0;
+ fst.getFirstArc(firstArc);
+ IntsRef output = Util.getByOutput(fst, ord, @in, firstArc, scratchArc, scratchInts);
+ result.bytes = new sbyte[output.length];
+ result.offset = 0;
+ result.length = 0;
+ Util.toBytesRef(output, result);
+ }
+ catch (IOException bogus)
+ {
+ throw new Exception(bogus);
+ }
+ }
+
+ public override long lookupTerm(BytesRef key)
+ {
+ try
+ {
+ BytesRefFSTEnum.InputOutput<long?> o = fstEnum.seekCeil(key);
+ if (o == null)
+ {
+ return -ValueCount - 1;
+ }
+ else if (o.input.Equals(key))
+ {
+ return (int) o.output;
+ }
+ else
+ {
+ return -o.output - 1;
+ }
+ }
+ catch (IOException bogus)
+ {
+ throw new Exception(bogus);
+ }
+ }
+
+ public override long ValueCount
+ {
+ get { return entry.numOrds; }
+ }
+
+ public override TermsEnum termsEnum()
+ {
+ return new FSTTermsEnum(fst);
+ }
+ }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: private util.Bits getMissingBits(int fieldNumber, final long offset, final long length) throws java.io.IOException
//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
- private Bits getMissingBits(int fieldNumber, long offset, long length)
- {
- if (offset == -1)
- {
- return new Bits.MatchAllBits(maxDoc);
- }
- else
- {
- Bits instance;
- lock (this)
- {
- instance = docsWithFieldInstances[fieldNumber];
- if (instance == null)
- {
- IndexInput data = this.data.clone();
- data.seek(offset);
- Debug.Assert(length % 8 == 0);
- long[] bits = new long[(int) length >> 3];
- for (int i = 0; i < bits.Length; i++)
- {
- bits[i] = data.readLong();
- }
- instance = new FixedBitSet(bits, maxDoc);
- docsWithFieldInstances[fieldNumber] = instance;
- }
- }
- return instance;
- }
- }
+ private Bits getMissingBits(int fieldNumber, long offset, long length)
+ {
+ if (offset == -1)
+ {
+ return new Bits.MatchAllBits(maxDoc);
+ }
+ else
+ {
+ Bits instance;
+ lock (this)
+ {
+ instance = docsWithFieldInstances[fieldNumber];
+ if (instance == null)
+ {
+ IndexInput data = this.data.clone();
+ data.seek(offset);
+ Debug.Assert(length%8 == 0);
+ long[] bits = new long[(int) length >> 3];
+ for (int i = 0; i < bits.Length; i++)
+ {
+ bits[i] = data.ReadLong();
+ }
+ instance = new FixedBitSet(bits, maxDoc);
+ docsWithFieldInstances[fieldNumber] = instance;
+ }
+ }
+ return instance;
+ }
+ }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public util.Bits getDocsWithField(index.FieldInfo field) throws java.io.IOException
- public override Bits getDocsWithField(FieldInfo field)
- {
- switch (field.DocValuesType)
- {
- case SORTED_SET:
- return DocValues.docsWithValue(getSortedSet(field), maxDoc);
- case SORTED:
- return DocValues.docsWithValue(getSorted(field), maxDoc);
- case BINARY:
- BinaryEntry be = binaries[field.number];
- return getMissingBits(field.number, be.missingOffset, be.missingBytes);
- case NUMERIC:
- NumericEntry ne = numerics[field.number];
- return getMissingBits(field.number, ne.missingOffset, ne.missingBytes);
- default:
- throw new AssertionError();
- }
- }
+ public override Bits getDocsWithField(FieldInfo field)
+ {
+ switch (field.DocValuesType)
+ {
+ case SORTED_SET:
+ return DocValues.docsWithValue(getSortedSet(field), maxDoc);
+ case SORTED:
+ return DocValues.docsWithValue(getSorted(field), maxDoc);
+ case BINARY:
+ BinaryEntry be = binaries[field.number];
+ return getMissingBits(field.number, be.missingOffset, be.missingBytes);
+ case NUMERIC:
+ NumericEntry ne = numerics[field.number];
+ return getMissingBits(field.number, ne.missingOffset, ne.missingBytes);
+ default:
+ throw new AssertionError();
+ }
+ }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public void close() throws java.io.IOException
- public override void close()
- {
- data.close();
- }
-
- internal class NumericEntry
- {
- internal long offset;
- internal long missingOffset;
- internal long missingBytes;
- internal sbyte format;
- internal int packedIntsVersion;
- }
-
- internal class BinaryEntry
- {
- internal long offset;
- internal long missingOffset;
- internal long missingBytes;
- internal long numBytes;
- internal int minLength;
- internal int maxLength;
- internal int packedIntsVersion;
- internal int blockSize;
- }
-
- internal class FSTEntry
- {
- internal long offset;
- internal long numOrds;
- }
-
- // exposes FSTEnum directly as a TermsEnum: avoids binary-search next()
- internal class FSTTermsEnum : TermsEnum
- {
- internal readonly BytesRefFSTEnum<long?> @in;
-
- // this is all for the complicated seek(ord)...
- // maybe we should add a FSTEnum that supports this operation?
- internal readonly FST<long?> fst;
- internal readonly FST.BytesReader bytesReader;
- internal readonly FST.Arc<long?> firstArc = new FST.Arc<long?>();
- internal readonly FST.Arc<long?> scratchArc = new FST.Arc<long?>();
- internal readonly IntsRef scratchInts = new IntsRef();
- internal readonly BytesRef scratchBytes = new BytesRef();
-
- internal FSTTermsEnum(FST<long?> fst)
- {
- this.fst = fst;
- @in = new BytesRefFSTEnum<>(fst);
- bytesReader = fst.BytesReader;
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public util.BytesRef next() throws java.io.IOException
- public override BytesRef next()
- {
- BytesRefFSTEnum.InputOutput<long?> io = @in.next();
- if (io == null)
- {
- return null;
- }
- else
- {
- return io.input;
- }
- }
-
- public override IComparer<BytesRef> Comparator
- {
- get
- {
- return BytesRef.UTF8SortedAsUnicodeComparator;
- }
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public SeekStatus seekCeil(util.BytesRef text) throws java.io.IOException
- public override SeekStatus seekCeil(BytesRef text)
- {
- if (@in.seekCeil(text) == null)
- {
- return SeekStatus.END;
- }
- else if (term().Equals(text))
- {
- // TODO: add SeekStatus to FSTEnum like in https://issues.apache.org/jira/browse/LUCENE-3729
- // to remove this comparision?
- return SeekStatus.FOUND;
- }
- else
- {
- return SeekStatus.NOT_FOUND;
- }
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public boolean seekExact(util.BytesRef text) throws java.io.IOException
- public override bool seekExact(BytesRef text)
- {
- if (@in.seekExact(text) == null)
- {
- return false;
- }
- else
- {
- return true;
- }
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void seekExact(long ord) throws java.io.IOException
- public override void seekExact(long ord)
- {
- // TODO: would be better to make this simpler and faster.
- // but we dont want to introduce a bug that corrupts our enum state!
- bytesReader.Position = 0;
- fst.getFirstArc(firstArc);
- IntsRef output = Util.getByOutput(fst, ord, bytesReader, firstArc, scratchArc, scratchInts);
- scratchBytes.bytes = new sbyte[output.length];
- scratchBytes.offset = 0;
- scratchBytes.length = 0;
- Util.toBytesRef(output, scratchBytes);
- // TODO: we could do this lazily, better to try to push into FSTEnum though?
- @in.seekExact(scratchBytes);
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public util.BytesRef term() throws java.io.IOException
- public override BytesRef term()
- {
- return @in.current().input;
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public long ord() throws java.io.IOException
- public override long ord()
- {
- return @in.current().output;
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public int docFreq() throws java.io.IOException
- public override int docFreq()
- {
- throw new System.NotSupportedException();
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public long totalTermFreq() throws java.io.IOException
- public override long totalTermFreq()
- {
- throw new System.NotSupportedException();
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public index.DocsEnum docs(util.Bits liveDocs, index.DocsEnum reuse, int flags) throws java.io.IOException
- public override DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags)
- {
- throw new System.NotSupportedException();
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public index.DocsAndPositionsEnum docsAndPositions(util.Bits liveDocs, index.DocsAndPositionsEnum reuse, int flags) throws java.io.IOException
- public override DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags)
- {
- throw new System.NotSupportedException();
- }
- }
- }
+ public override void close()
+ {
+ data.close();
+ }
+
+ internal class NumericEntry
+ {
+ internal long offset;
+ internal long missingOffset;
+ internal long missingBytes;
+ internal sbyte format;
+ internal int packedIntsVersion;
+ }
+
+ internal class BinaryEntry
+ {
+ internal long offset;
+ internal long missingOffset;
+ internal long missingBytes;
+ internal long numBytes;
+ internal int minLength;
+ internal int maxLength;
+ internal int packedIntsVersion;
+ internal int blockSize;
+ }
+
+ internal class FSTEntry
+ {
+ internal long offset;
+ internal long numOrds;
+ }
+
+ // exposes FSTEnum directly as a TermsEnum: avoids binary-search next()
+ internal class FSTTermsEnum : TermsEnum
+ {
+ internal readonly BytesRefFSTEnum<long?> @in;
+
+ // this is all for the complicated seek(ord)...
+ // maybe we should add a FSTEnum that supports this operation?
+ internal readonly FST<long?> fst;
+ internal readonly FST.BytesReader bytesReader;
+ internal readonly FST.Arc<long?> firstArc = new FST.Arc<long?>();
+ internal readonly FST.Arc<long?> scratchArc = new FST.Arc<long?>();
+ internal readonly IntsRef scratchInts = new IntsRef();
+ internal readonly BytesRef scratchBytes = new BytesRef();
+
+ internal FSTTermsEnum(FST<long?> fst)
+ {
+ this.fst = fst;
+ @in = new BytesRefFSTEnum<>(fst);
+ bytesReader = fst.BytesReader;
+ }
+
+ public override BytesRef Next()
+ {
+ BytesRefFSTEnum.InputOutput<long?> io = @in.Next();
+ return io == null ? null : io.Input;
+ }
+
+ public override IComparer<BytesRef> Comparator
+ {
+ get { return BytesRef.UTF8SortedAsUnicodeComparer; }
+ }
+
+ public override SeekStatus SeekCeil(BytesRef text)
+ {
+ if (@in.SeekCeil(text) == null)
+ {
+ return SeekStatus.END;
+ }
+ else if (Term().Equals(text))
+ {
+ // TODO: add SeekStatus to FSTEnum like in https://issues.apache.org/jira/browse/LUCENE-3729
+ // to remove this comparision?
+ return SeekStatus.FOUND;
+ }
+ else
+ {
+ return SeekStatus.NOT_FOUND;
+ }
+ }
+
+ public override bool SeekExact(BytesRef text)
+ {
+ return @in.SeekExact(text) != null;
+ }
+
+ public override void SeekExact(long ord)
+ {
+ // TODO: would be better to make this simpler and faster.
+ // but we dont want to introduce a bug that corrupts our enum state!
+ bytesReader.Position = 0;
+ fst.GetFirstArc(firstArc);
+ IntsRef output = Util.GetByOutput(fst, ord, bytesReader, firstArc, scratchArc, scratchInts);
+ scratchBytes.Bytes = new sbyte[output.Length];
+ scratchBytes.Offset = 0;
+ scratchBytes.Length = 0;
+ Util.ToBytesRef(output, scratchBytes);
+ // TODO: we could do this lazily, better to try to push into FSTEnum though?
+ @in.SeekExact(scratchBytes);
+ }
+
+ public override BytesRef Term()
+ {
+ return @in.Current().Input;
+ }
+
+ public override long Ord()
+ {
+ return @in.Current().Output;
+ }
+
+ public override int DocFreq()
+ {
+ throw new NotSupportedException();
+ }
+
+ public override long TotalTermFreq()
+ {
+ throw new NotSupportedException();
+ }
+
+ public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags)
+ {
+ throw new NotSupportedException();
+ }
+
+ public override DocsAndPositionsEnum DocsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags)
+ {
+ throw new NotSupportedException();
+ }
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0cc0e7ec/src/Lucene.Net.Codecs/Memory/MemoryPostingsFormat.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Memory/MemoryPostingsFormat.cs b/src/Lucene.Net.Codecs/Memory/MemoryPostingsFormat.cs
index b655f4e..0e1d4ac 100644
--- a/src/Lucene.Net.Codecs/Memory/MemoryPostingsFormat.cs
+++ b/src/Lucene.Net.Codecs/Memory/MemoryPostingsFormat.cs
@@ -50,7 +50,7 @@ namespace Lucene.Net.Codecs.Memory
using BytesRefFSTEnum = Util.Fst.BytesRefFSTEnum;
using FST = Util.Fst.FST;
using Util = Util.Fst.Util;
- using PackedInts = Util.packed.PackedInts;
+ using PackedInts = Util.Packed.PackedInts;
// TODO: would be nice to somehow allow this to act like
// InstantiatedIndex, by never writing to disk; ie you write
@@ -129,7 +129,7 @@ namespace Lucene.Net.Codecs.Memory
this.field = field;
this.doPackFST = doPackFST;
this.acceptableOverheadRatio = acceptableOverheadRatio;
- builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPackFST, acceptableOverheadRatio, true, 15);
+ builder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPackFST, acceptableOverheadRatio, true, 15);
}
private class PostingsWriter : PostingsConsumer
@@ -152,31 +152,26 @@ namespace Lucene.Net.Codecs.Memory
internal int lastOffsetLength;
internal int lastOffset;
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void startDoc(int docID, int termDocFreq) throws java.io.IOException
- public override void startDoc(int docID, int termDocFreq)
+ public override void StartDoc(int docID, int termDocFreq)
{
- //System.out.println(" startDoc docID=" + docID + " freq=" + termDocFreq);
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int delta = docID - lastDocID;
int delta = docID - lastDocID;
Debug.Assert(docID == 0 || delta > 0);
lastDocID = docID;
docCount++;
- if (outerInstance.field.IndexOptions == IndexOptions.DOCS_ONLY)
+ if (outerInstance.field.FieldIndexOptions == IndexOptions.DOCS_ONLY)
{
- buffer.writeVInt(delta);
+ buffer.WriteVInt(delta);
}
else if (termDocFreq == 1)
{
- buffer.writeVInt((delta << 1) | 1);
+ buffer.WriteVInt((delta << 1) | 1);
}
else
{
- buffer.writeVInt(delta << 1);
+ buffer.WriteVInt(delta << 1);
Debug.Assert(termDocFreq > 0);
- buffer.writeVInt(termDocFreq);
+ buffer.WriteVInt(termDocFreq);
}
lastPos = 0;
@@ -205,17 +200,17 @@ namespace Lucene.Net.Codecs.Memory
if (payloadLen != lastPayloadLen)
{
lastPayloadLen = payloadLen;
- buffer.writeVInt((delta << 1) | 1);
- buffer.writeVInt(payloadLen);
+ buffer.WriteVInt((delta << 1) | 1);
+ buffer.WriteVInt(payloadLen);
}
else
{
- buffer.writeVInt(delta << 1);
+ buffer.WriteVInt(delta << 1);
}
}
else
{
- buffer.writeVInt(delta);
+ buffer.WriteVInt(delta);
}
if (outerInstance.field.IndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0)
@@ -226,12 +221,12 @@ namespace Lucene.Net.Codecs.Memory
int offsetLength = endOffset - startOffset;
if (offsetLength != lastOffsetLength)
{
- buffer.writeVInt(offsetDelta << 1 | 1);
- buffer.writeVInt(offsetLength);
+ buffer.WriteVInt(offsetDelta << 1 | 1);
+ buffer.WriteVInt(offsetLength);
}
else
{
- buffer.writeVInt(offsetDelta << 1);
+ buffer.WriteVInt(offsetDelta << 1);
}
lastOffset = startOffset;
lastOffsetLength = offsetLength;
@@ -239,7 +234,7 @@ namespace Lucene.Net.Codecs.Memory
if (payloadLen > 0)
{
- buffer.writeBytes(payload.bytes, payload.offset, payloadLen);
+ buffer.WriteBytes(payload.bytes, payload.offset, payloadLen);
}
}
@@ -282,13 +277,13 @@ namespace Lucene.Net.Codecs.Memory
Debug.Assert(buffer2.FilePointer == 0);
- buffer2.writeVInt(stats.docFreq);
+ buffer2.WriteVInt(stats.docFreq);
if (field.IndexOptions != IndexOptions.DOCS_ONLY)
{
- buffer2.writeVLong(stats.totalTermFreq - stats.docFreq);
+ buffer2.WriteVLong(stats.totalTermFreq - stats.docFreq);
}
int pos = (int) buffer2.FilePointer;
- buffer2.writeTo(finalBuffer, 0);
+ buffer2.WriteTo(finalBuffer, 0);
buffer2.reset();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
@@ -298,7 +293,7 @@ namespace Lucene.Net.Codecs.Memory
{
finalBuffer = ArrayUtil.grow(finalBuffer, totalBytes);
}
- postingsWriter.buffer.writeTo(finalBuffer, pos);
+ postingsWriter.buffer.WriteTo(finalBuffer, pos);
postingsWriter.buffer.reset();
spare.bytes = finalBuffer;
@@ -319,14 +314,14 @@ namespace Lucene.Net.Codecs.Memory
{
if (termCount > 0)
{
- @out.writeVInt(termCount);
- @out.writeVInt(field.number);
+ @out.WriteVInt(termCount);
+ @out.WriteVInt(field.number);
if (field.IndexOptions != IndexOptions.DOCS_ONLY)
{
- @out.writeVLong(sumTotalTermFreq);
+ @out.WriteVLong(sumTotalTermFreq);
}
- @out.writeVLong(sumDocFreq);
- @out.writeVInt(docCount);
+ @out.WriteVLong(sumDocFreq);
+ @out.WriteVInt(docCount);
FST<BytesRef> fst = builder.finish();
fst.save(@out);
//System.out.println("finish field=" + field.name + " fp=" + out.getFilePointer());
@@ -361,14 +356,14 @@ namespace Lucene.Net.Codecs.Memory
bool success = false;
try
{
- CodecUtil.writeHeader(@out, CODEC_NAME, VERSION_CURRENT);
+ CodecUtil.WriteHeader(@out, CODEC_NAME, VERSION_CURRENT);
success = true;
}
finally
{
if (!success)
{
- IOUtils.closeWhileHandlingException(@out);
+ IOUtils.CloseWhileHandlingException(@out);
}
}
@@ -400,8 +395,8 @@ namespace Lucene.Net.Codecs.Memory
// EOF marker:
try
{
- @out.writeVInt(0);
- CodecUtil.writeFooter(@out);
+ @out.WriteVInt(0);
+ CodecUtil.WriteFooter(@out);
}
finally
{
@@ -1147,54 +1142,52 @@ namespace Lucene.Net.Codecs.Memory
private class FieldsProducerAnonymousInnerClassHelper : FieldsProducer
{
- private readonly MemoryPostingsFormat outerInstance;
-
- private SortedMap<string, TermsReader> fields;
+ private readonly Dictionary<string, TermsReader> _fields;
- public FieldsProducerAnonymousInnerClassHelper(MemoryPostingsFormat outerInstance, SortedMap<string, TermsReader> fields)
+ public FieldsProducerAnonymousInnerClassHelper(MemoryPostingsFormat outerInstance, Dictionary<string, TermsReader> fields)
{
- this.outerInstance = outerInstance;
- this.fields = fields;
+ _fields = fields;
}
- public override IEnumerator<string> iterator()
+ public override IEnumerator<string> GetEnumerator()
{
- return Collections.unmodifiableSet(fields.Keys).GetEnumerator();
+ return Collections.unmodifiableSet(_fields.Keys).GetEnumerator();
}
- public override Terms terms(string field)
+ public override Terms Terms(string field)
{
- return fields.get(field);
+ return _fields.Get(field);
}
- public override int size()
+ public override int Size
{
- return fields.size();
+ get
+ {
+ return _fields.Size ;
+ }
}
- public override void close()
+ public override void Dispose()
{
// Drop ref to FST:
- foreach (TermsReader termsReader in fields.values())
+ foreach (TermsReader termsReader in _fields)
{
termsReader.fst = null;
}
}
- public override long ramBytesUsed()
+ public override long RamBytesUsed()
{
long sizeInBytes = 0;
- foreach (KeyValuePair<string, TermsReader> entry in fields.entrySet())
+ foreach (KeyValuePair<string, TermsReader> entry in _fields.EntrySet())
{
- sizeInBytes += (entry.Key.length() * RamUsageEstimator.NUM_BYTES_CHAR);
+ sizeInBytes += (entry.Key.Length * RamUsageEstimator.NUM_BYTES_CHAR);
sizeInBytes += entry.Value.ramBytesUsed();
}
return sizeInBytes;
}
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void checkIntegrity() throws java.io.IOException
- public override void checkIntegrity()
+ public override void CheckIntegrity()
{
}
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0cc0e7ec/src/Lucene.Net.Core/Util/Fst/PositiveIntOutputs.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/Fst/PositiveIntOutputs.cs b/src/Lucene.Net.Core/Util/Fst/PositiveIntOutputs.cs
index e7ab34e..497d38c 100644
--- a/src/Lucene.Net.Core/Util/Fst/PositiveIntOutputs.cs
+++ b/src/Lucene.Net.Core/Util/Fst/PositiveIntOutputs.cs
@@ -29,7 +29,6 @@ namespace Lucene.Net.Util.Fst
///
/// @lucene.experimental
/// </summary>
-
public sealed class PositiveIntOutputs : Outputs<long>
{
private static readonly long NO_OUTPUT = new long();
[5/5] lucenenet git commit: close apache/lucene.net#5 - long ago fixed
Posted by pn...@apache.org.
close apache/lucene.net#5 - long ago fixed
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/bcff24dd
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/bcff24dd
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/bcff24dd
Branch: refs/heads/master
Commit: bcff24dd18b716d8db17b4702066bad4a3f04301
Parents: 0cc0e7e
Author: Prescott Nasser <pn...@apache.org>
Authored: Sat Nov 8 14:54:44 2014 -0800
Committer: Prescott Nasser <pn...@apache.org>
Committed: Sat Nov 8 14:54:44 2014 -0800
----------------------------------------------------------------------
----------------------------------------------------------------------
[3/5] lucenenet git commit: Updating Memory Codec
Posted by pn...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0cc0e7ec/src/Lucene.Net.Codecs/Memory/DirectPostingsFormat.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Memory/DirectPostingsFormat.cs b/src/Lucene.Net.Codecs/Memory/DirectPostingsFormat.cs
index 36ba9d5..58edcc5 100644
--- a/src/Lucene.Net.Codecs/Memory/DirectPostingsFormat.cs
+++ b/src/Lucene.Net.Codecs/Memory/DirectPostingsFormat.cs
@@ -1,1542 +1,1533 @@
-using System;
-using System.Diagnostics;
-using System.Collections.Generic;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
namespace Lucene.Net.Codecs.Memory
{
- /// <summary>
- /// Licensed to the Apache Software Foundation (ASF) under one or more
- /// contributor license agreements. See the NOTICE file distributed with
- /// this work for additional information regarding copyright ownership.
- /// The ASF licenses this file to You under the Apache License, Version 2.0
- /// (the "License"); you may not use this file except in compliance with
- /// the License. You may obtain a copy of the License at
- ///
- /// http://www.apache.org/licenses/LICENSE-2.0
- ///
- /// Unless required by applicable law or agreed to in writing, software
- /// distributed under the License is distributed on an "AS IS" BASIS,
- /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- /// See the License for the specific language governing permissions and
- /// limitations under the License.
- /// </summary>
-
-
- using Lucene41PostingsFormat = codecs.lucene41.Lucene41PostingsFormat; // javadocs
- using DocsAndPositionsEnum = index.DocsAndPositionsEnum;
- using DocsEnum = index.DocsEnum;
- using IndexOptions = index.FieldInfo.IndexOptions;
- using FieldInfo = index.FieldInfo;
- using Fields = index.Fields;
- using OrdTermState = index.OrdTermState;
- using SegmentReadState = index.SegmentReadState;
- using SegmentWriteState = index.SegmentWriteState;
- using TermState = index.TermState;
- using Terms = index.Terms;
- using TermsEnum = index.TermsEnum;
- using IOContext = store.IOContext;
- using RAMOutputStream = store.RAMOutputStream;
- using ArrayUtil = util.ArrayUtil;
- using Bits = util.Bits;
- using BytesRef = util.BytesRef;
- using RamUsageEstimator = util.RamUsageEstimator;
- using CompiledAutomaton = util.automaton.CompiledAutomaton;
- using RunAutomaton = util.automaton.RunAutomaton;
- using Transition = util.automaton.Transition;
-
- // TODO:
- // - build depth-N prefix hash?
- // - or: longer dense skip lists than just next byte?
-
- /// <summary>
- /// Wraps <seealso cref="Lucene41PostingsFormat"/> format for on-disk
- /// storage, but then at read time loads and stores all
- /// terms & postings directly in RAM as byte[], int[].
- ///
- /// <para><b><font color=red>WARNING</font></b>: This is
- /// exceptionally RAM intensive: it makes no effort to
- /// compress the postings data, storing terms as separate
- /// byte[] and postings as separate int[], but as a result it
- /// gives substantial increase in search performance.
- ///
- /// </para>
- /// <para>This postings format supports <seealso cref="TermsEnum#ord"/>
- /// and <seealso cref="TermsEnum#seekExact(long)"/>.
- ///
- /// </para>
- /// <para>Because this holds all term bytes as a single
- /// byte[], you cannot have more than 2.1GB worth of term
- /// bytes in a single segment.
- ///
- /// @lucene.experimental
- /// </para>
- /// </summary>
-
- public sealed class DirectPostingsFormat : PostingsFormat
- {
-
- private readonly int minSkipCount;
- private readonly int lowFreqCutoff;
-
- private const int DEFAULT_MIN_SKIP_COUNT = 8;
- private const int DEFAULT_LOW_FREQ_CUTOFF = 32;
-
- //private static final boolean DEBUG = true;
-
- // TODO: allow passing/wrapping arbitrary postings format?
-
- public DirectPostingsFormat() : this(DEFAULT_MIN_SKIP_COUNT, DEFAULT_LOW_FREQ_CUTOFF)
- {
- }
-
- /// <summary>
- /// minSkipCount is how many terms in a row must have the
- /// same prefix before we put a skip pointer down. Terms
- /// with docFreq <= lowFreqCutoff will use a single int[]
- /// to hold all docs, freqs, position and offsets; terms
- /// with higher docFreq will use separate arrays.
- /// </summary>
- public DirectPostingsFormat(int minSkipCount, int lowFreqCutoff) : base("Direct")
- {
- this.minSkipCount = minSkipCount;
- this.lowFreqCutoff = lowFreqCutoff;
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public codecs.FieldsConsumer fieldsConsumer(index.SegmentWriteState state) throws java.io.IOException
- public override FieldsConsumer fieldsConsumer(SegmentWriteState state)
- {
- return PostingsFormat.forName("Lucene41").fieldsConsumer(state);
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public codecs.FieldsProducer fieldsProducer(index.SegmentReadState state) throws java.io.IOException
- public override FieldsProducer fieldsProducer(SegmentReadState state)
- {
- FieldsProducer postings = PostingsFormat.forName("Lucene41").fieldsProducer(state);
- if (state.context.context != IOContext.Context.MERGE)
- {
- FieldsProducer loadedPostings;
- try
- {
- postings.checkIntegrity();
- loadedPostings = new DirectFields(state, postings, minSkipCount, lowFreqCutoff);
- }
- finally
- {
- postings.close();
- }
- return loadedPostings;
- }
- else
- {
- // Don't load postings for merge:
- return postings;
- }
- }
-
- private sealed class DirectFields : FieldsProducer
- {
- internal readonly IDictionary<string, DirectField> fields = new SortedDictionary<string, DirectField>();
+ using System;
+ using System.Diagnostics;
+ using System.Collections.Generic;
+
+ using Lucene41PostingsFormat = Lucene41.Lucene41PostingsFormat;
+ using DocsAndPositionsEnum = Index.DocsAndPositionsEnum;
+ using DocsEnum = Index.DocsEnum;
+ using IndexOptions = Index.FieldInfo.IndexOptions;
+ using FieldInfo = Index.FieldInfo;
+ using Fields = Index.Fields;
+ using OrdTermState = Index.OrdTermState;
+ using SegmentReadState = Index.SegmentReadState;
+ using SegmentWriteState = Index.SegmentWriteState;
+ using TermState = Index.TermState;
+ using Terms = Index.Terms;
+ using TermsEnum = Index.TermsEnum;
+ using IOContext = Store.IOContext;
+ using RAMOutputStream = Store.RAMOutputStream;
+ using ArrayUtil = Util.ArrayUtil;
+ using Bits = Util.Bits;
+ using BytesRef = Util.BytesRef;
+ using RamUsageEstimator = Util.RamUsageEstimator;
+ using CompiledAutomaton = Util.Automaton.CompiledAutomaton;
+ using RunAutomaton = Util.Automaton.RunAutomaton;
+ using Transition = Util.Automaton.Transition;
+
+ // TODO:
+ // - build depth-N prefix hash?
+ // - or: longer dense skip lists than just next byte?
+
+ /// <summary>
+ /// Wraps <seealso cref="Lucene41PostingsFormat"/> format for on-disk
+ /// storage, but then at read time loads and stores all
+ /// terms & postings directly in RAM as byte[], int[].
+ ///
+ /// <para><b>WARNING</b>: This is
+ /// exceptionally RAM intensive: it makes no effort to
+ /// compress the postings data, storing terms as separate
+ /// byte[] and postings as separate int[], but as a result it
+ /// gives substantial increase in search performance.
+ ///
+ /// </para>
+ /// <para>This postings format supports <seealso cref="TermsEnum#ord"/>
+ /// and <seealso cref="TermsEnum#seekExact(long)"/>.
+ ///
+ /// </para>
+ /// <para>Because this holds all term bytes as a single
+ /// byte[], you cannot have more than 2.1GB worth of term
+ /// bytes in a single segment.
+ ///
+ /// @lucene.experimental
+ /// </para>
+ /// </summary>
+
+ public sealed class DirectPostingsFormat : PostingsFormat
+ {
+
+ private readonly int _minSkipCount;
+ private readonly int _lowFreqCutoff;
+
+ private const int DEFAULT_MIN_SKIP_COUNT = 8;
+ private const int DEFAULT_LOW_FREQ_CUTOFF = 32;
+
+ // TODO: allow passing/wrapping arbitrary postings format?
+
+ public DirectPostingsFormat() : this(DEFAULT_MIN_SKIP_COUNT, DEFAULT_LOW_FREQ_CUTOFF)
+ {
+ }
+
+ /// <summary>
+ /// minSkipCount is how many terms in a row must have the
+ /// same prefix before we put a skip pointer down. Terms
+ /// with docFreq less than or equal lowFreqCutoff will use a single int[]
+ /// to hold all docs, freqs, position and offsets; terms
+ /// with higher docFreq will use separate arrays.
+ /// </summary>
+ public DirectPostingsFormat(int minSkipCount, int lowFreqCutoff) : base("Direct")
+ {
+ _minSkipCount = minSkipCount;
+ _lowFreqCutoff = lowFreqCutoff;
+ }
+
+ public override FieldsConsumer FieldsConsumer(SegmentWriteState state)
+ {
+ return ForName("Lucene41").FieldsConsumer(state);
+ }
+
+ public override FieldsProducer FieldsProducer(SegmentReadState state)
+ {
+ var postings = ForName("Lucene41").FieldsProducer(state);
+ if (state.Context.Context != IOContext.Context.MERGE)
+ {
+ FieldsProducer loadedPostings;
+ try
+ {
+ postings.CheckIntegrity();
+ loadedPostings = new DirectFields(state, postings, _minSkipCount, _lowFreqCutoff);
+ }
+ finally
+ {
+ postings.Dispose();
+ }
+ return loadedPostings;
+ }
+ else
+ {
+ // Don't load postings for merge:
+ return postings;
+ }
+ }
+
+ private sealed class DirectFields : FieldsProducer
+ {
+ internal readonly IDictionary<string, DirectField> fields = new SortedDictionary<string, DirectField>();
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public DirectFields(index.SegmentReadState state, index.Fields fields, int minSkipCount, int lowFreqCutoff) throws java.io.IOException
- public DirectFields(SegmentReadState state, Fields fields, int minSkipCount, int lowFreqCutoff)
- {
- foreach (string field in fields)
- {
- this.fields[field] = new DirectField(state, field, fields.terms(field), minSkipCount, lowFreqCutoff);
- }
- }
-
- public override IEnumerator<string> iterator()
- {
- return Collections.unmodifiableSet(fields.Keys).GetEnumerator();
- }
-
- public override Terms terms(string field)
- {
- return fields[field];
- }
-
- public override int size()
- {
- return fields.Count;
- }
-
- public override long UniqueTermCount
- {
- get
- {
- long numTerms = 0;
- foreach (DirectField field in fields.Values)
- {
- numTerms += field.terms.Length;
- }
- return numTerms;
- }
- }
-
- public override void close()
- {
- }
-
- public override long ramBytesUsed()
- {
- long sizeInBytes = 0;
- foreach (KeyValuePair<string, DirectField> entry in fields.SetOfKeyValuePairs())
- {
- sizeInBytes += entry.Key.length() * RamUsageEstimator.NUM_BYTES_CHAR;
- sizeInBytes += entry.Value.ramBytesUsed();
- }
- return sizeInBytes;
- }
+ public DirectFields(SegmentReadState state, Fields fields, int minSkipCount, int lowFreqCutoff)
+ {
+ foreach (string field in fields)
+ {
+ this.fields[field] = new DirectField(state, field, fields.terms(field), minSkipCount, lowFreqCutoff);
+ }
+ }
+
+ public override IEnumerator<string> iterator()
+ {
+ return Collections.unmodifiableSet(fields.Keys).GetEnumerator();
+ }
+
+ public override Terms terms(string field)
+ {
+ return fields[field];
+ }
+
+ public override int size()
+ {
+ return fields.Count;
+ }
+
+ public override long UniqueTermCount
+ {
+ get
+ {
+ long numTerms = 0;
+ foreach (DirectField field in fields.Values)
+ {
+ numTerms += field.terms.Length;
+ }
+ return numTerms;
+ }
+ }
+
+ public override void close()
+ {
+ }
+
+ public override long ramBytesUsed()
+ {
+ long sizeInBytes = 0;
+ foreach (KeyValuePair<string, DirectField> entry in fields.SetOfKeyValuePairs())
+ {
+ sizeInBytes += entry.Key.length()*RamUsageEstimator.NUM_BYTES_CHAR;
+ sizeInBytes += entry.Value.ramBytesUsed();
+ }
+ return sizeInBytes;
+ }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public void checkIntegrity() throws java.io.IOException
- public override void checkIntegrity()
- {
- // if we read entirely into ram, we already validated.
- // otherwise returned the raw postings reader
- }
- }
-
- private sealed class DirectField : Terms
- {
-
- private abstract class TermAndSkip
- {
- public int[] skips;
-
- /// <summary>
- /// Returns the approximate number of RAM bytes used </summary>
- public abstract long ramBytesUsed();
- }
-
- private sealed class LowFreqTerm : TermAndSkip
- {
- public readonly int[] postings;
- public readonly sbyte[] payloads;
- public readonly int docFreq;
- public readonly int totalTermFreq;
-
- public LowFreqTerm(int[] postings, sbyte[] payloads, int docFreq, int totalTermFreq)
- {
- this.postings = postings;
- this.payloads = payloads;
- this.docFreq = docFreq;
- this.totalTermFreq = totalTermFreq;
- }
-
- public override long ramBytesUsed()
- {
- return ((postings != null) ? RamUsageEstimator.sizeOf(postings) : 0) + ((payloads != null) ? RamUsageEstimator.sizeOf(payloads) : 0);
- }
- }
-
- // TODO: maybe specialize into prx/no-prx/no-frq cases?
- private sealed class HighFreqTerm : TermAndSkip
- {
- public readonly long totalTermFreq;
- public readonly int[] docIDs;
- public readonly int[] freqs;
- public readonly int[][] positions;
- public readonly sbyte[][][] payloads;
-
- public HighFreqTerm(int[] docIDs, int[] freqs, int[][] positions, sbyte[][][] payloads, long totalTermFreq)
- {
- this.docIDs = docIDs;
- this.freqs = freqs;
- this.positions = positions;
- this.payloads = payloads;
- this.totalTermFreq = totalTermFreq;
- }
-
- public override long ramBytesUsed()
- {
- long sizeInBytes = 0;
- sizeInBytes += (docIDs != null)? RamUsageEstimator.sizeOf(docIDs) : 0;
- sizeInBytes += (freqs != null)? RamUsageEstimator.sizeOf(freqs) : 0;
-
- if (positions != null)
- {
- foreach (int[] position in positions)
- {
- sizeInBytes += (position != null) ? RamUsageEstimator.sizeOf(position) : 0;
- }
- }
-
- if (payloads != null)
- {
- foreach (sbyte[][] payload in payloads)
- {
- if (payload != null)
- {
- foreach (sbyte[] pload in payload)
- {
- sizeInBytes += (pload != null) ? RamUsageEstimator.sizeOf(pload) : 0;
- }
- }
- }
- }
-
- return sizeInBytes;
- }
- }
-
- internal readonly sbyte[] termBytes;
- internal readonly int[] termOffsets;
-
- internal readonly int[] skips;
- internal readonly int[] skipOffsets;
-
- internal readonly TermAndSkip[] terms;
- internal readonly bool hasFreq;
- internal readonly bool hasPos;
- internal readonly bool hasOffsets_Renamed;
- internal readonly bool hasPayloads_Renamed;
- internal readonly long sumTotalTermFreq;
- internal readonly int docCount;
- internal readonly long sumDocFreq;
- internal int skipCount;
-
- // TODO: maybe make a separate builder? These are only
- // used during load:
- internal int count;
- internal int[] sameCounts = new int[10];
- internal readonly int minSkipCount;
-
- private sealed class IntArrayWriter
- {
- internal int[] ints = new int[10];
- internal int upto;
-
- public void add(int value)
- {
- if (ints.Length == upto)
- {
- ints = ArrayUtil.grow(ints);
- }
- ints[upto++] = value;
- }
-
- public int[] get()
- {
+ public override void checkIntegrity()
+ {
+ // if we read entirely into ram, we already validated.
+ // otherwise returned the raw postings reader
+ }
+ }
+
+ private sealed class DirectField : Terms
+ {
+
+ private abstract class TermAndSkip
+ {
+ public int[] skips;
+
+ /// <summary>
+ /// Returns the approximate number of RAM bytes used </summary>
+ public abstract long ramBytesUsed();
+ }
+
+ private sealed class LowFreqTerm : TermAndSkip
+ {
+ public readonly int[] postings;
+ public readonly sbyte[] payloads;
+ public readonly int docFreq;
+ public readonly int totalTermFreq;
+
+ public LowFreqTerm(int[] postings, sbyte[] payloads, int docFreq, int totalTermFreq)
+ {
+ this.postings = postings;
+ this.payloads = payloads;
+ this.docFreq = docFreq;
+ this.totalTermFreq = totalTermFreq;
+ }
+
+ public override long ramBytesUsed()
+ {
+ return ((postings != null) ? RamUsageEstimator.sizeOf(postings) : 0) +
+ ((payloads != null) ? RamUsageEstimator.sizeOf(payloads) : 0);
+ }
+ }
+
+ // TODO: maybe specialize into prx/no-prx/no-frq cases?
+ private sealed class HighFreqTerm : TermAndSkip
+ {
+ public readonly long totalTermFreq;
+ public readonly int[] docIDs;
+ public readonly int[] freqs;
+ public readonly int[][] positions;
+ public readonly sbyte[][][] payloads;
+
+ public HighFreqTerm(int[] docIDs, int[] freqs, int[][] positions, sbyte[][][] payloads,
+ long totalTermFreq)
+ {
+ this.docIDs = docIDs;
+ this.freqs = freqs;
+ this.positions = positions;
+ this.payloads = payloads;
+ this.totalTermFreq = totalTermFreq;
+ }
+
+ public override long ramBytesUsed()
+ {
+ long sizeInBytes = 0;
+ sizeInBytes += (docIDs != null) ? RamUsageEstimator.sizeOf(docIDs) : 0;
+ sizeInBytes += (freqs != null) ? RamUsageEstimator.sizeOf(freqs) : 0;
+
+ if (positions != null)
+ {
+ foreach (int[] position in positions)
+ {
+ sizeInBytes += (position != null) ? RamUsageEstimator.sizeOf(position) : 0;
+ }
+ }
+
+ if (payloads != null)
+ {
+ foreach (sbyte[][] payload in payloads)
+ {
+ if (payload != null)
+ {
+ foreach (sbyte[] pload in payload)
+ {
+ sizeInBytes += (pload != null) ? RamUsageEstimator.sizeOf(pload) : 0;
+ }
+ }
+ }
+ }
+
+ return sizeInBytes;
+ }
+ }
+
+ internal readonly sbyte[] termBytes;
+ internal readonly int[] termOffsets;
+
+ internal readonly int[] skips;
+ internal readonly int[] skipOffsets;
+
+ internal readonly TermAndSkip[] terms;
+ internal readonly bool hasFreq;
+ internal readonly bool hasPos;
+ internal readonly bool hasOffsets_Renamed;
+ internal readonly bool hasPayloads_Renamed;
+ internal readonly long sumTotalTermFreq;
+ internal readonly int docCount;
+ internal readonly long sumDocFreq;
+ internal int skipCount;
+
+ // TODO: maybe make a separate builder? These are only
+ // used during load:
+ internal int count;
+ internal int[] sameCounts = new int[10];
+ internal readonly int minSkipCount;
+
+ private sealed class IntArrayWriter
+ {
+ internal int[] ints = new int[10];
+ internal int upto;
+
+ public void add(int value)
+ {
+ if (ints.Length == upto)
+ {
+ ints = ArrayUtil.grow(ints);
+ }
+ ints[upto++] = value;
+ }
+
+ public int[] get()
+ {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int[] arr = new int[upto];
- int[] arr = new int[upto];
- Array.Copy(ints, 0, arr, 0, upto);
- upto = 0;
- return arr;
- }
- }
+ int[] arr = new int[upto];
+ Array.Copy(ints, 0, arr, 0, upto);
+ upto = 0;
+ return arr;
+ }
+ }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public DirectField(index.SegmentReadState state, String field, index.Terms termsIn, int minSkipCount, int lowFreqCutoff) throws java.io.IOException
- public DirectField(SegmentReadState state, string field, Terms termsIn, int minSkipCount, int lowFreqCutoff)
- {
+ public DirectField(SegmentReadState state, string field, Terms termsIn, int minSkipCount, int lowFreqCutoff)
+ {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final index.FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
- FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
+ FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
- sumTotalTermFreq = termsIn.SumTotalTermFreq;
- sumDocFreq = termsIn.SumDocFreq;
- docCount = termsIn.DocCount;
+ sumTotalTermFreq = termsIn.SumTotalTermFreq;
+ sumDocFreq = termsIn.SumDocFreq;
+ docCount = termsIn.DocCount;
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int numTerms = (int) termsIn.size();
- int numTerms = (int) termsIn.size();
- if (numTerms == -1)
- {
- throw new System.ArgumentException("codec does not provide Terms.size()");
- }
- terms = new TermAndSkip[numTerms];
- termOffsets = new int[1 + numTerms];
-
- sbyte[] termBytes = new sbyte[1024];
-
- this.minSkipCount = minSkipCount;
-
- hasFreq = fieldInfo.IndexOptions.compareTo(IndexOptions.DOCS_ONLY) > 0;
- hasPos = fieldInfo.IndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) > 0;
- hasOffsets_Renamed = fieldInfo.IndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) > 0;
- hasPayloads_Renamed = fieldInfo.hasPayloads();
-
- BytesRef term;
- DocsEnum docsEnum = null;
- DocsAndPositionsEnum docsAndPositionsEnum = null;
+ int numTerms = (int) termsIn.size();
+ if (numTerms == -1)
+ {
+ throw new System.ArgumentException("codec does not provide Terms.size()");
+ }
+ terms = new TermAndSkip[numTerms];
+ termOffsets = new int[1 + numTerms];
+
+ sbyte[] termBytes = new sbyte[1024];
+
+ this.minSkipCount = minSkipCount;
+
+ hasFreq = fieldInfo.IndexOptions.compareTo(IndexOptions.DOCS_ONLY) > 0;
+ hasPos = fieldInfo.IndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) > 0;
+ hasOffsets_Renamed = fieldInfo.IndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) > 0;
+ hasPayloads_Renamed = fieldInfo.hasPayloads();
+
+ BytesRef term;
+ DocsEnum docsEnum = null;
+ DocsAndPositionsEnum docsAndPositionsEnum = null;
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final index.TermsEnum termsEnum = termsIn.iterator(null);
- TermsEnum termsEnum = termsIn.iterator(null);
- int termOffset = 0;
+ TermsEnum termsEnum = termsIn.iterator(null);
+ int termOffset = 0;
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final IntArrayWriter scratch = new IntArrayWriter();
- IntArrayWriter scratch = new IntArrayWriter();
+ IntArrayWriter scratch = new IntArrayWriter();
- // Used for payloads, if any:
+ // Used for payloads, if any:
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final store.RAMOutputStream ros = new store.RAMOutputStream();
- RAMOutputStream ros = new RAMOutputStream();
+ RAMOutputStream ros = new RAMOutputStream();
- // if (DEBUG) {
- // System.out.println("\nLOAD terms seg=" + state.segmentInfo.name + " field=" + field + " hasOffsets=" + hasOffsets + " hasFreq=" + hasFreq + " hasPos=" + hasPos + " hasPayloads=" + hasPayloads);
- // }
+ // if (DEBUG) {
+ // System.out.println("\nLOAD terms seg=" + state.segmentInfo.name + " field=" + field + " hasOffsets=" + hasOffsets + " hasFreq=" + hasFreq + " hasPos=" + hasPos + " hasPayloads=" + hasPayloads);
+ // }
- while ((term = termsEnum.next()) != null)
- {
+ while ((term = termsEnum.next()) != null)
+ {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int docFreq = termsEnum.docFreq();
- int docFreq = termsEnum.docFreq();
+ int docFreq = termsEnum.docFreq();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final long totalTermFreq = termsEnum.totalTermFreq();
- long totalTermFreq = termsEnum.totalTermFreq();
-
- // if (DEBUG) {
- // System.out.println(" term=" + term.utf8ToString());
- // }
-
- termOffsets[count] = termOffset;
-
- if (termBytes.Length < (termOffset + term.length))
- {
- termBytes = ArrayUtil.grow(termBytes, termOffset + term.length);
- }
- Array.Copy(term.bytes, term.offset, termBytes, termOffset, term.length);
- termOffset += term.length;
- termOffsets[count + 1] = termOffset;
-
- if (hasPos)
- {
- docsAndPositionsEnum = termsEnum.docsAndPositions(null, docsAndPositionsEnum);
- }
- else
- {
- docsEnum = termsEnum.docs(null, docsEnum);
- }
+ long totalTermFreq = termsEnum.totalTermFreq();
+
+ // if (DEBUG) {
+ // System.out.println(" term=" + term.utf8ToString());
+ // }
+
+ termOffsets[count] = termOffset;
+
+ if (termBytes.Length < (termOffset + term.length))
+ {
+ termBytes = ArrayUtil.grow(termBytes, termOffset + term.length);
+ }
+ Array.Copy(term.bytes, term.offset, termBytes, termOffset, term.length);
+ termOffset += term.length;
+ termOffsets[count + 1] = termOffset;
+
+ if (hasPos)
+ {
+ docsAndPositionsEnum = termsEnum.docsAndPositions(null, docsAndPositionsEnum);
+ }
+ else
+ {
+ docsEnum = termsEnum.docs(null, docsEnum);
+ }
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final TermAndSkip ent;
- TermAndSkip ent;
+ TermAndSkip ent;
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final index.DocsEnum docsEnum2;
- DocsEnum docsEnum2;
- if (hasPos)
- {
- docsEnum2 = docsAndPositionsEnum;
- }
- else
- {
- docsEnum2 = docsEnum;
- }
-
- int docID;
-
- if (docFreq <= lowFreqCutoff)
- {
-
- ros.reset();
-
- // Pack postings for low-freq terms into a single int[]:
- while ((docID = docsEnum2.nextDoc()) != DocsEnum.NO_MORE_DOCS)
- {
- scratch.add(docID);
- if (hasFreq)
- {
+ DocsEnum docsEnum2;
+ if (hasPos)
+ {
+ docsEnum2 = docsAndPositionsEnum;
+ }
+ else
+ {
+ docsEnum2 = docsEnum;
+ }
+
+ int docID;
+
+ if (docFreq <= lowFreqCutoff)
+ {
+
+ ros.reset();
+
+ // Pack postings for low-freq terms into a single int[]:
+ while ((docID = docsEnum2.nextDoc()) != DocsEnum.NO_MORE_DOCS)
+ {
+ scratch.add(docID);
+ if (hasFreq)
+ {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int freq = docsEnum2.freq();
- int freq = docsEnum2.freq();
- scratch.add(freq);
- if (hasPos)
- {
- for (int pos = 0;pos < freq;pos++)
- {
- scratch.add(docsAndPositionsEnum.nextPosition());
- if (hasOffsets_Renamed)
- {
- scratch.add(docsAndPositionsEnum.startOffset());
- scratch.add(docsAndPositionsEnum.endOffset());
- }
- if (hasPayloads_Renamed)
- {
+ int freq = docsEnum2.freq();
+ scratch.add(freq);
+ if (hasPos)
+ {
+ for (int pos = 0; pos < freq; pos++)
+ {
+ scratch.add(docsAndPositionsEnum.nextPosition());
+ if (hasOffsets_Renamed)
+ {
+ scratch.add(docsAndPositionsEnum.startOffset());
+ scratch.add(docsAndPositionsEnum.endOffset());
+ }
+ if (hasPayloads_Renamed)
+ {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final util.BytesRef payload = docsAndPositionsEnum.getPayload();
- BytesRef payload = docsAndPositionsEnum.Payload;
- if (payload != null)
- {
- scratch.add(payload.length);
- ros.writeBytes(payload.bytes, payload.offset, payload.length);
- }
- else
- {
- scratch.add(0);
- }
- }
- }
- }
- }
- }
+ BytesRef payload = docsAndPositionsEnum.Payload;
+ if (payload != null)
+ {
+ scratch.add(payload.length);
+ ros.writeBytes(payload.bytes, payload.offset, payload.length);
+ }
+ else
+ {
+ scratch.add(0);
+ }
+ }
+ }
+ }
+ }
+ }
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final byte[] payloads;
- sbyte[] payloads;
- if (hasPayloads_Renamed)
- {
- ros.flush();
- payloads = new sbyte[(int) ros.length()];
- ros.writeTo(payloads, 0);
- }
- else
- {
- payloads = null;
- }
+ sbyte[] payloads;
+ if (hasPayloads_Renamed)
+ {
+ ros.flush();
+ payloads = new sbyte[(int) ros.length()];
+ ros.writeTo(payloads, 0);
+ }
+ else
+ {
+ payloads = null;
+ }
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int[] postings = scratch.get();
- int[] postings = scratch.get();
+ int[] postings = scratch.get();
- ent = new LowFreqTerm(postings, payloads, docFreq, (int) totalTermFreq);
- }
- else
- {
+ ent = new LowFreqTerm(postings, payloads, docFreq, (int) totalTermFreq);
+ }
+ else
+ {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int[] docs = new int[docFreq];
- int[] docs = new int[docFreq];
+ int[] docs = new int[docFreq];
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int[] freqs;
- int[] freqs;
+ int[] freqs;
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int[][] positions;
- int[][] positions;
+ int[][] positions;
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final byte[][][] payloads;
- sbyte[][][] payloads;
- if (hasFreq)
- {
- freqs = new int[docFreq];
- if (hasPos)
- {
- positions = new int[docFreq][];
- if (hasPayloads_Renamed)
- {
- payloads = new sbyte[docFreq][][];
- }
- else
- {
- payloads = null;
- }
- }
- else
- {
- positions = null;
- payloads = null;
- }
- }
- else
- {
- freqs = null;
- positions = null;
- payloads = null;
- }
-
- // Use separate int[] for the postings for high-freq
- // terms:
- int upto = 0;
- while ((docID = docsEnum2.nextDoc()) != DocsEnum.NO_MORE_DOCS)
- {
- docs[upto] = docID;
- if (hasFreq)
- {
+ sbyte[][][] payloads;
+ if (hasFreq)
+ {
+ freqs = new int[docFreq];
+ if (hasPos)
+ {
+ positions = new int[docFreq][];
+ if (hasPayloads_Renamed)
+ {
+ payloads = new sbyte[docFreq][][];
+ }
+ else
+ {
+ payloads = null;
+ }
+ }
+ else
+ {
+ positions = null;
+ payloads = null;
+ }
+ }
+ else
+ {
+ freqs = null;
+ positions = null;
+ payloads = null;
+ }
+
+ // Use separate int[] for the postings for high-freq
+ // terms:
+ int upto = 0;
+ while ((docID = docsEnum2.nextDoc()) != DocsEnum.NO_MORE_DOCS)
+ {
+ docs[upto] = docID;
+ if (hasFreq)
+ {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int freq = docsEnum2.freq();
- int freq = docsEnum2.freq();
- freqs[upto] = freq;
- if (hasPos)
- {
+ int freq = docsEnum2.freq();
+ freqs[upto] = freq;
+ if (hasPos)
+ {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int mult;
- int mult;
- if (hasOffsets_Renamed)
- {
- mult = 3;
- }
- else
- {
- mult = 1;
- }
- if (hasPayloads_Renamed)
- {
- payloads[upto] = new sbyte[freq][];
- }
- positions[upto] = new int[mult * freq];
- int posUpto = 0;
- for (int pos = 0;pos < freq;pos++)
- {
- positions[upto][posUpto] = docsAndPositionsEnum.nextPosition();
- if (hasPayloads_Renamed)
- {
- BytesRef payload = docsAndPositionsEnum.Payload;
- if (payload != null)
- {
- sbyte[] payloadBytes = new sbyte[payload.length];
- Array.Copy(payload.bytes, payload.offset, payloadBytes, 0, payload.length);
- payloads[upto][pos] = payloadBytes;
- }
- }
- posUpto++;
- if (hasOffsets_Renamed)
- {
- positions[upto][posUpto++] = docsAndPositionsEnum.startOffset();
- positions[upto][posUpto++] = docsAndPositionsEnum.endOffset();
- }
- }
- }
- }
-
- upto++;
- }
- Debug.Assert(upto == docFreq);
- ent = new HighFreqTerm(docs, freqs, positions, payloads, totalTermFreq);
- }
-
- terms[count] = ent;
- setSkips(count, termBytes);
- count++;
- }
-
- // End sentinel:
- termOffsets[count] = termOffset;
-
- finishSkips();
-
- //System.out.println(skipCount + " skips: " + field);
-
- this.termBytes = new sbyte[termOffset];
- Array.Copy(termBytes, 0, this.termBytes, 0, termOffset);
-
- // Pack skips:
- this.skips = new int[skipCount];
- this.skipOffsets = new int[1 + numTerms];
-
- int skipOffset = 0;
- for (int i = 0;i < numTerms;i++)
- {
+ int mult;
+ if (hasOffsets_Renamed)
+ {
+ mult = 3;
+ }
+ else
+ {
+ mult = 1;
+ }
+ if (hasPayloads_Renamed)
+ {
+ payloads[upto] = new sbyte[freq][];
+ }
+ positions[upto] = new int[mult*freq];
+ int posUpto = 0;
+ for (int pos = 0; pos < freq; pos++)
+ {
+ positions[upto][posUpto] = docsAndPositionsEnum.nextPosition();
+ if (hasPayloads_Renamed)
+ {
+ BytesRef payload = docsAndPositionsEnum.Payload;
+ if (payload != null)
+ {
+ sbyte[] payloadBytes = new sbyte[payload.length];
+ Array.Copy(payload.bytes, payload.offset, payloadBytes, 0,
+ payload.length);
+ payloads[upto][pos] = payloadBytes;
+ }
+ }
+ posUpto++;
+ if (hasOffsets_Renamed)
+ {
+ positions[upto][posUpto++] = docsAndPositionsEnum.startOffset();
+ positions[upto][posUpto++] = docsAndPositionsEnum.endOffset();
+ }
+ }
+ }
+ }
+
+ upto++;
+ }
+ Debug.Assert(upto == docFreq);
+ ent = new HighFreqTerm(docs, freqs, positions, payloads, totalTermFreq);
+ }
+
+ terms[count] = ent;
+ setSkips(count, termBytes);
+ count++;
+ }
+
+ // End sentinel:
+ termOffsets[count] = termOffset;
+
+ finishSkips();
+
+ //System.out.println(skipCount + " skips: " + field);
+
+ this.termBytes = new sbyte[termOffset];
+ Array.Copy(termBytes, 0, this.termBytes, 0, termOffset);
+
+ // Pack skips:
+ this.skips = new int[skipCount];
+ this.skipOffsets = new int[1 + numTerms];
+
+ int skipOffset = 0;
+ for (int i = 0; i < numTerms; i++)
+ {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int[] termSkips = terms[i].skips;
- int[] termSkips = terms[i].skips;
- skipOffsets[i] = skipOffset;
- if (termSkips != null)
- {
- Array.Copy(termSkips, 0, skips, skipOffset, termSkips.Length);
- skipOffset += termSkips.Length;
- terms[i].skips = null;
- }
- }
- this.skipOffsets[numTerms] = skipOffset;
- Debug.Assert(skipOffset == skipCount);
- }
-
- /// <summary>
- /// Returns approximate RAM bytes used </summary>
- public long ramBytesUsed()
- {
- long sizeInBytes = 0;
- sizeInBytes += ((termBytes != null) ? RamUsageEstimator.sizeOf(termBytes) : 0);
- sizeInBytes += ((termOffsets != null) ? RamUsageEstimator.sizeOf(termOffsets) : 0);
- sizeInBytes += ((skips != null) ? RamUsageEstimator.sizeOf(skips) : 0);
- sizeInBytes += ((skipOffsets != null) ? RamUsageEstimator.sizeOf(skipOffsets) : 0);
- sizeInBytes += ((sameCounts != null) ? RamUsageEstimator.sizeOf(sameCounts) : 0);
-
- if (terms != null)
- {
- foreach (TermAndSkip termAndSkip in terms)
- {
- sizeInBytes += (termAndSkip != null) ? termAndSkip.ramBytesUsed() : 0;
- }
- }
-
- return sizeInBytes;
- }
-
- // Compares in unicode (UTF8) order:
- internal int compare(int ord, BytesRef other)
- {
+ int[] termSkips = terms[i].skips;
+ skipOffsets[i] = skipOffset;
+ if (termSkips != null)
+ {
+ Array.Copy(termSkips, 0, skips, skipOffset, termSkips.Length);
+ skipOffset += termSkips.Length;
+ terms[i].skips = null;
+ }
+ }
+ this.skipOffsets[numTerms] = skipOffset;
+ Debug.Assert(skipOffset == skipCount);
+ }
+
+ /// <summary>
+ /// Returns approximate RAM bytes used </summary>
+ public long ramBytesUsed()
+ {
+ long sizeInBytes = 0;
+ sizeInBytes += ((termBytes != null) ? RamUsageEstimator.sizeOf(termBytes) : 0);
+ sizeInBytes += ((termOffsets != null) ? RamUsageEstimator.sizeOf(termOffsets) : 0);
+ sizeInBytes += ((skips != null) ? RamUsageEstimator.sizeOf(skips) : 0);
+ sizeInBytes += ((skipOffsets != null) ? RamUsageEstimator.sizeOf(skipOffsets) : 0);
+ sizeInBytes += ((sameCounts != null) ? RamUsageEstimator.sizeOf(sameCounts) : 0);
+
+ if (terms != null)
+ {
+ foreach (TermAndSkip termAndSkip in terms)
+ {
+ sizeInBytes += (termAndSkip != null) ? termAndSkip.ramBytesUsed() : 0;
+ }
+ }
+
+ return sizeInBytes;
+ }
+
+ // Compares in unicode (UTF8) order:
+ internal int compare(int ord, BytesRef other)
+ {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final byte[] otherBytes = other.bytes;
- sbyte[] otherBytes = other.bytes;
+ sbyte[] otherBytes = other.bytes;
- int upto = termOffsets[ord];
+ int upto = termOffsets[ord];
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int termLen = termOffsets[1+ord] - upto;
- int termLen = termOffsets[1 + ord] - upto;
- int otherUpto = other.offset;
+ int termLen = termOffsets[1 + ord] - upto;
+ int otherUpto = other.offset;
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int stop = upto + Math.min(termLen, other.length);
- int stop = upto + Math.Min(termLen, other.length);
- while (upto < stop)
- {
- int diff = (termBytes[upto++] & 0xFF) - (otherBytes[otherUpto++] & 0xFF);
- if (diff != 0)
- {
- return diff;
- }
- }
-
- // One is a prefix of the other, or, they are equal:
- return termLen - other.length;
- }
-
- internal void setSkips(int termOrd, sbyte[] termBytes)
- {
+ int stop = upto + Math.Min(termLen, other.length);
+ while (upto < stop)
+ {
+ int diff = (termBytes[upto++] & 0xFF) - (otherBytes[otherUpto++] & 0xFF);
+ if (diff != 0)
+ {
+ return diff;
+ }
+ }
+
+ // One is a prefix of the other, or, they are equal:
+ return termLen - other.length;
+ }
+
+ internal void setSkips(int termOrd, sbyte[] termBytes)
+ {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int termLength = termOffsets[termOrd+1] - termOffsets[termOrd];
- int termLength = termOffsets[termOrd + 1] - termOffsets[termOrd];
+ int termLength = termOffsets[termOrd + 1] - termOffsets[termOrd];
- if (sameCounts.Length < termLength)
- {
- sameCounts = ArrayUtil.grow(sameCounts, termLength);
- }
+ if (sameCounts.Length < termLength)
+ {
+ sameCounts = ArrayUtil.grow(sameCounts, termLength);
+ }
- // Update skip pointers:
- if (termOrd > 0)
- {
+ // Update skip pointers:
+ if (termOrd > 0)
+ {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int lastTermLength = termOffsets[termOrd] - termOffsets[termOrd-1];
- int lastTermLength = termOffsets[termOrd] - termOffsets[termOrd - 1];
+ int lastTermLength = termOffsets[termOrd] - termOffsets[termOrd - 1];
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int limit = Math.min(termLength, lastTermLength);
- int limit = Math.Min(termLength, lastTermLength);
-
- int lastTermOffset = termOffsets[termOrd - 1];
- int termOffset = termOffsets[termOrd];
-
- int i = 0;
- for (;i < limit;i++)
- {
- if (termBytes[lastTermOffset++] == termBytes[termOffset++])
- {
- sameCounts[i]++;
- }
- else
- {
- for (;i < limit;i++)
- {
- if (sameCounts[i] >= minSkipCount)
- {
- // Go back and add a skip pointer:
- saveSkip(termOrd, sameCounts[i]);
- }
- sameCounts[i] = 1;
- }
- break;
- }
- }
-
- for (;i < lastTermLength;i++)
- {
- if (sameCounts[i] >= minSkipCount)
- {
- // Go back and add a skip pointer:
- saveSkip(termOrd, sameCounts[i]);
- }
- sameCounts[i] = 0;
- }
- for (int j = limit;j < termLength;j++)
- {
- sameCounts[j] = 1;
- }
- }
- else
- {
- for (int i = 0;i < termLength;i++)
- {
- sameCounts[i]++;
- }
- }
- }
-
- internal void finishSkips()
- {
- Debug.Assert(count == terms.Length);
- int lastTermOffset = termOffsets[count - 1];
- int lastTermLength = termOffsets[count] - lastTermOffset;
-
- for (int i = 0;i < lastTermLength;i++)
- {
- if (sameCounts[i] >= minSkipCount)
- {
- // Go back and add a skip pointer:
- saveSkip(count, sameCounts[i]);
- }
- }
-
- // Reverse the skip pointers so they are "nested":
- for (int termID = 0;termID < terms.Length;termID++)
- {
- TermAndSkip term = terms[termID];
- if (term.skips != null && term.skips.Length > 1)
- {
- for (int pos = 0;pos < term.skips.Length / 2;pos++)
- {
+ int limit = Math.Min(termLength, lastTermLength);
+
+ int lastTermOffset = termOffsets[termOrd - 1];
+ int termOffset = termOffsets[termOrd];
+
+ int i = 0;
+ for (; i < limit; i++)
+ {
+ if (termBytes[lastTermOffset++] == termBytes[termOffset++])
+ {
+ sameCounts[i]++;
+ }
+ else
+ {
+ for (; i < limit; i++)
+ {
+ if (sameCounts[i] >= minSkipCount)
+ {
+ // Go back and add a skip pointer:
+ saveSkip(termOrd, sameCounts[i]);
+ }
+ sameCounts[i] = 1;
+ }
+ break;
+ }
+ }
+
+ for (; i < lastTermLength; i++)
+ {
+ if (sameCounts[i] >= minSkipCount)
+ {
+ // Go back and add a skip pointer:
+ saveSkip(termOrd, sameCounts[i]);
+ }
+ sameCounts[i] = 0;
+ }
+ for (int j = limit; j < termLength; j++)
+ {
+ sameCounts[j] = 1;
+ }
+ }
+ else
+ {
+ for (int i = 0; i < termLength; i++)
+ {
+ sameCounts[i]++;
+ }
+ }
+ }
+
+ internal void finishSkips()
+ {
+ Debug.Assert(count == terms.Length);
+ int lastTermOffset = termOffsets[count - 1];
+ int lastTermLength = termOffsets[count] - lastTermOffset;
+
+ for (int i = 0; i < lastTermLength; i++)
+ {
+ if (sameCounts[i] >= minSkipCount)
+ {
+ // Go back and add a skip pointer:
+ saveSkip(count, sameCounts[i]);
+ }
+ }
+
+ // Reverse the skip pointers so they are "nested":
+ for (int termID = 0; termID < terms.Length; termID++)
+ {
+ TermAndSkip term = terms[termID];
+ if (term.skips != null && term.skips.Length > 1)
+ {
+ for (int pos = 0; pos < term.skips.Length/2; pos++)
+ {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int otherPos = term.skips.length-pos-1;
- int otherPos = term.skips.Length - pos - 1;
+ int otherPos = term.skips.Length - pos - 1;
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int temp = term.skips[pos];
- int temp = term.skips[pos];
- term.skips[pos] = term.skips[otherPos];
- term.skips[otherPos] = temp;
- }
- }
- }
- }
-
- internal void saveSkip(int ord, int backCount)
- {
+ int temp = term.skips[pos];
+ term.skips[pos] = term.skips[otherPos];
+ term.skips[otherPos] = temp;
+ }
+ }
+ }
+ }
+
+ internal void saveSkip(int ord, int backCount)
+ {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final TermAndSkip term = terms[ord - backCount];
- TermAndSkip term = terms[ord - backCount];
- skipCount++;
- if (term.skips == null)
- {
- term.skips = new int[] {ord};
- }
- else
- {
- // Normally we'd grow at a slight exponential... but
- // given that the skips themselves are already log(N)
- // we can grow by only 1 and still have amortized
- // linear time:
+ TermAndSkip term = terms[ord - backCount];
+ skipCount++;
+ if (term.skips == null)
+ {
+ term.skips = new int[] {ord};
+ }
+ else
+ {
+ // Normally we'd grow at a slight exponential... but
+ // given that the skips themselves are already log(N)
+ // we can grow by only 1 and still have amortized
+ // linear time:
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int[] newSkips = new int[term.skips.length+1];
- int[] newSkips = new int[term.skips.Length + 1];
- Array.Copy(term.skips, 0, newSkips, 0, term.skips.Length);
- term.skips = newSkips;
- term.skips[term.skips.Length - 1] = ord;
- }
- }
-
- public override TermsEnum iterator(TermsEnum reuse)
- {
- DirectTermsEnum termsEnum;
- if (reuse != null && reuse is DirectTermsEnum)
- {
- termsEnum = (DirectTermsEnum) reuse;
- if (!termsEnum.canReuse(terms))
- {
- termsEnum = new DirectTermsEnum(this);
- }
- }
- else
- {
- termsEnum = new DirectTermsEnum(this);
- }
- termsEnum.reset();
- return termsEnum;
- }
+ int[] newSkips = new int[term.skips.Length + 1];
+ Array.Copy(term.skips, 0, newSkips, 0, term.skips.Length);
+ term.skips = newSkips;
+ term.skips[term.skips.Length - 1] = ord;
+ }
+ }
+
+ public override TermsEnum iterator(TermsEnum reuse)
+ {
+ DirectTermsEnum termsEnum;
+ if (reuse != null && reuse is DirectTermsEnum)
+ {
+ termsEnum = (DirectTermsEnum) reuse;
+ if (!termsEnum.canReuse(terms))
+ {
+ termsEnum = new DirectTermsEnum(this);
+ }
+ }
+ else
+ {
+ termsEnum = new DirectTermsEnum(this);
+ }
+ termsEnum.reset();
+ return termsEnum;
+ }
//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
//ORIGINAL LINE: @Override public index.TermsEnum intersect(util.automaton.CompiledAutomaton compiled, final util.BytesRef startTerm)
- public override TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm)
- {
- return new DirectIntersectTermsEnum(this, compiled, startTerm);
- }
-
- public override long size()
- {
- return terms.Length;
- }
-
- public override long SumTotalTermFreq
- {
- get
- {
- return sumTotalTermFreq;
- }
- }
-
- public override long SumDocFreq
- {
- get
- {
- return sumDocFreq;
- }
- }
-
- public override int DocCount
- {
- get
- {
- return docCount;
- }
- }
-
- public override IComparer<BytesRef> Comparator
- {
- get
- {
- return BytesRef.UTF8SortedAsUnicodeComparator;
- }
- }
-
- public override bool hasFreqs()
- {
- return hasFreq;
- }
-
- public override bool hasOffsets()
- {
- return hasOffsets_Renamed;
- }
-
- public override bool hasPositions()
- {
- return hasPos;
- }
-
- public override bool hasPayloads()
- {
- return hasPayloads_Renamed;
- }
-
- private sealed class DirectTermsEnum : TermsEnum
- {
- private readonly DirectPostingsFormat.DirectField outerInstance;
-
- public DirectTermsEnum(DirectPostingsFormat.DirectField outerInstance)
- {
- this.outerInstance = outerInstance;
- }
-
-
- internal readonly BytesRef scratch = new BytesRef();
- internal int termOrd;
-
- internal bool canReuse(TermAndSkip[] other)
- {
- return outerInstance.terms == other;
- }
-
- internal BytesRef setTerm()
- {
- scratch.bytes = outerInstance.termBytes;
- scratch.offset = outerInstance.termOffsets[termOrd];
- scratch.length = outerInstance.termOffsets[termOrd + 1] - outerInstance.termOffsets[termOrd];
- return scratch;
- }
-
- public void reset()
- {
- termOrd = -1;
- }
-
- public override IComparer<BytesRef> Comparator
- {
- get
- {
- return BytesRef.UTF8SortedAsUnicodeComparator;
- }
- }
-
- public override BytesRef next()
- {
- termOrd++;
- if (termOrd < outerInstance.terms.Length)
- {
- return setTerm();
- }
- else
- {
- return null;
- }
- }
-
- public override TermState termState()
- {
- OrdTermState state = new OrdTermState();
- state.ord = termOrd;
- return state;
- }
-
- // If non-negative, exact match; else, -ord-1, where ord
- // is where you would insert the term.
- internal int findTerm(BytesRef term)
- {
-
- // Just do binary search: should be (constant factor)
- // faster than using the skip list:
- int low = 0;
- int high = outerInstance.terms.Length - 1;
-
- while (low <= high)
- {
- int mid = (int)((uint)(low + high) >> 1);
- int cmp = outerInstance.compare(mid, term);
- if (cmp < 0)
- {
- low = mid + 1;
- }
- else if (cmp > 0)
- {
- high = mid - 1;
- }
- else
- {
- return mid; // key found
- }
- }
-
- return -(low + 1); // key not found.
- }
-
- public override SeekStatus seekCeil(BytesRef term)
- {
- // TODO: we should use the skip pointers; should be
- // faster than bin search; we should also hold
- // & reuse current state so seeking forwards is
- // faster
+ public override TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm)
+ {
+ return new DirectIntersectTermsEnum(this, compiled, startTerm);
+ }
+
+ public override long size()
+ {
+ return terms.Length;
+ }
+
+ public override long SumTotalTermFreq
+ {
+ get { return sumTotalTermFreq; }
+ }
+
+ public override long SumDocFreq
+ {
+ get { return sumDocFreq; }
+ }
+
+ public override int DocCount
+ {
+ get { return docCount; }
+ }
+
+ public override IComparer<BytesRef> Comparator
+ {
+ get { return BytesRef.UTF8SortedAsUnicodeComparator; }
+ }
+
+ public override bool hasFreqs()
+ {
+ return hasFreq;
+ }
+
+ public override bool hasOffsets()
+ {
+ return hasOffsets_Renamed;
+ }
+
+ public override bool hasPositions()
+ {
+ return hasPos;
+ }
+
+ public override bool hasPayloads()
+ {
+ return hasPayloads_Renamed;
+ }
+
+ private sealed class DirectTermsEnum : TermsEnum
+ {
+ private readonly DirectPostingsFormat.DirectField outerInstance;
+
+ public DirectTermsEnum(DirectPostingsFormat.DirectField outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+
+ internal readonly BytesRef scratch = new BytesRef();
+ internal int termOrd;
+
+ internal bool canReuse(TermAndSkip[] other)
+ {
+ return outerInstance.terms == other;
+ }
+
+ internal BytesRef setTerm()
+ {
+ scratch.bytes = outerInstance.termBytes;
+ scratch.offset = outerInstance.termOffsets[termOrd];
+ scratch.length = outerInstance.termOffsets[termOrd + 1] - outerInstance.termOffsets[termOrd];
+ return scratch;
+ }
+
+ public void reset()
+ {
+ termOrd = -1;
+ }
+
+ public override IComparer<BytesRef> Comparator
+ {
+ get { return BytesRef.UTF8SortedAsUnicodeComparator; }
+ }
+
+ public override BytesRef next()
+ {
+ termOrd++;
+ if (termOrd < outerInstance.terms.Length)
+ {
+ return setTerm();
+ }
+ else
+ {
+ return null;
+ }
+ }
+
+ public override TermState termState()
+ {
+ OrdTermState state = new OrdTermState();
+ state.ord = termOrd;
+ return state;
+ }
+
+ // If non-negative, exact match; else, -ord-1, where ord
+ // is where you would insert the term.
+ internal int findTerm(BytesRef term)
+ {
+
+ // Just do binary search: should be (constant factor)
+ // faster than using the skip list:
+ int low = 0;
+ int high = outerInstance.terms.Length - 1;
+
+ while (low <= high)
+ {
+ int mid = (int) ((uint) (low + high) >> 1);
+ int cmp = outerInstance.compare(mid, term);
+ if (cmp < 0)
+ {
+ low = mid + 1;
+ }
+ else if (cmp > 0)
+ {
+ high = mid - 1;
+ }
+ else
+ {
+ return mid; // key found
+ }
+ }
+
+ return -(low + 1); // key not found.
+ }
+
+ public override SeekStatus seekCeil(BytesRef term)
+ {
+ // TODO: we should use the skip pointers; should be
+ // faster than bin search; we should also hold
+ // & reuse current state so seeking forwards is
+ // faster
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int ord = findTerm(term);
- int ord = findTerm(term);
- // if (DEBUG) {
- // System.out.println(" find term=" + term.utf8ToString() + " ord=" + ord);
- // }
- if (ord >= 0)
- {
- termOrd = ord;
- setTerm();
- return SeekStatus.FOUND;
- }
- else if (ord == -outerInstance.terms.Length - 1)
- {
- return SeekStatus.END;
- }
- else
- {
- termOrd = -ord - 1;
- setTerm();
- return SeekStatus.NOT_FOUND;
- }
- }
-
- public override bool seekExact(BytesRef term)
- {
- // TODO: we should use the skip pointers; should be
- // faster than bin search; we should also hold
- // & reuse current state so seeking forwards is
- // faster
+ int ord = findTerm(term);
+ // if (DEBUG) {
+ // System.out.println(" find term=" + term.utf8ToString() + " ord=" + ord);
+ // }
+ if (ord >= 0)
+ {
+ termOrd = ord;
+ setTerm();
+ return SeekStatus.FOUND;
+ }
+ else if (ord == -outerInstance.terms.Length - 1)
+ {
+ return SeekStatus.END;
+ }
+ else
+ {
+ termOrd = -ord - 1;
+ setTerm();
+ return SeekStatus.NOT_FOUND;
+ }
+ }
+
+ public override bool seekExact(BytesRef term)
+ {
+ // TODO: we should use the skip pointers; should be
+ // faster than bin search; we should also hold
+ // & reuse current state so seeking forwards is
+ // faster
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int ord = findTerm(term);
- int ord = findTerm(term);
- if (ord >= 0)
- {
- termOrd = ord;
- setTerm();
- return true;
- }
- else
- {
- return false;
- }
- }
-
- public override void seekExact(long ord)
- {
- termOrd = (int) ord;
- setTerm();
- }
+ int ord = findTerm(term);
+ if (ord >= 0)
+ {
+ termOrd = ord;
+ setTerm();
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ public override void seekExact(long ord)
+ {
+ termOrd = (int) ord;
+ setTerm();
+ }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public void seekExact(util.BytesRef term, index.TermState state) throws java.io.IOException
- public override void seekExact(BytesRef term, TermState state)
- {
- termOrd = (int)((OrdTermState) state).ord;
- setTerm();
- Debug.Assert(term.Equals(scratch));
- }
-
- public override BytesRef term()
- {
- return scratch;
- }
-
- public override long ord()
- {
- return termOrd;
- }
-
- public override int docFreq()
- {
- if (outerInstance.terms[termOrd] is LowFreqTerm)
- {
- return ((LowFreqTerm) outerInstance.terms[termOrd]).docFreq;
- }
- else
- {
- return ((HighFreqTerm) outerInstance.terms[termOrd]).docIDs.Length;
- }
- }
-
- public override long totalTermFreq()
- {
- if (outerInstance.terms[termOrd] is LowFreqTerm)
- {
- return ((LowFreqTerm) outerInstance.terms[termOrd]).totalTermFreq;
- }
- else
- {
- return ((HighFreqTerm) outerInstance.terms[termOrd]).totalTermFreq;
- }
- }
-
- public override DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags)
- {
- // TODO: implement reuse, something like Pulsing:
- // it's hairy!
-
- if (outerInstance.terms[termOrd] is LowFreqTerm)
- {
+ public override void seekExact(BytesRef term, TermState state)
+ {
+ termOrd = (int) ((OrdTermState) state).ord;
+ setTerm();
+ Debug.Assert(term.Equals(scratch));
+ }
+
+ public override BytesRef term()
+ {
+ return scratch;
+ }
+
+ public override long ord()
+ {
+ return termOrd;
+ }
+
+ public override int docFreq()
+ {
+ if (outerInstance.terms[termOrd] is LowFreqTerm)
+ {
+ return ((LowFreqTerm) outerInstance.terms[termOrd]).docFreq;
+ }
+ else
+ {
+ return ((HighFreqTerm) outerInstance.terms[termOrd]).docIDs.Length;
+ }
+ }
+
+ public override long totalTermFreq()
+ {
+ if (outerInstance.terms[termOrd] is LowFreqTerm)
+ {
+ return ((LowFreqTerm) outerInstance.terms[termOrd]).totalTermFreq;
+ }
+ else
+ {
+ return ((HighFreqTerm) outerInstance.terms[termOrd]).totalTermFreq;
+ }
+ }
+
+ public override DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags)
+ {
+ // TODO: implement reuse, something like Pulsing:
+ // it's hairy!
+
+ if (outerInstance.terms[termOrd] is LowFreqTerm)
+ {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int[] postings = ((LowFreqTerm) terms[termOrd]).postings;
- int[] postings = ((LowFreqTerm) outerInstance.terms[termOrd]).postings;
- if (outerInstance.hasFreq)
- {
- if (outerInstance.hasPos)
- {
- int posLen;
- if (outerInstance.hasOffsets_Renamed)
- {
- posLen = 3;
- }
- else
- {
- posLen = 1;
- }
- if (outerInstance.hasPayloads_Renamed)
- {
- posLen++;
- }
- LowFreqDocsEnum docsEnum;
- if (reuse is LowFreqDocsEnum)
- {
- docsEnum = (LowFreqDocsEnum) reuse;
- if (!docsEnum.canReuse(liveDocs, posLen))
- {
- docsEnum = new LowFreqDocsEnum(liveDocs, posLen);
- }
- }
- else
- {
- docsEnum = new LowFreqDocsEnum(liveDocs, posLen);
- }
-
- return docsEnum.reset(postings);
- }
- else
- {
- LowFreqDocsEnumNoPos docsEnum;
- if (reuse is LowFreqDocsEnumNoPos)
- {
- docsEnum = (LowFreqDocsEnumNoPos) reuse;
- if (!docsEnum.canReuse(liveDocs))
- {
- docsEnum = new LowFreqDocsEnumNoPos(liveDocs);
- }
- }
- else
- {
- docsEnum = new LowFreqDocsEnumNoPos(liveDocs);
- }
-
- return docsEnum.reset(postings);
- }
- }
- else
- {
- LowFreqDocsEnumNoTF docsEnum;
- if (reuse is LowFreqDocsEnumNoTF)
- {
- docsEnum = (LowFreqDocsEnumNoTF) reuse;
- if (!docsEnum.canReuse(liveDocs))
- {
- docsEnum = new LowFreqDocsEnumNoTF(liveDocs);
- }
- }
- else
- {
- docsEnum = new LowFreqDocsEnumNoTF(liveDocs);
- }
-
- return docsEnum.reset(postings);
- }
- }
- else
- {
+ int[] postings = ((LowFreqTerm) outerInstance.terms[termOrd]).postings;
+ if (outerInstance.hasFreq)
+ {
+ if (outerInstance.hasPos)
+ {
+ int posLen;
+ if (outerInstance.hasOffsets_Renamed)
+ {
+ posLen = 3;
+ }
+ else
+ {
+ posLen = 1;
+ }
+ if (outerInstance.hasPayloads_Renamed)
+ {
+ posLen++;
+ }
+ LowFreqDocsEnum docsEnum;
+ if (reuse is LowFreqDocsEnum)
+ {
+ docsEnum = (LowFreqDocsEnum) reuse;
+ if (!docsEnum.canReuse(liveDocs, posLen))
+ {
+ docsEnum = new LowFreqDocsEnum(liveDocs, posLen);
+ }
+ }
+ else
+ {
+ docsEnum = new LowFreqDocsEnum(liveDocs, posLen);
+ }
+
+ return docsEnum.reset(postings);
+ }
+ else
+ {
+ LowFreqDocsEnumNoPos docsEnum;
+ if (reuse is LowFreqDocsEnumNoPos)
+ {
+ docsEnum = (LowFreqDocsEnumNoPos) reuse;
+ if (!docsEnum.canReuse(liveDocs))
+ {
+ docsEnum = new LowFreqDocsEnumNoPos(liveDocs);
+ }
+ }
+ else
+ {
+ docsEnum = new LowFreqDocsEnumNoPos(liveDocs);
+ }
+
+ return docsEnum.reset(postings);
+ }
+ }
+ else
+ {
+ LowFreqDocsEnumNoTF docsEnum;
+ if (reuse is LowFreqDocsEnumNoTF)
+ {
+ docsEnum = (LowFreqDocsEnumNoTF) reuse;
+ if (!docsEnum.canReuse(liveDocs))
+ {
+ docsEnum = new LowFreqDocsEnumNoTF(liveDocs);
+ }
+ }
+ else
+ {
+ docsEnum = new LowFreqDocsEnumNoTF(liveDocs);
+ }
+
+ return docsEnum.reset(postings);
+ }
+ }
+ else
+ {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final HighFreqTerm term = (HighFreqTerm) terms[termOrd];
- HighFreqTerm term = (HighFreqTerm) outerInstance.terms[termOrd];
-
- HighFreqDocsEnum docsEnum;
- if (reuse is HighFreqDocsEnum)
- {
- docsEnum = (HighFreqDocsEnum) reuse;
- if (!docsEnum.canReuse(liveDocs))
- {
- docsEnum = new HighFreqDocsEnum(liveDocs);
- }
- }
- else
- {
- docsEnum = new HighFreqDocsEnum(liveDocs);
- }
-
- //System.out.println(" DE for term=" + new BytesRef(terms[termOrd].term).utf8ToString() + ": " + term.docIDs.length + " docs");
- return docsEnum.reset(term.docIDs, term.freqs);
- }
- }
-
- public override DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags)
- {
- if (!outerInstance.hasPos)
- {
- return null;
- }
-
- // TODO: implement reuse, something like Pulsing:
- // it's hairy!
-
- if (outerInstance.terms[termOrd] is LowFreqTerm)
- {
+ HighFreqTerm term = (HighFreqTerm) outerInstance.terms[termOrd];
+
+ HighFreqDocsEnum docsEnum;
+ if (reuse is HighFreqDocsEnum)
+ {
+ docsEnum = (HighFreqDocsEnum) reuse;
+ if (!docsEnum.canReuse(liveDocs))
+ {
+ docsEnum = new HighFreqDocsEnum(liveDocs);
+ }
+ }
+ else
+ {
+ docsEnum = new HighFreqDocsEnum(liveDocs);
+ }
+
+ //System.out.println(" DE for term=" + new BytesRef(terms[termOrd].term).utf8ToString() + ": " + term.docIDs.length + " docs");
+ return docsEnum.reset(term.docIDs, term.freqs);
+ }
+ }
+
+ public override DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse,
+ int flags)
+ {
+ if (!outerInstance.hasPos)
+ {
+ return null;
+ }
+
+ // TODO: implement reuse, something like Pulsing:
+ // it's hairy!
+
+ if (outerInstance.terms[termOrd] is LowFreqTerm)
+ {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final LowFreqTerm term = ((LowFreqTerm) terms[termOrd]);
- LowFreqTerm term = ((LowFreqTerm) outerInstance.terms[termOrd]);
+ LowFreqTerm term = ((LowFreqTerm) outerInstance.terms[termOrd]);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int[] postings = term.postings;
- int[] postings = term.postings;
+ int[] postings = term.postings;
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final byte[] payloads = term.payloads;
- sbyte[] payloads = term.payloads;
- return (new LowFreqDocsAndPositionsEnum(liveDocs, outerInstance.hasOffsets_Renamed, outerInstance.hasPayloads_Renamed)).reset(postings, payloads);
- }
- else
- {
+ sbyte[] payloads = term.payloads;
+ return
+ (new LowFreqDocsAndPositionsEnum(liveDocs, outerInstance.hasOffsets_Renamed,
+ outerInstance.hasPayloads_Renamed)).reset(postings, payloads);
+ }
+ else
+ {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final HighFreqTerm term = (HighFreqTerm) terms[termOrd];
- HighFreqTerm term = (HighFreqTerm) outerInstance.terms[termOrd];
- return (new HighFreqDocsAndPositionsEnum(liveDocs, outerInstance.hasOffsets_Renamed)).reset(term.docIDs, term.freqs, term.positions, term.payloads);
- }
- }
- }
-
- private sealed class DirectIntersectTermsEnum : TermsEnum
- {
- private readonly DirectPostingsFormat.DirectField outerInstance;
-
- internal readonly RunAutomaton runAutomaton;
- internal readonly CompiledAutomaton compiledAutomaton;
- internal int termOrd;
- internal readonly BytesRef scratch = new BytesRef();
-
- private sealed class State
- {
- private readonly DirectPostingsFormat.DirectField.DirectIntersectTermsEnum outerInstance;
-
- public State(DirectPostingsFormat.DirectField.DirectIntersectTermsEnum outerInstance)
- {
- this.outerInstance = outerInstance;
- }
-
- internal int changeOrd;
- internal int state;
- internal Transition[] transitions;
- internal int transitionUpto;
- internal int transitionMax;
- internal int transitionMin;
- }
-
- internal State[] states;
- internal int stateUpto;
-
- public DirectIntersectTermsEnum(DirectPostingsFormat.DirectField outerInstance, CompiledAutomaton compiled, BytesRef startTerm)
- {
- this.outerInstance = outerInstance;
- runAutomaton = compiled.runAutomaton;
- compiledAutomaton = compiled;
- termOrd = -1;
- states = new State[1];
- states[0] = new State(this);
- states[0].changeOrd = outerInstance.terms.Length;
- states[0].state = runAutomaton.InitialState;
- states[0].transitions = compiledAutomaton.sortedTransitions[states[0].state];
- states[0].transitionUpto = -1;
- states[0].transitionMax = -1;
-
- //System.out.println("IE.init startTerm=" + startTerm);
-
- if (startTerm != null)
- {
- int skipUpto = 0;
- if (startTerm.length == 0)
- {
- if (outerInstance.terms.Length > 0 && outerInstance.termOffsets[1] == 0)
- {
- termOrd = 0;
- }
- }
- else
- {
- termOrd++;
-
- for (int i = 0;i < startTerm.length;i++)
- {
+ HighFreqTerm term = (HighFreqTerm) outerInstance.terms[termOrd];
+ return
+ (new HighFreqDocsAndPositionsEnum(liveDocs, outerInstance.hasOffsets_Renamed)).Reset(
+ term.docIDs, term.freqs, term.positions, term.payloads);
+ }
+ }
+ }
+
+ private sealed class DirectIntersectTermsEnum : TermsEnum
+ {
+ private readonly DirectPostingsFormat.DirectField outerInstance;
+
+ internal readonly RunAutomaton runAutomaton;
+ internal readonly CompiledAutomaton compiledAutomaton;
+ internal int termOrd;
+ internal readonly BytesRef scratch = new BytesRef();
+
+ private sealed class State
+ {
+ private readonly DirectPostingsFormat.DirectField.DirectIntersectTermsEnum outerInstance;
+
+ public State(DirectPostingsFormat.DirectField.DirectIntersectTermsEnum outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ internal int changeOrd;
+ internal int state;
+ internal Transition[] transitions;
+ internal int transitionUpto;
+ internal int transitionMax;
+ internal int transitionMin;
+ }
+
+ internal State[] states;
+ internal int stateUpto;
+
+ public DirectIntersectTermsEnum(DirectPostingsFormat.DirectField outerInstance,
+ CompiledAutomaton compiled, BytesRef startTerm)
+ {
+ this.outerInstance = outerInstance;
+ runAutomaton = compiled.runAutomaton;
+ compiledAutomaton = compiled;
+ termOrd = -1;
+ states = new State[1];
+ states[0] = new State(this);
+ states[0].changeOrd = outerInstance.terms.Length;
+ states[0].state = runAutomaton.InitialState;
+ states[0].transitions = compiledAutomaton.sortedTransitions[states[0].state];
+ states[0].transitionUpto = -1;
+ states[0].transitionMax = -1;
+
+ //System.out.println("IE.init startTerm=" + startTerm);
+
+ if (startTerm != null)
+ {
+ int skipUpto = 0;
+ if (startTerm.length == 0)
+ {
+ if (outerInstance.terms.Length > 0 && outerInstance.termOffsets[1] == 0)
+ {
+ termOrd = 0;
+ }
+ }
+ else
+ {
+ termOrd++;
+
+ for (int i = 0; i < startTerm.length; i++)
+ {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int label = startTerm.bytes[startTerm.offset+i] & 0xFF;
- int label = startTerm.bytes[startTerm.offset + i] & 0xFF;
-
- while (label > states[i].transitionMax)
- {
- states[i].transitionUpto++;
- Debug.Assert(states[i].transitionUpto < states[i].transitions.Length);
- states[i].transitionMin = states[i].transitions[states[i].transitionUpto].Min;
- states[i].transitionMax = states[i].transitions[states[i].transitionUpto].Max;
- Debug.Assert(states[i].transitionMin >= 0);
- Debug.Assert(states[i].transitionMin <= 255);
- Debug.Assert(states[i].transitionMax >= 0);
- Debug.Assert(states[i].transitionMax <= 255);
- }
-
- // Skip forwards until we find a term matching
- // the label at this position:
- while (termOrd < outerInstance.terms.Length)
- {
+ int label = startTerm.bytes[startTerm.offset + i] & 0xFF;
+
+ while (label > states[i].transitionMax)
+ {
+ states[i].transitionUpto++;
+ Debug.Assert(states[i].transitionUpto < states[i].transitions.Length);
+ states[i].transitionMin = states[i].transitions[states[i].transitionUpto].Min;
+ states[i].transitionMax = states[i].transitions[states[i].transitionUpto].Max;
+ Debug.Assert(states[i].transitionMin >= 0);
+ Debug.Assert(states[i].transitionMin <= 255);
+ Debug.Assert(states[i].transitionMax >= 0);
+ Debug.Assert(states[i].transitionMax <= 255);
+ }
+
+ // Skip forwards until we find a term matching
+ // the label at this position:
+ while (termOrd < outerInstance.terms.Length)
+ {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int skipOffset = skipOffsets[termOrd];
- int skipOffset = outerInstance.skipOffsets[termOrd];
+ int skipOffset = outerInstance.skipOffsets[termOrd];
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int numSkips = skipOffsets[termOrd+1] - skipOffset;
- int numSkips = outerInstance.skipOffsets[termOrd + 1] - skipOffset;
+ int numSkips = outerInstance.skipOffsets[termOrd + 1] - skipOffset;
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int termOffset = termOffsets[termOrd];
- int termOffset = outerInstance.termOffsets[termOrd];
+ int termOffset = outerInstance.termOffsets[termOrd];
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int termLength = termOffsets[1+termOrd] - termOffset;
- int termLength = outerInstance.termOffsets[1 + termOrd] - termOffset;
-
- // if (DEBUG) {
- // System.out.println(" check termOrd=" + termOrd + " term=" + new BytesRef(termBytes, termOffset, termLength).utf8ToString() + " skips=" + Arrays.toString(skips) + " i=" + i);
- // }
-
- if (termOrd == states[stateUpto].changeOrd)
- {
- // if (DEBUG) {
- // System.out.println("
<TRUNCATED>
[2/5] lucenenet git commit: Updating Memory Codec
Posted by pn...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0cc0e7ec/src/Lucene.Net.Codecs/Memory/FSTOrdPostingsFormat.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Memory/FSTOrdPostingsFormat.cs b/src/Lucene.Net.Codecs/Memory/FSTOrdPostingsFormat.cs
index 5723f48..71b9a78 100644
--- a/src/Lucene.Net.Codecs/Memory/FSTOrdPostingsFormat.cs
+++ b/src/Lucene.Net.Codecs/Memory/FSTOrdPostingsFormat.cs
@@ -1,88 +1,81 @@
-namespace org.apache.lucene.codecs.memory
-{
-
-
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
- using Lucene41PostingsWriter = org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter;
- using Lucene41PostingsReader = org.apache.lucene.codecs.lucene41.Lucene41PostingsReader;
- using IndexOptions = org.apache.lucene.index.FieldInfo.IndexOptions;
- using SegmentReadState = org.apache.lucene.index.SegmentReadState;
- using SegmentWriteState = org.apache.lucene.index.SegmentWriteState;
- using IOUtils = org.apache.lucene.util.IOUtils;
-
- /// <summary>
- /// FSTOrd term dict + Lucene41PBF
- /// </summary>
+namespace Lucene.Net.Codecs.Memory
+{
+ using Lucene41PostingsWriter = Lucene41.Lucene41PostingsWriter;
+ using Lucene41PostingsReader = Lucene41.Lucene41PostingsReader;
+ using SegmentReadState = Index.SegmentReadState;
+ using SegmentWriteState = Index.SegmentWriteState;
+ using IOUtils = Util.IOUtils;
- public sealed class FSTOrdPostingsFormat : PostingsFormat
- {
- public FSTOrdPostingsFormat() : base("FSTOrd41")
- {
- }
+ /// <summary>
+ /// FSTOrd term dict + Lucene41PBF
+ /// </summary>
- public override string ToString()
- {
- return Name;
- }
+ public sealed class FSTOrdPostingsFormat : PostingsFormat
+ {
+ public FSTOrdPostingsFormat() : base("FSTOrd41")
+ {
+ }
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public org.apache.lucene.codecs.FieldsConsumer fieldsConsumer(org.apache.lucene.index.SegmentWriteState state) throws java.io.IOException
- public override FieldsConsumer fieldsConsumer(SegmentWriteState state)
- {
- PostingsWriterBase postingsWriter = new Lucene41PostingsWriter(state);
+ public override string ToString()
+ {
+ return Name;
+ }
- bool success = false;
- try
- {
- FieldsConsumer ret = new FSTOrdTermsWriter(state, postingsWriter);
- success = true;
- return ret;
- }
- finally
- {
- if (!success)
- {
- IOUtils.closeWhileHandlingException(postingsWriter);
- }
- }
- }
+ public override FieldsConsumer FieldsConsumer(SegmentWriteState state)
+ {
+ PostingsWriterBase postingsWriter = new Lucene41PostingsWriter(state);
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public org.apache.lucene.codecs.FieldsProducer fieldsProducer(org.apache.lucene.index.SegmentReadState state) throws java.io.IOException
- public override FieldsProducer fieldsProducer(SegmentReadState state)
- {
- PostingsReaderBase postingsReader = new Lucene41PostingsReader(state.directory, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix);
- bool success = false;
- try
- {
- FieldsProducer ret = new FSTOrdTermsReader(state, postingsReader);
- success = true;
- return ret;
- }
- finally
- {
- if (!success)
- {
- IOUtils.closeWhileHandlingException(postingsReader);
- }
- }
- }
- }
+ bool success = false;
+ try
+ {
+ FieldsConsumer ret = new FSTOrdTermsWriter(state, postingsWriter);
+ success = true;
+ return ret;
+ }
+ finally
+ {
+ if (!success)
+ {
+ IOUtils.CloseWhileHandlingException(postingsWriter);
+ }
+ }
+ }
+ public override FieldsProducer FieldsProducer(SegmentReadState state)
+ {
+ PostingsReaderBase postingsReader = new Lucene41PostingsReader(state.Directory, state.FieldInfos,
+ state.SegmentInfo, state.Context, state.SegmentSuffix);
+ bool success = false;
+ try
+ {
+ FieldsProducer ret = new FSTOrdTermsReader(state, postingsReader);
+ success = true;
+ return ret;
+ }
+ finally
+ {
+ if (!success)
+ {
+ IOUtils.CloseWhileHandlingException(postingsReader);
+ }
+ }
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0cc0e7ec/src/Lucene.Net.Codecs/Memory/FSTOrdTermsWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Memory/FSTOrdTermsWriter.cs b/src/Lucene.Net.Codecs/Memory/FSTOrdTermsWriter.cs
index 51c7278..4b06045 100644
--- a/src/Lucene.Net.Codecs/Memory/FSTOrdTermsWriter.cs
+++ b/src/Lucene.Net.Codecs/Memory/FSTOrdTermsWriter.cs
@@ -1,435 +1,406 @@
-using System;
-using System.Collections.Generic;
-
-namespace org.apache.lucene.codecs.memory
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.IO;
+using Lucene.Net.Util.Fst;
+
+namespace Lucene.Net.Codecs.Memory
{
-
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
- using IndexOptions = org.apache.lucene.index.FieldInfo.IndexOptions;
- using FieldInfo = org.apache.lucene.index.FieldInfo;
- using FieldInfos = org.apache.lucene.index.FieldInfos;
- using IndexFileNames = org.apache.lucene.index.IndexFileNames;
- using SegmentWriteState = org.apache.lucene.index.SegmentWriteState;
- using DataOutput = org.apache.lucene.store.DataOutput;
- using IndexOutput = org.apache.lucene.store.IndexOutput;
- using RAMOutputStream = org.apache.lucene.store.RAMOutputStream;
- using ArrayUtil = org.apache.lucene.util.ArrayUtil;
- using BytesRef = org.apache.lucene.util.BytesRef;
- using IOUtils = org.apache.lucene.util.IOUtils;
- using IntsRef = org.apache.lucene.util.IntsRef;
- using Builder = org.apache.lucene.util.fst.Builder;
- using FST = org.apache.lucene.util.fst.FST;
- using PositiveIntOutputs = org.apache.lucene.util.fst.PositiveIntOutputs;
- using Util = org.apache.lucene.util.fst.Util;
-
- /// <summary>
- /// FST-based term dict, using ord as FST output.
- ///
- /// The FST holds the mapping between <term, ord>, and
- /// term's metadata is delta encoded into a single byte block.
- ///
- /// Typically the byte block consists of four parts:
- /// 1. term statistics: docFreq, totalTermFreq;
- /// 2. monotonic long[], e.g. the pointer to the postings list for that term;
- /// 3. generic byte[], e.g. other information customized by postings base.
- /// 4. single-level skip list to speed up metadata decoding by ord.
- ///
- /// <para>
- /// Files:
- /// <ul>
- /// <li><tt>.tix</tt>: <a href="#Termindex">Term Index</a></li>
- /// <li><tt>.tbk</tt>: <a href="#Termblock">Term Block</a></li>
- /// </ul>
- /// </para>
- ///
- /// <a name="Termindex" id="Termindex"></a>
- /// <h3>Term Index</h3>
- /// <para>
- /// The .tix contains a list of FSTs, one for each field.
- /// The FST maps a term to its corresponding order in current field.
- /// </para>
- ///
- /// <ul>
- /// <li>TermIndex(.tix) --> Header, TermFST<sup>NumFields</sup>, Footer</li>
- /// <li>TermFST --> <seealso cref="FST FST<long>"/></li>
- /// <li>Header --> <seealso cref="CodecUtil#writeHeader CodecHeader"/></li>
- /// <li>Footer --> <seealso cref="CodecUtil#writeFooter CodecFooter"/></li>
- /// </ul>
- ///
- /// <para>Notes:</para>
- /// <ul>
- /// <li>
- /// Since terms are already sorted before writing to <a href="#Termblock">Term Block</a>,
- /// their ords can directly used to seek term metadata from term block.
- /// </li>
- /// </ul>
- ///
- /// <a name="Termblock" id="Termblock"></a>
- /// <h3>Term Block</h3>
- /// <para>
- /// The .tbk contains all the statistics and metadata for terms, along with field summary (e.g.
- /// per-field data like number of documents in current field). For each field, there are four blocks:
- /// <ul>
- /// <li>statistics bytes block: contains term statistics; </li>
- /// <li>metadata longs block: delta-encodes monotonic part of metadata; </li>
- /// <li>metadata bytes block: encodes other parts of metadata; </li>
- /// <li>skip block: contains skip data, to speed up metadata seeking and decoding</li>
- /// </ul>
- /// </para>
- ///
- /// <para>File Format:</para>
- /// <ul>
- /// <li>TermBlock(.tbk) --> Header, <i>PostingsHeader</i>, FieldSummary, DirOffset</li>
- /// <li>FieldSummary --> NumFields, <FieldNumber, NumTerms, SumTotalTermFreq?, SumDocFreq,
- /// DocCount, LongsSize, DataBlock > <sup>NumFields</sup>, Footer</li>
- ///
- /// <li>DataBlock --> StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength,
- /// SkipBlock, StatsBlock, MetaLongsBlock, MetaBytesBlock </li>
- /// <li>SkipBlock --> < StatsFPDelta, MetaLongsSkipFPDelta, MetaBytesSkipFPDelta,
- /// MetaLongsSkipDelta<sup>LongsSize</sup> ><sup>NumTerms</sup>
- /// <li>StatsBlock --> < DocFreq[Same?], (TotalTermFreq-DocFreq) ? > <sup>NumTerms</sup>
- /// <li>MetaLongsBlock --> < LongDelta<sup>LongsSize</sup>, BytesSize > <sup>NumTerms</sup>
- /// <li>MetaBytesBlock --> Byte <sup>MetaBytesBlockLength</sup>
- /// <li>Header --> <seealso cref="CodecUtil#writeHeader CodecHeader"/></li>
- /// <li>DirOffset --> <seealso cref="DataOutput#writeLong Uint64"/></li>
- /// <li>NumFields, FieldNumber, DocCount, DocFreq, LongsSize,
- /// FieldNumber, DocCount --> <seealso cref="DataOutput#writeVInt VInt"/></li>
- /// <li>NumTerms, SumTotalTermFreq, SumDocFreq, StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength,
- /// StatsFPDelta, MetaLongsSkipFPDelta, MetaBytesSkipFPDelta, MetaLongsSkipStart, TotalTermFreq,
- /// LongDelta,--> <seealso cref="DataOutput#writeVLong VLong"/></li>
- /// <li>Footer --> <seealso cref="CodecUtil#writeFooter CodecFooter"/></li>
- /// </ul>
- /// <para>Notes: </para>
- /// <ul>
- /// <li>
- /// The format of PostingsHeader and MetaBytes are customized by the specific postings implementation:
- /// they contain arbitrary per-file data (such as parameters or versioning information), and per-term data
- /// (non-monotonic ones like pulsed postings data).
- /// </li>
- /// <li>
- /// During initialization the reader will load all the blocks into memory. SkipBlock will be decoded, so that during seek
- /// term dict can lookup file pointers directly. StatsFPDelta, MetaLongsSkipFPDelta, etc. are file offset
- /// for every SkipInterval's term. MetaLongsSkipDelta is the difference from previous one, which indicates
- /// the value of preceding metadata longs for every SkipInterval's term.
- /// </li>
- /// <li>
- /// DocFreq is the count of documents which contain the term. TotalTermFreq is the total number of occurrences of the term.
- /// Usually these two values are the same for long tail terms, therefore one bit is stole from DocFreq to check this case,
- /// so that encoding of TotalTermFreq may be omitted.
- /// </li>
- /// </ul>
- ///
- /// @lucene.experimental
- /// </summary>
-
- public class FSTOrdTermsWriter : FieldsConsumer
- {
- internal const string TERMS_INDEX_EXTENSION = "tix";
- internal const string TERMS_BLOCK_EXTENSION = "tbk";
- internal const string TERMS_CODEC_NAME = "FST_ORD_TERMS_DICT";
- public const int TERMS_VERSION_START = 0;
- public const int TERMS_VERSION_CHECKSUM = 1;
- public const int TERMS_VERSION_CURRENT = TERMS_VERSION_CHECKSUM;
- public const int SKIP_INTERVAL = 8;
-
- internal readonly PostingsWriterBase postingsWriter;
- internal readonly FieldInfos fieldInfos;
- internal readonly IList<FieldMetaData> fields = new List<FieldMetaData>();
- internal IndexOutput blockOut = null;
- internal IndexOutput indexOut = null;
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public FSTOrdTermsWriter(org.apache.lucene.index.SegmentWriteState state, org.apache.lucene.codecs.PostingsWriterBase postingsWriter) throws java.io.IOException
- public FSTOrdTermsWriter(SegmentWriteState state, PostingsWriterBase postingsWriter)
- {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final String termsIndexFileName = org.apache.lucene.index.IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_INDEX_EXTENSION);
- string termsIndexFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_INDEX_EXTENSION);
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final String termsBlockFileName = org.apache.lucene.index.IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_BLOCK_EXTENSION);
- string termsBlockFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_BLOCK_EXTENSION);
-
- this.postingsWriter = postingsWriter;
- this.fieldInfos = state.fieldInfos;
-
- bool success = false;
- try
- {
- this.indexOut = state.directory.createOutput(termsIndexFileName, state.context);
- this.blockOut = state.directory.createOutput(termsBlockFileName, state.context);
- writeHeader(indexOut);
- writeHeader(blockOut);
- this.postingsWriter.init(blockOut);
- success = true;
- }
- finally
- {
- if (!success)
- {
- IOUtils.closeWhileHandlingException(indexOut, blockOut);
- }
- }
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public org.apache.lucene.codecs.TermsConsumer addField(org.apache.lucene.index.FieldInfo field) throws java.io.IOException
- public override TermsConsumer addField(FieldInfo field)
- {
- return new TermsWriter(this, field);
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void close() throws java.io.IOException
- public override void close()
- {
- if (blockOut != null)
- {
- IOException ioe = null;
- try
- {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final long blockDirStart = blockOut.getFilePointer();
- long blockDirStart = blockOut.FilePointer;
-
- // write field summary
- blockOut.writeVInt(fields.Count);
- foreach (FieldMetaData field in fields)
- {
- blockOut.writeVInt(field.fieldInfo.number);
- blockOut.writeVLong(field.numTerms);
- if (field.fieldInfo.IndexOptions != IndexOptions.DOCS_ONLY)
- {
- blockOut.writeVLong(field.sumTotalTermFreq);
- }
- blockOut.writeVLong(field.sumDocFreq);
- blockOut.writeVInt(field.docCount);
- blockOut.writeVInt(field.longsSize);
- blockOut.writeVLong(field.statsOut.FilePointer);
- blockOut.writeVLong(field.metaLongsOut.FilePointer);
- blockOut.writeVLong(field.metaBytesOut.FilePointer);
-
- field.skipOut.writeTo(blockOut);
- field.statsOut.writeTo(blockOut);
- field.metaLongsOut.writeTo(blockOut);
- field.metaBytesOut.writeTo(blockOut);
- field.dict.save(indexOut);
- }
- writeTrailer(blockOut, blockDirStart);
- CodecUtil.writeFooter(indexOut);
- CodecUtil.writeFooter(blockOut);
- }
- catch (IOException ioe2)
- {
- ioe = ioe2;
- }
- finally
- {
- IOUtils.closeWhileHandlingException(ioe, blockOut, indexOut, postingsWriter);
- blockOut = null;
- }
- }
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: private void writeHeader(org.apache.lucene.store.IndexOutput out) throws java.io.IOException
- private void writeHeader(IndexOutput @out)
- {
- CodecUtil.writeHeader(@out, TERMS_CODEC_NAME, TERMS_VERSION_CURRENT);
- }
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: private void writeTrailer(org.apache.lucene.store.IndexOutput out, long dirStart) throws java.io.IOException
- private void writeTrailer(IndexOutput @out, long dirStart)
- {
- @out.writeLong(dirStart);
- }
-
- private class FieldMetaData
- {
- public FieldInfo fieldInfo;
- public long numTerms;
- public long sumTotalTermFreq;
- public long sumDocFreq;
- public int docCount;
- public int longsSize;
- public FST<long?> dict;
-
- // TODO: block encode each part
-
- // vint encode next skip point (fully decoded when reading)
- public RAMOutputStream skipOut;
- // vint encode df, (ttf-df)
- public RAMOutputStream statsOut;
- // vint encode monotonic long[] and length for corresponding byte[]
- public RAMOutputStream metaLongsOut;
- // generic byte[]
- public RAMOutputStream metaBytesOut;
- }
-
- internal sealed class TermsWriter : TermsConsumer
- {
- private readonly FSTOrdTermsWriter outerInstance;
-
- internal readonly Builder<long?> builder;
- internal readonly PositiveIntOutputs outputs;
- internal readonly FieldInfo fieldInfo;
- internal readonly int longsSize;
- internal long numTerms;
-
- internal readonly IntsRef scratchTerm = new IntsRef();
- internal readonly RAMOutputStream statsOut = new RAMOutputStream();
- internal readonly RAMOutputStream metaLongsOut = new RAMOutputStream();
- internal readonly RAMOutputStream metaBytesOut = new RAMOutputStream();
-
- internal readonly RAMOutputStream skipOut = new RAMOutputStream();
- internal long lastBlockStatsFP;
- internal long lastBlockMetaLongsFP;
- internal long lastBlockMetaBytesFP;
- internal long[] lastBlockLongs;
-
- internal long[] lastLongs;
- internal long lastMetaBytesFP;
-
- internal TermsWriter(FSTOrdTermsWriter outerInstance, FieldInfo fieldInfo)
- {
- this.outerInstance = outerInstance;
- this.numTerms = 0;
- this.fieldInfo = fieldInfo;
- this.longsSize = outerInstance.postingsWriter.setField(fieldInfo);
- this.outputs = PositiveIntOutputs.Singleton;
- this.builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
-
- this.lastBlockStatsFP = 0;
- this.lastBlockMetaLongsFP = 0;
- this.lastBlockMetaBytesFP = 0;
- this.lastBlockLongs = new long[longsSize];
-
- this.lastLongs = new long[longsSize];
- this.lastMetaBytesFP = 0;
- }
-
- public override IComparer<BytesRef> Comparator
- {
- get
- {
- return BytesRef.UTF8SortedAsUnicodeComparator;
- }
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public org.apache.lucene.codecs.PostingsConsumer startTerm(org.apache.lucene.util.BytesRef text) throws java.io.IOException
- public override PostingsConsumer startTerm(BytesRef text)
- {
- outerInstance.postingsWriter.startTerm();
- return outerInstance.postingsWriter;
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void finishTerm(org.apache.lucene.util.BytesRef text, org.apache.lucene.codecs.TermStats stats) throws java.io.IOException
- public override void finishTerm(BytesRef text, TermStats stats)
- {
- if (numTerms > 0 && numTerms % SKIP_INTERVAL == 0)
- {
- bufferSkip();
- }
- // write term meta data into fst
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final long longs[] = new long[longsSize];
- long[] longs = new long[longsSize];
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final long delta = stats.totalTermFreq - stats.docFreq;
- long delta = stats.totalTermFreq - stats.docFreq;
- if (stats.totalTermFreq > 0)
- {
- if (delta == 0)
- {
- statsOut.writeVInt(stats.docFreq << 1 | 1);
- }
- else
- {
- statsOut.writeVInt(stats.docFreq << 1 | 0);
- statsOut.writeVLong(stats.totalTermFreq - stats.docFreq);
- }
- }
- else
- {
- statsOut.writeVInt(stats.docFreq);
- }
- BlockTermState state = outerInstance.postingsWriter.newTermState();
- state.docFreq = stats.docFreq;
- state.totalTermFreq = stats.totalTermFreq;
- outerInstance.postingsWriter.finishTerm(state);
- outerInstance.postingsWriter.encodeTerm(longs, metaBytesOut, fieldInfo, state, true);
- for (int i = 0; i < longsSize; i++)
- {
- metaLongsOut.writeVLong(longs[i] - lastLongs[i]);
- lastLongs[i] = longs[i];
- }
- metaLongsOut.writeVLong(metaBytesOut.FilePointer - lastMetaBytesFP);
-
- builder.add(Util.toIntsRef(text, scratchTerm), numTerms);
- numTerms++;
-
- lastMetaBytesFP = metaBytesOut.FilePointer;
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws java.io.IOException
- public override void finish(long sumTotalTermFreq, long sumDocFreq, int docCount)
- {
- if (numTerms > 0)
- {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final FieldMetaData metadata = new FieldMetaData();
- FieldMetaData metadata = new FieldMetaData();
- metadata.fieldInfo = fieldInfo;
- metadata.numTerms = numTerms;
- metadata.sumTotalTermFreq = sumTotalTermFreq;
- metadata.sumDocFreq = sumDocFreq;
- metadata.docCount = docCount;
- metadata.longsSize = longsSize;
- metadata.skipOut = skipOut;
- metadata.statsOut = statsOut;
- metadata.metaLongsOut = metaLongsOut;
- metadata.metaBytesOut = metaBytesOut;
- metadata.dict = builder.finish();
- outerInstance.fields.Add(metadata);
- }
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: private void bufferSkip() throws java.io.IOException
- internal void bufferSkip()
- {
- skipOut.writeVLong(statsOut.FilePointer - lastBlockStatsFP);
- skipOut.writeVLong(metaLongsOut.FilePointer - lastBlockMetaLongsFP);
- skipOut.writeVLong(metaBytesOut.FilePointer - lastBlockMetaBytesFP);
- for (int i = 0; i < longsSize; i++)
- {
- skipOut.writeVLong(lastLongs[i] - lastBlockLongs[i]);
- }
- lastBlockStatsFP = statsOut.FilePointer;
- lastBlockMetaLongsFP = metaLongsOut.FilePointer;
- lastBlockMetaBytesFP = metaBytesOut.FilePointer;
- Array.Copy(lastLongs, 0, lastBlockLongs, 0, longsSize);
- }
- }
- }
-
+ using System;
+ using System.Collections.Generic;
+
+ using IndexOptions = Index.FieldInfo.IndexOptions;
+ using FieldInfo = Index.FieldInfo;
+ using FieldInfos = Index.FieldInfos;
+ using IndexFileNames = Index.IndexFileNames;
+ using SegmentWriteState = Index.SegmentWriteState;
+ using DataOutput = Store.DataOutput;
+ using IndexOutput = Store.IndexOutput;
+ using RAMOutputStream = Store.RAMOutputStream;
+ using BytesRef = Util.BytesRef;
+ using IOUtils = Util.IOUtils;
+ using IntsRef = Util.IntsRef;
+ using Builder = Util.Fst.Builder<long>;
+ using FST = FST;
+ using PositiveIntOutputs = Util.Fst.PositiveIntOutputs;
+ using Util = Util.Fst.Util;
+
+ /// <summary>
+ /// FST-based term dict, using ord as FST output.
+ ///
+ /// The FST holds the mapping between <term, ord>, and
+ /// term's metadata is delta encoded into a single byte block.
+ ///
+ /// Typically the byte block consists of four parts:
+ /// 1. term statistics: docFreq, totalTermFreq;
+ /// 2. monotonic long[], e.g. the pointer to the postings list for that term;
+ /// 3. generic byte[], e.g. other information customized by postings base.
+ /// 4. single-level skip list to speed up metadata decoding by ord.
+ ///
+ /// <para>
+ /// Files:
+ /// <ul>
+ /// <li><tt>.tix</tt>: <a href="#Termindex">Term Index</a></li>
+ /// <li><tt>.tbk</tt>: <a href="#Termblock">Term Block</a></li>
+ /// </ul>
+ /// </para>
+ ///
+ /// <a name="Termindex" id="Termindex"></a>
+ /// <h3>Term Index</h3>
+ /// <para>
+ /// The .tix contains a list of FSTs, one for each field.
+ /// The FST maps a term to its corresponding order in current field.
+ /// </para>
+ ///
+ /// <ul>
+ /// <li>TermIndex(.tix) --> Header, TermFST<sup>NumFields</sup>, Footer</li>
+ /// <li>TermFST --> <seealso cref="FST"/></li>
+ /// <li>Header --> <seealso cref="CodecUtil#writeHeader CodecHeader"/></li>
+ /// <li>Footer --> <seealso cref="CodecUtil#writeFooter CodecFooter"/></li>
+ /// </ul>
+ ///
+ /// <para>Notes:</para>
+ /// <ul>
+ /// <li>
+ /// Since terms are already sorted before writing to <a href="#Termblock">Term Block</a>,
+ /// their ords can directly used to seek term metadata from term block.
+ /// </li>
+ /// </ul>
+ ///
+ /// <a name="Termblock" id="Termblock"></a>
+ /// <h3>Term Block</h3>
+ /// <para>
+ /// The .tbk contains all the statistics and metadata for terms, along with field summary (e.g.
+ /// per-field data like number of documents in current field). For each field, there are four blocks:
+ /// <ul>
+ /// <li>statistics bytes block: contains term statistics; </li>
+ /// <li>metadata longs block: delta-encodes monotonic part of metadata; </li>
+ /// <li>metadata bytes block: encodes other parts of metadata; </li>
+ /// <li>skip block: contains skip data, to speed up metadata seeking and decoding</li>
+ /// </ul>
+ /// </para>
+ ///
+ /// <para>File Format:</para>
+ /// <ul>
+ /// <li>TermBlock(.tbk) --> Header, <i>PostingsHeader</i>, FieldSummary, DirOffset</li>
+ /// <li>FieldSummary --> NumFields, <FieldNumber, NumTerms, SumTotalTermFreq?, SumDocFreq,
+ /// DocCount, LongsSize, DataBlock > <sup>NumFields</sup>, Footer</li>
+ ///
+ /// <li>DataBlock --> StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength,
+ /// SkipBlock, StatsBlock, MetaLongsBlock, MetaBytesBlock </li>
+ /// <li>SkipBlock --> < StatsFPDelta, MetaLongsSkipFPDelta, MetaBytesSkipFPDelta,
+ /// MetaLongsSkipDelta<sup>LongsSize</sup> ><sup>NumTerms</sup></li>
+ /// <li>StatsBlock --> < DocFreq[Same?], (TotalTermFreq-DocFreq) ? > <sup>NumTerms</sup></li>
+ /// <li>MetaLongsBlock --> < LongDelta<sup>LongsSize</sup>, BytesSize > <sup>NumTerms</sup></li>
+ /// <li>MetaBytesBlock --> Byte <sup>MetaBytesBlockLength</sup></li>
+ /// <li>Header --> <seealso cref="CodecUtil#writeHeader CodecHeader"/></li>
+ /// <li>DirOffset --> <seealso cref="DataOutput#writeLong Uint64"/></li>
+ /// <li>NumFields, FieldNumber, DocCount, DocFreq, LongsSize,
+ /// FieldNumber, DocCount --> <seealso cref="DataOutput#writeVInt VInt"/></li>
+ /// <li>NumTerms, SumTotalTermFreq, SumDocFreq, StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength,
+ /// StatsFPDelta, MetaLongsSkipFPDelta, MetaBytesSkipFPDelta, MetaLongsSkipStart, TotalTermFreq,
+ /// LongDelta,--> <seealso cref="DataOutput#writeVLong VLong"/></li>
+ /// <li>Footer --> <seealso cref="CodecUtil#writeFooter CodecFooter"/></li>
+ /// </ul>
+ /// <para>Notes: </para>
+ /// <ul>
+ /// <li>
+ /// The format of PostingsHeader and MetaBytes are customized by the specific postings implementation:
+ /// they contain arbitrary per-file data (such as parameters or versioning information), and per-term data
+ /// (non-monotonic ones like pulsed postings data).
+ /// </li>
+ /// <li>
+ /// During initialization the reader will load all the blocks into memory. SkipBlock will be decoded, so that during seek
+ /// term dict can lookup file pointers directly. StatsFPDelta, MetaLongsSkipFPDelta, etc. are file offset
+ /// for every SkipInterval's term. MetaLongsSkipDelta is the difference from previous one, which indicates
+ /// the value of preceding metadata longs for every SkipInterval's term.
+ /// </li>
+ /// <li>
+ /// DocFreq is the count of documents which contain the term. TotalTermFreq is the total number of occurrences of the term.
+ /// Usually these two values are the same for long tail terms, therefore one bit is stole from DocFreq to check this case,
+ /// so that encoding of TotalTermFreq may be omitted.
+ /// </li>
+ /// </ul>
+ ///
+ /// @lucene.experimental
+ /// </summary>
+
+ public class FSTOrdTermsWriter : FieldsConsumer
+ {
+ internal const string TERMS_INDEX_EXTENSION = "tix";
+ internal const string TERMS_BLOCK_EXTENSION = "tbk";
+ internal const string TERMS_CODEC_NAME = "FST_ORD_TERMS_DICT";
+ public const int TERMS_VERSION_START = 0;
+ public const int TERMS_VERSION_CHECKSUM = 1;
+ public const int TERMS_VERSION_CURRENT = TERMS_VERSION_CHECKSUM;
+ public const int SKIP_INTERVAL = 8;
+
+ internal readonly PostingsWriterBase postingsWriter;
+ internal readonly FieldInfos fieldInfos;
+ private readonly IList<FieldMetaData> _fields = new List<FieldMetaData>();
+ internal IndexOutput blockOut = null;
+ internal IndexOutput indexOut = null;
+
+ public FSTOrdTermsWriter(SegmentWriteState state, PostingsWriterBase postingsWriter)
+ {
+ var termsIndexFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix,
+ TERMS_INDEX_EXTENSION);
+ var termsBlockFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix,
+ TERMS_BLOCK_EXTENSION);
+
+ this.postingsWriter = postingsWriter;
+ fieldInfos = state.FieldInfos;
+
+ var success = false;
+ try
+ {
+ indexOut = state.Directory.CreateOutput(termsIndexFileName, state.Context);
+ blockOut = state.Directory.CreateOutput(termsBlockFileName, state.Context);
+ WriteHeader(indexOut);
+ WriteHeader(blockOut);
+ this.postingsWriter.Init(blockOut);
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ IOUtils.CloseWhileHandlingException(indexOut, blockOut);
+ }
+ }
+ }
+
+ public override TermsConsumer AddField(FieldInfo field)
+ {
+ return new TermsWriter(this, field);
+ }
+
+ public override void Dispose()
+ {
+ if (blockOut == null) return;
+
+ IOException ioe = null;
+ try
+ {
+ var blockDirStart = blockOut.FilePointer;
+
+ // write field summary
+ blockOut.WriteVInt(_fields.Count);
+ foreach (var field in _fields)
+ {
+ blockOut.WriteVInt(field.FieldInfo.Number);
+ blockOut.WriteVLong(field.NumTerms);
+ if (field.FieldInfo.FieldIndexOptions != IndexOptions.DOCS_ONLY)
+ {
+ blockOut.WriteVLong(field.SumTotalTermFreq);
+ }
+ blockOut.WriteVLong(field.SumDocFreq);
+ blockOut.WriteVInt(field.DocCount);
+ blockOut.WriteVInt(field.LongsSize);
+ blockOut.WriteVLong(field.StatsOut.FilePointer);
+ blockOut.WriteVLong(field.MetaLongsOut.FilePointer);
+ blockOut.WriteVLong(field.MetaBytesOut.FilePointer);
+
+ field.SkipOut.WriteTo(blockOut);
+ field.StatsOut.WriteTo(blockOut);
+ field.MetaLongsOut.WriteTo(blockOut);
+ field.MetaBytesOut.WriteTo(blockOut);
+ field.Dict.Save(indexOut);
+ }
+ WriteTrailer(blockOut, blockDirStart);
+ CodecUtil.WriteFooter(indexOut);
+ CodecUtil.WriteFooter(blockOut);
+ }
+ catch (IOException ioe2)
+ {
+ ioe = ioe2;
+ }
+ finally
+ {
+ IOUtils.CloseWhileHandlingException(ioe, blockOut, indexOut, postingsWriter);
+ blockOut = null;
+ }
+ }
+
+ private static void WriteHeader(IndexOutput @out)
+ {
+ CodecUtil.WriteHeader(@out, TERMS_CODEC_NAME, TERMS_VERSION_CURRENT);
+ }
+
+ private static void WriteTrailer(IndexOutput output, long dirStart)
+ {
+ output.WriteLong(dirStart);
+ }
+
+ private class FieldMetaData
+ {
+ public FieldInfo FieldInfo { get; set; }
+ public long NumTerms { get; set; }
+ public long SumTotalTermFreq { get; set; }
+ public long SumDocFreq { get; set; }
+ public int DocCount { get; set; }
+ public int LongsSize { get; set; }
+ public FST<long> Dict { get; set; }
+
+ // TODO: block encode each part
+
+ // vint encode next skip point (fully decoded when reading)
+ public RAMOutputStream SkipOut { get; set; }
+ // vint encode df, (ttf-df)
+ public RAMOutputStream StatsOut { get; set; }
+ // vint encode monotonic long[] and length for corresponding byte[]
+ public RAMOutputStream MetaLongsOut { get; set; }
+ // generic byte[]
+ public RAMOutputStream MetaBytesOut { get; set; }
+ }
+
+ internal sealed class TermsWriter : TermsConsumer
+ {
+ private readonly FSTOrdTermsWriter _outerInstance;
+
+ private readonly Builder<long> _builder;
+ private readonly PositiveIntOutputs _outputs;
+ private readonly FieldInfo _fieldInfo;
+ private readonly int _longsSize;
+ private long _numTerms;
+
+ private readonly IntsRef _scratchTerm = new IntsRef();
+ private readonly RAMOutputStream _statsOut = new RAMOutputStream();
+ private readonly RAMOutputStream _metaLongsOut = new RAMOutputStream();
+ private readonly RAMOutputStream _metaBytesOut = new RAMOutputStream();
+ private readonly RAMOutputStream _skipOut = new RAMOutputStream();
+
+ private long _lastBlockStatsFp;
+ private long _lastBlockMetaLongsFp;
+ private long _lastBlockMetaBytesFp;
+ private readonly long[] _lastBlockLongs;
+
+ private readonly long[] _lastLongs;
+ private long _lastMetaBytesFp;
+
+ internal TermsWriter(FSTOrdTermsWriter outerInstance, FieldInfo fieldInfo)
+ {
+ _outerInstance = outerInstance;
+ _numTerms = 0;
+ _fieldInfo = fieldInfo;
+ _longsSize = outerInstance.postingsWriter.SetField(fieldInfo);
+ _outputs = PositiveIntOutputs.Singleton;
+ _builder = new Builder<long>(FST.INPUT_TYPE.BYTE1, _outputs);
+
+ _lastBlockStatsFp = 0;
+ _lastBlockMetaLongsFp = 0;
+ _lastBlockMetaBytesFp = 0;
+ _lastBlockLongs = new long[_longsSize];
+
+ _lastLongs = new long[_longsSize];
+ _lastMetaBytesFp = 0;
+ }
+
+ public override IComparer<BytesRef> Comparator
+ {
+ get { return BytesRef.UTF8SortedAsUnicodeComparer; }
+ }
+
+ public override PostingsConsumer StartTerm(BytesRef text)
+ {
+ _outerInstance.postingsWriter.StartTerm();
+ return _outerInstance.postingsWriter;
+ }
+
+
+ public override void FinishTerm(BytesRef text, TermStats stats)
+ {
+ if (_numTerms > 0 && _numTerms%SKIP_INTERVAL == 0)
+ {
+ BufferSkip();
+ }
+ // write term meta data into fst
+ var longs = new long[_longsSize];
+
+ long delta = stats.TotalTermFreq - stats.DocFreq;
+ if (stats.TotalTermFreq > 0)
+ {
+ if (delta == 0)
+ {
+ _statsOut.WriteVInt(stats.DocFreq << 1 | 1);
+ }
+ else
+ {
+ _statsOut.WriteVInt(stats.DocFreq << 1 | 0);
+ _statsOut.WriteVLong(stats.TotalTermFreq - stats.DocFreq);
+ }
+ }
+ else
+ {
+ _statsOut.WriteVInt(stats.DocFreq);
+ }
+ var state = _outerInstance.postingsWriter.NewTermState();
+ state.DocFreq = stats.DocFreq;
+ state.TotalTermFreq = stats.TotalTermFreq;
+ _outerInstance.postingsWriter.FinishTerm(state);
+ _outerInstance.postingsWriter.EncodeTerm(longs, _metaBytesOut, _fieldInfo, state, true);
+ for (var i = 0; i < _longsSize; i++)
+ {
+ _metaLongsOut.WriteVLong(longs[i] - _lastLongs[i]);
+ _lastLongs[i] = longs[i];
+ }
+ _metaLongsOut.WriteVLong(_metaBytesOut.FilePointer - _lastMetaBytesFp);
+
+ _builder.Add(Util.ToIntsRef(text, _scratchTerm), _numTerms);
+ _numTerms++;
+
+ _lastMetaBytesFp = _metaBytesOut.FilePointer;
+ }
+
+ public override void Finish(long sumTotalTermFreq, long sumDocFreq, int docCount)
+ {
+ if (_numTerms <= 0) return;
+
+ var metadata = new FieldMetaData
+ {
+ FieldInfo = _fieldInfo,
+ NumTerms = _numTerms,
+ SumTotalTermFreq = sumTotalTermFreq,
+ SumDocFreq = sumDocFreq,
+ DocCount = docCount,
+ LongsSize = _longsSize,
+ SkipOut = _skipOut,
+ StatsOut = _statsOut,
+ MetaLongsOut = _metaLongsOut,
+ MetaBytesOut = _metaBytesOut,
+ Dict = _builder.Finish()
+ };
+ _outerInstance._fields.Add(metadata);
+ }
+
+ internal void BufferSkip()
+ {
+ _skipOut.WriteVLong(_statsOut.FilePointer - _lastBlockStatsFp);
+ _skipOut.WriteVLong(_metaLongsOut.FilePointer - _lastBlockMetaLongsFp);
+ _skipOut.WriteVLong(_metaBytesOut.FilePointer - _lastBlockMetaBytesFp);
+ for (var i = 0; i < _longsSize; i++)
+ {
+ _skipOut.WriteVLong(_lastLongs[i] - _lastBlockLongs[i]);
+ }
+ _lastBlockStatsFp = _statsOut.FilePointer;
+ _lastBlockMetaLongsFp = _metaLongsOut.FilePointer;
+ _lastBlockMetaBytesFp = _metaBytesOut.FilePointer;
+ Array.Copy(_lastLongs, 0, _lastBlockLongs, 0, _longsSize);
+ }
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0cc0e7ec/src/Lucene.Net.Codecs/Memory/FSTPostingsFormat.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Memory/FSTPostingsFormat.cs b/src/Lucene.Net.Codecs/Memory/FSTPostingsFormat.cs
index f41001b..432fcdd 100644
--- a/src/Lucene.Net.Codecs/Memory/FSTPostingsFormat.cs
+++ b/src/Lucene.Net.Codecs/Memory/FSTPostingsFormat.cs
@@ -1,88 +1,82 @@
-namespace org.apache.lucene.codecs.memory
-{
-
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- using Lucene41PostingsWriter = org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter;
- using Lucene41PostingsReader = org.apache.lucene.codecs.lucene41.Lucene41PostingsReader;
- using IndexOptions = org.apache.lucene.index.FieldInfo.IndexOptions;
- using SegmentReadState = org.apache.lucene.index.SegmentReadState;
- using SegmentWriteState = org.apache.lucene.index.SegmentWriteState;
- using IOUtils = org.apache.lucene.util.IOUtils;
+namespace Lucene.Net.Codecs.Memory
+{
- /// <summary>
- /// FST term dict + Lucene41PBF
- /// </summary>
+ using Lucene41PostingsWriter = Lucene41.Lucene41PostingsWriter;
+ using Lucene41PostingsReader = Lucene41.Lucene41PostingsReader;
+ using SegmentReadState = Index.SegmentReadState;
+ using SegmentWriteState = Index.SegmentWriteState;
+ using IOUtils = Util.IOUtils;
- public sealed class FSTPostingsFormat : PostingsFormat
- {
- public FSTPostingsFormat() : base("FST41")
- {
- }
+ /// <summary>
+ /// FST term dict + Lucene41PBF
+ /// </summary>
- public override string ToString()
- {
- return Name;
- }
+ public sealed class FSTPostingsFormat : PostingsFormat
+ {
+ public FSTPostingsFormat() : base("FST41")
+ {
+ }
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public org.apache.lucene.codecs.FieldsConsumer fieldsConsumer(org.apache.lucene.index.SegmentWriteState state) throws java.io.IOException
- public override FieldsConsumer fieldsConsumer(SegmentWriteState state)
- {
- PostingsWriterBase postingsWriter = new Lucene41PostingsWriter(state);
+ public override string ToString()
+ {
+ return Name;
+ }
- bool success = false;
- try
- {
- FieldsConsumer ret = new FSTTermsWriter(state, postingsWriter);
- success = true;
- return ret;
- }
- finally
- {
- if (!success)
- {
- IOUtils.closeWhileHandlingException(postingsWriter);
- }
- }
- }
+ public override FieldsConsumer FieldsConsumer(SegmentWriteState state)
+ {
+ PostingsWriterBase postingsWriter = new Lucene41PostingsWriter(state);
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public org.apache.lucene.codecs.FieldsProducer fieldsProducer(org.apache.lucene.index.SegmentReadState state) throws java.io.IOException
- public override FieldsProducer fieldsProducer(SegmentReadState state)
- {
- PostingsReaderBase postingsReader = new Lucene41PostingsReader(state.directory, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix);
- bool success = false;
- try
- {
- FieldsProducer ret = new FSTTermsReader(state, postingsReader);
- success = true;
- return ret;
- }
- finally
- {
- if (!success)
- {
- IOUtils.closeWhileHandlingException(postingsReader);
- }
- }
- }
- }
+ bool success = false;
+ try
+ {
+ FieldsConsumer ret = new FSTTermsWriter(state, postingsWriter);
+ success = true;
+ return ret;
+ }
+ finally
+ {
+ if (!success)
+ {
+ IOUtils.CloseWhileHandlingException(postingsWriter);
+ }
+ }
+ }
+ public override FieldsProducer FieldsProducer(SegmentReadState state)
+ {
+ PostingsReaderBase postingsReader = new Lucene41PostingsReader(state.Directory, state.FieldInfos,
+ state.SegmentInfo, state.Context, state.SegmentSuffix);
+ bool success = false;
+ try
+ {
+ FieldsProducer ret = new FSTTermsReader(state, postingsReader);
+ success = true;
+ return ret;
+ }
+ finally
+ {
+ if (!success)
+ {
+ IOUtils.CloseWhileHandlingException(postingsReader);
+ }
+ }
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0cc0e7ec/src/Lucene.Net.Codecs/Memory/FSTTermsWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Memory/FSTTermsWriter.cs b/src/Lucene.Net.Codecs/Memory/FSTTermsWriter.cs
index 55b0631..0bd16df 100644
--- a/src/Lucene.Net.Codecs/Memory/FSTTermsWriter.cs
+++ b/src/Lucene.Net.Codecs/Memory/FSTTermsWriter.cs
@@ -269,18 +269,18 @@ namespace Lucene.Net.Codecs.Memory
var meta = new FSTTermOutputs.TermData
{
- longs = new long[_longsSize],
- bytes = null,
- docFreq = state.DocFreq = stats.DocFreq,
- totalTermFreq = state.TotalTermFreq = stats.TotalTermFreq
+ LONGS = new long[_longsSize],
+ BYTES = null,
+ DOC_FREQ = state.DocFreq = stats.DocFreq,
+ TOTAL_TERM_FREQ = state.TotalTermFreq = stats.TotalTermFreq
};
_outerInstance._postingsWriter.FinishTerm(state);
- _outerInstance._postingsWriter.EncodeTerm(meta.longs, _metaWriter, _fieldInfo, state, true);
+ _outerInstance._postingsWriter.EncodeTerm(meta.LONGS, _metaWriter, _fieldInfo, state, true);
var bytesSize = (int) _metaWriter.FilePointer;
if (bytesSize > 0)
{
- meta.bytes = new sbyte[bytesSize];
- _metaWriter.WriteTo(meta.bytes, 0);
+ meta.BYTES = new sbyte[bytesSize];
+ _metaWriter.WriteTo(meta.BYTES, 0);
_metaWriter.Reset();
}
_builder.Add(Util.ToIntsRef(text, _scratchTerm), meta);
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0cc0e7ec/src/Lucene.Net.Codecs/Memory/MemoryDocValuesConsumer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Memory/MemoryDocValuesConsumer.cs b/src/Lucene.Net.Codecs/Memory/MemoryDocValuesConsumer.cs
index a51df00..4613d28 100644
--- a/src/Lucene.Net.Codecs/Memory/MemoryDocValuesConsumer.cs
+++ b/src/Lucene.Net.Codecs/Memory/MemoryDocValuesConsumer.cs
@@ -1,13 +1,4 @@
-using System;
-using System.Diagnostics;
-using System.Collections.Generic;
-using Lucene.Net.Codecs.Memory;
-using Lucene.Net.Index;
-
-namespace Lucene.Net.Codecs.Memory
-{
-
- /*
+ /*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -24,524 +15,472 @@ namespace Lucene.Net.Codecs.Memory
* limitations under the License.
*/
+using System;
+using System.Collections;
+using System.Diagnostics;
+using System.Collections.Generic;
+using Lucene.Net.Codecs.Memory;
+using Lucene.Net.Index;
+using Lucene.Net.Util.Fst;
+
+namespace Lucene.Net.Codecs.Memory
+{
- using FieldInfo = index.FieldInfo;
- using IndexFileNames = index.IndexFileNames;
- using SegmentWriteState = index.SegmentWriteState;
- using ByteArrayDataOutput = store.ByteArrayDataOutput;
- using IndexOutput = store.IndexOutput;
- using ArrayUtil = util.ArrayUtil;
- using BytesRef = util.BytesRef;
- using IOUtils = util.IOUtils;
- using IntsRef = util.IntsRef;
- using MathUtil = util.MathUtil;
- using Builder = util.fst.Builder;
- using INPUT_TYPE = util.fst.FST.INPUT_TYPE;
- using FST = util.fst.FST;
- using PositiveIntOutputs = util.fst.PositiveIntOutputs;
- using Util = util.fst.Util;
- using BlockPackedWriter = util.packed.BlockPackedWriter;
- using MonotonicBlockPackedWriter = util.packed.MonotonicBlockPackedWriter;
- using FormatAndBits = util.packed.PackedInts.FormatAndBits;
- using PackedInts = util.packed.PackedInts;
-
-//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to .NET:
- import static Lucene.Net.Codecs.Memory.MemoryDocValuesProducer.VERSION_CURRENT;
-//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to .NET:
- import static Lucene.Net.Codecs.Memory.MemoryDocValuesProducer.BLOCK_SIZE;
-//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to .NET:
- import static Lucene.Net.Codecs.Memory.MemoryDocValuesProducer.BYTES;
-//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to .NET:
- import static Lucene.Net.Codecs.Memory.MemoryDocValuesProducer.NUMBER;
-//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to .NET:
- import static Lucene.Net.Codecs.Memory.MemoryDocValuesProducer.FST;
-//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to .NET:
- import static Lucene.Net.Codecs.Memory.MemoryDocValuesProducer.DELTA_COMPRESSED;
-//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to .NET:
- import static Lucene.Net.Codecs.Memory.MemoryDocValuesProducer.GCD_COMPRESSED;
-//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to .NET:
- import static Lucene.Net.Codecs.Memory.MemoryDocValuesProducer.TABLE_COMPRESSED;
-//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to .NET:
- import static Lucene.Net.Codecs.Memory.MemoryDocValuesProducer.UNCOMPRESSED;
-
- /// <summary>
- /// Writer for <seealso cref="MemoryDocValuesFormat"/>
- /// </summary>
- internal class MemoryDocValuesConsumer : DocValuesConsumer
- {
- internal IndexOutput data, meta;
- internal readonly int maxDoc;
- internal readonly float acceptableOverheadRatio;
-
- internal MemoryDocValuesConsumer(SegmentWriteState state, string dataCodec, string dataExtension, string metaCodec, string metaExtension, float acceptableOverheadRatio)
- {
- this.acceptableOverheadRatio = acceptableOverheadRatio;
- maxDoc = state.segmentInfo.DocCount;
- bool success = false;
- try
- {
- string dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
- data = state.directory.createOutput(dataName, state.context);
- CodecUtil.writeHeader(data, dataCodec, VERSION_CURRENT);
- string metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
- meta = state.directory.createOutput(metaName, state.context);
- CodecUtil.writeHeader(meta, metaCodec, VERSION_CURRENT);
- success = true;
- }
- finally
- {
- if (!success)
- {
- IOUtils.closeWhileHandlingException(this);
- }
- }
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void addNumericField(index.FieldInfo field, Iterable<Number> values) throws java.io.IOException
- public override void addNumericField(FieldInfo field, IEnumerable<Number> values)
- {
- addNumericField(field, values, true);
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: void addNumericField(index.FieldInfo field, Iterable<Number> values, boolean optimizeStorage) throws java.io.IOException
- internal virtual void addNumericField(FieldInfo field, IEnumerable<Number> values, bool optimizeStorage)
- {
- meta.writeVInt(field.number);
- meta.writeByte(NUMBER);
- meta.writeLong(data.FilePointer);
- long minValue = long.MaxValue;
- long maxValue = long.MinValue;
- long gcd = 0;
- bool missing = false;
- // TODO: more efficient?
- HashSet<long?> uniqueValues = null;
- if (optimizeStorage)
- {
- uniqueValues = new HashSet<>();
-
- long count = 0;
- foreach (Number nv in values)
- {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final long v;
- long v;
- if (nv == null)
- {
- v = 0;
- missing = true;
- }
- else
- {
- v = (long)nv;
- }
-
- if (gcd != 1)
- {
- if (v < long.MinValue / 2 || v > long.MaxValue / 2)
- {
- // in that case v - minValue might overflow and make the GCD computation return
- // wrong results. Since these extreme values are unlikely, we just discard
- // GCD computation for them
- gcd = 1;
- } // minValue needs to be set first
- else if (count != 0)
- {
- gcd = MathUtil.gcd(gcd, v - minValue);
- }
- }
-
- minValue = Math.Min(minValue, v);
- maxValue = Math.Max(maxValue, v);
-
- if (uniqueValues != null)
- {
- if (uniqueValues.Add(v))
- {
- if (uniqueValues.Count > 256)
- {
- uniqueValues = null;
- }
- }
- }
-
- ++count;
- }
- Debug.Assert(count == maxDoc);
- }
-
- if (missing)
- {
- long start = data.FilePointer;
- writeMissingBitset(values);
- meta.writeLong(start);
- meta.writeLong(data.FilePointer - start);
- }
- else
- {
- meta.writeLong(-1L);
- }
-
- if (uniqueValues != null)
- {
- // small number of unique values
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int bitsPerValue = util.packed.PackedInts.bitsRequired(uniqueValues.size()-1);
- int bitsPerValue = PackedInts.bitsRequired(uniqueValues.Count - 1);
- FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(maxDoc, bitsPerValue, acceptableOverheadRatio);
- if (formatAndBits.bitsPerValue == 8 && minValue >= sbyte.MinValue && maxValue <= sbyte.MaxValue)
- {
- meta.writeByte(UNCOMPRESSED); // uncompressed
- foreach (Number nv in values)
- {
- data.writeByte(nv == null ? 0 : (long)(sbyte) nv);
- }
- }
- else
- {
- meta.writeByte(TABLE_COMPRESSED); // table-compressed
- long?[] decode = uniqueValues.toArray(new long?[uniqueValues.Count]);
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final java.util.HashMap<Long,Integer> encode = new java.util.HashMap<>();
- Dictionary<long?, int?> encode = new Dictionary<long?, int?>();
- data.writeVInt(decode.Length);
- for (int i = 0; i < decode.Length; i++)
- {
- data.writeLong(decode[i]);
- encode[decode[i]] = i;
- }
-
- meta.writeVInt(PackedInts.VERSION_CURRENT);
- data.writeVInt(formatAndBits.format.Id);
- data.writeVInt(formatAndBits.bitsPerValue);
-
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final util.packed.PackedInts.Writer writer = util.packed.PackedInts.getWriterNoHeader(data, formatAndBits.format, maxDoc, formatAndBits.bitsPerValue, util.packed.PackedInts.DEFAULT_BUFFER_SIZE);
- PackedInts.Writer writer = PackedInts.getWriterNoHeader(data, formatAndBits.format, maxDoc, formatAndBits.bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE);
- foreach (Number nv in values)
- {
- writer.add(encode[nv == null ? 0 : (long)nv]);
- }
- writer.finish();
- }
- }
- else if (gcd != 0 && gcd != 1)
- {
- meta.writeByte(GCD_COMPRESSED);
- meta.writeVInt(PackedInts.VERSION_CURRENT);
- data.writeLong(minValue);
- data.writeLong(gcd);
- data.writeVInt(BLOCK_SIZE);
-
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final util.packed.BlockPackedWriter writer = new util.packed.BlockPackedWriter(data, BLOCK_SIZE);
- BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
- foreach (Number nv in values)
- {
- long value = nv == null ? 0 : (long)nv;
- writer.add((value - minValue) / gcd);
- }
- writer.finish();
- }
- else
- {
- meta.writeByte(DELTA_COMPRESSED); // delta-compressed
-
- meta.writeVInt(PackedInts.VERSION_CURRENT);
- data.writeVInt(BLOCK_SIZE);
-
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final util.packed.BlockPackedWriter writer = new util.packed.BlockPackedWriter(data, BLOCK_SIZE);
- BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
- foreach (Number nv in values)
- {
- writer.add(nv == null ? 0 : (long)nv);
- }
- writer.finish();
- }
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void close() throws java.io.IOException
- public override void close()
- {
- bool success = false;
- try
- {
- if (meta != null)
- {
- meta.writeVInt(-1); // write EOF marker
- CodecUtil.writeFooter(meta); // write checksum
- }
- if (data != null)
- {
- CodecUtil.writeFooter(data);
- }
- success = true;
- }
- finally
- {
- if (success)
- {
- IOUtils.close(data, meta);
- }
- else
- {
- IOUtils.closeWhileHandlingException(data, meta);
- }
- data = meta = null;
- }
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void addBinaryField(index.FieldInfo field, final Iterable<util.BytesRef> values) throws java.io.IOException
-//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
- public override void addBinaryField(FieldInfo field, IEnumerable<BytesRef> values)
- {
- // write the byte[] data
- meta.writeVInt(field.number);
- meta.writeByte(BYTES);
- int minLength = int.MaxValue;
- int maxLength = int.MinValue;
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final long startFP = data.getFilePointer();
- long startFP = data.FilePointer;
- bool missing = false;
- foreach (BytesRef v in values)
- {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int length;
- int length;
- if (v == null)
- {
- length = 0;
- missing = true;
- }
- else
- {
- length = v.length;
- }
- if (length > MemoryDocValuesFormat.MAX_BINARY_FIELD_LENGTH)
- {
- throw new System.ArgumentException("DocValuesField \"" + field.name + "\" is too large, must be <= " + MemoryDocValuesFormat.MAX_BINARY_FIELD_LENGTH);
- }
- minLength = Math.Min(minLength, length);
- maxLength = Math.Max(maxLength, length);
- if (v != null)
- {
- data.writeBytes(v.bytes, v.offset, v.length);
- }
- }
- meta.writeLong(startFP);
- meta.writeLong(data.FilePointer - startFP);
- if (missing)
- {
- long start = data.FilePointer;
- writeMissingBitset(values);
- meta.writeLong(start);
- meta.writeLong(data.FilePointer - start);
- }
- else
- {
- meta.writeLong(-1L);
- }
- meta.writeVInt(minLength);
- meta.writeVInt(maxLength);
-
- // if minLength == maxLength, its a fixed-length byte[], we are done (the addresses are implicit)
- // otherwise, we need to record the length fields...
- if (minLength != maxLength)
- {
- meta.writeVInt(PackedInts.VERSION_CURRENT);
- meta.writeVInt(BLOCK_SIZE);
-
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final util.packed.MonotonicBlockPackedWriter writer = new util.packed.MonotonicBlockPackedWriter(data, BLOCK_SIZE);
- MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);
- long addr = 0;
- foreach (BytesRef v in values)
- {
- if (v != null)
- {
- addr += v.length;
- }
- writer.add(addr);
- }
- writer.finish();
- }
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: private void writeFST(index.FieldInfo field, Iterable<util.BytesRef> values) throws java.io.IOException
- private void writeFST(FieldInfo field, IEnumerable<BytesRef> values)
- {
- meta.writeVInt(field.number);
- meta.writeByte(FST);
- meta.writeLong(data.FilePointer);
- PositiveIntOutputs outputs = PositiveIntOutputs.Singleton;
- Builder<long?> builder = new Builder<long?>(INPUT_TYPE.BYTE1, outputs);
- IntsRef scratch = new IntsRef();
- long ord = 0;
- foreach (BytesRef v in values)
- {
- builder.add(Util.toIntsRef(v, scratch), ord);
- ord++;
- }
- FST<long?> fst = builder.finish();
- if (fst != null)
- {
- fst.save(data);
- }
- meta.writeVLong(ord);
- }
-
- // TODO: in some cases representing missing with minValue-1 wouldn't take up additional space and so on,
- // but this is very simple, and algorithms only check this for values of 0 anyway (doesnt slow down normal decode)
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: void writeMissingBitset(Iterable<?> values) throws java.io.IOException
- internal virtual void writeMissingBitset<T1>(IEnumerable<T1> values)
- {
- long bits = 0;
- int count = 0;
- foreach (object v in values)
- {
- if (count == 64)
- {
- data.writeLong(bits);
- count = 0;
- bits = 0;
- }
- if (v != null)
- {
- bits |= 1L << (count & 0x3f);
- }
- count++;
- }
- if (count > 0)
- {
- data.writeLong(bits);
- }
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void addSortedField(index.FieldInfo field, Iterable<util.BytesRef> values, Iterable<Number> docToOrd) throws java.io.IOException
- public override void addSortedField(FieldInfo field, IEnumerable<BytesRef> values, IEnumerable<Number> docToOrd)
- {
- // write the ordinals as numerics
- addNumericField(field, docToOrd, false);
-
- // write the values as FST
- writeFST(field, values);
- }
-
- // note: this might not be the most efficient... but its fairly simple
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void addSortedSetField(index.FieldInfo field, Iterable<util.BytesRef> values, final Iterable<Number> docToOrdCount, final Iterable<Number> ords) throws java.io.IOException
-//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
- public override void addSortedSetField(FieldInfo field, IEnumerable<BytesRef> values, IEnumerable<Number> docToOrdCount, IEnumerable<Number> ords)
- {
- // write the ordinals as a binary field
- addBinaryField(field, new IterableAnonymousInnerClassHelper(this, docToOrdCount, ords));
-
- // write the values as FST
- writeFST(field, values);
- }
-
- private class IterableAnonymousInnerClassHelper : IEnumerable<BytesRef>
- {
- private readonly MemoryDocValuesConsumer outerInstance;
-
- private IEnumerable<Number> docToOrdCount;
- private IEnumerable<Number> ords;
-
- public IterableAnonymousInnerClassHelper(MemoryDocValuesConsumer outerInstance, IEnumerable<Number> docToOrdCount, IEnumerable<Number> ords)
- {
- this.outerInstance = outerInstance;
- this.docToOrdCount = docToOrdCount;
- this.ords = ords;
- }
-
- public virtual IEnumerator<BytesRef> GetEnumerator()
- {
- return new SortedSetIterator(docToOrdCount.GetEnumerator(), ords.GetEnumerator());
- }
- }
-
- // per-document vint-encoded byte[]
- internal class SortedSetIterator : IEnumerator<BytesRef>
- {
- internal sbyte[] buffer = new sbyte[10];
- internal ByteArrayDataOutput @out = new ByteArrayDataOutput();
- internal BytesRef @ref = new BytesRef();
-
- internal readonly IEnumerator<Number> counts;
- internal readonly IEnumerator<Number> ords;
-
- internal SortedSetIterator(IEnumerator<Number> counts, IEnumerator<Number> ords)
- {
- this.counts = counts;
- this.ords = ords;
- }
-
- public override bool hasNext()
- {
+
+ using FieldInfo = Index.FieldInfo;
+ using IndexFileNames = Index.IndexFileNames;
+ using SegmentWriteState = Index.SegmentWriteState;
+ using ByteArrayDataOutput = Store.ByteArrayDataOutput;
+ using IndexOutput = Store.IndexOutput;
+ using ArrayUtil = Util.ArrayUtil;
+ using BytesRef = Util.BytesRef;
+ using IOUtils = Util.IOUtils;
+ using IntsRef = Util.IntsRef;
+ using MathUtil = Util.MathUtil;
+ using Builder = Util.Fst.Builder;
+ using INPUT_TYPE = Util.Fst.FST.INPUT_TYPE;
+ using FST = Util.Fst.FST;
+ using PositiveIntOutputs = Util.Fst.PositiveIntOutputs;
+ using Util = Util.Fst.Util;
+ using BlockPackedWriter = Util.Packed.BlockPackedWriter;
+ using MonotonicBlockPackedWriter = Util.Packed.MonotonicBlockPackedWriter;
+ using FormatAndBits = Util.Packed.PackedInts.FormatAndBits;
+ using PackedInts = Util.Packed.PackedInts;
+
+ /// <summary>
+ /// Writer for <seealso cref="MemoryDocValuesFormat"/>
+ /// </summary>
+ internal class MemoryDocValuesConsumer : DocValuesConsumer
+ {
+ internal IndexOutput data, meta;
+ internal readonly int maxDoc;
+ internal readonly float acceptableOverheadRatio;
+
+ internal MemoryDocValuesConsumer(SegmentWriteState state, string dataCodec, string dataExtension,
+ string metaCodec,
+ string metaExtension, float acceptableOverheadRatio)
+ {
+ this.acceptableOverheadRatio = acceptableOverheadRatio;
+ maxDoc = state.SegmentInfo.DocCount;
+ var success = false;
+ try
+ {
+ var dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension);
+ data = state.Directory.CreateOutput(dataName, state.Context);
+ CodecUtil.WriteHeader(data, dataCodec, MemoryDocValuesProducer.VERSION_CURRENT);
+ var metaName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension);
+ meta = state.Directory.CreateOutput(metaName, state.Context);
+ CodecUtil.WriteHeader(meta, metaCodec, MemoryDocValuesProducer.VERSION_CURRENT);
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ IOUtils.CloseWhileHandlingException(this);
+ }
+ }
+ }
+
+ public override void AddNumericField(FieldInfo field, IEnumerable<long> values)
+ {
+ AddNumericField(field, values, true);
+ }
+
+ internal virtual void AddNumericField(FieldInfo field, IEnumerable<long> values, bool optimizeStorage)
+ {
+ meta.WriteVInt(field.Number);
+ meta.WriteByte(MemoryDocValuesProducer.NUMBER);
+ meta.WriteLong(data.FilePointer);
+ long minValue = long.MaxValue;
+ long maxValue = long.MinValue;
+ long gcd = 0;
+ bool missing = false;
+ // TODO: more efficient?
+ HashSet<long?> uniqueValues = null;
+ if (optimizeStorage)
+ {
+ uniqueValues = new HashSet<>();
+
+ long count = 0;
+ foreach (var nv in values)
+ {
+ long v = nv;
+
+ if (gcd != 1)
+ {
+ if (v < long.MinValue/2 || v > long.MaxValue/2)
+ {
+ // in that case v - minValue might overflow and make the GCD computation return
+ // wrong results. Since these extreme values are unlikely, we just discard
+ // GCD computation for them
+ gcd = 1;
+ } // minValue needs to be set first
+ else if (count != 0)
+ {
+ gcd = MathUtil.Gcd(gcd, v - minValue);
+ }
+ }
+
+ minValue = Math.Min(minValue, v);
+ maxValue = Math.Max(maxValue, v);
+
+ if (uniqueValues != null)
+ {
+ if (uniqueValues.Add(v))
+ {
+ if (uniqueValues.Count > 256)
+ {
+ uniqueValues = null;
+ }
+ }
+ }
+
+ ++count;
+ }
+ Debug.Assert(count == maxDoc);
+ }
+
+ if (missing)
+ {
+ long start = data.FilePointer;
+ WriteMissingBitset(values);
+ meta.WriteLong(start);
+ meta.WriteLong(data.FilePointer - start);
+ }
+ else
+ {
+ meta.WriteLong(-1L);
+ }
+
+ if (uniqueValues != null)
+ {
+ // small number of unique values
+
+ int bitsPerValue = PackedInts.BitsRequired(uniqueValues.Count - 1);
+ FormatAndBits formatAndBits = PackedInts.FastestFormatAndBits(maxDoc, bitsPerValue,
+ acceptableOverheadRatio);
+ if (formatAndBits.bitsPerValue == 8 && minValue >= sbyte.MinValue && maxValue <= sbyte.MaxValue)
+ {
+ meta.WriteByte(MemoryDocValuesProducer.UNCOMPRESSED); // uncompressed
+ foreach (var nv in values)
+ {
+ data.WriteByte(nv == null ? 0 : (long) (sbyte) nv);
+ }
+ }
+ else
+ {
+ meta.WriteByte(MemoryDocValuesProducer.TABLE_COMPRESSED); // table-compressed
+ long?[] decode = uniqueValues.toArray(new long?[uniqueValues.Count]);
+
+ var encode = new Dictionary<long?, int?>();
+ data.WriteVInt(decode.Length);
+ for (int i = 0; i < decode.Length; i++)
+ {
+ data.WriteLong(decode[i]);
+ encode[decode[i]] = i;
+ }
+
+ meta.WriteVInt(PackedInts.VERSION_CURRENT);
+ data.WriteVInt(formatAndBits.format.Id);
+ data.WriteVInt(formatAndBits.bitsPerValue);
+
+ PackedInts.Writer writer = PackedInts.GetWriterNoHeader(data, formatAndBits.format, maxDoc,
+ formatAndBits.bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE);
+ foreach (long nv in values)
+ {
+ writer.Add(encode[nv == null ? 0 : (long) nv]);
+ }
+ writer.Finish();
+ }
+ }
+ else if (gcd != 0 && gcd != 1)
+ {
+ meta.WriteByte(MemoryDocValuesProducer.GCD_COMPRESSED);
+ meta.WriteVInt(PackedInts.VERSION_CURRENT);
+ data.WriteLong(minValue);
+ data.WriteLong(gcd);
+ data.WriteVInt(MemoryDocValuesProducer.BLOCK_SIZE);
+
+ var writer = new BlockPackedWriter(data, MemoryDocValuesProducer.BLOCK_SIZE);
+ foreach (var nv in values)
+ {
+ writer.Add((nv - minValue)/gcd);
+ }
+ writer.Finish();
+ }
+ else
+ {
+ meta.WriteByte(MemoryDocValuesProducer.DELTA_COMPRESSED); // delta-compressed
+
+ meta.WriteVInt(PackedInts.VERSION_CURRENT);
+ data.WriteVInt(MemoryDocValuesProducer.BLOCK_SIZE);
+
+ var writer = new BlockPackedWriter(data, MemoryDocValuesProducer.BLOCK_SIZE);
+ foreach (var nv in values)
+ {
+ writer.Add(nv);
+ }
+ writer.Finish();
+ }
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (disposing) return;
+
+ var success = false;
+ try
+ {
+ if (meta != null)
+ {
+ meta.WriteVInt(-1); // write EOF marker
+ CodecUtil.WriteFooter(meta); // write checksum
+ }
+ if (data != null)
+ {
+ CodecUtil.WriteFooter(data);
+ }
+ success = true;
+ }
+ finally
+ {
+ if (success)
+ {
+ IOUtils.Close(data, meta);
+ }
+ else
+ {
+ IOUtils.CloseWhileHandlingException(data, meta);
+ }
+ data = meta = null;
+ }
+ }
+
+ public override void AddBinaryField(FieldInfo field, IEnumerable<BytesRef> values)
+ {
+ // write the byte[] data
+ meta.WriteVInt(field.Number);
+ meta.WriteByte(MemoryDocValuesProducer.BYTES);
+ var minLength = int.MaxValue;
+ var maxLength = int.MinValue;
+
+ var startFP = data.FilePointer;
+ var missing = false;
+ foreach (var v in values)
+ {
+ int length;
+ if (v == null)
+ {
+ length = 0;
+ missing = true;
+ }
+ else
+ {
+ length = v.Length;
+ }
+ if (length > MemoryDocValuesFormat.MAX_BINARY_FIELD_LENGTH)
+ {
+ throw new ArgumentException("DocValuesField \"" + field.Name + "\" is too large, must be <= " +
+ MemoryDocValuesFormat.MAX_BINARY_FIELD_LENGTH);
+ }
+ minLength = Math.Min(minLength, length);
+ maxLength = Math.Max(maxLength, length);
+ if (v != null)
+ {
+ data.WriteBytes(v.Bytes, v.Offset, v.Length);
+ }
+ }
+ meta.WriteLong(startFP);
+ meta.WriteLong(data.FilePointer - startFP);
+ if (missing)
+ {
+ long start = data.FilePointer;
+ WriteMissingBitset(values);
+ meta.WriteLong(start);
+ meta.WriteLong(data.FilePointer - start);
+ }
+ else
+ {
+ meta.WriteLong(-1L);
+ }
+ meta.WriteVInt(minLength);
+ meta.WriteVInt(maxLength);
+
+ // if minLength == maxLength, its a fixed-length byte[], we are done (the addresses are implicit)
+ // otherwise, we need to record the length fields...
+ if (minLength != maxLength)
+ {
+ meta.WriteVInt(PackedInts.VERSION_CURRENT);
+ meta.WriteVInt(MemoryDocValuesProducer.BLOCK_SIZE);
+
+
+ var writer = new MonotonicBlockPackedWriter(data, MemoryDocValuesProducer.BLOCK_SIZE);
+ long addr = 0;
+ foreach (BytesRef v in values)
+ {
+ if (v != null)
+ {
+ addr += v.Length;
+ }
+ writer.Add(addr);
+ }
+ writer.Finish();
+ }
+ }
+
+ private void WriteFST(FieldInfo field, IEnumerable<BytesRef> values)
+ {
+ meta.WriteVInt(field.Number);
+ meta.WriteByte(FST);
+ meta.WriteLong(data.FilePointer);
+ PositiveIntOutputs outputs = PositiveIntOutputs.Singleton;
+ var builder = new Builder<long?>(INPUT_TYPE.BYTE1, outputs);
+ var scratch = new IntsRef();
+ long ord = 0;
+ foreach (BytesRef v in values)
+ {
+ builder.Add(Util.ToIntsRef(v, scratch), ord);
+ ord++;
+ }
+ FST<long?> fst = builder.Finish();
+ if (fst != null)
+ {
+ fst.Save(data);
+ }
+ meta.WriteVLong(ord);
+ }
+
+ // TODO: in some cases representing missing with minValue-1 wouldn't take up additional space and so on,
+ // but this is very simple, and algorithms only check this for values of 0 anyway (doesnt slow down normal decode)
+
+ internal virtual void WriteMissingBitset<T1>(IEnumerable<T1> values)
+ {
+ long bits = 0;
+ int count = 0;
+ foreach (object v in values)
+ {
+ if (count == 64)
+ {
+ data.WriteLong(bits);
+ count = 0;
+ bits = 0;
+ }
+ if (v != null)
+ {
+ bits |= 1L << (count & 0x3f);
+ }
+ count++;
+ }
+ if (count > 0)
+ {
+ data.WriteLong(bits);
+ }
+ }
+
+ public override void AddSortedField(FieldInfo field, IEnumerable<BytesRef> values, IEnumerable<long> docToOrd)
+ {
+ // write the ordinals as numerics
+ AddNumericField(field, docToOrd, false);
+
+ // write the values as FST
+ WriteFST(field, values);
+ }
+
+ // note: this might not be the most efficient... but its fairly simple
+ public override void AddSortedSetField(FieldInfo field, IEnumerable<BytesRef> values,
+ IEnumerable<long> docToOrdCount, IEnumerable<long> ords)
+ {
+ // write the ordinals as a binary field
+ AddBinaryField(field, new IterableAnonymousInnerClassHelper(this, docToOrdCount, ords));
+
+ // write the values as FST
+ WriteFST(field, values);
+ }
+
+ private class IterableAnonymousInnerClassHelper : IEnumerable<BytesRef>
+ {
+ private readonly IEnumerable<long> _docToOrdCount;
+ private readonly IEnumerable<long> _ords;
+
+ public IterableAnonymousInnerClassHelper(MemoryDocValuesConsumer outerInstance,
+ IEnumerable<long> docToOrdCount, IEnumerable<long> ords)
+ {
+ _docToOrdCount = docToOrdCount;
+ _ords = ords;
+ }
+
+ public IEnumerator<BytesRef> GetEnumerator()
+ {
+ return new SortedSetIterator(_docToOrdCount.GetEnumerator(), _ords.GetEnumerator());
+ }
+ }
+
+ // per-document vint-encoded byte[]
+ internal class SortedSetIterator : IEnumerator<BytesRef>
+ {
+ internal sbyte[] buffer = new sbyte[10];
+ internal ByteArrayDataOutput @out = new ByteArrayDataOutput();
+ internal BytesRef @ref = new BytesRef();
+
+ internal readonly IEnumerator<long> counts;
+ internal readonly IEnumerator<long> ords;
+
+ internal SortedSetIterator(IEnumerator<long> counts, IEnumerator<long> ords)
+ {
+ this.counts = counts;
+ this.ords = ords;
+ }
+
+ public override bool HasNext()
+ {
//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
- return counts.hasNext();
- }
+ return counts.hasNext();
+ }
- public override BytesRef next()
- {
- if (!hasNext())
- {
- throw new NoSuchElementException();
- }
+ public override BytesRef Next()
+ {
+ if (!HasNext())
+ {
+ throw new ArgumentOutOfRangeException();
+ }
//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
- int count = (int)counts.next();
- int maxSize = count * 9; // worst case
- if (maxSize > buffer.Length)
- {
- buffer = ArrayUtil.grow(buffer, maxSize);
- }
-
- try
- {
- encodeValues(count);
- }
- catch (IOException bogus)
- {
- throw new Exception(bogus);
- }
-
- @ref.bytes = buffer;
- @ref.offset = 0;
- @ref.length = @out.Position;
-
- return @ref;
- }
-
- // encodes count values to buffer
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: private void encodeValues(int count) throws java.io.IOException
- internal virtual void encodeValues(int count)
- {
- @out.reset(buffer);
- long lastOrd = 0;
- for (int i = 0; i < count; i++)
- {
+ int count = (int) counts.next();
+ int maxSize = count*9; // worst case
+ if (maxSize > buffer.Length)
+ {
+ buffer = ArrayUtil.Grow(buffer, maxSize);
+ }
+
+ EncodeValues(count);
+
+
+ @ref.Bytes = buffer;
+ @ref.Offset = 0;
+ @ref.Length = @out.Position;
+
+ return @ref;
+ }
+
+ // encodes count values to buffer
+ internal virtual void EncodeValues(int count)
+ {
+ @out.Reset(buffer);
+ long lastOrd = 0;
+ for (int i = 0; i < count; i++)
+ {
//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
- long ord = (long)ords.next();
- @out.writeVLong(ord - lastOrd);
- lastOrd = ord;
- }
- }
-
- public override void remove()
- {
- throw new System.NotSupportedException();
- }
- }
- }
-
+ long ord = (long) ords.next();
+ @out.writeVLong(ord - lastOrd);
+ lastOrd = ord;
+ }
+ }
+
+ public override void Remove()
+ {
+ throw new NotSupportedException();
+ }
+ }
+ }
}
\ No newline at end of file
[4/5] lucenenet git commit: Updating Memory Codec
Posted by pn...@apache.org.
Updating Memory Codec
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/0cc0e7ec
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/0cc0e7ec
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/0cc0e7ec
Branch: refs/heads/master
Commit: 0cc0e7eccc1d41dbfc193d271bc2818db5c7376b
Parents: 9971717
Author: Prescott Nasser <pn...@apache.org>
Authored: Sat Nov 8 14:53:37 2014 -0800
Committer: Prescott Nasser <pn...@apache.org>
Committed: Sat Nov 8 14:53:37 2014 -0800
----------------------------------------------------------------------
.../Memory/DirectDocValuesConsumer.cs | 16 +-
.../Memory/DirectPostingsFormat.cs | 5343 +++++++++---------
.../Memory/FSTOrdPostingsFormat.cs | 153 +-
.../Memory/FSTOrdTermsWriter.cs | 837 ++-
.../Memory/FSTPostingsFormat.cs | 152 +-
src/Lucene.Net.Codecs/Memory/FSTTermsWriter.cs | 14 +-
.../Memory/MemoryDocValuesConsumer.cs | 987 ++--
.../Memory/MemoryDocValuesProducer.cs | 1717 +++---
.../Memory/MemoryPostingsFormat.cs | 101 +-
.../Util/Fst/PositiveIntOutputs.cs | 1 -
10 files changed, 4577 insertions(+), 4744 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0cc0e7ec/src/Lucene.Net.Codecs/Memory/DirectDocValuesConsumer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Memory/DirectDocValuesConsumer.cs b/src/Lucene.Net.Codecs/Memory/DirectDocValuesConsumer.cs
index 23e7a16..b9b157e 100644
--- a/src/Lucene.Net.Codecs/Memory/DirectDocValuesConsumer.cs
+++ b/src/Lucene.Net.Codecs/Memory/DirectDocValuesConsumer.cs
@@ -46,11 +46,11 @@ namespace Lucene.Net.Codecs.Memory
string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix,
dataExtension);
data = state.Directory.CreateOutput(dataName, state.Context);
- CodecUtil.WriteHeader(data, dataCodec, VERSION_CURRENT);
+ CodecUtil.WriteHeader(data, dataCodec, MemoryDocValuesProducer.VERSION_CURRENT);
string metaName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix,
metaExtension);
meta = state.Directory.CreateOutput(metaName, state.Context);
- CodecUtil.WriteHeader(meta, metaCodec, VERSION_CURRENT);
+ CodecUtil.WriteHeader(meta, metaCodec, MemoryDocValuesProducer.VERSION_CURRENT);
success = true;
}
finally
@@ -65,7 +65,7 @@ namespace Lucene.Net.Codecs.Memory
public override void AddNumericField(FieldInfo field, IEnumerable<long> values)
{
meta.WriteVInt(field.Number);
- meta.WriteByte(NUMBER);
+ meta.WriteByte(MemoryDocValuesProducer.NUMBER);
AddNumericFieldValues(field, values);
}
@@ -194,7 +194,7 @@ namespace Lucene.Net.Codecs.Memory
public override void AddBinaryField(FieldInfo field, IEnumerable<BytesRef> values)
{
meta.WriteVInt(field.Number);
- meta.WriteByte(BYTES);
+ meta.WriteByte(MemoryDocValuesProducer.BYTES);
AddBinaryFieldValues(field, values);
}
@@ -213,7 +213,7 @@ namespace Lucene.Net.Codecs.Memory
totalBytes += v.Length;
if (totalBytes > DirectDocValuesFormat.MAX_TOTAL_BYTES_LENGTH)
{
- throw new ArgumentException("DocValuesField \"" + field.name +
+ throw new ArgumentException("DocValuesField \"" + field.Name +
"\" is too large, cannot have more than DirectDocValuesFormat.MAX_TOTAL_BYTES_LENGTH (" +
DirectDocValuesFormat.MAX_TOTAL_BYTES_LENGTH + ") bytes");
}
@@ -310,7 +310,7 @@ namespace Lucene.Net.Codecs.Memory
AddBinaryFieldValues(field, values);
}
- private class IterableAnonymousInnerClassHelper : IEnumerable<Number>
+ private class IterableAnonymousInnerClassHelper : IEnumerable<long>
{
private readonly DirectDocValuesConsumer _outerInstance;
private readonly IEnumerable<long> _docToOrdCount;
@@ -322,11 +322,9 @@ namespace Lucene.Net.Codecs.Memory
_docToOrdCount = docToOrdCount;
}
-
// Just aggregates the count values so they become
// "addresses", and adds one more value in the end
// (the final sum):
-
public virtual IEnumerator<long> GetEnumerator()
{
var iter = _docToOrdCount.GetEnumerator();
@@ -335,13 +333,11 @@ namespace Lucene.Net.Codecs.Memory
private class IteratorAnonymousInnerClassHelper : IEnumerator<long>
{
- private readonly IterableAnonymousInnerClassHelper outerInstance;
private readonly IEnumerator<long> _iter;
public IteratorAnonymousInnerClassHelper(IterableAnonymousInnerClassHelper outerInstance,
IEnumerator<long> iter)
{
- this.outerInstance = outerInstance;
_iter = iter;
}