You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by mh...@apache.org on 2013/09/24 20:32:46 UTC
[10/50] [abbrv] git commit: nearing completion - this file 50%,
overall 90% on namespace
nearing completion - this file 50%, overall 90% on namespace
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/d9ad1fea
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/d9ad1fea
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/d9ad1fea
Branch: refs/heads/branch_4x
Commit: d9ad1fea5fe5fb1a72a7248fee1e2c04d0a20253
Parents: e47e663
Author: Mike Potts <mi...@feature23.com>
Authored: Sun Jul 14 12:03:09 2013 -0400
Committer: Mike Potts <mi...@feature23.com>
Committed: Sun Jul 14 12:03:09 2013 -0400
----------------------------------------------------------------------
.../CompressingStoredFieldsIndexReader.cs | 2 +-
.../CompressingStoredFieldsIndexWriter.cs | 167 ++++++
.../Compressing/CompressingTermVectorsFormat.cs | 28 +
.../Compressing/CompressingTermVectorsReader.cs | 507 +++++++++++++++++++
src/core/Lucene.Net.csproj | 3 +
5 files changed, 706 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d9ad1fea/src/core/Codecs/Compressing/CompressingStoredFieldsIndexReader.cs
----------------------------------------------------------------------
diff --git a/src/core/Codecs/Compressing/CompressingStoredFieldsIndexReader.cs b/src/core/Codecs/Compressing/CompressingStoredFieldsIndexReader.cs
index f981b32..d5a16df 100644
--- a/src/core/Codecs/Compressing/CompressingStoredFieldsIndexReader.cs
+++ b/src/core/Codecs/Compressing/CompressingStoredFieldsIndexReader.cs
@@ -147,7 +147,7 @@ namespace Lucene.Net.Codecs.Compressing
return hi;
}
- private long getStartPointer(int docID)
+ public long GetStartPointer(int docID)
{
if (docID < 0 || docID >= maxDoc) {
throw new ArgumentException("docID out of range [0-" + maxDoc + "]: " + docID);
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d9ad1fea/src/core/Codecs/Compressing/CompressingStoredFieldsIndexWriter.cs
----------------------------------------------------------------------
diff --git a/src/core/Codecs/Compressing/CompressingStoredFieldsIndexWriter.cs b/src/core/Codecs/Compressing/CompressingStoredFieldsIndexWriter.cs
new file mode 100644
index 0000000..ece363a
--- /dev/null
+++ b/src/core/Codecs/Compressing/CompressingStoredFieldsIndexWriter.cs
@@ -0,0 +1,167 @@
+using Lucene.Net.Store;
+using Lucene.Net.Util.Packed;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Codecs.Compressing
+{
+ public sealed class CompressingStoredFieldsIndexWriter : IDisposable
+ {
+ static readonly int BLOCK_SIZE = 1024; // number of chunks to serialize at once
+
+ private IndexOutput fieldsIndexOut;
+ private int totalDocs;
+ private int blockDocs;
+ private int blockChunks;
+ private long firstStartPointer;
+ private long maxStartPointer;
+ private int[] docBaseDeltas;
+ private long[] startPointerDeltas;
+
+ static long moveSignToLowOrderBit(long n)
+ {
+ return (n >> 63) ^ (n << 1);
+ }
+
+ CompressingStoredFieldsIndexWriter(IndexOutput indexOutput)
+ {
+ this.fieldsIndexOut = indexOutput;
+ reset();
+ totalDocs = 0;
+ docBaseDeltas = new int[BLOCK_SIZE];
+ startPointerDeltas = new long[BLOCK_SIZE];
+ fieldsIndexOut.WriteVInt(PackedInts.VERSION_CURRENT);
+ }
+
+ private void reset()
+ {
+ blockChunks = 0;
+ blockDocs = 0;
+ firstStartPointer = -1; // means unset
+ }
+
+ private void writeBlock()
+ {
+ fieldsIndexOut.WriteVInt(blockChunks);
+
+ // The trick here is that we only store the difference from the average start
+ // pointer or doc base, this helps save bits per value.
+ // And in order to prevent a few chunks that would be far from the average to
+ // raise the number of bits per value for all of them, we only encode blocks
+ // of 1024 chunks at once
+ // See LUCENE-4512
+
+ // doc bases
+ int avgChunkDocs;
+ if (blockChunks == 1)
+ {
+ avgChunkDocs = 0;
+ }
+ else
+ {
+ //hackmp - TODO - This needs review. The function as a whole is desgined with an int as the core value,
+ //including contracts on other methods. I NEVER like casting from double to int, but for now...
+ avgChunkDocs = (int)Math.Round((float)(blockDocs - docBaseDeltas[blockChunks - 1]) / (blockChunks - 1));
+ }
+ fieldsIndexOut.WriteVInt(totalDocs - blockDocs); // docBase
+ fieldsIndexOut.WriteVInt(avgChunkDocs);
+ int docBase = 0;
+ long maxDelta = 0;
+ for (int i = 0; i < blockChunks; ++i)
+ {
+ int delta = docBase - avgChunkDocs * i;
+ maxDelta |= moveSignToLowOrderBit(delta);
+ docBase += docBaseDeltas[i];
+ }
+
+ int bitsPerDocBase = PackedInts.BitsRequired(maxDelta);
+ fieldsIndexOut.WriteVInt(bitsPerDocBase);
+ PackedInts.Writer writer = PackedInts.GetWriterNoHeader(fieldsIndexOut,
+ PackedInts.Format.PACKED, blockChunks, bitsPerDocBase, 1);
+ docBase = 0;
+ for (int i = 0; i < blockChunks; ++i)
+ {
+ long delta = docBase - avgChunkDocs * i;
+ writer.Add(moveSignToLowOrderBit(delta));
+ docBase += docBaseDeltas[i];
+ }
+ writer.Finish();
+
+ // start pointers
+ fieldsIndexOut.WriteVLong(firstStartPointer);
+ long avgChunkSize;
+ if (blockChunks == 1)
+ {
+ avgChunkSize = 0;
+ }
+ else
+ {
+ avgChunkSize = (maxStartPointer - firstStartPointer) / (blockChunks - 1);
+ }
+ fieldsIndexOut.WriteVLong(avgChunkSize);
+ long startPointer = 0;
+ maxDelta = 0;
+ for (int i = 0; i < blockChunks; ++i)
+ {
+ startPointer += startPointerDeltas[i];
+ long delta = startPointer - avgChunkSize * i;
+ maxDelta |= moveSignToLowOrderBit(delta);
+ }
+
+ int bitsPerStartPointer = PackedInts.BitsRequired(maxDelta);
+ fieldsIndexOut.WriteVInt(bitsPerStartPointer);
+ writer = PackedInts.GetWriterNoHeader(fieldsIndexOut, PackedInts.Format.PACKED,
+ blockChunks, bitsPerStartPointer, 1);
+ startPointer = 0;
+ for (int i = 0; i < blockChunks; ++i)
+ {
+ startPointer += startPointerDeltas[i];
+ long delta = startPointer - avgChunkSize * i;
+ writer.Add(moveSignToLowOrderBit(delta));
+ }
+ writer.Finish();
+ }
+
+ void writeIndex(int numDocs, long startPointer)
+ {
+ if (blockChunks == BLOCK_SIZE)
+ {
+ writeBlock();
+ reset();
+ }
+
+ if (firstStartPointer == -1)
+ {
+ firstStartPointer = maxStartPointer = startPointer;
+ }
+
+ docBaseDeltas[blockChunks] = numDocs;
+ startPointerDeltas[blockChunks] = startPointer - maxStartPointer;
+
+ ++blockChunks;
+ blockDocs += numDocs;
+ totalDocs += numDocs;
+ maxStartPointer = startPointer;
+ }
+
+ void finish(int numDocs)
+ {
+ if (numDocs != totalDocs)
+ {
+ throw new ArgumentOutOfRangeException("Expected " + numDocs + " docs, but got " + totalDocs);
+ }
+ if (blockChunks > 0)
+ {
+ writeBlock();
+ }
+ fieldsIndexOut.WriteVInt(0); // end marker
+ }
+
+ public void Dispose()
+ {
+ fieldsIndexOut.Dispose();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d9ad1fea/src/core/Codecs/Compressing/CompressingTermVectorsFormat.cs
----------------------------------------------------------------------
diff --git a/src/core/Codecs/Compressing/CompressingTermVectorsFormat.cs b/src/core/Codecs/Compressing/CompressingTermVectorsFormat.cs
new file mode 100644
index 0000000..0a2afd1
--- /dev/null
+++ b/src/core/Codecs/Compressing/CompressingTermVectorsFormat.cs
@@ -0,0 +1,28 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Codecs.Compressing
+{
+ public class CompressingTermVectorsFormat: TermVectorsFormat
+ {
+ private string formatName;
+ private string segmentSuffix;
+ private CompressionMode compressionMode;
+ private int chunkSize;
+
+ public CompressingTermVectorsFormat(String formatName, String segmentSuffix,
+ CompressionMode compressionMode, int chunkSize)
+ {
+ this.formatName = formatName;
+ this.segmentSuffix = segmentSuffix;
+ this.compressionMode = compressionMode;
+ if (chunkSize < 1)
+ {
+ throw new ArgumentException("chunkSize must be >= 1");
+ }
+ this.chunkSize = chunkSize;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d9ad1fea/src/core/Codecs/Compressing/CompressingTermVectorsReader.cs
----------------------------------------------------------------------
diff --git a/src/core/Codecs/Compressing/CompressingTermVectorsReader.cs b/src/core/Codecs/Compressing/CompressingTermVectorsReader.cs
new file mode 100644
index 0000000..0de0f4f
--- /dev/null
+++ b/src/core/Codecs/Compressing/CompressingTermVectorsReader.cs
@@ -0,0 +1,507 @@
+using Lucene.Net.Index;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Packed;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Codecs.Compressing
+{
+ public sealed class CompressingTermVectorsReader: IDisposable, TermVectorsReader
+ {
+ private FieldInfos fieldInfos;
+ CompressingStoredFieldsIndexReader indexReader;
+ IndexInput vectorsStream;
+ private int packedIntsVersion;
+ private CompressionMode compressionMode;
+ private Decompressor decompressor;
+ private int chunkSize;
+ private int numDocs;
+ private bool closed;
+ private BlockPackedReaderIterator reader;
+
+ private CompressingTermVectorsReader(CompressingTermVectorsReader reader)
+ {
+ this.fieldInfos = reader.fieldInfos;
+ this.vectorsStream = (IndexInput)reader.vectorsStream.Clone();
+ this.indexReader = reader.indexReader.clone();
+ this.packedIntsVersion = reader.packedIntsVersion;
+ this.compressionMode = reader.compressionMode;
+ this.decompressor = (Decompressor)reader.decompressor.Clone();
+ this.chunkSize = reader.chunkSize;
+ this.numDocs = reader.numDocs;
+ this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, BLOCK_SIZE, 0);
+ this.closed = false;
+ }
+
+ /** Sole constructor. */
+ public CompressingTermVectorsReader(Directory d, SegmentInfo si, String segmentSuffix, FieldInfos fn,
+ IOContext context, String formatName, CompressionMode compressionMode)
+ {
+ this.compressionMode = compressionMode;
+ string segment = si.name;
+ bool success = false;
+ fieldInfos = fn;
+ numDocs = si.DocCount;
+ IndexInput indexStream = null;
+ try {
+ vectorsStream = d.OpenInput(IndexFileNames.SegmentFileName(segment, segmentSuffix, VECTORS_EXTENSION), context);
+ string indexStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION);
+ indexStream = d.OpenInput(indexStreamFN, context);
+
+ string codecNameIdx = formatName + CODEC_SFX_IDX;
+ string codecNameDat = formatName + CODEC_SFX_DAT;
+ CodecUtil.CheckHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT);
+ CodecUtil.CheckHeader(vectorsStream, codecNameDat, VERSION_START, VERSION_CURRENT);
+
+ indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);
+ indexStream = null;
+
+ packedIntsVersion = vectorsStream.ReadVInt();
+ chunkSize = vectorsStream.ReadVInt();
+ decompressor = compressionMode.newDecompressor();
+ this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, BLOCK_SIZE, 0);
+
+ success = true;
+ } finally {
+ if (!success) {
+ IOUtils.CloseWhileHandlingException(this, indexStream);
+ }
+ }
+ }
+
+ CompressionMode getCompressionMode()
+ {
+ return compressionMode;
+ }
+
+ int getChunkSize() {
+ return chunkSize;
+ }
+
+ int getPackedIntsVersion() {
+ return packedIntsVersion;
+ }
+
+ CompressingStoredFieldsIndexReader getIndex() {
+ return indexReader;
+ }
+
+ IndexInput getVectorsStream() {
+ return vectorsStream;
+ }
+
+ /**
+ * @throws AlreadyClosedException if this TermVectorsReader is closed
+ */
+ private void ensureOpen()
+ {
+ if (closed) {
+ throw new AlreadyClosedException("this FieldsReader is closed");
+ }
+ }
+
+
+
+ public void Dispose()
+ {
+ if (!closed)
+ {
+ IOUtils.Close(vectorsStream, indexReader);
+ closed = true;
+ }
+ }
+
+ public override Index.Fields Get(int doc)
+ {
+ ensureOpen();
+
+ // seek to the right place
+ {
+ long startPointer = indexReader.GetStartPointer(doc);
+ vectorsStream.Seek(startPointer);
+ }
+
+ // decode
+ // - docBase: first doc ID of the chunk
+ // - chunkDocs: number of docs of the chunk
+ int docBase = vectorsStream.ReadVInt();
+ int chunkDocs = vectorsStream.ReadVInt();
+ if (doc < docBase || doc >= docBase + chunkDocs || docBase + chunkDocs > numDocs) {
+ throw new CorruptIndexException("docBase=" + docBase + ",chunkDocs=" + chunkDocs + ",doc=" + doc);
+ }
+
+ long skip; // number of fields to skip
+ long numFields; // number of fields of the document we're looking for
+ long totalFields; // total number of fields of the chunk (sum for all docs)
+ if (chunkDocs == 1) {
+ skip = 0;
+ numFields = totalFields = vectorsStream.ReadVInt();
+ } else {
+ reader.Reset(vectorsStream, chunkDocs);
+ long sum = 0;
+ for (int i = docBase; i < doc; ++i) {
+ sum += reader.Next();
+ }
+ skip = sum;
+ numFields = (int) reader.Next();
+ sum += numFields;
+ for (int i = doc + 1; i < docBase + chunkDocs; ++i) {
+ sum += reader.Next();
+ }
+ totalFields = sum;
+ }
+
+ if (numFields == 0) {
+ // no vectors
+ return null;
+ }
+
+ // read field numbers that have term vectors
+ int[] fieldNums;
+ {
+ int token = vectorsStream.ReadByte() & 0xFF;
+ int bitsPerFieldNum = token & 0x1F;
+ int totalDistinctFields = Number.URShift(token, 5);
+ if (totalDistinctFields == 0x07) {
+ totalDistinctFields += vectorsStream.ReadVInt();
+ }
+ ++totalDistinctFields;
+ PackedInts.ReaderIterator it = PackedInts.GetReaderIteratorNoHeader(vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalDistinctFields, bitsPerFieldNum, 1);
+ fieldNums = new int[totalDistinctFields];
+ for (int i = 0; i < totalDistinctFields; ++i) {
+ fieldNums[i] = (int) it.Next();
+ }
+ }
+
+ // read field numbers and flags
+ int[] fieldNumOffs = new int[numFields];
+ PackedInts.Reader flags;
+ {
+ int bitsPerOff = PackedInts.BitsRequired(fieldNums.Length - 1);
+ PackedInts.Reader allFieldNumOffs = PackedInts.GetReaderNoHeader(vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalFields, bitsPerOff);
+ switch (vectorsStream.ReadVInt()) {
+ case 0:
+ PackedInts.Reader fieldFlags = PackedInts.getReaderNoHeader(vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, fieldNums.Length, FLAGS_BITS);
+ PackedInts.Mutable f = PackedInts.GetMutable(totalFields, FLAGS_BITS, PackedInts.COMPACT);
+ for (int i = 0; i < totalFields; ++i) {
+ int fieldNumOff = (int) allFieldNumOffs.Get(i);
+ int fgs = (int) fieldFlags.Get(fieldNumOff);
+ f.Set(i, fgs);
+ }
+ flags = f;
+ break;
+ case 1:
+ flags = PackedInts.GetReaderNoHeader(vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalFields, FLAGS_BITS);
+ break;
+ default:
+ throw new AssertionError();
+ }
+ for (int i = 0; i < numFields; ++i) {
+ //hackmp - TODO - NEEDS REVIEW
+ //Here again, seems to be a larger impact to change all ints to long, than simply cast. Will need Pual to review..
+ fieldNumOffs[i] = (int) allFieldNumOffs.Get((int)skip + i);
+ }
+ }
+
+ // number of terms per field for all fields
+ PackedInts.Reader numTerms;
+ long totalTerms;
+ {
+ int bitsRequired = vectorsStream.ReadVInt();
+ numTerms = PackedInts.GetReaderNoHeader(vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalFields, bitsRequired);
+ long sum = 0;
+ for (int i = 0; i < totalFields; ++i) {
+ sum += numTerms.Get(i);
+ }
+ totalTerms = sum;
+ }
+
+ // term lengths
+ long docOff = 0, docLen = 0, totalLen;
+ int[] fieldLengths = new int[numFields];
+ int[][] prefixLengths = new int[numFields][];
+ int[][] suffixLengths = new int[numFields][];
+ {
+ reader.Reset(vectorsStream, totalTerms);
+ // skip
+ long toSkip = 0;
+ for (int i = 0; i < skip; ++i) {
+ toSkip += numTerms.Get(i);
+ }
+ reader.Skip(toSkip);
+ // read prefix lengths
+ for (int i = 0; i < numFields; ++i) {
+ //hackmp - TODO - NEEDS REVIEW
+ //casting long to int
+ long termCount = (int) numTerms.Get((int)skip + i);
+ int[] fieldPrefixLengths = new int[termCount];
+ prefixLengths[i] = fieldPrefixLengths;
+ for (int j = 0; j < termCount; ) {
+ //hackmp - TODO - NEEDS REVIEW
+ //casting long to int..
+ LongsRef next = reader.Next((int)termCount - j);
+ for (int k = 0; k < next.length; ++k) {
+ fieldPrefixLengths[j++] = (int) next.longs[next.offset + k];
+ }
+ }
+ }
+ reader.Skip(totalTerms - reader.Ord);
+
+ reader.Reset(vectorsStream, totalTerms);
+ // skip
+ toSkip = 0;
+ for (int i = 0; i < skip; ++i) {
+ for (int j = 0; j < numTerms.Get(i); ++j) {
+ docOff += reader.Next();
+ }
+ }
+ for (int i = 0; i < numFields; ++i) {
+ //HACKMP - TODO - NEEDS REVIEW
+ //..and again, casting long to int
+ int termCount = (int) numTerms.Get((int)skip + i);
+ int[] fieldSuffixLengths = new int[termCount];
+ suffixLengths[i] = fieldSuffixLengths;
+ for (int j = 0; j < termCount; ) {
+ LongsRef next = reader.Next(termCount - j);
+ for (int k = 0; k < next.length; ++k) {
+ fieldSuffixLengths[j++] = (int) next.longs[next.offset + k];
+ }
+ }
+ fieldLengths[i] = sum(suffixLengths[i]);
+ docLen += fieldLengths[i];
+ }
+ totalLen = docOff + docLen;
+ for (long i = skip + numFields; i < totalFields; ++i) {
+ //hackmp - TODO - NEEDS REVIEW
+ //long > int
+ for (int j = 0; j < numTerms.Get((int)i); ++j)
+ {
+ totalLen += reader.Next();
+ }
+ }
+ }
+
+ // term freqs
+ int[] termFreqs = new int[totalTerms];
+ {
+ reader.Reset(vectorsStream, totalTerms);
+ for (int i = 0; i < totalTerms; ) {
+ //hackmp - TODO - NEEDS REVIEW
+ //long > int
+ LongsRef next = reader.Next((int)totalTerms - i);
+ for (int k = 0; k < next.length; ++k) {
+ termFreqs[i++] = 1 + (int) next.longs[next.offset + k];
+ }
+ }
+ }
+
+ // total number of positions, offsets and payloads
+ int totalPositions = 0, totalOffsets = 0, totalPayloads = 0;
+ for (int i = 0, termIndex = 0; i < totalFields; ++i)
+ {
+ int f = (int) flags.Get(i);
+ int termCount = (int) numTerms.Get(i);
+ for (int j = 0; j < termCount; ++j) {
+ int freq = termFreqs[termIndex++];
+ if ((f & POSITIONS) != 0) {
+ totalPositions += freq;
+ }
+ if ((f & OFFSETS) != 0) {
+ totalOffsets += freq;
+ }
+ if ((f & PAYLOADS) != 0) {
+ totalPayloads += freq;
+ }
+ }
+ }
+
+ int[][] positionIndex = positionIndex(skip, numFields, numTerms, termFreqs);
+ int[][] positions, startOffsets, lengths;
+ if (totalPositions > 0) {
+ positions = readPositions(skip, numFields, flags, numTerms, termFreqs, POSITIONS, totalPositions, positionIndex);
+ } else {
+ positions = new int[numFields][];
+ }
+
+ if (totalOffsets > 0) {
+ // average number of chars per term
+ float[] charsPerTerm = new float[fieldNums.Length];
+ for (int i = 0; i < charsPerTerm.Length; ++i) {
+ charsPerTerm[i] = Number.IntBitsToFloat(vectorsStream.ReadInt());
+ }
+ startOffsets = readPositions(skip, numFields, flags, numTerms, termFreqs, OFFSETS, totalOffsets, positionIndex);
+ lengths = readPositions(skip, numFields, flags, numTerms, termFreqs, OFFSETS, totalOffsets, positionIndex);
+
+ for (int i = 0; i < numFields; ++i) {
+ int[] fStartOffsets = startOffsets[i];
+ int[] fPositions = positions[i];
+ // patch offsets from positions
+ if (fStartOffsets != null && fPositions != null) {
+ float fieldCharsPerTerm = charsPerTerm[fieldNumOffs[i]];
+ for (int j = 0; j < startOffsets[i].Length; ++j) {
+ fStartOffsets[j] += (int) (fieldCharsPerTerm * fPositions[j]);
+ }
+ }
+ if (fStartOffsets != null) {
+ int[] fPrefixLengths = prefixLengths[i];
+ int[] fSuffixLengths = suffixLengths[i];
+ int[] fLengths = lengths[i];
+ //hackmp - TODO - NEEDS REVIEW
+ //long > int
+ for (int j = 0, end = (int) numTerms.Get((int)skip + i); j < end; ++j) {
+ // delta-decode start offsets and patch lengths using term lengths
+ int termLength = fPrefixLengths[j] + fSuffixLengths[j];
+ lengths[i][positionIndex[i][j]] += termLength;
+ for (int k = positionIndex[i][j] + 1; k < positionIndex[i][j + 1]; ++k) {
+ fStartOffsets[k] += fStartOffsets[k - 1];
+ fLengths[k] += termLength;
+ }
+ }
+ }
+ }
+ } else {
+ startOffsets = lengths = new int[numFields][];
+ }
+ if (totalPositions > 0) {
+ // delta-decode positions
+ for (int i = 0; i < numFields; ++i) {
+ int[] fPositions = positions[i];
+ int[] fpositionIndex = positionIndex[i];
+ if (fPositions != null) {
+ //hackmp - TODO - NEED REVIEW
+ //long > int
+ for (int j = 0, end = (int) numTerms.Get((int)skip + i); j < end; ++j) {
+ // delta-decode start offsets
+ for (int k = fpositionIndex[j] + 1; k < fpositionIndex[j + 1]; ++k) {
+ fPositions[k] += fPositions[k - 1];
+ }
+ }
+ }
+ }
+ }
+
+ // payload lengths
+ int[][] payloadIndex = new int[numFields][];
+ long totalPayloadLength = 0;
+ int payloadOff = 0;
+ int payloadLen = 0;
+ if (totalPayloads > 0) {
+ reader.Reset(vectorsStream, totalPayloads);
+ // skip
+ int termIndex = 0;
+ for (int i = 0; i < skip; ++i) {
+ int f = (int) flags.Get(i);
+ int termCount = (int) numTerms.Get(i);
+ if ((f & PAYLOADS) != 0) {
+ for (int j = 0; j < termCount; ++j) {
+ int freq = termFreqs[termIndex + j];
+ for (int k = 0; k < freq; ++k) {
+ int l = (int) reader.Next();
+ payloadOff += l;
+ }
+ }
+ }
+ termIndex += termCount;
+ }
+ totalPayloadLength = payloadOff;
+ // read doc payload lengths
+ for (int i = 0; i < numFields; ++i) {
+ //hackmp - TODO - NEEDS REVIEW
+ //long > int
+ int f = (int) flags.Get((int)skip + i);
+ int termCount = (int) numTerms.Get((int)skip + i);
+ if ((f & PAYLOADS) != 0) {
+ int totalFreq = positionIndex[i][termCount];
+ payloadIndex[i] = new int[totalFreq + 1];
+ int posIdx = 0;
+ payloadIndex[i][posIdx] = payloadLen;
+ for (int j = 0; j < termCount; ++j) {
+ int freq = termFreqs[termIndex + j];
+ for (int k = 0; k < freq; ++k) {
+ int payloadLength = (int) reader.Next();
+ payloadLen += payloadLength;
+ payloadIndex[i][posIdx+1] = payloadLen;
+ ++posIdx;
+ }
+ }
+ }
+ termIndex += termCount;
+ }
+ totalPayloadLength += payloadLen;
+ for (long i = skip + numFields; i < totalFields; ++i) {
+ //hackmp - TODO - NEEDS REVIEW
+ //long > int
+ int f = (int) flags.Get((int)i);
+ int termCount = (int) numTerms.Get((int)i);
+ if ((f & PAYLOADS) != 0) {
+ for (int j = 0; j < termCount; ++j) {
+ int freq = termFreqs[termIndex + j];
+ for (int k = 0; k < freq; ++k) {
+ totalPayloadLength += reader.Next();
+ }
+ }
+ }
+ termIndex += termCount;
+ }
+ }
+
+ // decompress data
+ BytesRef suffixBytes = new BytesRef();
+ //hackmp - TODO - NEEDS REVIEW
+ //long > int
+ decompressor.Decompress(vectorsStream, (int)totalLen + (int)totalPayloadLength, (int)docOff + (int)payloadOff, (int)docLen + payloadLen, suffixBytes);
+ suffixBytes.length = (int)docLen;
+ BytesRef payloadBytes = new BytesRef(suffixBytes.bytes, suffixBytes.offset + (int)docLen, payloadLen);
+
+ int[] fieldFlags = new int[numFields];
+ for (int i = 0; i < numFields; ++i) {
+ //hackmp - TODO - NEEDS REVIEW
+ //long > int
+ fieldFlags[i] = (int) flags.Get((int)skip + i);
+ }
+
+ int[] fieldNumTerms = new int[numFields];
+ for (int i = 0; i < numFields; ++i) {
+ //hackmp - TODO - NEEDS REVIEW
+ fieldNumTerms[i] = (int) numTerms.Get((int)skip + i);
+ }
+
+ int[][] fieldTermFreqs = new int[numFields][];
+ {
+ long termIdx = 0;
+ for (int i = 0; i < skip; ++i) {
+ termIdx += numTerms.Get(i);
+ }
+ for (int i = 0; i < numFields; ++i) {
+ //hackmp - TODO - NEEDS REVIEW
+ //long > int
+ long termCount = (int) numTerms.Get((int)skip + i);
+ fieldTermFreqs[i] = new int[termCount];
+ for (int j = 0; j < termCount; ++j) {
+ fieldTermFreqs[i][j] = termFreqs[termIdx++];
+ }
+ }
+ }
+
+ return new TVFields(fieldNums, fieldFlags, fieldNumOffs, fieldNumTerms, fieldLengths,
+ prefixLengths, suffixLengths, fieldTermFreqs,
+ positionIndex, positions, startOffsets, lengths,
+ payloadBytes, payloadIndex,
+ suffixBytes);
+ }
+
+ public override object Clone()
+ {
+ return new CompressingTermVectorsReader(this);
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ throw new NotImplementedException();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d9ad1fea/src/core/Lucene.Net.csproj
----------------------------------------------------------------------
diff --git a/src/core/Lucene.Net.csproj b/src/core/Lucene.Net.csproj
index 85f9818..306396c 100644
--- a/src/core/Lucene.Net.csproj
+++ b/src/core/Lucene.Net.csproj
@@ -188,8 +188,11 @@
<Compile Include="Codecs\CodecUtil.cs" />
<Compile Include="Codecs\Compressing\CompressingStoredFieldsFormat.cs" />
<Compile Include="Codecs\Compressing\CompressingStoredFieldsIndexReader.cs" />
+ <Compile Include="Codecs\Compressing\CompressingStoredFieldsIndexWriter.cs" />
<Compile Include="Codecs\Compressing\CompressingStoredFieldsReader.cs" />
<Compile Include="Codecs\Compressing\CompressingStoredFieldsWriter.cs" />
+ <Compile Include="Codecs\Compressing\CompressingTermVectorsFormat.cs" />
+ <Compile Include="Codecs\Compressing\CompressingTermVectorsReader.cs" />
<Compile Include="Codecs\Compressing\CompressionMode.cs" />
<Compile Include="Codecs\Compressing\Compressor.cs" />
<Compile Include="Codecs\Compressing\Decompressor.cs" />