You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by mh...@apache.org on 2013/09/24 20:32:38 UTC
[02/50] [abbrv] git commit: another set of files
another set of files
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/64c13f3c
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/64c13f3c
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/64c13f3c
Branch: refs/heads/branch_4x
Commit: 64c13f3c5cf452252c0e45ff88860ebca5da0174
Parents: 666d83e
Author: Mike Potts <mi...@feature23.com>
Authored: Wed Jul 10 23:15:58 2013 -0400
Committer: Mike Potts <mi...@feature23.com>
Committed: Wed Jul 10 23:15:58 2013 -0400
----------------------------------------------------------------------
.../CompressingStoredFieldsIndexReader.cs | 175 +++++
.../CompressingStoredFieldsReader.cs | 2 +-
.../CompressingStoredFieldsWriter.cs | 756 +++++++++----------
src/core/Codecs/Compressing/Compressor.cs | 2 +-
.../Compressing/GrowableByteArrayDataOutput.cs | 34 +-
src/core/Lucene.Net.csproj | 2 +
6 files changed, 575 insertions(+), 396 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/64c13f3c/src/core/Codecs/Compressing/CompressingStoredFieldsIndexReader.cs
----------------------------------------------------------------------
diff --git a/src/core/Codecs/Compressing/CompressingStoredFieldsIndexReader.cs b/src/core/Codecs/Compressing/CompressingStoredFieldsIndexReader.cs
new file mode 100644
index 0000000..f981b32
--- /dev/null
+++ b/src/core/Codecs/Compressing/CompressingStoredFieldsIndexReader.cs
@@ -0,0 +1,175 @@
+using Lucene.Net.Index;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Packed;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Codecs.Compressing
+{
+ public sealed class CompressingStoredFieldsIndexReader: ICloneable //Closable??
+ {
+ int maxDoc;
+ int[] docBases;
+ long[] startPointers;
+ int[] avgChunkDocs;
+ long[] avgChunkSizes;
+ PackedInts.Reader[] docBasesDeltas; // delta from the avg
+ PackedInts.Reader[] startPointersDeltas; // delta from the avg
+
+ IndexInput fieldsIndexIn;
+
+ static long MoveLowOrderBitToSign(long n)
+ {
+ return ((Number.URShift(n, 1) ^ -(n & 1)));
+ }
+
+ public CompressingStoredFieldsIndexReader(IndexInput fieldsIndexIn, SegmentInfo si)
+ {
+ this.fieldsIndexIn = fieldsIndexIn;
+ maxDoc = si.DocCount;
+ int[] docBases = new int[16];
+ long[] startPointers = new long[16];
+ int[] avgChunkDocs = new int[16];
+ long[] avgChunkSizes = new long[16];
+ PackedInts.Reader[] docBasesDeltas = new PackedInts.Reader[16];
+ PackedInts.Reader[] startPointersDeltas = new PackedInts.Reader[16];
+
+ int packedIntsVersion = fieldsIndexIn.ReadVInt();
+
+ int blockCount = 0;
+
+ for (;;) {
+ int numChunks = fieldsIndexIn.ReadVInt();
+ if (numChunks == 0) {
+ break;
+ }
+
+ if (blockCount == docBases.Length) {
+ int newSize = ArrayUtil.Oversize(blockCount + 1, 8);
+ docBases = Arrays.CopyOf(docBases, newSize);
+ startPointers = Arrays.CopyOf(startPointers, newSize);
+ avgChunkDocs = Arrays.CopyOf(avgChunkDocs, newSize);
+ avgChunkSizes = Arrays.CopyOf(avgChunkSizes, newSize);
+ docBasesDeltas = Arrays.CopyOf(docBasesDeltas, newSize);
+ startPointersDeltas = Arrays.CopyOf(startPointersDeltas, newSize);
+ }
+
+ // doc bases
+ docBases[blockCount] = fieldsIndexIn.ReadVInt();
+ avgChunkDocs[blockCount] = fieldsIndexIn.ReadVInt();
+ int bitsPerDocBase = fieldsIndexIn.ReadVInt();
+ if (bitsPerDocBase > 32) {
+ throw new CorruptIndexException("Corrupted");
+ }
+ docBasesDeltas[blockCount] = (Lucene.Net.Util.Packed.PackedInts.Reader)PackedInts.GetReaderNoHeader(fieldsIndexIn, PackedInts.Format.PACKED, packedIntsVersion, numChunks, bitsPerDocBase);
+
+ // start pointers
+ startPointers[blockCount] = fieldsIndexIn.ReadVLong();
+ avgChunkSizes[blockCount] = fieldsIndexIn.ReadVLong();
+ int bitsPerStartPointer = fieldsIndexIn.ReadVInt();
+ if (bitsPerStartPointer > 64) {
+ throw new CorruptIndexException("Corrupted");
+ }
+ startPointersDeltas[blockCount] = (Lucene.Net.Util.Packed.PackedInts.Reader)PackedInts.GetReaderNoHeader(fieldsIndexIn, PackedInts.Format.PACKED, packedIntsVersion, numChunks, bitsPerStartPointer);
+
+ ++blockCount;
+ }
+
+ this.docBases = Arrays.CopyOf(docBases, blockCount);
+ this.startPointers = Arrays.CopyOf(startPointers, blockCount);
+ this.avgChunkDocs = Arrays.CopyOf(avgChunkDocs, blockCount);
+ this.avgChunkSizes = Arrays.CopyOf(avgChunkSizes, blockCount);
+ this.docBasesDeltas = Arrays.CopyOf(docBasesDeltas, blockCount);
+ this.startPointersDeltas = Arrays.CopyOf(startPointersDeltas, blockCount);
+ }
+
+ private CompressingStoredFieldsIndexReader(CompressingStoredFieldsIndexReader other)
+ {
+ this.fieldsIndexIn = null;
+ this.maxDoc = other.maxDoc;
+ this.docBases = other.docBases;
+ this.startPointers = other.startPointers;
+ this.avgChunkDocs = other.avgChunkDocs;
+ this.avgChunkSizes = other.avgChunkSizes;
+ this.docBasesDeltas = other.docBasesDeltas;
+ this.startPointersDeltas = other.startPointersDeltas;
+ }
+
+ private int Block(int docID)
+ {
+ int lo = 0, hi = docBases.Length - 1;
+ while (lo <= hi) {
+ int mid = Number.URShift(lo + hi, 1);
+ int midValue = docBases[mid];
+ if (midValue == docID) {
+ return mid;
+ } else if (midValue < docID) {
+ lo = mid + 1;
+ } else {
+ hi = mid - 1;
+ }
+ }
+ return hi;
+ }
+
+ private int relativeDocBase(int block, int relativeChunk)
+ {
+ int expected = avgChunkDocs[block] * relativeChunk;
+ long delta = MoveLowOrderBitToSign(docBasesDeltas[block].Get(relativeChunk));
+ return expected + (int) delta;
+ }
+
+ private long relativeStartPointer(int block, int relativeChunk)
+ {
+ long expected = avgChunkSizes[block] * relativeChunk;
+ long delta = MoveLowOrderBitToSign(startPointersDeltas[block].Get(relativeChunk));
+ return expected + delta;
+ }
+
+ private int relativeChunk(int block, int relativeDoc)
+ {
+ int lo = 0, hi = docBasesDeltas[block].Size() - 1;
+ while (lo <= hi) {
+ int mid = Number.URShift(lo + hi, 1);
+ int midValue = relativeDocBase(block, mid);
+ if (midValue == relativeDoc) {
+ return mid;
+ } else if (midValue < relativeDoc) {
+ lo = mid + 1;
+ } else {
+ hi = mid - 1;
+ }
+ }
+ return hi;
+ }
+
+ private long getStartPointer(int docID)
+ {
+ if (docID < 0 || docID >= maxDoc) {
+ throw new ArgumentException("docID out of range [0-" + maxDoc + "]: " + docID);
+ }
+ int block = Block(docID);
+ int relativeChunk = this.relativeChunk(block, docID - docBases[block]);
+ return startPointers[block] + relativeStartPointer(block, relativeChunk);
+ }
+
+ public override CompressingStoredFieldsIndexReader clone()
+ {
+ if (fieldsIndexIn == null) {
+ return this;
+ } else {
+ return new CompressingStoredFieldsIndexReader(this);
+ }
+ }
+
+ public override void close()
+ {
+ IOUtils.Close(fieldsIndexIn);
+ }
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/64c13f3c/src/core/Codecs/Compressing/CompressingStoredFieldsReader.cs
----------------------------------------------------------------------
diff --git a/src/core/Codecs/Compressing/CompressingStoredFieldsReader.cs b/src/core/Codecs/Compressing/CompressingStoredFieldsReader.cs
index ad909ce..9c55e07 100644
--- a/src/core/Codecs/Compressing/CompressingStoredFieldsReader.cs
+++ b/src/core/Codecs/Compressing/CompressingStoredFieldsReader.cs
@@ -59,7 +59,7 @@ public sealed class CompressingStoredFieldsReader: StoredFieldsReader {
string segment = si.name;
bool success = false;
fieldInfos = fn;
- numDocs = si.getDocCount();
+ numDocs = si.DocCount;
IndexInput indexStream = null;
try {
fieldsStream = d.OpenInput(IndexFileNames.SegmentFileName(segment, segmentSuffix, FIELDS_EXTENSION), context);
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/64c13f3c/src/core/Codecs/Compressing/CompressingStoredFieldsWriter.cs
----------------------------------------------------------------------
diff --git a/src/core/Codecs/Compressing/CompressingStoredFieldsWriter.cs b/src/core/Codecs/Compressing/CompressingStoredFieldsWriter.cs
index 54882fd..1543196 100644
--- a/src/core/Codecs/Compressing/CompressingStoredFieldsWriter.cs
+++ b/src/core/Codecs/Compressing/CompressingStoredFieldsWriter.cs
@@ -1,413 +1,391 @@
-package org.apache.lucene.codecs.compressing;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.FIELDS_EXTENSION;
-import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION;
-
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.lucene.codecs.CodecUtil;
-import org.apache.lucene.codecs.StoredFieldsReader;
-import org.apache.lucene.codecs.StoredFieldsWriter;
-import org.apache.lucene.codecs.compressing.CompressingStoredFieldsReader.ChunkIterator;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.index.AtomicReader;
-import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.FieldInfos;
-import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.IndexableField;
-import org.apache.lucene.index.MergeState;
-import org.apache.lucene.index.SegmentInfo;
-import org.apache.lucene.index.SegmentReader;
-import org.apache.lucene.store.DataOutput;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IOContext;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.packed.PackedInts;
-
-/**
- * {@link StoredFieldsWriter} impl for {@link CompressingStoredFieldsFormat}.
- * @lucene.experimental
- */
-public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
-
- // hard limit on the maximum number of documents per chunk
- static final int MAX_DOCUMENTS_PER_CHUNK = 128;
-
- static final int STRING = 0x00;
- static final int BYTE_ARR = 0x01;
- static final int NUMERIC_INT = 0x02;
- static final int NUMERIC_FLOAT = 0x03;
- static final int NUMERIC_LONG = 0x04;
- static final int NUMERIC_DOUBLE = 0x05;
-
- static final int TYPE_BITS = PackedInts.bitsRequired(NUMERIC_DOUBLE);
- static final int TYPE_MASK = (int) PackedInts.maxValue(TYPE_BITS);
-
- static final String CODEC_SFX_IDX = "Index";
- static final String CODEC_SFX_DAT = "Data";
- static final int VERSION_START = 0;
- static final int VERSION_CURRENT = VERSION_START;
-
- private final Directory directory;
- private final String segment;
- private final String segmentSuffix;
- private CompressingStoredFieldsIndexWriter indexWriter;
- private IndexOutput fieldsStream;
-
- private final CompressionMode compressionMode;
- private final Compressor compressor;
- private final int chunkSize;
-
- private final GrowableByteArrayDataOutput bufferedDocs;
- private int[] numStoredFields; // number of stored fields
- private int[] endOffsets; // end offsets in bufferedDocs
- private int docBase; // doc ID at the beginning of the chunk
- private int numBufferedDocs; // docBase + numBufferedDocs == current doc ID
-
- /** Sole constructor. */
- public CompressingStoredFieldsWriter(Directory directory, SegmentInfo si, String segmentSuffix, IOContext context,
- String formatName, CompressionMode compressionMode, int chunkSize) throws IOException {
- assert directory != null;
- this.directory = directory;
- this.segment = si.name;
- this.segmentSuffix = segmentSuffix;
- this.compressionMode = compressionMode;
- this.compressor = compressionMode.newCompressor();
- this.chunkSize = chunkSize;
- this.docBase = 0;
- this.bufferedDocs = new GrowableByteArrayDataOutput(chunkSize);
- this.numStoredFields = new int[16];
- this.endOffsets = new int[16];
- this.numBufferedDocs = 0;
-
- boolean success = false;
- IndexOutput indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION), context);
- try {
- fieldsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION), context);
-
- final String codecNameIdx = formatName + CODEC_SFX_IDX;
- final String codecNameDat = formatName + CODEC_SFX_DAT;
- CodecUtil.writeHeader(indexStream, codecNameIdx, VERSION_CURRENT);
- CodecUtil.writeHeader(fieldsStream, codecNameDat, VERSION_CURRENT);
- assert CodecUtil.headerLength(codecNameDat) == fieldsStream.getFilePointer();
- assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer();
-
- indexWriter = new CompressingStoredFieldsIndexWriter(indexStream);
- indexStream = null;
-
- fieldsStream.writeVInt(PackedInts.VERSION_CURRENT);
-
- success = true;
- } finally {
- if (!success) {
- IOUtils.closeWhileHandlingException(indexStream);
- abort();
- }
- }
- }
-
- @Override
- public void close() throws IOException {
- try {
- IOUtils.close(fieldsStream, indexWriter);
- } finally {
- fieldsStream = null;
- indexWriter = null;
- }
- }
-
- @Override
- public void startDocument(int numStoredFields) throws IOException {
- if (numBufferedDocs == this.numStoredFields.length) {
- final int newLength = ArrayUtil.oversize(numBufferedDocs + 1, 4);
- this.numStoredFields = Arrays.copyOf(this.numStoredFields, newLength);
- endOffsets = Arrays.copyOf(endOffsets, newLength);
- }
- this.numStoredFields[numBufferedDocs] = numStoredFields;
- ++numBufferedDocs;
- }
-
- @Override
- public void finishDocument() throws IOException {
- endOffsets[numBufferedDocs - 1] = bufferedDocs.length;
- if (triggerFlush()) {
- flush();
- }
- }
-
- private static void saveInts(int[] values, int length, DataOutput out) throws IOException {
- assert length > 0;
- if (length == 1) {
- out.writeVInt(values[0]);
- } else {
- boolean allEqual = true;
- for (int i = 1; i < length; ++i) {
- if (values[i] != values[0]) {
- allEqual = false;
- break;
- }
- }
- if (allEqual) {
- out.writeVInt(0);
- out.writeVInt(values[0]);
- } else {
- long max = 0;
- for (int i = 0; i < length; ++i) {
- max |= values[i];
+using Lucene.Net.Documents;
+using Lucene.Net.Index;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Packed;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Codecs.Compressing
+{
+ public sealed class CompressingStoredFieldsWriter : StoredFieldsWriter
+ {
+ static readonly int MAX_DOCUMENTS_PER_CHUNK = 128;
+ static readonly int STRING = 0x00;
+ static readonly int BYTE_ARR = 0x01;
+ static readonly int NUMERIC_INT = 0x02;
+ static readonly int NUMERIC_FLOAT = 0x03;
+ static readonly int NUMERIC_LONG = 0x04;
+ static readonly int NUMERIC_DOUBLE = 0x05;
+
+ static readonly int TYPE_BITS = PackedInts.bitsRequired(NUMERIC_DOUBLE);
+ static readonly int TYPE_MASK = (int)PackedInts.maxValue(TYPE_BITS);
+
+ static readonly String CODEC_SFX_IDX = "Index";
+ static readonly String CODEC_SFX_DAT = "Data";
+ static readonly int VERSION_START = 0;
+ static readonly int VERSION_CURRENT = VERSION_START;
+
+ private Directory directory;
+ private string segment;
+ private string segmentSuffix;
+ private CompressingStoredFieldsIndexWriter indexWriter;
+ private IndexOutput fieldsStream;
+
+ private CompressionMode compressionMode;
+ private Compressor compressor;
+ private int chunkSize;
+
+ private GrowableByteArrayDataOutput bufferedDocs;
+ private int[] numStoredFields; // number of stored fields
+ private int[] endOffsets; // end offsets in bufferedDocs
+ private int docBase; // doc ID at the beginning of the chunk
+ private int numBufferedDocs; // docBase + numBufferedDocs == current doc ID
+
+ public CompressingStoredFieldsWriter(Directory directory, SegmentInfo si, string segmentSuffix, IOContext context, string formatName, CompressionMode compressionMode, int chunkSize)
+ {
+ this.directory = directory;
+ this.segment = si.name;
+ this.segmentSuffix = segmentSuffix;
+ this.compressionMode = compressionMode;
+ this.compressor = compressionMode.newCompressor();
+ this.chunkSize = chunkSize;
+ this.docBase = 0;
+ this.bufferedDocs = new GrowableByteArrayDataOutput(chunkSize);
+ this.numStoredFields = new int[16];
+ this.endOffsets = new int[16];
+ this.numBufferedDocs = 0;
+
+ bool success = false;
+ IndexOutput indexStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION), context);
+ try
+ {
+ fieldsStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, segmentSuffix, FIELDS_EXTENSION), context);
+
+ string codecNameIdx = formatName + CODEC_SFX_IDX;
+ string codecNameDat = formatName + CODEC_SFX_DAT;
+ CodecUtil.WriteHeader(indexStream, codecNameIdx, VERSION_CURRENT);
+ CodecUtil.WriteHeader(fieldsStream, codecNameDat, VERSION_CURRENT);
+
+ indexWriter = new CompressingStoredFieldsIndexWriter(indexStream);
+ indexStream = null;
+
+ fieldsStream.WriteVInt(PackedInts.VERSION_CURRENT);
+
+ success = true;
+ }
+ finally
+ {
+ if (!success) {
+ IOUtils.CloseWhileHandlingException(indexStream);
+ abort();
+ }
+ }
}
- final int bitsRequired = PackedInts.bitsRequired(max);
- out.writeVInt(bitsRequired);
- final PackedInts.Writer w = PackedInts.getWriterNoHeader(out, PackedInts.Format.PACKED, length, bitsRequired, 1);
- for (int i = 0; i < length; ++i) {
- w.add(values[i]);
+
+ public override void Close()
+ {
+ try
+ {
+ IOUtils.Close(fieldsStream, indexWriter);
+ }
+ finally
+ {
+ fieldsStream = null;
+ indexWriter = null;
+ }
}
- w.finish();
- }
- }
- }
- private void writeHeader(int docBase, int numBufferedDocs, int[] numStoredFields, int[] lengths) throws IOException {
- // save docBase and numBufferedDocs
- fieldsStream.writeVInt(docBase);
- fieldsStream.writeVInt(numBufferedDocs);
+ public override void StartDocument(int numStoredFields)
+ {
+ if (numBufferedDocs == this.numStoredFields.Length)
+ {
+ int newLength = ArrayUtil.Oversize(numBufferedDocs + 1, 4);
+ this.numStoredFields = Arrays.CopyOf(this.numStoredFields, newLength);
+ endOffsets = Arrays.CopyOf(endOffsets, newLength);
+ }
+ this.numStoredFields[numBufferedDocs] = numStoredFields;
+ ++numBufferedDocs;
+ }
- // save numStoredFields
- saveInts(numStoredFields, numBufferedDocs, fieldsStream);
+ public override void FinishDocument()
+ {
+ endOffsets[numBufferedDocs - 1] = bufferedDocs.Length;
+ if (TriggerFlush())
+ {
+ Flush();
+ }
+ }
- // save lengths
- saveInts(lengths, numBufferedDocs, fieldsStream);
- }
+ private static void saveInts(int[] values, int length, DataOutput output)
+ {
+ if (length == 1)
+ {
+ output.WriteVInt(values[0]);
+ }
+ else
+ {
+ bool allEqual = true;
+ for (int i = 1; i < length; ++i) {
+ if (values[i] != values[0]) {
+ allEqual = false;
+ //break;
+ }
+ }
+ if (allEqual) {
+ output.WriteVInt(0);
+ output.WriteVInt(values[0]);
+ }
+ else
+ {
+ long max = 0;
+ for (int i = 0; i < length; ++i) {
+ max |= values[i];
+ }
+ int bitsRequired = PackedInts.BitsRequired(max);
+ output.WriteVInt(bitsRequired);
+ PackedInts.Writer w = PackedInts.GetWriterNoHeader(output, PackedInts.Format.PACKED, length, bitsRequired, 1);
+ for (int i = 0; i < length; ++i) {
+ w.Add(values[i]);
+ }
+ w.Finish();
+ }
+ }
+ }
- private boolean triggerFlush() {
- return bufferedDocs.length >= chunkSize || // chunks of at least chunkSize bytes
- numBufferedDocs >= MAX_DOCUMENTS_PER_CHUNK;
- }
+ private void WriteHeader(int docBase, int numBufferedDocs, int[] numStoredFields, int[] lengths)
+ {
+ // save docBase and numBufferedDocs
+ fieldsStream.WriteVInt(docBase);
+ fieldsStream.WriteVInt(numBufferedDocs);
- private void flush() throws IOException {
- indexWriter.writeIndex(numBufferedDocs, fieldsStream.getFilePointer());
+ // save numStoredFields
+ saveInts(numStoredFields, numBufferedDocs, fieldsStream);
- // transform end offsets into lengths
- final int[] lengths = endOffsets;
- for (int i = numBufferedDocs - 1; i > 0; --i) {
- lengths[i] = endOffsets[i] - endOffsets[i - 1];
- assert lengths[i] >= 0;
- }
- writeHeader(docBase, numBufferedDocs, numStoredFields, lengths);
-
- // compress stored fields to fieldsStream
- compressor.compress(bufferedDocs.bytes, 0, bufferedDocs.length, fieldsStream);
-
- // reset
- docBase += numBufferedDocs;
- numBufferedDocs = 0;
- bufferedDocs.length = 0;
- }
-
- @Override
- public void writeField(FieldInfo info, IndexableField field)
- throws IOException {
- int bits = 0;
- final BytesRef bytes;
- final String string;
-
- Number number = field.numericValue();
- if (number != null) {
- if (number instanceof Byte || number instanceof Short || number instanceof Integer) {
- bits = NUMERIC_INT;
- } else if (number instanceof Long) {
- bits = NUMERIC_LONG;
- } else if (number instanceof Float) {
- bits = NUMERIC_FLOAT;
- } else if (number instanceof Double) {
- bits = NUMERIC_DOUBLE;
- } else {
- throw new IllegalArgumentException("cannot store numeric type " + number.getClass());
- }
- string = null;
- bytes = null;
- } else {
- bytes = field.binaryValue();
- if (bytes != null) {
- bits = BYTE_ARR;
- string = null;
- } else {
- bits = STRING;
- string = field.stringValue();
- if (string == null) {
- throw new IllegalArgumentException("field " + field.name() + " is stored but does not have binaryValue, stringValue nor numericValue");
+ // save lengths
+ saveInts(lengths, numBufferedDocs, fieldsStream);
}
- }
- }
- final long infoAndBits = (((long) info.number) << TYPE_BITS) | bits;
- bufferedDocs.writeVLong(infoAndBits);
-
- if (bytes != null) {
- bufferedDocs.writeVInt(bytes.length);
- bufferedDocs.writeBytes(bytes.bytes, bytes.offset, bytes.length);
- } else if (string != null) {
- bufferedDocs.writeString(field.stringValue());
- } else {
- if (number instanceof Byte || number instanceof Short || number instanceof Integer) {
- bufferedDocs.writeInt(number.intValue());
- } else if (number instanceof Long) {
- bufferedDocs.writeLong(number.longValue());
- } else if (number instanceof Float) {
- bufferedDocs.writeInt(Float.floatToIntBits(number.floatValue()));
- } else if (number instanceof Double) {
- bufferedDocs.writeLong(Double.doubleToLongBits(number.doubleValue()));
- } else {
- throw new AssertionError("Cannot get here");
- }
- }
- }
-
- @Override
- public void abort() {
- IOUtils.closeWhileHandlingException(this);
- IOUtils.deleteFilesIgnoringExceptions(directory,
- IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION),
- IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION));
- }
-
- @Override
- public void finish(FieldInfos fis, int numDocs) throws IOException {
- if (numBufferedDocs > 0) {
- flush();
- } else {
- assert bufferedDocs.length == 0;
- }
- if (docBase != numDocs) {
- throw new RuntimeException("Wrote " + docBase + " docs, finish called with numDocs=" + numDocs);
- }
- indexWriter.finish(numDocs);
- assert bufferedDocs.length == 0;
- }
-
- @Override
- public int merge(MergeState mergeState) throws IOException {
- int docCount = 0;
- int idx = 0;
-
- for (AtomicReader reader : mergeState.readers) {
- final SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++];
- CompressingStoredFieldsReader matchingFieldsReader = null;
- if (matchingSegmentReader != null) {
- final StoredFieldsReader fieldsReader = matchingSegmentReader.getFieldsReader();
- // we can only bulk-copy if the matching reader is also a CompressingStoredFieldsReader
- if (fieldsReader != null && fieldsReader instanceof CompressingStoredFieldsReader) {
- matchingFieldsReader = (CompressingStoredFieldsReader) fieldsReader;
+ private bool TriggerFlush()
+ {
+ return bufferedDocs.Length >= chunkSize || // chunks of at least chunkSize bytes
+ numBufferedDocs >= MAX_DOCUMENTS_PER_CHUNK;
}
- }
-
- final int maxDoc = reader.maxDoc();
- final Bits liveDocs = reader.getLiveDocs();
-
- if (matchingFieldsReader == null) {
- // naive merge...
- for (int i = nextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = nextLiveDoc(i + 1, liveDocs, maxDoc)) {
- Document doc = reader.document(i);
- addDocument(doc, mergeState.fieldInfos);
- ++docCount;
- mergeState.checkAbort.work(300);
+
+ private void Flush()
+ {
+ indexWriter.WriteIndex(numBufferedDocs, fieldsStream.FilePointer);
+
+ // transform end offsets into lengths
+ int[] lengths = endOffsets;
+ for (int i = numBufferedDocs - 1; i > 0; --i)
+ {
+ lengths[i] = endOffsets[i] - endOffsets[i - 1];
+ }
+
+ WriteHeader(docBase, numBufferedDocs, numStoredFields, lengths);
+
+ // compress stored fields to fieldsStream
+ compressor.Compress(bufferedDocs.Bytes, 0, bufferedDocs.Length, fieldsStream);
+
+ // reset
+ docBase += numBufferedDocs;
+ numBufferedDocs = 0;
+ bufferedDocs.Length = 0;
}
- } else {
- int docID = nextLiveDoc(0, liveDocs, maxDoc);
- if (docID < maxDoc) {
- // not all docs were deleted
- final ChunkIterator it = matchingFieldsReader.chunkIterator(docID);
- int[] startOffsets = new int[0];
- do {
- // go to the next chunk that contains docID
- it.next(docID);
- // transform lengths into offsets
- if (startOffsets.length < it.chunkDocs) {
- startOffsets = new int[ArrayUtil.oversize(it.chunkDocs, 4)];
+
+ public override void writeField(FieldInfo info, IndexableField field)
+ {
+ int bits = 0;
+ BytesRef bytes;
+ string str;
+
+ Number number = field.numericValue();
+ if (number != null) {
+ if (number instanceof Byte || number instanceof Short || number instanceof Integer) {
+ bits = NUMERIC_INT;
+ } else if (number instanceof Long) {
+ bits = NUMERIC_LONG;
+ } else if (number instanceof Float) {
+ bits = NUMERIC_FLOAT;
+ } else if (number instanceof Double) {
+ bits = NUMERIC_DOUBLE;
+ } else {
+ throw new IllegalArgumentException("cannot store numeric type " + number.getClass());
+ }
+ str = null;
+ bytes = null;
+ } else {
+ bytes = field.binaryValue();
+ if (bytes != null) {
+ bits = BYTE_ARR;
+ str = null;
+ } else {
+ bits = STRING;
+ str = field.stringValue();
+ if (str == null) {
+ throw new ArgumentException("field " + field.name() + " is stored but does not have binaryValue, stringValue nor numericValue");
+ }
}
- for (int i = 1; i < it.chunkDocs; ++i) {
- startOffsets[i] = startOffsets[i - 1] + it.lengths[i - 1];
+ }
+
+ long infoAndBits = (((long) info.number) << TYPE_BITS) | bits;
+ bufferedDocs.WriteVLong(infoAndBits);
+
+ if (bytes != null) {
+ bufferedDocs.WriteVInt(bytes.length);
+ bufferedDocs.WriteBytes(bytes.bytes, bytes.offset, bytes.length);
+ } else if (str != null) {
+ bufferedDocs.WriteString(field.stringValue());
+ } else {
+ if (number instanceof Byte || number instanceof Short || number instanceof Integer) {
+ bufferedDocs.writeInt(number.intValue());
+ } else if (number instanceof Long) {
+ bufferedDocs.writeLong(number.longValue());
+ } else if (number instanceof Float) {
+ bufferedDocs.writeInt(Float.floatToIntBits(number.floatValue()));
+ } else if (number instanceof Double) {
+ bufferedDocs.writeLong(Double.doubleToLongBits(number.doubleValue()));
+ } else {
+ throw new AssertionError("Cannot get here");
}
+ }
+ }
- if (compressionMode == matchingFieldsReader.getCompressionMode() // same compression mode
- && numBufferedDocs == 0 // starting a new chunk
- && startOffsets[it.chunkDocs - 1] < chunkSize // chunk is small enough
- && startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] >= chunkSize // chunk is large enough
- && nextDeletedDoc(it.docBase, liveDocs, it.docBase + it.chunkDocs) == it.docBase + it.chunkDocs) { // no deletion in the chunk
- assert docID == it.docBase;
-
- // no need to decompress, just copy data
- indexWriter.writeIndex(it.chunkDocs, fieldsStream.getFilePointer());
- writeHeader(this.docBase, it.chunkDocs, it.numStoredFields, it.lengths);
- it.copyCompressedData(fieldsStream);
- this.docBase += it.chunkDocs;
- docID = nextLiveDoc(it.docBase + it.chunkDocs, liveDocs, maxDoc);
- docCount += it.chunkDocs;
- mergeState.checkAbort.work(300 * it.chunkDocs);
- } else {
- // decompress
- it.decompress();
- if (startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] != it.bytes.length) {
- throw new CorruptIndexException("Corrupted: expected chunk size=" + startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] + ", got " + it.bytes.length);
+ public override void Abort() {
+ IOUtils.CloseWhileHandlingException(this);
+ IOUtils.DeleteFilesIgnoringExceptions(directory,
+ IndexFileNames.SegmentFileName(segment, segmentSuffix, FIELDS_EXTENSION),
+ IndexFileNames.SegmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION));
+ }
+
+ public override void finish(FieldInfos fis, int numDocs)
+ {
+ if (numBufferedDocs > 0) {
+ Flush();
+ } else {
+ //assert bufferedDocs.length == 0;
+ }
+ if (docBase != numDocs) {
+ throw new RuntimeException("Wrote " + docBase + " docs, finish called with numDocs=" + numDocs);
+ }
+ indexWriter.finish(numDocs);
+ }
+
+ public override int Merge(MergeState mergeState)
+ {
+ int docCount = 0;
+ int idx = 0;
+
+ foreach (AtomicReader reader in mergeState.readers)
+ {
+ SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++];
+ CompressingStoredFieldsReader matchingFieldsReader = null;
+ if (matchingSegmentReader != null)
+ {
+ StoredFieldsReader fieldsReader = matchingSegmentReader.FieldsReader;
+ // we can only bulk-copy if the matching reader is also a CompressingStoredFieldsReader
+ if (fieldsReader != null && fieldsReader is CompressingStoredFieldsReader)
+ {
+ matchingFieldsReader = (CompressingStoredFieldsReader) fieldsReader;
}
- // copy non-deleted docs
- for (; docID < it.docBase + it.chunkDocs; docID = nextLiveDoc(docID + 1, liveDocs, maxDoc)) {
- final int diff = docID - it.docBase;
- startDocument(it.numStoredFields[diff]);
- bufferedDocs.writeBytes(it.bytes.bytes, it.bytes.offset + startOffsets[diff], it.lengths[diff]);
- finishDocument();
+ }
+
+ int maxDoc = reader.MaxDoc;
+ IBits liveDocs = reader.LiveDocs;
+
+ if (matchingFieldsReader == null) {
+ // naive merge...
+ for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = NextLiveDoc(i + 1, liveDocs, maxDoc)) {
+ Document doc = reader.Document(i);
+ AddDocument(doc, mergeState.fieldInfos);
++docCount;
- mergeState.checkAbort.work(300);
+ mergeState.checkAbort.Work(300);
+ }
+ } else {
+ int docID = NextLiveDoc(0, liveDocs, maxDoc);
+ if (docID < maxDoc) {
+ // not all docs were deleted
+ ChunkIterator it = matchingFieldsReader.ChunkIterator(docID);
+ int[] startOffsets = new int[0];
+ do {
+ // go to the next chunk that contains docID
+ it.next(docID);
+ // transform lengths into offsets
+ if (startOffsets.Length < it.chunkDocs) {
+ startOffsets = new int[ArrayUtil.Oversize(it.chunkDocs, 4)];
+ }
+ for (int i = 1; i < it.chunkDocs; ++i) {
+ startOffsets[i] = startOffsets[i - 1] + it.lengths[i - 1];
+ }
+
+ if (compressionMode == matchingFieldsReader.getCompressionMode() // same compression mode
+ && numBufferedDocs == 0 // starting a new chunk
+ && startOffsets[it.chunkDocs - 1] < chunkSize // chunk is small enough
+ && startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] >= chunkSize // chunk is large enough
+ && nextDeletedDoc(it.docBase, liveDocs, it.docBase + it.chunkDocs) == it.docBase + it.chunkDocs) { // no deletion in the chunk
+
+ // no need to decompress, just copy data
+ indexWriter.writeIndex(it.chunkDocs, fieldsStream.FilePointer);
+ WriteHeader(this.docBase, it.chunkDocs, it.numStoredFields, it.lengths);
+ it.copyCompressedData(fieldsStream);
+ this.docBase += it.chunkDocs;
+ docID = NextLiveDoc(it.docBase + it.chunkDocs, liveDocs, maxDoc);
+ docCount += it.chunkDocs;
+ mergeState.checkAbort.Work(300 * it.chunkDocs);
+ } else {
+ // decompress
+ it.decompress();
+ if (startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] != it.bytes.length) {
+ throw new CorruptIndexException("Corrupted: expected chunk size=" + startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] + ", got " + it.bytes.length);
+ }
+ // copy non-deleted docs
+ for (; docID < it.docBase + it.chunkDocs; docID = NextLiveDoc(docID + 1, liveDocs, maxDoc)) {
+ int diff = docID - it.docBase;
+ StartDocument(it.numStoredFields[diff]);
+ bufferedDocs.WriteBytes(it.bytes.bytes, it.bytes.offset + startOffsets[diff], it.lengths[diff]);
+ FinishDocument();
+ ++docCount;
+ mergeState.checkAbort.Work(300);
+ }
+ }
+ } while (docID < maxDoc);
}
}
- } while (docID < maxDoc);
+ }
+
+ Finish(mergeState.fieldInfos, docCount);
+ return docCount;
}
- }
- }
- finish(mergeState.fieldInfos, docCount);
- return docCount;
- }
- private static int nextLiveDoc(int doc, Bits liveDocs, int maxDoc) {
- if (liveDocs == null) {
- return doc;
- }
- while (doc < maxDoc && !liveDocs.get(doc)) {
- ++doc;
- }
- return doc;
- }
+ private static int NextLiveDoc(int doc, IBits liveDocs, int maxDoc)
+ {
+ if (liveDocs == null)
+ {
+ return doc;
+ }
+ while (doc < maxDoc && !liveDocs[doc])
+ {
+ ++doc;
+ }
+ return doc;
+ }
- private static int nextDeletedDoc(int doc, Bits liveDocs, int maxDoc) {
- if (liveDocs == null) {
- return maxDoc;
- }
- while (doc < maxDoc && liveDocs.get(doc)) {
- ++doc;
- }
- return doc;
- }
+ private static int nextDeletedDoc(int doc, Bits liveDocs, int maxDoc)
+ {
+ if (liveDocs == null)
+ {
+ return maxDoc;
+ }
+ while (doc < maxDoc && liveDocs[doc])
+ {
+ ++doc;
+ }
+ return doc;
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/64c13f3c/src/core/Codecs/Compressing/Compressor.cs
----------------------------------------------------------------------
diff --git a/src/core/Codecs/Compressing/Compressor.cs b/src/core/Codecs/Compressing/Compressor.cs
index 48fdb74..00c0053 100644
--- a/src/core/Codecs/Compressing/Compressor.cs
+++ b/src/core/Codecs/Compressing/Compressor.cs
@@ -32,7 +32,7 @@ namespace Lucene.Net.Codecs.Compressing
* compressor to add all necessary information so that a {@link Decompressor}
* will know when to stop decompressing bytes from the stream.
*/
- public abstract void Compress(byte[] bytes, int off, int len, DataOutput output);
+ public abstract void Compress(sbyte[] bytes, int off, int len, DataOutput output);
}
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/64c13f3c/src/core/Codecs/Compressing/GrowableByteArrayDataOutput.cs
----------------------------------------------------------------------
diff --git a/src/core/Codecs/Compressing/GrowableByteArrayDataOutput.cs b/src/core/Codecs/Compressing/GrowableByteArrayDataOutput.cs
index a0b8eba..d6b873d 100644
--- a/src/core/Codecs/Compressing/GrowableByteArrayDataOutput.cs
+++ b/src/core/Codecs/Compressing/GrowableByteArrayDataOutput.cs
@@ -25,13 +25,37 @@ namespace Lucene.Net.Codecs.Compressing
{
internal sealed class GrowableByteArrayDataOutput : DataOutput
{
- sbyte[] bytes;
- int length;
+ private sbyte[] _bytes;
+ private int _length;
- GrowableByteArrayDataOutput(int cp)
+ public GrowableByteArrayDataOutput(int cp)
{
- this.bytes = new sbyte[ArrayUtil.Oversize(cp, 1)];
- this.length = 0;
+ Bytes = new sbyte[ArrayUtil.Oversize(cp, 1)];
+ Length = 0;
+ }
+
+ public sbyte[] Bytes
+ {
+ get
+ {
+ return _bytes;
+ }
+ set
+ {
+ _bytes = value;
+ }
+ }
+
+ public int Length
+ {
+ get
+ {
+ return _length;
+ }
+ set
+ {
+ _length = value;
+ }
}
public override void WriteByte(byte b)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/64c13f3c/src/core/Lucene.Net.csproj
----------------------------------------------------------------------
diff --git a/src/core/Lucene.Net.csproj b/src/core/Lucene.Net.csproj
index ce097bb..85f9818 100644
--- a/src/core/Lucene.Net.csproj
+++ b/src/core/Lucene.Net.csproj
@@ -187,7 +187,9 @@
<Compile Include="Codecs\Codec.cs" />
<Compile Include="Codecs\CodecUtil.cs" />
<Compile Include="Codecs\Compressing\CompressingStoredFieldsFormat.cs" />
+ <Compile Include="Codecs\Compressing\CompressingStoredFieldsIndexReader.cs" />
<Compile Include="Codecs\Compressing\CompressingStoredFieldsReader.cs" />
+ <Compile Include="Codecs\Compressing\CompressingStoredFieldsWriter.cs" />
<Compile Include="Codecs\Compressing\CompressionMode.cs" />
<Compile Include="Codecs\Compressing\Compressor.cs" />
<Compile Include="Codecs\Compressing\Decompressor.cs" />