You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by pn...@apache.org on 2014/09/28 10:50:09 UTC
[05/10] Lucene.Net.Codes/Sep fully ported,
work done on SimpleText and Memory as well
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d852d5b0/src/Lucene.Net.Codecs/Memory/MemoryDocValuesConsumer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Memory/MemoryDocValuesConsumer.cs b/src/Lucene.Net.Codecs/Memory/MemoryDocValuesConsumer.cs
index 285c243..26ee255 100644
--- a/src/Lucene.Net.Codecs/Memory/MemoryDocValuesConsumer.cs
+++ b/src/Lucene.Net.Codecs/Memory/MemoryDocValuesConsumer.cs
@@ -1,408 +1,548 @@
-package codecs.memory;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.NoSuchElementException;
-
-import codecs.CodecUtil;
-import codecs.DocValuesConsumer;
-import index.FieldInfo;
-import index.IndexFileNames;
-import index.SegmentWriteState;
-import store.ByteArrayDataOutput;
-import store.IndexOutput;
-import util.ArrayUtil;
-import util.BytesRef;
-import util.IOUtils;
-import util.IntsRef;
-import util.MathUtil;
-import util.fst.Builder;
-import util.fst.FST.INPUT_TYPE;
-import util.fst.FST;
-import util.fst.PositiveIntOutputs;
-import util.fst.Util;
-import util.packed.BlockPackedWriter;
-import util.packed.MonotonicBlockPackedWriter;
-import util.packed.PackedInts.FormatAndBits;
-import util.packed.PackedInts;
-
-import static codecs.memory.MemoryDocValuesProducer.VERSION_CURRENT;
-import static codecs.memory.MemoryDocValuesProducer.BLOCK_SIZE;
-import static codecs.memory.MemoryDocValuesProducer.BYTES;
-import static codecs.memory.MemoryDocValuesProducer.NUMBER;
-import static codecs.memory.MemoryDocValuesProducer.FST;
-import static codecs.memory.MemoryDocValuesProducer.DELTA_COMPRESSED;
-import static codecs.memory.MemoryDocValuesProducer.GCD_COMPRESSED;
-import static codecs.memory.MemoryDocValuesProducer.TABLE_COMPRESSED;
-import static codecs.memory.MemoryDocValuesProducer.UNCOMPRESSED;
-
-/**
- * Writer for {@link MemoryDocValuesFormat}
- */
-class MemoryDocValuesConsumer extends DocValuesConsumer {
- IndexOutput data, meta;
- final int maxDoc;
- final float acceptableOverheadRatio;
-
- MemoryDocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension, float acceptableOverheadRatio) {
- this.acceptableOverheadRatio = acceptableOverheadRatio;
- maxDoc = state.segmentInfo.getDocCount();
- bool success = false;
- try {
- String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
- data = state.directory.createOutput(dataName, state.context);
- CodecUtil.writeHeader(data, dataCodec, VERSION_CURRENT);
- String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
- meta = state.directory.createOutput(metaName, state.context);
- CodecUtil.writeHeader(meta, metaCodec, VERSION_CURRENT);
- success = true;
- } finally {
- if (!success) {
- IOUtils.closeWhileHandlingException(this);
- }
- }
- }
-
- @Override
- public void addNumericField(FieldInfo field, Iterable<Number> values) {
- addNumericField(field, values, true);
- }
-
- void addNumericField(FieldInfo field, Iterable<Number> values, bool optimizeStorage) {
- meta.writeVInt(field.number);
- meta.writeByte(NUMBER);
- meta.writeLong(data.getFilePointer());
- long minValue = Long.MAX_VALUE;
- long maxValue = Long.MIN_VALUE;
- long gcd = 0;
- bool missing = false;
- // TODO: more efficient?
- HashSet<Long> uniqueValues = null;
- if (optimizeStorage) {
- uniqueValues = new HashSet<>();
-
- long count = 0;
- for (Number nv : values) {
- final long v;
- if (nv == null) {
- v = 0;
- missing = true;
- } else {
- v = nv.longValue();
- }
-
- if (gcd != 1) {
- if (v < Long.MIN_VALUE / 2 || v > Long.MAX_VALUE / 2) {
- // in that case v - minValue might overflow and make the GCD computation return
- // wrong results. Since these extreme values are unlikely, we just discard
- // GCD computation for them
- gcd = 1;
- } else if (count != 0) { // minValue needs to be set first
- gcd = MathUtil.gcd(gcd, v - minValue);
- }
- }
-
- minValue = Math.min(minValue, v);
- maxValue = Math.max(maxValue, v);
-
- if (uniqueValues != null) {
- if (uniqueValues.add(v)) {
- if (uniqueValues.size() > 256) {
- uniqueValues = null;
- }
- }
- }
-
- ++count;
- }
- Debug.Assert( count == maxDoc;
- }
-
- if (missing) {
- long start = data.getFilePointer();
- writeMissingBitset(values);
- meta.writeLong(start);
- meta.writeLong(data.getFilePointer() - start);
- } else {
- meta.writeLong(-1L);
- }
-
- if (uniqueValues != null) {
- // small number of unique values
- final int bitsPerValue = PackedInts.bitsRequired(uniqueValues.size()-1);
- FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(maxDoc, bitsPerValue, acceptableOverheadRatio);
- if (formatAndBits.bitsPerValue == 8 && minValue >= Byte.MIN_VALUE && maxValue <= Byte.MAX_VALUE) {
- meta.writeByte(UNCOMPRESSED); // uncompressed
- for (Number nv : values) {
- data.writeByte(nv == null ? 0 : (byte) nv.longValue());
- }
- } else {
- meta.writeByte(TABLE_COMPRESSED); // table-compressed
- Long[] decode = uniqueValues.toArray(new Long[uniqueValues.size()]);
- final HashMap<Long,Integer> encode = new HashMap<>();
- data.writeVInt(decode.length);
- for (int i = 0; i < decode.length; i++) {
- data.writeLong(decode[i]);
- encode.put(decode[i], i);
- }
-
- meta.writeVInt(PackedInts.VERSION_CURRENT);
- data.writeVInt(formatAndBits.format.getId());
- data.writeVInt(formatAndBits.bitsPerValue);
-
- final PackedInts.Writer writer = PackedInts.getWriterNoHeader(data, formatAndBits.format, maxDoc, formatAndBits.bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE);
- for(Number nv : values) {
- writer.add(encode.get(nv == null ? 0 : nv.longValue()));
- }
- writer.finish();
- }
- } else if (gcd != 0 && gcd != 1) {
- meta.writeByte(GCD_COMPRESSED);
- meta.writeVInt(PackedInts.VERSION_CURRENT);
- data.writeLong(minValue);
- data.writeLong(gcd);
- data.writeVInt(BLOCK_SIZE);
-
- final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
- for (Number nv : values) {
- long value = nv == null ? 0 : nv.longValue();
- writer.add((value - minValue) / gcd);
- }
- writer.finish();
- } else {
- meta.writeByte(DELTA_COMPRESSED); // delta-compressed
-
- meta.writeVInt(PackedInts.VERSION_CURRENT);
- data.writeVInt(BLOCK_SIZE);
-
- final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
- for (Number nv : values) {
- writer.add(nv == null ? 0 : nv.longValue());
- }
- writer.finish();
- }
- }
-
- @Override
- public void close() {
- bool success = false;
- try {
- if (meta != null) {
- meta.writeVInt(-1); // write EOF marker
- CodecUtil.writeFooter(meta); // write checksum
- }
- if (data != null) {
- CodecUtil.writeFooter(data);
- }
- success = true;
- } finally {
- if (success) {
- IOUtils.close(data, meta);
- } else {
- IOUtils.closeWhileHandlingException(data, meta);
- }
- data = meta = null;
- }
- }
-
- @Override
- public void addBinaryField(FieldInfo field, final Iterable<BytesRef> values) {
- // write the byte[] data
- meta.writeVInt(field.number);
- meta.writeByte(BYTES);
- int minLength = Integer.MAX_VALUE;
- int maxLength = Integer.MIN_VALUE;
- final long startFP = data.getFilePointer();
- bool missing = false;
- for(BytesRef v : values) {
- final int length;
- if (v == null) {
- length = 0;
- missing = true;
- } else {
- length = v.length;
- }
- if (length > MemoryDocValuesFormat.MAX_BINARY_FIELD_LENGTH) {
- throw new IllegalArgumentException("DocValuesField \"" + field.name + "\" is too large, must be <= " + MemoryDocValuesFormat.MAX_BINARY_FIELD_LENGTH);
- }
- minLength = Math.min(minLength, length);
- maxLength = Math.max(maxLength, length);
- if (v != null) {
- data.writeBytes(v.bytes, v.offset, v.length);
- }
- }
- meta.writeLong(startFP);
- meta.writeLong(data.getFilePointer() - startFP);
- if (missing) {
- long start = data.getFilePointer();
- writeMissingBitset(values);
- meta.writeLong(start);
- meta.writeLong(data.getFilePointer() - start);
- } else {
- meta.writeLong(-1L);
- }
- meta.writeVInt(minLength);
- meta.writeVInt(maxLength);
-
- // if minLength == maxLength, its a fixed-length byte[], we are done (the addresses are implicit)
- // otherwise, we need to record the length fields...
- if (minLength != maxLength) {
- meta.writeVInt(PackedInts.VERSION_CURRENT);
- meta.writeVInt(BLOCK_SIZE);
-
- final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);
- long addr = 0;
- for (BytesRef v : values) {
- if (v != null) {
- addr += v.length;
- }
- writer.add(addr);
- }
- writer.finish();
- }
- }
-
- private void writeFST(FieldInfo field, Iterable<BytesRef> values) {
- meta.writeVInt(field.number);
- meta.writeByte(FST);
- meta.writeLong(data.getFilePointer());
- PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
- Builder<Long> builder = new Builder<>(INPUT_TYPE.BYTE1, outputs);
- IntsRef scratch = new IntsRef();
- long ord = 0;
- for (BytesRef v : values) {
- builder.add(Util.toIntsRef(v, scratch), ord);
- ord++;
- }
- FST<Long> fst = builder.finish();
- if (fst != null) {
- fst.save(data);
- }
- meta.writeVLong(ord);
- }
-
- // TODO: in some cases representing missing with minValue-1 wouldn't take up additional space and so on,
- // but this is very simple, and algorithms only check this for values of 0 anyway (doesnt slow down normal decode)
- void writeMissingBitset(Iterable<?> values) {
- long bits = 0;
- int count = 0;
- for (Object v : values) {
- if (count == 64) {
- data.writeLong(bits);
- count = 0;
- bits = 0;
- }
- if (v != null) {
- bits |= 1L << (count & 0x3f);
- }
- count++;
- }
- if (count > 0) {
- data.writeLong(bits);
- }
- }
-
- @Override
- public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) {
- // write the ordinals as numerics
- addNumericField(field, docToOrd, false);
-
- // write the values as FST
- writeFST(field, values);
- }
-
- // note: this might not be the most efficient... but its fairly simple
- @Override
- public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, final Iterable<Number> docToOrdCount, final Iterable<Number> ords) {
- // write the ordinals as a binary field
- addBinaryField(field, new Iterable<BytesRef>() {
- @Override
- public Iterator<BytesRef> iterator() {
- return new SortedSetIterator(docToOrdCount.iterator(), ords.iterator());
- }
- });
-
- // write the values as FST
- writeFST(field, values);
- }
-
- // per-document vint-encoded byte[]
- static class SortedSetIterator implements Iterator<BytesRef> {
- byte[] buffer = new byte[10];
- ByteArrayDataOutput out = new ByteArrayDataOutput();
- BytesRef ref = new BytesRef();
-
- final Iterator<Number> counts;
- final Iterator<Number> ords;
-
- SortedSetIterator(Iterator<Number> counts, Iterator<Number> ords) {
- this.counts = counts;
- this.ords = ords;
- }
-
- @Override
- public bool hasNext() {
- return counts.hasNext();
- }
-
- @Override
- public BytesRef next() {
- if (!hasNext()) {
- throw new NoSuchElementException();
- }
-
- int count = counts.next().intValue();
- int maxSize = count*9; // worst case
- if (maxSize > buffer.length) {
- buffer = ArrayUtil.grow(buffer, maxSize);
- }
-
- try {
- encodeValues(count);
- } catch (IOException bogus) {
- throw new RuntimeException(bogus);
- }
-
- ref.bytes = buffer;
- ref.offset = 0;
- ref.length = out.getPosition();
-
- return ref;
- }
-
- // encodes count values to buffer
- private void encodeValues(int count) {
- out.reset(buffer);
- long lastOrd = 0;
- for (int i = 0; i < count; i++) {
- long ord = ords.next().longValue();
- out.writeVLong(ord - lastOrd);
- lastOrd = ord;
- }
- }
-
- @Override
- public void remove() {
- throw new UnsupportedOperationException();
- }
- }
-}
+using System;
+using System.Diagnostics;
+using System.Collections.Generic;
+using Lucene.Net.Codecs.Memory;
+
+namespace org.apache.lucene.codecs.memory
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using FieldInfo = org.apache.lucene.index.FieldInfo;
+ using IndexFileNames = org.apache.lucene.index.IndexFileNames;
+ using SegmentWriteState = org.apache.lucene.index.SegmentWriteState;
+ using ByteArrayDataOutput = org.apache.lucene.store.ByteArrayDataOutput;
+ using IndexOutput = org.apache.lucene.store.IndexOutput;
+ using ArrayUtil = org.apache.lucene.util.ArrayUtil;
+ using BytesRef = org.apache.lucene.util.BytesRef;
+ using IOUtils = org.apache.lucene.util.IOUtils;
+ using IntsRef = org.apache.lucene.util.IntsRef;
+ using MathUtil = org.apache.lucene.util.MathUtil;
+ using Builder = org.apache.lucene.util.fst.Builder;
+ using INPUT_TYPE = org.apache.lucene.util.fst.FST.INPUT_TYPE;
+ using FST = org.apache.lucene.util.fst.FST;
+ using PositiveIntOutputs = org.apache.lucene.util.fst.PositiveIntOutputs;
+ using Util = org.apache.lucene.util.fst.Util;
+ using BlockPackedWriter = org.apache.lucene.util.packed.BlockPackedWriter;
+ using MonotonicBlockPackedWriter = org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
+ using FormatAndBits = org.apache.lucene.util.packed.PackedInts.FormatAndBits;
+ using PackedInts = org.apache.lucene.util.packed.PackedInts;
+
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to .NET:
+ import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.VERSION_CURRENT;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to .NET:
+ import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.BLOCK_SIZE;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to .NET:
+ import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.BYTES;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to .NET:
+ import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.NUMBER;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to .NET:
+ import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.FST;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to .NET:
+ import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.DELTA_COMPRESSED;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to .NET:
+ import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.GCD_COMPRESSED;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to .NET:
+ import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.TABLE_COMPRESSED;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to .NET:
+ import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.UNCOMPRESSED;
+
+ /// <summary>
+ /// Writer for <seealso cref="MemoryDocValuesFormat"/>
+ /// </summary>
+ internal class MemoryDocValuesConsumer : DocValuesConsumer
+ {
+ internal IndexOutput data, meta;
+ internal readonly int maxDoc;
+ internal readonly float acceptableOverheadRatio;
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: MemoryDocValuesConsumer(org.apache.lucene.index.SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension, float acceptableOverheadRatio) throws java.io.IOException
+ internal MemoryDocValuesConsumer(SegmentWriteState state, string dataCodec, string dataExtension, string metaCodec, string metaExtension, float acceptableOverheadRatio)
+ {
+ this.acceptableOverheadRatio = acceptableOverheadRatio;
+ maxDoc = state.segmentInfo.DocCount;
+ bool success = false;
+ try
+ {
+ string dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
+ data = state.directory.createOutput(dataName, state.context);
+ CodecUtil.writeHeader(data, dataCodec, VERSION_CURRENT);
+ string metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
+ meta = state.directory.createOutput(metaName, state.context);
+ CodecUtil.writeHeader(meta, metaCodec, VERSION_CURRENT);
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ IOUtils.closeWhileHandlingException(this);
+ }
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void addNumericField(org.apache.lucene.index.FieldInfo field, Iterable<Number> values) throws java.io.IOException
+ public override void addNumericField(FieldInfo field, IEnumerable<Number> values)
+ {
+ addNumericField(field, values, true);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: void addNumericField(org.apache.lucene.index.FieldInfo field, Iterable<Number> values, boolean optimizeStorage) throws java.io.IOException
+ internal virtual void addNumericField(FieldInfo field, IEnumerable<Number> values, bool optimizeStorage)
+ {
+ meta.writeVInt(field.number);
+ meta.writeByte(NUMBER);
+ meta.writeLong(data.FilePointer);
+ long minValue = long.MaxValue;
+ long maxValue = long.MinValue;
+ long gcd = 0;
+ bool missing = false;
+ // TODO: more efficient?
+ HashSet<long?> uniqueValues = null;
+ if (optimizeStorage)
+ {
+ uniqueValues = new HashSet<>();
+
+ long count = 0;
+ foreach (Number nv in values)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final long v;
+ long v;
+ if (nv == null)
+ {
+ v = 0;
+ missing = true;
+ }
+ else
+ {
+ v = (long)nv;
+ }
+
+ if (gcd != 1)
+ {
+ if (v < long.MinValue / 2 || v > long.MaxValue / 2)
+ {
+ // in that case v - minValue might overflow and make the GCD computation return
+ // wrong results. Since these extreme values are unlikely, we just discard
+ // GCD computation for them
+ gcd = 1;
+ } // minValue needs to be set first
+ else if (count != 0)
+ {
+ gcd = MathUtil.gcd(gcd, v - minValue);
+ }
+ }
+
+ minValue = Math.Min(minValue, v);
+ maxValue = Math.Max(maxValue, v);
+
+ if (uniqueValues != null)
+ {
+ if (uniqueValues.Add(v))
+ {
+ if (uniqueValues.Count > 256)
+ {
+ uniqueValues = null;
+ }
+ }
+ }
+
+ ++count;
+ }
+ Debug.Assert(count == maxDoc);
+ }
+
+ if (missing)
+ {
+ long start = data.FilePointer;
+ writeMissingBitset(values);
+ meta.writeLong(start);
+ meta.writeLong(data.FilePointer - start);
+ }
+ else
+ {
+ meta.writeLong(-1L);
+ }
+
+ if (uniqueValues != null)
+ {
+ // small number of unique values
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int bitsPerValue = org.apache.lucene.util.packed.PackedInts.bitsRequired(uniqueValues.size()-1);
+ int bitsPerValue = PackedInts.bitsRequired(uniqueValues.Count - 1);
+ FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(maxDoc, bitsPerValue, acceptableOverheadRatio);
+ if (formatAndBits.bitsPerValue == 8 && minValue >= sbyte.MinValue && maxValue <= sbyte.MaxValue)
+ {
+ meta.writeByte(UNCOMPRESSED); // uncompressed
+ foreach (Number nv in values)
+ {
+ data.writeByte(nv == null ? 0 : (long)(sbyte) nv);
+ }
+ }
+ else
+ {
+ meta.writeByte(TABLE_COMPRESSED); // table-compressed
+ long?[] decode = uniqueValues.toArray(new long?[uniqueValues.Count]);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.util.HashMap<Long,Integer> encode = new java.util.HashMap<>();
+ Dictionary<long?, int?> encode = new Dictionary<long?, int?>();
+ data.writeVInt(decode.Length);
+ for (int i = 0; i < decode.Length; i++)
+ {
+ data.writeLong(decode[i]);
+ encode[decode[i]] = i;
+ }
+
+ meta.writeVInt(PackedInts.VERSION_CURRENT);
+ data.writeVInt(formatAndBits.format.Id);
+ data.writeVInt(formatAndBits.bitsPerValue);
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.packed.PackedInts.Writer writer = org.apache.lucene.util.packed.PackedInts.getWriterNoHeader(data, formatAndBits.format, maxDoc, formatAndBits.bitsPerValue, org.apache.lucene.util.packed.PackedInts.DEFAULT_BUFFER_SIZE);
+ PackedInts.Writer writer = PackedInts.getWriterNoHeader(data, formatAndBits.format, maxDoc, formatAndBits.bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE);
+ foreach (Number nv in values)
+ {
+ writer.add(encode[nv == null ? 0 : (long)nv]);
+ }
+ writer.finish();
+ }
+ }
+ else if (gcd != 0 && gcd != 1)
+ {
+ meta.writeByte(GCD_COMPRESSED);
+ meta.writeVInt(PackedInts.VERSION_CURRENT);
+ data.writeLong(minValue);
+ data.writeLong(gcd);
+ data.writeVInt(BLOCK_SIZE);
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.packed.BlockPackedWriter writer = new org.apache.lucene.util.packed.BlockPackedWriter(data, BLOCK_SIZE);
+ BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
+ foreach (Number nv in values)
+ {
+ long value = nv == null ? 0 : (long)nv;
+ writer.add((value - minValue) / gcd);
+ }
+ writer.finish();
+ }
+ else
+ {
+ meta.writeByte(DELTA_COMPRESSED); // delta-compressed
+
+ meta.writeVInt(PackedInts.VERSION_CURRENT);
+ data.writeVInt(BLOCK_SIZE);
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.packed.BlockPackedWriter writer = new org.apache.lucene.util.packed.BlockPackedWriter(data, BLOCK_SIZE);
+ BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
+ foreach (Number nv in values)
+ {
+ writer.add(nv == null ? 0 : (long)nv);
+ }
+ writer.finish();
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void close() throws java.io.IOException
+ public override void close()
+ {
+ bool success = false;
+ try
+ {
+ if (meta != null)
+ {
+ meta.writeVInt(-1); // write EOF marker
+ CodecUtil.writeFooter(meta); // write checksum
+ }
+ if (data != null)
+ {
+ CodecUtil.writeFooter(data);
+ }
+ success = true;
+ }
+ finally
+ {
+ if (success)
+ {
+ IOUtils.close(data, meta);
+ }
+ else
+ {
+ IOUtils.closeWhileHandlingException(data, meta);
+ }
+ data = meta = null;
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void addBinaryField(org.apache.lucene.index.FieldInfo field, final Iterable<org.apache.lucene.util.BytesRef> values) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+ public override void addBinaryField(FieldInfo field, IEnumerable<BytesRef> values)
+ {
+ // write the byte[] data
+ meta.writeVInt(field.number);
+ meta.writeByte(BYTES);
+ int minLength = int.MaxValue;
+ int maxLength = int.MinValue;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final long startFP = data.getFilePointer();
+ long startFP = data.FilePointer;
+ bool missing = false;
+ foreach (BytesRef v in values)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int length;
+ int length;
+ if (v == null)
+ {
+ length = 0;
+ missing = true;
+ }
+ else
+ {
+ length = v.length;
+ }
+ if (length > MemoryDocValuesFormat.MAX_BINARY_FIELD_LENGTH)
+ {
+ throw new System.ArgumentException("DocValuesField \"" + field.name + "\" is too large, must be <= " + MemoryDocValuesFormat.MAX_BINARY_FIELD_LENGTH);
+ }
+ minLength = Math.Min(minLength, length);
+ maxLength = Math.Max(maxLength, length);
+ if (v != null)
+ {
+ data.writeBytes(v.bytes, v.offset, v.length);
+ }
+ }
+ meta.writeLong(startFP);
+ meta.writeLong(data.FilePointer - startFP);
+ if (missing)
+ {
+ long start = data.FilePointer;
+ writeMissingBitset(values);
+ meta.writeLong(start);
+ meta.writeLong(data.FilePointer - start);
+ }
+ else
+ {
+ meta.writeLong(-1L);
+ }
+ meta.writeVInt(minLength);
+ meta.writeVInt(maxLength);
+
+ // if minLength == maxLength, its a fixed-length byte[], we are done (the addresses are implicit)
+ // otherwise, we need to record the length fields...
+ if (minLength != maxLength)
+ {
+ meta.writeVInt(PackedInts.VERSION_CURRENT);
+ meta.writeVInt(BLOCK_SIZE);
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.packed.MonotonicBlockPackedWriter writer = new org.apache.lucene.util.packed.MonotonicBlockPackedWriter(data, BLOCK_SIZE);
+ MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);
+ long addr = 0;
+ foreach (BytesRef v in values)
+ {
+ if (v != null)
+ {
+ addr += v.length;
+ }
+ writer.add(addr);
+ }
+ writer.finish();
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void writeFST(org.apache.lucene.index.FieldInfo field, Iterable<org.apache.lucene.util.BytesRef> values) throws java.io.IOException
+ private void writeFST(FieldInfo field, IEnumerable<BytesRef> values)
+ {
+ meta.writeVInt(field.number);
+ meta.writeByte(FST);
+ meta.writeLong(data.FilePointer);
+ PositiveIntOutputs outputs = PositiveIntOutputs.Singleton;
+ Builder<long?> builder = new Builder<long?>(INPUT_TYPE.BYTE1, outputs);
+ IntsRef scratch = new IntsRef();
+ long ord = 0;
+ foreach (BytesRef v in values)
+ {
+ builder.add(Util.toIntsRef(v, scratch), ord);
+ ord++;
+ }
+ FST<long?> fst = builder.finish();
+ if (fst != null)
+ {
+ fst.save(data);
+ }
+ meta.writeVLong(ord);
+ }
+
+ // TODO: in some cases representing missing with minValue-1 wouldn't take up additional space and so on,
+ // but this is very simple, and algorithms only check this for values of 0 anyway (doesnt slow down normal decode)
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: void writeMissingBitset(Iterable<?> values) throws java.io.IOException
+ internal virtual void writeMissingBitset<T1>(IEnumerable<T1> values)
+ {
+ long bits = 0;
+ int count = 0;
+ foreach (object v in values)
+ {
+ if (count == 64)
+ {
+ data.writeLong(bits);
+ count = 0;
+ bits = 0;
+ }
+ if (v != null)
+ {
+ bits |= 1L << (count & 0x3f);
+ }
+ count++;
+ }
+ if (count > 0)
+ {
+ data.writeLong(bits);
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void addSortedField(org.apache.lucene.index.FieldInfo field, Iterable<org.apache.lucene.util.BytesRef> values, Iterable<Number> docToOrd) throws java.io.IOException
+ public override void addSortedField(FieldInfo field, IEnumerable<BytesRef> values, IEnumerable<Number> docToOrd)
+ {
+ // write the ordinals as numerics
+ addNumericField(field, docToOrd, false);
+
+ // write the values as FST
+ writeFST(field, values);
+ }
+
+ // note: this might not be the most efficient... but its fairly simple
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void addSortedSetField(org.apache.lucene.index.FieldInfo field, Iterable<org.apache.lucene.util.BytesRef> values, final Iterable<Number> docToOrdCount, final Iterable<Number> ords) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+ public override void addSortedSetField(FieldInfo field, IEnumerable<BytesRef> values, IEnumerable<Number> docToOrdCount, IEnumerable<Number> ords)
+ {
+ // write the ordinals as a binary field
+ addBinaryField(field, new IterableAnonymousInnerClassHelper(this, docToOrdCount, ords));
+
+ // write the values as FST
+ writeFST(field, values);
+ }
+
+ private class IterableAnonymousInnerClassHelper : IEnumerable<BytesRef>
+ {
+ private readonly MemoryDocValuesConsumer outerInstance;
+
+ private IEnumerable<Number> docToOrdCount;
+ private IEnumerable<Number> ords;
+
+ public IterableAnonymousInnerClassHelper(MemoryDocValuesConsumer outerInstance, IEnumerable<Number> docToOrdCount, IEnumerable<Number> ords)
+ {
+ this.outerInstance = outerInstance;
+ this.docToOrdCount = docToOrdCount;
+ this.ords = ords;
+ }
+
+ public virtual IEnumerator<BytesRef> GetEnumerator()
+ {
+ return new SortedSetIterator(docToOrdCount.GetEnumerator(), ords.GetEnumerator());
+ }
+ }
+
+ // per-document vint-encoded byte[]
+ internal class SortedSetIterator : IEnumerator<BytesRef>
+ {
+ internal sbyte[] buffer = new sbyte[10];
+ internal ByteArrayDataOutput @out = new ByteArrayDataOutput();
+ internal BytesRef @ref = new BytesRef();
+
+ internal readonly IEnumerator<Number> counts;
+ internal readonly IEnumerator<Number> ords;
+
+ internal SortedSetIterator(IEnumerator<Number> counts, IEnumerator<Number> ords)
+ {
+ this.counts = counts;
+ this.ords = ords;
+ }
+
+ public override bool hasNext()
+ {
+//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
+ return counts.hasNext();
+ }
+
+ public override BytesRef next()
+ {
+ if (!hasNext())
+ {
+ throw new NoSuchElementException();
+ }
+
+//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
+ int count = (int)counts.next();
+ int maxSize = count * 9; // worst case
+ if (maxSize > buffer.Length)
+ {
+ buffer = ArrayUtil.grow(buffer, maxSize);
+ }
+
+ try
+ {
+ encodeValues(count);
+ }
+ catch (IOException bogus)
+ {
+ throw new Exception(bogus);
+ }
+
+ @ref.bytes = buffer;
+ @ref.offset = 0;
+ @ref.length = @out.Position;
+
+ return @ref;
+ }
+
+ // encodes count values to buffer
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void encodeValues(int count) throws java.io.IOException
+ internal virtual void encodeValues(int count)
+ {
+ @out.reset(buffer);
+ long lastOrd = 0;
+ for (int i = 0; i < count; i++)
+ {
+//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
+ long ord = (long)ords.next();
+ @out.writeVLong(ord - lastOrd);
+ lastOrd = ord;
+ }
+ }
+
+ public override void remove()
+ {
+ throw new System.NotSupportedException();
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d852d5b0/src/Lucene.Net.Codecs/Memory/MemoryDocValuesFormat.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Memory/MemoryDocValuesFormat.cs b/src/Lucene.Net.Codecs/Memory/MemoryDocValuesFormat.cs
index 34b360c..8b36c0f 100644
--- a/src/Lucene.Net.Codecs/Memory/MemoryDocValuesFormat.cs
+++ b/src/Lucene.Net.Codecs/Memory/MemoryDocValuesFormat.cs
@@ -1,72 +1,73 @@
-package codecs.memory;
+using org.apache.lucene.codecs.memory;
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+namespace Lucene.Net.Codecs.Memory
+{
-import java.io.IOException;
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// In-memory docvalues format </summary>
+ public class MemoryDocValuesFormat : DocValuesFormat
+ {
-import codecs.DocValuesConsumer;
-import codecs.DocValuesProducer;
-import codecs.DocValuesFormat;
-import index.SegmentReadState;
-import index.SegmentWriteState;
-import util.packed.PackedInts;
+ /// <summary>
+ /// Maximum length for each binary doc values field. </summary>
+ public static readonly int MAX_BINARY_FIELD_LENGTH = (1 << 15) - 2;
-/** In-memory docvalues format */
-public class MemoryDocValuesFormat extends DocValuesFormat {
+ internal readonly float acceptableOverheadRatio;
- /** Maximum length for each binary doc values field. */
- public static final int MAX_BINARY_FIELD_LENGTH = (1 << 15) - 2;
-
- final float acceptableOverheadRatio;
-
- /**
- * Calls {@link #MemoryDocValuesFormat(float)
- * MemoryDocValuesFormat(PackedInts.DEFAULT)}
- */
- public MemoryDocValuesFormat() {
- this(PackedInts.DEFAULT);
- }
-
- /**
- * Creates a new MemoryDocValuesFormat with the specified
- * <code>acceptableOverheadRatio</code> for NumericDocValues.
- * @param acceptableOverheadRatio compression parameter for numerics.
- * Currently this is only used when the number of unique values is small.
- *
- * @lucene.experimental
- */
- public MemoryDocValuesFormat(float acceptableOverheadRatio) {
- super("Memory");
- this.acceptableOverheadRatio = acceptableOverheadRatio;
- }
+ /// <summary>
+ /// Calls {@link #MemoryDocValuesFormat(float)
+ /// MemoryDocValuesFormat(PackedInts.DEFAULT)}
+ /// </summary>
+ public MemoryDocValuesFormat() : this(PackedInts.DEFAULT)
+ {
+ }
- @Override
- public DocValuesConsumer fieldsConsumer(SegmentWriteState state) {
- return new MemoryDocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION, acceptableOverheadRatio);
- }
-
- @Override
- public DocValuesProducer fieldsProducer(SegmentReadState state) {
- return new MemoryDocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
- }
-
- static final String DATA_CODEC = "MemoryDocValuesData";
- static final String DATA_EXTENSION = "mdvd";
- static final String METADATA_CODEC = "MemoryDocValuesMetadata";
- static final String METADATA_EXTENSION = "mdvm";
-}
+ /// <summary>
+ /// Creates a new MemoryDocValuesFormat with the specified
+ /// <code>acceptableOverheadRatio</code> for NumericDocValues. </summary>
+ /// <param name="acceptableOverheadRatio"> compression parameter for numerics.
+ /// Currently this is only used when the number of unique values is small.
+ ///
+ /// @lucene.experimental </param>
+ public MemoryDocValuesFormat(float acceptableOverheadRatio) : base("Memory")
+ {
+ this.acceptableOverheadRatio = acceptableOverheadRatio;
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.codecs.DocValuesConsumer fieldsConsumer(org.apache.lucene.index.SegmentWriteState state) throws java.io.IOException
+ public override DocValuesConsumer fieldsConsumer(SegmentWriteState state)
+ {
+ return new MemoryDocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION, acceptableOverheadRatio);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.codecs.DocValuesProducer fieldsProducer(org.apache.lucene.index.SegmentReadState state) throws java.io.IOException
+ public override DocValuesProducer fieldsProducer(SegmentReadState state)
+ {
+ return new MemoryDocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
+ }
+
+ internal const string DATA_CODEC = "MemoryDocValuesData";
+ internal const string DATA_EXTENSION = "mdvd";
+ internal const string METADATA_CODEC = "MemoryDocValuesMetadata";
+ internal const string METADATA_EXTENSION = "mdvm";
+ }
+
+}
\ No newline at end of file