You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by pn...@apache.org on 2014/12/02 18:41:38 UTC
[2/4] lucenenet git commit: More updates to IntBlock and SimpleText
codecs
More updates to IntBlock and SimpleText codecs
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/a33ca521
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/a33ca521
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/a33ca521
Branch: refs/heads/master
Commit: a33ca521eea62cf14dd50b44cfc7be27afc67a3d
Parents: a7d71c2
Author: Prescott Nasser <pn...@apache.org>
Authored: Sun Nov 23 14:36:47 2014 -0800
Committer: Prescott Nasser <pn...@apache.org>
Committed: Sun Nov 23 14:36:47 2014 -0800
----------------------------------------------------------------------
.../Intblock/FixedIntBlockIndexInput.cs | 43 +-
.../Intblock/FixedIntBlockIndexOutput.cs | 189 +++----
src/Lucene.Net.Codecs/Intblock/Index.cs | 69 ++-
src/Lucene.Net.Codecs/Intblock/Reader.cs | 94 ++--
.../Intblock/VariableIntBlockIndexInput.cs | 18 +-
.../Intblock/VariableIntBlockIndexOutput.cs | 235 +++++----
.../SimpleText/SimpleTextCodec.cs | 106 ++--
.../SimpleText/SimpleTextDocValuesReader.cs | 504 +++++++++----------
.../SimpleText/SimpleTextDocValuesWriter.cs | 39 +-
.../SimpleText/SimpleTextFieldsReader.cs | 15 +-
10 files changed, 668 insertions(+), 644 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a33ca521/src/Lucene.Net.Codecs/Intblock/FixedIntBlockIndexInput.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Intblock/FixedIntBlockIndexInput.cs b/src/Lucene.Net.Codecs/Intblock/FixedIntBlockIndexInput.cs
index 3594005..dc7b558 100644
--- a/src/Lucene.Net.Codecs/Intblock/FixedIntBlockIndexInput.cs
+++ b/src/Lucene.Net.Codecs/Intblock/FixedIntBlockIndexInput.cs
@@ -15,11 +15,12 @@
* limitations under the License.
*/
+using Lucene.Net.Codecs.Sep;
using Lucene.Net.Store;
namespace Lucene.Net.Codecs.Intblock
{
-
+
/// <summary>
/// Naive int block API that writes vInts. This is
/// expected to give poor performance; it's really only for
@@ -33,36 +34,38 @@ namespace Lucene.Net.Codecs.Intblock
///
/// @lucene.experimental
/// </summary>
-public abstract class FixedIntBlockIndexInput : IntIndexInput {
+ public abstract class FixedIntBlockIndexInput : IntIndexInput
+ {
- private readonly IndexInput input;
- protected readonly int BlockSize;
+ private readonly IndexInput _input;
+ private readonly int _blockSize;
protected FixedIntBlockIndexInput(IndexInput input)
{
- this.input = input;
- BlockSize = input.ReadVInt();
+ _input = input;
+ _blockSize = input.ReadVInt();
}
- public override IntIndexInput.Reader reader() {
- final int[] buffer = new int[BlockSize];
- final IndexInput clone = in.clone();
+ public override IntIndexInputReader Reader()
+ {
+ var buffer = new int[_blockSize];
+ var clone = (IndexInput)_input.Clone();
- // TODO: can this be simplified?
- return new Reader(clone, buffer, this.GetBlockReader(clone, buffer));
- }
+ // TODO: can this be simplified?
+ return new Reader(clone, buffer, BlockReader(clone, buffer));
+ }
- public override void Close()
+ public override void Dispose()
{
- input.Close();
+ _input.Dispose();
}
- public override IntIndexInput.Index Index() {
- return new Index();
- }
+ public override IntIndexInputIndex Index()
+ {
+ return new Index();
+ }
- protected abstract BlockReader getBlockReader(IndexInput in, int[] buffer);
+ protected abstract VariableIntBlockIndexInput.BlockReader BlockReader(IndexInput input, int[] buffer);
-
-}
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a33ca521/src/Lucene.Net.Codecs/Intblock/FixedIntBlockIndexOutput.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Intblock/FixedIntBlockIndexOutput.cs b/src/Lucene.Net.Codecs/Intblock/FixedIntBlockIndexOutput.cs
index db74098..b19a0a3 100644
--- a/src/Lucene.Net.Codecs/Intblock/FixedIntBlockIndexOutput.cs
+++ b/src/Lucene.Net.Codecs/Intblock/FixedIntBlockIndexOutput.cs
@@ -1,5 +1,3 @@
-package codecs.intblock;
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -21,108 +19,123 @@ package codecs.intblock;
* expected to give poor performance; it's really only for
* testing the pluggability. One should typically use pfor instead. */
-import java.io.IOException;
-
-import codecs.sep.IntIndexOutput;
-import store.DataOutput;
-import store.IndexOutput;
-
-/** Abstract base class that writes fixed-size blocks of ints
- * to an IndexOutput. While this is a simple approach, a
- * more performant approach would directly create an impl
- * of IntIndexOutput inside Directory. Wrapping a generic
- * IndexInput will likely cost performance.
- *
- * @lucene.experimental
- */
-public abstract class FixedIntBlockIndexOutput extends IntIndexOutput {
-
- protected final IndexOutput out;
- private final int blockSize;
- protected final int[] buffer;
+using System;
+using System.Diagnostics;
+using Lucene.Net.Codecs.Sep;
+using Lucene.Net.Store;
+
+
+/// <summary>
+/// Abstract base class that writes fixed-size blocks of ints to an
+/// IndexOutput. While this is a simple approach, a more
+/// performant approach would directly create an impl of
+/// IntIndexOutput inside Directory. Wrapping a generic IndexInput
+/// will likely cost performance.
+///
+/// * @lucene.experimental
+/// </summary>
+public abstract class FixedIntBlockIndexOutput : IntIndexOutput {
+
+ protected IndexOutput out;
+ private int blockSize;
+ protected int int[] buffer;
private int upto;
protected FixedIntBlockIndexOutput(IndexOutput out, int fixedBlockSize) {
blockSize = fixedBlockSize;
this.out = out;
- out.writeVInt(blockSize);
+ out.WriteVInt(blockSize);
buffer = new int[blockSize];
}
protected abstract void flushBlock() ;
- @Override
- public IntIndexOutput.Index index() {
+ public override IntIndexOutputIndex index() {
return new Index();
}
- private class Index extends IntIndexOutput.Index {
- long fp;
- int upto;
- long lastFP;
- int lastUpto;
-
- @Override
- public void mark() {
- fp = out.getFilePointer();
- upto = FixedIntBlockIndexOutput.this.upto;
- }
-
- @Override
- public void copyFrom(IntIndexOutput.Index other, bool copyLast) {
- Index idx = (Index) other;
- fp = idx.fp;
- upto = idx.upto;
- if (copyLast) {
- lastFP = fp;
- lastUpto = upto;
- }
- }
-
- @Override
- public void write(DataOutput indexOut, bool absolute) {
- if (absolute) {
- indexOut.writeVInt(upto);
- indexOut.writeVLong(fp);
- } else if (fp == lastFP) {
- // same block
- Debug.Assert( upto >= lastUpto;
- int uptoDelta = upto - lastUpto;
- indexOut.writeVInt(uptoDelta << 1 | 1);
- } else {
- // new block
- indexOut.writeVInt(upto << 1);
- indexOut.writeVLong(fp - lastFP);
- }
- lastUpto = upto;
- lastFP = fp;
- }
-
- @Override
- public String toString() {
- return "fp=" + fp + " upto=" + upto;
+ private class Index : IntIndexOutputIndex
+ {
+ private long fp;
+ private int upto;
+ private long lastFP;
+ private int lastUpto;
+
+ public override void Mark()
+ {
+ fp =out.
+ FilePointer;
+ upto = FixedIntBlockIndexOutput.
+ this.upto;
+ }
+
+ public override void CopyFrom(IntIndexOutputIndex other, bool copyLast)
+ {
+ Index idx = (Index) other;
+ fp = idx.fp;
+ upto = idx.upto;
+ if (copyLast)
+ {
+ lastFP = fp;
+ lastUpto = upto;
+ }
+ }
+
+ public override void Write(DataOutput indexOut, bool absolute)
+ {
+ if (absolute)
+ {
+ indexOut.WriteVInt(upto);
+ indexOut.WriteVLong(fp);
+ }
+ else if (fp == lastFP)
+ {
+ // same block
+ Debug.Assert(upto >= lastUpto);
+ var uptoDelta = upto - lastUpto;
+ indexOut.WriteVInt(uptoDelta << 1 | 1);
+ }
+ else
+ {
+ // new block
+ indexOut.WriteVInt(upto << 1);
+ indexOut.WriteVLong(fp - lastFP);
+ }
+ lastUpto = upto;
+ lastFP = fp;
+ }
+
+ public override String ToString()
+ {
+ return String.Format("fp={0} upto={1}", fp, upto);
+ }
}
- }
- @Override
- public void write(int v) {
- buffer[upto++] = v;
- if (upto == blockSize) {
- flushBlock();
- upto = 0;
+ public override void Write(int v)
+ {
+ buffer[upto++] = v;
+ if (upto == blockSize)
+ {
+ flushBlock();
+ upto = 0;
+ }
}
- }
- @Override
- public void close() {
- try {
- if (upto > 0) {
- // NOTE: entries in the block after current upto are
- // invalid
- flushBlock();
- }
- } finally {
- out.close();
+ public override void Dispose()
+ {
+ try
+ {
+ if (upto > 0)
+ {
+ // NOTE: entries in the block after current upto are
+ // invalid
+ flushBlock();
+ }
+ }
+ finally
+ {
+ out.
+ Dispose();
+ }
}
- }
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a33ca521/src/Lucene.Net.Codecs/Intblock/Index.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Intblock/Index.cs b/src/Lucene.Net.Codecs/Intblock/Index.cs
index 67710a6..d2bd9a9 100644
--- a/src/Lucene.Net.Codecs/Intblock/Index.cs
+++ b/src/Lucene.Net.Codecs/Intblock/Index.cs
@@ -17,64 +17,61 @@
namespace Lucene.Net.Codecs.Intblock
{
+ using Sep;
+ using Store;
using System;
- using Lucene.Net.Codecs.Intblock;
-
- internal class Index : IntIndexInput.Index
+ using System.Diagnostics;
+
+ internal class Index : IntIndexInputIndex
{
- private long fp;
- private int upto;
+ private long _fp;
+ private int _upto;
- public override void Read(final DataInput indexIn, final bool absolute)
- {
- if (absolute)
+ public override void Read(DataInput indexIn, bool absolute)
{
- upto = indexIn.readVInt();
- fp = indexIn.readVLong();
- }
- else
- {
- final
- int uptoDelta = indexIn.readVInt();
- if ((uptoDelta & 1) == 1)
+ if (absolute)
{
- // same block
- upto += uptoDelta >> > 1;
+ _upto = indexIn.ReadVInt();
+ _fp = indexIn.ReadVLong();
}
else
{
- // new block
- upto = uptoDelta >> > 1;
- fp += indexIn.readVLong();
+ var uptoDelta = indexIn.ReadVInt();
+ if ((uptoDelta & 1) == 1)
+ {
+ // same block
+ _upto += (int)((uint)uptoDelta >> 1);
+ }
+ else
+ {
+ // new block
+ _upto = (int)((uint)uptoDelta >> 1);
+ _fp += indexIn.ReadVLong();
+ }
}
+ Debug.Assert(_upto < BlockSize);
}
- Debug.Assert(
- upto < blockSize;
- }
- public override void Seek(final IntIndexInput .Reader other)
+ public override void Seek(IntIndexInputReader other)
{
- ((Reader) other).seek(fp, upto);
+ ((Reader) other).Seek(_fp, _upto);
}
- public override void CopyFrom(IntIndexInput.Index other)
+ public override void CopyFrom(IntIndexInputIndex other)
{
- Index idx = (Index) other;
- fp = idx.fp;
- upto = idx.upto;
+ var idx = (Index) other;
+ _fp = idx._fp;
+ _upto = idx._upto;
}
- public override Index Clone()
+ public override IntIndexInputIndex Clone()
{
- Index other = new Index();
- other.fp = fp;
- other.upto = upto;
- return other;
+ return new Index {_fp = _fp, _upto = _upto};
}
public override String ToString()
{
- return "fp=" + fp + " upto=" + upto;
+ return String.Format("fp={0} upto={1}", _fp, _upto);
}
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a33ca521/src/Lucene.Net.Codecs/Intblock/Reader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Intblock/Reader.cs b/src/Lucene.Net.Codecs/Intblock/Reader.cs
index 8e0eb1d..4966c14 100644
--- a/src/Lucene.Net.Codecs/Intblock/Reader.cs
+++ b/src/Lucene.Net.Codecs/Intblock/Reader.cs
@@ -15,53 +15,67 @@
* limitations under the License.
*/
+using System.Diagnostics;
+using Lucene.Net.Codecs.Sep;
+using Lucene.Net.Store;
+
namespace Lucene.Net.Codecs.Intblock
{
- internal static class Reader : IntIndexInput.Reader
+ internal class Reader : IntIndexInputReader
{
- private final IndexInput in;
- private final BlockReader blockReader;
- private final int blockSize;
- private final int[] pending;
+ private readonly IndexInput _input;
+ private readonly VariableIntBlockIndexInput.BlockReader _blockReader;
+ private readonly int _blockSize;
+ private readonly int[] _pending;
- private int upto;
- private bool seekPending;
- private long pendingFP;
- private long lastBlockFP = -1;
+ private int _upto;
+ private bool _seekPending;
+ private long _pendingFp;
+ private long _lastBlockFp = -1;
- public Reader(final IndexInput in, final int[] pending, final BlockReader blockReader) {
- this.in = in;
- this.pending = pending;
- this.blockSize = pending.length;
- this.blockReader = blockReader;
- upto = blockSize;
- }
+ public Reader(IndexInput input, int[] pending, VariableIntBlockIndexInput.BlockReader blockReader)
+ {
+ _input = input;
+ _pending = pending;
+ _blockSize = pending.Length;
+ _blockReader = blockReader;
+ _upto = _blockSize;
+ }
- void Seek(final long fp, final int upto) {
- Debug.Assert( upto < blockSize;
- if (seekPending || fp != lastBlockFP) {
- pendingFP = fp;
- seekPending = true;
- }
- this.upto = upto;
- }
+ private void Seek(long fp, int upto)
+ {
+ Debug.Assert(upto < _blockSize);
+
+ if (_seekPending || fp != _lastBlockFp)
+ {
+ _pendingFp = fp;
+ _seekPending = true;
+ }
+
+ _upto = upto;
+ }
- public override int Next() {
- if (seekPending) {
- // Seek & load new block
- in.seek(pendingFP);
- lastBlockFP = pendingFP;
- blockReader.readBlock();
- seekPending = false;
- } else if (upto == blockSize) {
- // Load new block
- lastBlockFP = in.getFilePointer();
- blockReader.readBlock();
- upto = 0;
- }
- return pending[upto++];
- }
- }
+ public override int Next()
+ {
+ if (_seekPending)
+ {
+ // Seek & load new block
+ _input.Seek(_pendingFp);
+ _lastBlockFp = _pendingFp;
+ _blockReader.readBlock();
+ _seekPending = false;
+ }
+ else if (_upto == _blockSize)
+ {
+ // Load new block
+ _lastBlockFp = _input.FilePointer;
+ _blockReader.readBlock();
+ _upto = 0;
+ }
+
+ return _pending[_upto++];
+ }
}
}
+
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a33ca521/src/Lucene.Net.Codecs/Intblock/VariableIntBlockIndexInput.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Intblock/VariableIntBlockIndexInput.cs b/src/Lucene.Net.Codecs/Intblock/VariableIntBlockIndexInput.cs
index 1ce9712..cc0b6f1 100644
--- a/src/Lucene.Net.Codecs/Intblock/VariableIntBlockIndexInput.cs
+++ b/src/Lucene.Net.Codecs/Intblock/VariableIntBlockIndexInput.cs
@@ -1,5 +1,3 @@
-package codecs.intblock;
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -21,12 +19,6 @@ package codecs.intblock;
* expected to give poor performance; it's really only for
* testing the pluggability. One should typically use pfor instead. */
-import java.io.IOException;
-
-import codecs.sep.IntIndexInput;
-import store.DataInput;
-import store.IndexInput;
-
// TODO: much of this can be shared code w/ the fixed case
/** Abstract base class that reads variable-size blocks of ints
@@ -37,7 +29,11 @@ import store.IndexInput;
*
* @lucene.experimental
*/
-public abstract class VariableIntBlockIndexInput extends IntIndexInput {
+
+using Lucene.Net.Codecs.Sep;
+using Lucene.Net.Store;
+
+public abstract class VariableIntBlockIndexInput : IntIndexInput {
protected final IndexInput in;
protected final int maxBlockSize;
@@ -77,7 +73,7 @@ public abstract class VariableIntBlockIndexInput extends IntIndexInput {
public void seek(long pos) ;
}
- private static class Reader extends IntIndexInput.Reader {
+ private static class Reader : IntIndexInputReader {
private final IndexInput in;
public final int[] pending;
@@ -145,7 +141,7 @@ public abstract class VariableIntBlockIndexInput extends IntIndexInput {
}
}
- private class Index extends IntIndexInput.Index {
+ private class Index : IntIndexInputIndex {
private long fp;
private int upto;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a33ca521/src/Lucene.Net.Codecs/Intblock/VariableIntBlockIndexOutput.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Intblock/VariableIntBlockIndexOutput.cs b/src/Lucene.Net.Codecs/Intblock/VariableIntBlockIndexOutput.cs
index b76f69e..a5de4ca 100644
--- a/src/Lucene.Net.Codecs/Intblock/VariableIntBlockIndexOutput.cs
+++ b/src/Lucene.Net.Codecs/Intblock/VariableIntBlockIndexOutput.cs
@@ -1,5 +1,3 @@
-package codecs.intblock;
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -17,120 +15,137 @@ package codecs.intblock;
* limitations under the License.
*/
-/** Naive int block API that writes vInts. This is
- * expected to give poor performance; it's really only for
- * testing the pluggability. One should typically use pfor instead. */
-
-import java.io.IOException;
-
-import codecs.sep.IntIndexOutput;
-import store.DataOutput;
-import store.IndexOutput;
-
-// TODO: much of this can be shared code w/ the fixed case
-
-/** Abstract base class that writes variable-size blocks of ints
- * to an IndexOutput. While this is a simple approach, a
- * more performant approach would directly create an impl
- * of IntIndexOutput inside Directory. Wrapping a generic
- * IndexInput will likely cost performance.
- *
- * @lucene.experimental
- */
-public abstract class VariableIntBlockIndexOutput extends IntIndexOutput {
-
- protected final IndexOutput out;
-
- private int upto;
- private bool hitExcDuringWrite;
-
- // TODO what Var-Var codecs exist in practice... and what are there blocksizes like?
- // if its less than 128 we should set that as max and use byte?
-
- /** NOTE: maxBlockSize must be the maximum block size
- * plus the max non-causal lookahead of your codec. EG Simple9
- * requires lookahead=1 because on seeing the Nth value
- * it knows it must now encode the N-1 values before it. */
- protected VariableIntBlockIndexOutput(IndexOutput out, int maxBlockSize) {
- this.out = out;
- out.writeInt(maxBlockSize);
- }
-
- /** Called one value at a time. Return the number of
- * buffered input values that have been written to out. */
- protected abstract int add(int value) ;
-
- @Override
- public IntIndexOutput.Index index() {
- return new Index();
- }
+namespace Lucene.Net.Codecs.Intblock
+{
+
+ using System.Diagnostics;
+ using Store;
+ using Sep;
+
+ /// <summary>
+ /// Naive int block API that writes vInts. This is expected to give poor
+ /// performance; it's really only for testing the pluggability. One
+ /// should typically use pfor instead.
+ ///
+ /// TODO: much of this can be shared code w/ the fixed case
+ ///
+ /// Abstract base class that writes variable-size blocks of ints
+ /// to an IndexOutput. While this is a simple approach, a
+ /// more performant approach would directly create an impl
+ /// of IntIndexOutput inside Directory. Wrapping a generic
+ /// IndexInput will likely cost performance.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public abstract class VariableIntBlockIndexOutput : IntIndexOutput
+ {
+ private readonly IndexOutput _output;
+ private int _upto;
+ private bool _hitExcDuringWrite;
+
+ /// <Remarks>
+ /// TODO what Var-Var codecs exist in practice, and what are their blocksizes like?
+ /// If it's less than 128 should we set that as max and use byte?
+ ///
+ /// NOTE: maxblockSize must be the maxium block size plus the max non-causal lookahed
+ /// of your codec. EG Simple9 requires lookahead=1 becuase on seeing the Nth value it
+ /// knows it must now encode the N-1 values before it
+ /// </Remarks>
+ protected VariableIntBlockIndexOutput(IndexOutput output, int maxBlockSize)
+ {
+ _output = output;
+ _output.WriteInt(maxBlockSize);
+ }
- private class Index extends IntIndexOutput.Index {
- long fp;
- int upto;
- long lastFP;
- int lastUpto;
+ /// <summary>
+ /// Called one value at a time. Return the number of
+ /// buffered input values that have been written out
+ /// </summary>
+ protected abstract int Add(int value);
- @Override
- public void mark() {
- fp = out.getFilePointer();
- upto = VariableIntBlockIndexOutput.this.upto;
- }
+ public override IntIndexOutputIndex Index()
+ {
+ return new VariableIntBlockIndexOutputIndex();
+ }
- @Override
- public void copyFrom(IntIndexOutput.Index other, bool copyLast) {
- Index idx = (Index) other;
- fp = idx.fp;
- upto = idx.upto;
- if (copyLast) {
- lastFP = fp;
- lastUpto = upto;
- }
- }
+ public override void Write(int v)
+ {
+ _hitExcDuringWrite = true;
+ _upto -= Add(v) - 1;
+ _hitExcDuringWrite = false;
+ Debug.Assert(_upto >= 0);
+ }
- @Override
- public void write(DataOutput indexOut, bool absolute) {
- Debug.Assert( upto >= 0;
- if (absolute) {
- indexOut.writeVInt(upto);
- indexOut.writeVLong(fp);
- } else if (fp == lastFP) {
- // same block
- Debug.Assert( upto >= lastUpto;
- int uptoDelta = upto - lastUpto;
- indexOut.writeVInt(uptoDelta << 1 | 1);
- } else {
- // new block
- indexOut.writeVInt(upto << 1);
- indexOut.writeVLong(fp - lastFP);
- }
- lastUpto = upto;
- lastFP = fp;
+ public override void Dispose()
+ {
+ try
+ {
+ if (_hitExcDuringWrite) return;
+
+ // stuff 0s in until the "real" data is flushed:
+ var stuffed = 0;
+ while (_upto > stuffed)
+ {
+ _upto -= Add(0) - 1;
+ Debug.Assert(_upto >= 0);
+ stuffed += 1;
+ }
+ }
+ finally
+ {
+ _output.Dispose();
+ }
+ }
}
- }
- @Override
- public void write(int v) {
- hitExcDuringWrite = true;
- upto -= add(v)-1;
- hitExcDuringWrite = false;
- Debug.Assert( upto >= 0;
- }
+ internal class VariableIntBlockIndexOutputIndex : IntIndexOutputIndex
+ {
+ private long _fp;
+ private int _upto;
+ private long _lastFp;
+ private int _lastUpto;
+
+ public override void Mark()
+ {
+ _fp = output.FilePointer;
+ _upto = VariableIntBlockIndexOutput.
+ this._upto;
+ }
+
+ public override void CopyFrom(IntIndexOutputIndex other, bool copyLast)
+ {
+ var idx = (Index)other;
+ _fp = idx.fp;
+ _upto = idx.upto;
+ if (!copyLast) return;
+
+ _lastFp = _fp;
+ _lastUpto = _upto;
+ }
- @Override
- public void close() {
- try {
- if (!hitExcDuringWrite) {
- // stuff 0s in until the "real" data is flushed:
- int stuffed = 0;
- while(upto > stuffed) {
- upto -= add(0)-1;
- Debug.Assert( upto >= 0;
- stuffed += 1;
+ public override void Write(DataOutput indexOut, bool absolute)
+ {
+ Debug.Assert(_upto >= 0);
+ if (absolute)
+ {
+ indexOut.WriteVInt(_upto);
+ indexOut.WriteVLong(_fp);
+ }
+ else if (_fp == _lastFp)
+ {
+ // same block
+ Debug.Assert(_upto >= _lastUpto);
+ var uptoDelta = _upto - _lastUpto;
+ indexOut.WriteVInt(uptoDelta << 1 | 1);
+ }
+ else
+ {
+ // new block
+ indexOut.WriteVInt(_upto << 1);
+ indexOut.WriteVLong(_fp - _lastFp);
+ }
+ _lastUpto = _upto;
+ _lastFp = _fp;
}
- }
- } finally {
- out.close();
}
- }
-}
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a33ca521/src/Lucene.Net.Codecs/SimpleText/SimpleTextCodec.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/SimpleText/SimpleTextCodec.cs b/src/Lucene.Net.Codecs/SimpleText/SimpleTextCodec.cs
index d173137..1141939 100644
--- a/src/Lucene.Net.Codecs/SimpleText/SimpleTextCodec.cs
+++ b/src/Lucene.Net.Codecs/SimpleText/SimpleTextCodec.cs
@@ -18,67 +18,67 @@
namespace Lucene.Net.Codecs.SimpleText
{
- /// <summary>
- /// plain text index format.
- /// <para>
- /// <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
- /// @lucene.experimental
- /// </para>
- /// </summary>
- public sealed class SimpleTextCodec : Codec
- {
- private readonly PostingsFormat _postings = new SimpleTextPostingsFormat();
- private readonly StoredFieldsFormat _storedFields = new SimpleTextStoredFieldsFormat();
- private readonly SegmentInfoFormat _segmentInfos = new SimpleTextSegmentInfoFormat();
- private readonly FieldInfosFormat _fieldInfosFormatRenamed = new SimpleTextFieldInfosFormat();
- private readonly TermVectorsFormat _vectorsFormat = new SimpleTextTermVectorsFormat();
- private readonly NormsFormat _normsFormatRenamed = new SimpleTextNormsFormat();
- private readonly LiveDocsFormat _liveDocs = new SimpleTextLiveDocsFormat();
- private readonly DocValuesFormat _dvFormat = new SimpleTextDocValuesFormat();
+ /// <summary>
+ /// plain text index format.
+ /// <para>
+ /// <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
+ /// @lucene.experimental
+ /// </para>
+ /// </summary>
+ public sealed class SimpleTextCodec : Codec
+ {
+ private readonly PostingsFormat _postings = new SimpleTextPostingsFormat();
+ private readonly StoredFieldsFormat _storedFields = new SimpleTextStoredFieldsFormat();
+ private readonly SegmentInfoFormat _segmentInfos = new SimpleTextSegmentInfoFormat();
+ private readonly FieldInfosFormat _fieldInfosFormatRenamed = new SimpleTextFieldInfosFormat();
+ private readonly TermVectorsFormat _vectorsFormat = new SimpleTextTermVectorsFormat();
+ private readonly NormsFormat _normsFormatRenamed = new SimpleTextNormsFormat();
+ private readonly LiveDocsFormat _liveDocs = new SimpleTextLiveDocsFormat();
+ private readonly DocValuesFormat _dvFormat = new SimpleTextDocValuesFormat();
- public SimpleTextCodec() : base("SimpleText")
- {
- }
+ public SimpleTextCodec() : base("SimpleText")
+ {
+ }
- public override PostingsFormat PostingsFormat()
- {
- return _postings;
- }
+ public override PostingsFormat PostingsFormat()
+ {
+ return _postings;
+ }
- public override StoredFieldsFormat StoredFieldsFormat()
- {
- return _storedFields;
- }
+ public override StoredFieldsFormat StoredFieldsFormat()
+ {
+ return _storedFields;
+ }
- public override TermVectorsFormat TermVectorsFormat()
- {
- return _vectorsFormat;
- }
+ public override TermVectorsFormat TermVectorsFormat()
+ {
+ return _vectorsFormat;
+ }
- public override FieldInfosFormat FieldInfosFormat()
- {
- return _fieldInfosFormatRenamed;
- }
+ public override FieldInfosFormat FieldInfosFormat()
+ {
+ return _fieldInfosFormatRenamed;
+ }
- public override SegmentInfoFormat SegmentInfoFormat()
- {
- return _segmentInfos;
- }
+ public override SegmentInfoFormat SegmentInfoFormat()
+ {
+ return _segmentInfos;
+ }
- public override NormsFormat NormsFormat()
- {
- return _normsFormatRenamed;
- }
+ public override NormsFormat NormsFormat()
+ {
+ return _normsFormatRenamed;
+ }
- public override LiveDocsFormat LiveDocsFormat()
- {
- return _liveDocs;
- }
+ public override LiveDocsFormat LiveDocsFormat()
+ {
+ return _liveDocs;
+ }
- public override DocValuesFormat DocValuesFormat()
- {
- return _dvFormat;
- }
- }
+ public override DocValuesFormat DocValuesFormat()
+ {
+ return _dvFormat;
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a33ca521/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesReader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesReader.cs b/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesReader.cs
index 4ac5623..478f69b 100644
--- a/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesReader.cs
+++ b/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesReader.cs
@@ -42,7 +42,6 @@ namespace Lucene.Net.Codecs.SimpleText
public class SimpleTextDocValuesReader : DocValuesProducer
{
-
internal class OneField
{
public long DataStartFilePointer { get; set; }
@@ -151,47 +150,66 @@ namespace Lucene.Net.Codecs.SimpleText
return new NumericDocValuesAnonymousInnerClassHelper(this, field, @in, scratch);
}
- private class NumericDocValuesAnonymousInnerClassHelper : NumericDocValues
+ public override BinaryDocValues GetBinary(FieldInfo fieldInfo)
{
- private readonly SimpleTextDocValuesReader _outerInstance;
+ var field = FIELDS[fieldInfo.Name];
+ Debug.Assert(field != null);
+ var input = (IndexInput)DATA.Clone();
+ var scratch = new BytesRef();
- private readonly OneField _field;
- private readonly IndexInput _input;
- private readonly BytesRef _scratch;
+ return new BinaryDocValuesAnonymousInnerClassHelper(this, field, input, scratch);
+ }
- public NumericDocValuesAnonymousInnerClassHelper(SimpleTextDocValuesReader outerInstance,
- OneField field, IndexInput @in, BytesRef scratch)
- {
- _outerInstance = outerInstance;
- _field = field;
- _input = @in;
- _scratch = scratch;
- }
+ public override SortedDocValues GetSorted(FieldInfo fieldInfo)
+ {
+ var field = FIELDS[fieldInfo.Name];
- public override long Get(int docId)
- {
- if (docId < 0 || docId >= _outerInstance.MAX_DOC)
- throw new IndexOutOfRangeException("docID must be 0 .. " + (_outerInstance.MAX_DOC - 1) +
- "; got " + docId);
+ // SegmentCoreReaders already verifies this field is valid:
+ Debug.Assert(field != null);
+ var input = (IndexInput)DATA.Clone();
+ var scratch = new BytesRef();
- _input.Seek(_field.DataStartFilePointer + (1 + _field.Pattern.Length + 2)*docId);
- SimpleTextUtil.ReadLine(_input, _scratch);
+ return new SortedDocValuesAnonymousInnerClassHelper(this, field, input, scratch);
+ }
- long bd;
- try
- {
- bd = long.Parse(_scratch.Utf8ToString());
- }
- catch (FormatException ex)
- {
- throw new CorruptIndexException("failed to parse long value (resource=" + _input + ")", ex);
- }
+ public override SortedSetDocValues GetSortedSet(FieldInfo fieldInfo)
+ {
+ var field = FIELDS[fieldInfo.Name];
- SimpleTextUtil.ReadLine(_input, _scratch); // read the line telling us if its real or not
- return _field.MinValue + bd;
+ // SegmentCoreReaders already verifies this field is
+ // valid:
+ Debug.Assert(field != null);
+
+ var input = (IndexInput) DATA.Clone();
+ var scratch = new BytesRef();
+
+ return new SortedSetDocValuesAnonymousInnerClassHelper(this, field, input, scratch);
+ }
+
+ public override Bits GetDocsWithField(FieldInfo field)
+ {
+ switch (field.DocValuesType)
+ {
+ case FieldInfo.DocValuesType_e.SORTED_SET:
+ return DocValues.DocsWithValue(GetSortedSet(field), MAX_DOC);
+ case FieldInfo.DocValuesType_e.SORTED:
+ return DocValues.DocsWithValue(GetSorted(field), MAX_DOC);
+ case FieldInfo.DocValuesType_e.BINARY:
+ return GetBinaryDocsWithField(field);
+ case FieldInfo.DocValuesType_e.NUMERIC:
+ return GetNumericDocsWithField(field);
+ default:
+ throw new InvalidEnumArgumentException();
}
}
+ protected override void Dispose(bool disposing)
+ {
+ if (disposing) return;
+
+ DATA.Dispose();
+ }
+
private Bits GetNumericDocsWithField(FieldInfo fieldInfo)
{
var field = FIELDS[fieldInfo.Name];
@@ -200,17 +218,57 @@ namespace Lucene.Net.Codecs.SimpleText
return new BitsAnonymousInnerClassHelper(this, field, input, scratch);
}
- public override BinaryDocValues GetBinary(FieldInfo fieldInfo)
+ private Bits GetBinaryDocsWithField(FieldInfo fieldInfo)
{
var field = FIELDS[fieldInfo.Name];
- Debug.Assert(field != null);
var input = (IndexInput)DATA.Clone();
var scratch = new BytesRef();
-
- return new BinaryDocValuesAnonymousInnerClassHelper(this, field, input, scratch);
+
+ return new BitsAnonymousInnerClassHelper2(this, field, input, scratch);
}
- private class BinaryDocValuesAnonymousInnerClassHelper : BinaryDocValues
+ /// <summary> Used only in ctor: </summary>
+ private void ReadLine()
+ {
+ SimpleTextUtil.ReadLine(DATA, SCRATCH);
+ }
+
+ /// <summary> Used only in ctor: </summary>
+ private bool StartsWith(BytesRef prefix)
+ {
+ return StringHelper.StartsWith(SCRATCH, prefix);
+ }
+
+ /// <summary> Used only in ctor: </summary>
+ private string StripPrefix(BytesRef prefix)
+ {
+ return SCRATCH.Bytes.SubList(SCRATCH.Offset + prefix.Length, SCRATCH.Length - prefix.Length).ToString();
+ }
+
+ public override long RamBytesUsed()
+ {
+ return 0;
+ }
+
+ public override void CheckIntegrity()
+ {
+ var iScratch = new BytesRef();
+ var clone = (IndexInput) DATA.Clone();
+ clone.Seek(0);
+ ChecksumIndexInput input = new BufferedChecksumIndexInput(clone);
+ while (true)
+ {
+ SimpleTextUtil.ReadLine(input, iScratch);
+ if (!iScratch.Equals(SimpleTextDocValuesWriter.END)) continue;
+
+ SimpleTextUtil.CheckFooter(input);
+ break;
+ }
+ }
+
+
+
+ private class BitsAnonymousInnerClassHelper : Bits
{
private readonly SimpleTextDocValuesReader _outerInstance;
@@ -218,50 +276,27 @@ namespace Lucene.Net.Codecs.SimpleText
private readonly IndexInput _input;
private readonly BytesRef _scratch;
- public BinaryDocValuesAnonymousInnerClassHelper(SimpleTextDocValuesReader outerInstance, OneField field,
- IndexInput input, BytesRef scratch)
+ public BitsAnonymousInnerClassHelper(SimpleTextDocValuesReader outerInstance,
+ OneField field, IndexInput @in, BytesRef scratch)
{
_outerInstance = outerInstance;
_field = field;
- _input = input;
+ _input = @in;
_scratch = scratch;
}
- public override void Get(int docId, BytesRef result)
+ public bool Get(int index)
{
- if (docId < 0 || docId >= _outerInstance.MAX_DOC)
- throw new IndexOutOfRangeException("docID must be 0 .. " + (_outerInstance.MAX_DOC - 1) +
- "; got " + docId);
-
- _input.Seek(_field.DataStartFilePointer + (9 + _field.Pattern.Length + _field.MaxLength + 2)*docId);
- SimpleTextUtil.ReadLine(_input, _scratch);
- Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextDocValuesWriter.LENGTH));
- int len;
- try
- {
- len = int.Parse(_scratch.Bytes.SubList(
- _scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length,
- _scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length).ToString());
- }
- catch (FormatException ex)
- {
- throw new CorruptIndexException("failed to parse int value (resource=" + _input + ")", ex);
- }
-
- result.Bytes = new sbyte[len];
- result.Offset = 0;
- result.Length = len;
- _input.ReadBytes(result.Bytes, 0, len);
+ _input.Seek(_field.DataStartFilePointer + (1 + _field.Pattern.Length + 2) * index);
+ SimpleTextUtil.ReadLine(_input, _scratch); // data
+ SimpleTextUtil.ReadLine(_input, _scratch); // 'T' or 'F'
+ return _scratch.Bytes[_scratch.Offset] == (sbyte)'T';
}
- }
- private Bits GetBinaryDocsWithField(FieldInfo fieldInfo)
- {
- var field = FIELDS[fieldInfo.Name];
- var input = (IndexInput)DATA.Clone();
- var scratch = new BytesRef();
-
- return new BitsAnonymousInnerClassHelper2(this, field, input, scratch);
+ public int Length()
+ {
+ return _outerInstance.MAX_DOC;
+ }
}
private class BitsAnonymousInnerClassHelper2 : Bits
@@ -272,7 +307,7 @@ namespace Lucene.Net.Codecs.SimpleText
private readonly IndexInput _input;
private readonly BytesRef _scratch;
- public BitsAnonymousInnerClassHelper2(SimpleTextDocValuesReader outerInstance, OneField field,
+ public BitsAnonymousInnerClassHelper2(SimpleTextDocValuesReader outerInstance, OneField field,
IndexInput input, BytesRef scratch)
{
_outerInstance = outerInstance;
@@ -283,13 +318,13 @@ namespace Lucene.Net.Codecs.SimpleText
public bool Get(int index)
{
- _input.Seek(_field.DataStartFilePointer + (9 + _field.Pattern.Length + _field.MaxLength + 2)*index);
+ _input.Seek(_field.DataStartFilePointer + (9 + _field.Pattern.Length + _field.MaxLength + 2) * index);
SimpleTextUtil.ReadLine(_input, _scratch);
Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextDocValuesWriter.LENGTH));
int len;
try
{
- len = int.Parse(_scratch.Bytes.SubList( _scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length,
+ len = int.Parse(_scratch.Bytes.SubList(_scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length,
_scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length).ToString());
}
catch (FormatException ex)
@@ -302,7 +337,7 @@ namespace Lucene.Net.Codecs.SimpleText
_input.ReadBytes(bytes, 0, len);
SimpleTextUtil.ReadLine(_input, _scratch); // newline
SimpleTextUtil.ReadLine(_input, _scratch); // 'T' or 'F'
- return _scratch.Bytes[_scratch.Offset] == (sbyte) 'T';
+ return _scratch.Bytes[_scratch.Offset] == (sbyte)'T';
}
public int Length()
@@ -311,279 +346,214 @@ namespace Lucene.Net.Codecs.SimpleText
}
}
- public override SortedDocValues GetSorted(FieldInfo fieldInfo)
- {
- var field = FIELDS[fieldInfo.Name];
-
- // SegmentCoreReaders already verifies this field is valid:
- Debug.Assert(field != null);
- IndexInput @in = (IndexInput)DATA.Clone();
- BytesRef scratch = new BytesRef();
-
- DecimalFormat decoder = new DecimalFormat(field.Pattern, new DecimalFormatSymbols(Locale.ROOT));
- DecimalFormat ordDecoder = new DecimalFormat(field.OrdPattern, new DecimalFormatSymbols(Locale.ROOT));
-
- return new SortedDocValuesAnonymousInnerClassHelper(this, field, @in, scratch, decoder, ordDecoder);
- }
-
private class SortedDocValuesAnonymousInnerClassHelper : SortedDocValues
{
- private readonly SimpleTextDocValuesReader outerInstance;
+ private readonly SimpleTextDocValuesReader _outerInstance;
- private Lucene.Net.Codecs.SimpleText.SimpleTextDocValuesReader.OneField field;
- private IndexInput @in;
- private BytesRef scratch;
- private DecimalFormat decoder;
- private DecimalFormat ordDecoder;
+ private readonly OneField _field;
+ private readonly IndexInput _input;
+ private readonly BytesRef _scratch;
+ private readonly string _decoderFormat;
+ private readonly string _ordDecoderFormat;
public SortedDocValuesAnonymousInnerClassHelper(SimpleTextDocValuesReader outerInstance,
- Lucene.Net.Codecs.SimpleText.SimpleTextDocValuesReader.OneField field, IndexInput @in, BytesRef scratch,
- DecimalFormat decoder, DecimalFormat ordDecoder)
+ OneField field, IndexInput input, BytesRef scratch)
{
- this.outerInstance = outerInstance;
- this.field = field;
- this.@in = @in;
- this.scratch = scratch;
- this.decoder = decoder;
- this.ordDecoder = ordDecoder;
+ _outerInstance = outerInstance;
+ _field = field;
+ _input = input;
+ _scratch = scratch;
+ _decoderFormat = field.Pattern;
+ _ordDecoderFormat = field.OrdPattern;
}
- public override int GetOrd(int docID)
+ public override int GetOrd(int docId)
{
- if (docID < 0 || docID >= outerInstance.MAX_DOC)
+ if (docId < 0 || docId >= _outerInstance.MAX_DOC)
{
- throw new IndexOutOfRangeException("docID must be 0 .. " + (outerInstance.MAX_DOC - 1) + "; got " +
- docID);
+ throw new IndexOutOfRangeException("docID must be 0 .. " + (_outerInstance.MAX_DOC - 1) + "; got " +
+ docId);
}
- @in.Seek(field.DataStartFilePointer + field.NumValues*(9 + field.Pattern.Length + field.MaxLength) +
- docID*(1 + field.OrdPattern.Length));
- SimpleTextUtil.ReadLine(@in, scratch);
+ _input.Seek(_field.DataStartFilePointer + _field.NumValues * (9 + _field.Pattern.Length + _field.MaxLength) +
+ docId * (1 + _field.OrdPattern.Length));
+ SimpleTextUtil.ReadLine(_input, _scratch);
try
{
- return (long) (int) ordDecoder.Parse(scratch.Utf8ToString()) - 1;
+ return _scratch.Utf8ToString().ToString(_ordDecoderFormat) - 1;
}
- catch (ParseException pe)
+ catch (Exception pe)
{
- CorruptIndexException e = new CorruptIndexException("failed to parse ord (resource=" + @in + ")");
- e.initCause(pe);
+ var e = new CorruptIndexException("failed to parse ord (resource=" + _input + ")", pe);
throw e;
}
}
public override void LookupOrd(int ord, BytesRef result)
{
- if (ord < 0 || ord >= field.NumValues)
+ if (ord < 0 || ord >= _field.NumValues)
{
- throw new System.IndexOutOfRangeException("ord must be 0 .. " + (field.NumValues - 1) + "; got " +
+ throw new IndexOutOfRangeException("ord must be 0 .. " + (_field.NumValues - 1) + "; got " +
ord);
}
- @in.Seek(field.DataStartFilePointer + ord*(9 + field.Pattern.Length + field.MaxLength));
- SimpleTextUtil.ReadLine(@in, scratch);
- Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextDocValuesWriter.LENGTH),
- "got " + scratch.Utf8ToString() + " in=" + @in);
+ _input.Seek(_field.DataStartFilePointer + ord * (9 + _field.Pattern.Length + _field.MaxLength));
+ SimpleTextUtil.ReadLine(_input, _scratch);
+ Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextDocValuesWriter.LENGTH),
+ "got " + _scratch.Utf8ToString() + " in=" + _input);
int len;
try
{
len =
(int)
- decoder.parse(scratch.Bytes.SubList(
- scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length,
- scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length).ToString());
+ Decimal.Parse(_scratch.Bytes.SubList(
+ _scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length,
+ _scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length).ToString()).ToString(_decoderFormat);
}
- catch (ParseException pe)
+ catch (Exception pe)
{
- CorruptIndexException e =
- new CorruptIndexException("failed to parse int length (resource=" + @in + ")");
- e.initCause(pe);
+ var e = new CorruptIndexException("failed to parse int length (resource=" + _input + ")", pe);
throw e;
}
+
result.Bytes = new sbyte[len];
result.Offset = 0;
result.Length = len;
- @in.ReadBytes(result.Bytes, 0, len);
+ _input.ReadBytes(result.Bytes, 0, len);
}
public override int ValueCount
{
- get { return (int) field.NumValues; }
+ get { return (int)_field.NumValues; }
}
}
- public override SortedSetDocValues GetSortedSet(FieldInfo fieldInfo)
- {
- OneField field = FIELDS[fieldInfo.Name];
-
- // SegmentCoreReaders already verifies this field is
- // valid:
- Debug.Assert(field != null);
-
- IndexInput @in = (IndexInput) DATA.Clone();
- BytesRef scratch = new BytesRef();
- DecimalFormat decoder = new DecimalFormat(field.Pattern, new DecimalFormatSymbols(Locale.ROOT));
-
- return new SortedSetDocValuesAnonymousInnerClassHelper(this, field, @in, scratch, decoder);
- }
-
private class SortedSetDocValuesAnonymousInnerClassHelper : SortedSetDocValues
{
- private readonly SimpleTextDocValuesReader outerInstance;
+ private readonly SimpleTextDocValuesReader _outerInstance;
- private Lucene.Net.Codecs.SimpleText.SimpleTextDocValuesReader.OneField field;
- private IndexInput @in;
- private BytesRef scratch;
- private DecimalFormat decoder;
+ private readonly OneField _field;
+ private readonly IndexInput _input;
+ private readonly BytesRef _scratch;
+ private readonly string _decoderFormat;
public SortedSetDocValuesAnonymousInnerClassHelper(SimpleTextDocValuesReader outerInstance,
- Lucene.Net.Codecs.SimpleText.SimpleTextDocValuesReader.OneField field, IndexInput @in, BytesRef scratch,
- DecimalFormat decoder)
+ OneField field, IndexInput input, BytesRef scratch)
{
- this.outerInstance = outerInstance;
- this.field = field;
- this.@in = @in;
- this.scratch = scratch;
- this.decoder = decoder;
- currentOrds = new string[0];
- currentIndex = 0;
+ _outerInstance = outerInstance;
+ _field = field;
+ _input = input;
+ _scratch = scratch;
+ _decoderFormat = field.Pattern;
+ _currentOrds = new string[0];
+ _currentIndex = 0;
}
- internal string[] currentOrds;
- internal int currentIndex;
+ private string[] _currentOrds;
+ private int _currentIndex;
public override long NextOrd()
{
- return currentIndex == currentOrds.Length ? NO_MORE_ORDS : Convert.ToInt64(currentOrds[currentIndex++]);
+ return _currentIndex == _currentOrds.Length ? NO_MORE_ORDS : Convert.ToInt64(_currentOrds[_currentIndex++]);
}
public override int Document
{
set
{
- if (value < 0 || value >= outerInstance.MAX_DOC)
- throw new IndexOutOfRangeException("docID must be 0 .. " + (outerInstance.MAX_DOC - 1) + "; got " +
+ if (value < 0 || value >= _outerInstance.MAX_DOC)
+ throw new IndexOutOfRangeException("docID must be 0 .. " + (_outerInstance.MAX_DOC - 1) + "; got " +
value);
- @in.Seek(field.DataStartFilePointer + field.NumValues*(9 + field.Pattern.Length + field.MaxLength) +
- value*(1 + field.OrdPattern.Length));
- SimpleTextUtil.ReadLine(@in, scratch);
- string ordList = scratch.Utf8ToString().Trim();
- if (ordList.Length == 0)
- {
- currentOrds = new string[0];
- }
- else
- {
- currentOrds = ordList.Split(",", true);
- }
- currentIndex = 0;
+ _input.Seek(_field.DataStartFilePointer + _field.NumValues * (9 + _field.Pattern.Length + _field.MaxLength) +
+ value * (1 + _field.OrdPattern.Length));
+ SimpleTextUtil.ReadLine(_input, _scratch);
+ var ordList = _scratch.Utf8ToString().Trim();
+ _currentOrds = ordList.Length == 0 ? new string[0] : ordList.Split(",", true);
+ _currentIndex = 0;
}
}
public override void LookupOrd(long ord, BytesRef result)
{
- if (ord < 0 || ord >= field.NumValues)
+ if (ord < 0 || ord >= _field.NumValues)
{
- throw new IndexOutOfRangeException("ord must be 0 .. " + (field.NumValues - 1) + "; got " + ord);
+ throw new IndexOutOfRangeException("ord must be 0 .. " + (_field.NumValues - 1) + "; got " + ord);
}
- @in.Seek(field.DataStartFilePointer + ord*(9 + field.Pattern.Length + field.MaxLength));
- SimpleTextUtil.ReadLine(@in, scratch);
- Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextDocValuesWriter.LENGTH),
- "got " + scratch.Utf8ToString() + " in=" + @in);
+ _input.Seek(_field.DataStartFilePointer + ord * (9 + _field.Pattern.Length + _field.MaxLength));
+ SimpleTextUtil.ReadLine(_input, _scratch);
+ Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextDocValuesWriter.LENGTH),
+ "got " + _scratch.Utf8ToString() + " in=" + _input);
int len;
try
{
len =
(int)
- decoder.parse(scratch.Bytes.SubList(
- scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length,
- scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length).ToString());
+ _decoderFormat.parse(_scratch.Bytes.SubList(
+ _scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length,
+ _scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length).ToString());
}
- catch (ParseException pe)
+ catch (Exception pe)
{
- CorruptIndexException e =
- new CorruptIndexException("failed to parse int length (resource=" + @in + ")");
- e.initCause(pe);
+ var e = new CorruptIndexException("failed to parse int length (resource=" + _input + ")", pe);
throw e;
}
+
result.Bytes = new sbyte[len];
result.Offset = 0;
result.Length = len;
- @in.ReadBytes(result.Bytes, 0, len);
-
+ _input.ReadBytes(result.Bytes, 0, len);
}
public override long ValueCount
{
- get { return field.NumValues; }
+ get { return _field.NumValues; }
}
}
- public override Bits GetDocsWithField(FieldInfo field)
- {
- switch (field.DocValuesType)
- {
- case FieldInfo.DocValuesType_e.SORTED_SET:
- return DocValues.DocsWithValue(GetSortedSet(field), MAX_DOC);
- case FieldInfo.DocValuesType_e.SORTED:
- return DocValues.DocsWithValue(GetSorted(field), MAX_DOC);
- case FieldInfo.DocValuesType_e.BINARY:
- return GetBinaryDocsWithField(field);
- case FieldInfo.DocValuesType_e.NUMERIC:
- return GetNumericDocsWithField(field);
- default:
- throw new InvalidEnumArgumentException();
- }
- }
-
- protected override void Dispose(bool disposing)
+ private class NumericDocValuesAnonymousInnerClassHelper : NumericDocValues
{
- if (disposing) return;
-
- DATA.Dispose();
- }
+ private readonly SimpleTextDocValuesReader _outerInstance;
- /// <summary> Used only in ctor: </summary>
- private void ReadLine()
- {
- SimpleTextUtil.ReadLine(DATA, SCRATCH);
- }
+ private readonly OneField _field;
+ private readonly IndexInput _input;
+ private readonly BytesRef _scratch;
- /// <summary> Used only in ctor: </summary>
- private bool StartsWith(BytesRef prefix)
- {
- return StringHelper.StartsWith(SCRATCH, prefix);
- }
+ public NumericDocValuesAnonymousInnerClassHelper(SimpleTextDocValuesReader outerInstance,
+ OneField field, IndexInput input, BytesRef scratch)
+ {
+ _outerInstance = outerInstance;
+ _field = field;
+ _input = input;
+ _scratch = scratch;
+ }
- /// <summary> Used only in ctor: </summary>
- private string StripPrefix(BytesRef prefix)
- {
- return SCRATCH.Bytes.SubList(SCRATCH.Offset + prefix.Length, SCRATCH.Length - prefix.Length).ToString();
- }
+ public override long Get(int docId)
+ {
+ if (docId < 0 || docId >= _outerInstance.MAX_DOC)
+ throw new IndexOutOfRangeException("docID must be 0 .. " + (_outerInstance.MAX_DOC - 1) +
+ "; got " + docId);
- public override long RamBytesUsed()
- {
- return 0;
- }
+ _input.Seek(_field.DataStartFilePointer + (1 + _field.Pattern.Length + 2) * docId);
+ SimpleTextUtil.ReadLine(_input, _scratch);
- public override void CheckIntegrity()
- {
- var iScratch = new BytesRef();
- var clone = (IndexInput) DATA.Clone();
- clone.Seek(0);
- ChecksumIndexInput input = new BufferedChecksumIndexInput(clone);
- while (true)
- {
- SimpleTextUtil.ReadLine(input, iScratch);
- if (!iScratch.Equals(SimpleTextDocValuesWriter.END)) continue;
+ long bd;
+ try
+ {
+ bd = long.Parse(_scratch.Utf8ToString());
+ }
+ catch (FormatException ex)
+ {
+ throw new CorruptIndexException("failed to parse long value (resource=" + _input + ")", ex);
+ }
- SimpleTextUtil.CheckFooter(input);
- break;
+ SimpleTextUtil.ReadLine(_input, _scratch); // read the line telling us if its real or not
+ return _field.MinValue + bd;
}
}
- private class BitsAnonymousInnerClassHelper : Bits
+ private class BinaryDocValuesAnonymousInnerClassHelper : BinaryDocValues
{
private readonly SimpleTextDocValuesReader _outerInstance;
@@ -591,26 +561,40 @@ namespace Lucene.Net.Codecs.SimpleText
private readonly IndexInput _input;
private readonly BytesRef _scratch;
- public BitsAnonymousInnerClassHelper(SimpleTextDocValuesReader outerInstance,
- OneField field, IndexInput @in, BytesRef scratch)
+ public BinaryDocValuesAnonymousInnerClassHelper(SimpleTextDocValuesReader outerInstance, OneField field,
+ IndexInput input, BytesRef scratch)
{
_outerInstance = outerInstance;
_field = field;
- _input = @in;
+ _input = input;
_scratch = scratch;
}
- public bool Get(int index)
+ public override void Get(int docId, BytesRef result)
{
- _input.Seek(_field.DataStartFilePointer + (1 + _field.Pattern.Length + 2) * index);
- SimpleTextUtil.ReadLine(_input, _scratch); // data
- SimpleTextUtil.ReadLine(_input, _scratch); // 'T' or 'F'
- return _scratch.Bytes[_scratch.Offset] == (sbyte)'T';
- }
+ if (docId < 0 || docId >= _outerInstance.MAX_DOC)
+ throw new IndexOutOfRangeException("docID must be 0 .. " + (_outerInstance.MAX_DOC - 1) +
+ "; got " + docId);
- public int Length()
- {
- return _outerInstance.MAX_DOC;
+ _input.Seek(_field.DataStartFilePointer + (9 + _field.Pattern.Length + _field.MaxLength + 2) * docId);
+ SimpleTextUtil.ReadLine(_input, _scratch);
+ Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextDocValuesWriter.LENGTH));
+ int len;
+ try
+ {
+ len = int.Parse(_scratch.Bytes.SubList(
+ _scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length,
+ _scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length).ToString());
+ }
+ catch (FormatException ex)
+ {
+ throw new CorruptIndexException("failed to parse int value (resource=" + _input + ")", ex);
+ }
+
+ result.Bytes = new sbyte[len];
+ result.Offset = 0;
+ result.Length = len;
+ _input.ReadBytes(result.Bytes, 0, len);
}
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a33ca521/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesWriter.cs b/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesWriter.cs
index c3f8fa4..b322f1d 100644
--- a/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesWriter.cs
+++ b/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesWriter.cs
@@ -99,8 +99,7 @@ namespace Lucene.Net.Codecs.SimpleText
SimpleTextUtil.WriteNewline(data);
var patternString = sb.ToString();
- DecimalFormat encoder = new DecimalFormat(patternString, new DecimalFormatSymbols(Locale.ROOT));
-
+
int numDocsWritten = 0;
// second pass to write the values
@@ -109,7 +108,7 @@ namespace Lucene.Net.Codecs.SimpleText
Debug.Assert(value >= minValue);
var delta = value - minValue;
- string s = encoder.format(delta);
+ string s = delta.ToString(patternString);
Debug.Assert(s.Length == patternString.Length);
SimpleTextUtil.Write(data, s, scratch);
SimpleTextUtil.WriteNewline(data);
@@ -151,14 +150,13 @@ namespace Lucene.Net.Codecs.SimpleText
SimpleTextUtil.Write(data, sb.ToString(), scratch);
SimpleTextUtil.WriteNewline(data);
- DecimalFormat encoder = new DecimalFormat(sb.ToString(), new DecimalFormatSymbols(Locale.ROOT));
-
+
int numDocsWritten = 0;
foreach (BytesRef value in values)
{
int length = value == null ? 0 : value.Length;
SimpleTextUtil.Write(data, LENGTH);
- SimpleTextUtil.Write(data, encoder.format(length), scratch);
+ SimpleTextUtil.Write(data, length.ToString(sb.ToString()), scratch);
SimpleTextUtil.WriteNewline(data);
// write bytes -- don't use SimpleText.Write
@@ -217,8 +215,8 @@ namespace Lucene.Net.Codecs.SimpleText
SimpleTextUtil.Write(data, PATTERN);
SimpleTextUtil.Write(data, sb.ToString(), scratch);
SimpleTextUtil.WriteNewline(data);
-
- DecimalFormat encoder = new DecimalFormat(sb.ToString(), new DecimalFormatSymbols(Locale.ROOT));
+
+ var encoderFormat = sb.ToString();
int maxOrdBytes = Convert.ToString(valueCount + 1L).Length;
sb.Length = 0;
@@ -231,8 +229,8 @@ namespace Lucene.Net.Codecs.SimpleText
SimpleTextUtil.Write(data, ORDPATTERN);
SimpleTextUtil.Write(data, sb.ToString(), scratch);
SimpleTextUtil.WriteNewline(data);
-
- DecimalFormat ordEncoder = new DecimalFormat(sb.ToString(), new DecimalFormatSymbols(Locale.ROOT));
+
+ var ordEncoderFormat = sb.ToString();
// for asserts:
int valuesSeen = 0;
@@ -241,7 +239,7 @@ namespace Lucene.Net.Codecs.SimpleText
{
// write length
SimpleTextUtil.Write(data, LENGTH);
- SimpleTextUtil.Write(data, encoder.format(value.Length), scratch);
+ SimpleTextUtil.Write(data, value.Length.ToString(encoderFormat), scratch);
SimpleTextUtil.WriteNewline(data);
// write bytes -- don't use SimpleText.Write
@@ -262,7 +260,7 @@ namespace Lucene.Net.Codecs.SimpleText
foreach (var ord in docToOrd)
{
- SimpleTextUtil.Write(data, ordEncoder.format(ord + 1), scratch);
+ SimpleTextUtil.Write(data, (ord + 1).ToString(ordEncoderFormat), scratch);
SimpleTextUtil.WriteNewline(data);
}
}
@@ -276,7 +274,7 @@ namespace Lucene.Net.Codecs.SimpleText
long valueCount = 0;
int maxLength = 0;
- foreach (BytesRef value in values)
+ foreach (var value in values)
{
maxLength = Math.Max(maxLength, value.Length);
valueCount++;
@@ -304,7 +302,7 @@ namespace Lucene.Net.Codecs.SimpleText
SimpleTextUtil.Write(data, sb.ToString(), scratch);
SimpleTextUtil.WriteNewline(data);
- DecimalFormat encoder = new DecimalFormat(sb.ToString(), new DecimalFormatSymbols(Locale.ROOT));
+ string encoderFormat = sb.ToString();
// compute ord pattern: this is funny, we encode all values for all docs to find the maximum length
var maxOrdListLength = 0;
@@ -316,7 +314,9 @@ namespace Lucene.Net.Codecs.SimpleText
int count = (int) n;
for (int i = 0; i < count; i++)
{
- long ord = (long) ordStream.next();
+ ordStream.MoveNext();
+
+ var ord = ordStream.Current;
if (sb2.Length > 0)
{
sb2.Append(",");
@@ -340,11 +340,11 @@ namespace Lucene.Net.Codecs.SimpleText
// for asserts:
long valuesSeen = 0;
- foreach (BytesRef value in values)
+ foreach (var value in values)
{
// write length
SimpleTextUtil.Write(data, LENGTH);
- SimpleTextUtil.Write(data, encoder.format(value.Length), scratch);
+ SimpleTextUtil.Write(data, value.Length.ToString(encoderFormat), scratch);
SimpleTextUtil.WriteNewline(data);
// write bytes -- don't use SimpleText.Write
@@ -352,7 +352,7 @@ namespace Lucene.Net.Codecs.SimpleText
data.WriteBytes(value.Bytes, value.Offset, value.Length);
// pad to fit
- for (int i = value.Length; i < maxLength; i++)
+ for (var i = value.Length; i < maxLength; i++)
{
data.WriteByte((sbyte) ' ');
}
@@ -372,7 +372,8 @@ namespace Lucene.Net.Codecs.SimpleText
var count = (int) n;
for (var i = 0; i < count; i++)
{
- var ord = (long) ordStream.Next();
+ ordStream.MoveNext();
+ var ord = ordStream.Current;
if (sb2.Length > 0)
sb2.Append(",");
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a33ca521/src/Lucene.Net.Codecs/SimpleText/SimpleTextFieldsReader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/SimpleText/SimpleTextFieldsReader.cs b/src/Lucene.Net.Codecs/SimpleText/SimpleTextFieldsReader.cs
index b896ac5..9d2f6ff 100644
--- a/src/Lucene.Net.Codecs/SimpleText/SimpleTextFieldsReader.cs
+++ b/src/Lucene.Net.Codecs/SimpleText/SimpleTextFieldsReader.cs
@@ -45,7 +45,7 @@ namespace Lucene.Net.Codecs.SimpleText
using IntsRef = Util.IntsRef;
using StringHelper = Util.StringHelper;
using UnicodeUtil = Util.UnicodeUtil;
- using BytesRefFSTEnum = Util.Fst.BytesRefFSTEnum<Util.Fst.PairOutputs<long,long>.Pair<long, Util.Fst.PairOutputs<long,long>.Pair<long, long>>>;
+ using BytesRefFSTEnum = Util.Fst.BytesRefFSTEnum<Util.Fst.PairOutputs<long,long>.Pair>;
using FST = Util.Fst.FST;
using PairOutputs = Util.Fst.PairOutputs<long,long>;
using PositiveIntOutputs = Util.Fst.PositiveIntOutputs;
@@ -115,14 +115,14 @@ namespace Lucene.Net.Codecs.SimpleText
private long _totalTermFreq;
private long _docsStart;
- private readonly BytesRefFSTEnum<PairOutputs<long, PairOutputs.Pair<long, long>>.Pair<long, PairOutputs.Pair<long, long>>> _fstEnum;
+ private readonly BytesRefFSTEnum<PairOutputs<long, PairOutputs.Pair>.Pair> _fstEnum;
public SimpleTextTermsEnum(SimpleTextFieldsReader outerInstance,
- FST<PairOutputs<long, PairOutputs.Pair<long, long>>.Pair<long, PairOutputs.Pair<long, long>>> fst, IndexOptions indexOptions)
+ FST<PairOutputs<long, PairOutputs.Pair>.Pair> fst, IndexOptions indexOptions)
{
_outerInstance = outerInstance;
_indexOptions = indexOptions;
- _fstEnum = new BytesRefFSTEnum<PairOutputs<long, PairOutputs.Pair<long, long>>.Pair<long, PairOutputs.Pair<long, long>>>(fst);
+ _fstEnum = new BytesRefFSTEnum<PairOutputs<long, PairOutputs.Pair>.Pair>(fst);
}
public override bool SeekExact(BytesRef text)
@@ -583,7 +583,7 @@ namespace Lucene.Net.Codecs.SimpleText
private long _sumTotalTermFreq;
private long _sumDocFreq;
private int _docCount;
- private FST<PairOutputs<long, PairOutputs.Pair<long, long>>.Pair<long, PairOutputs.Pair<long, long>>> _fst;
+ private FST<PairOutputs<long, PairOutputs.Pair>.Pair> _fst;
private int _termCount;
private readonly BytesRef _scratch = new BytesRef(10);
private readonly CharsRef _scratchUtf16 = new CharsRef(10);
@@ -601,10 +601,11 @@ namespace Lucene.Net.Codecs.SimpleText
{
var posIntOutputs = PositiveIntOutputs.Singleton;
var outputsInner = new PairOutputs<long, long>(posIntOutputs, posIntOutputs);
- var outputs = new PairOutputs<long, PairOutputs.Pair<long, long>>(posIntOutputs, outputsInner);
+ //var outputs = new PairOutputs<long, PairOutputs.Pair<long, long>>(posIntOutputs, outputsInner);
+ var outputs = new PairOutputs<long, PairOutputs.Pair>(posIntOutputs, outputsInner);
// honestly, wtf kind of generic mess is this.
- var b = new Builder<PairOutputs<long, PairOutputs.Pair<long, long>>.Pair<long, PairOutputs.Pair<long, long>>>(FST.INPUT_TYPE.BYTE1, outputs);
+ var b = new Builder<PairOutputs<long, PairOutputs.Pair>.Pair>(FST.INPUT_TYPE.BYTE1, outputs);
var input = (IndexInput) _outerInstance._input.Clone();
input.Seek(_termsStart);