You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2020/02/12 17:14:31 UTC
[lucenenet] 05/05: Lucene.Net.Store (FSDirectory +
BufferedIndexOutput): Refactored FSDirectory.FSIndexOutput to utilize the
FileStream buffer only,
rather than using both a FileStream buffer and the buffer in
BufferedIndexOutput.
This is an automated email from the ASF dual-hosted git repository.
nightowl888 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git
commit 08e35ce8ae4ed5d47da37bf1157ad3a17928cc98
Author: Shad Storhaug <sh...@shadstorhaug.com>
AuthorDate: Tue Feb 11 16:57:53 2020 +0700
Lucene.Net.Store (FSDirectory + BufferedIndexOutput): Refactored FSDirectory.FSIndexOutput to utilize the FileStream buffer only, rather than using both a FileStream buffer and the buffer in BufferedIndexOutput.
---
src/Lucene.Net/Store/BufferedIndexOutput.cs | 27 ++++++-----
src/Lucene.Net/Store/FSDirectory.cs | 75 +++++++++++++++++++++++------
2 files changed, 74 insertions(+), 28 deletions(-)
diff --git a/src/Lucene.Net/Store/BufferedIndexOutput.cs b/src/Lucene.Net/Store/BufferedIndexOutput.cs
index 6f3f12d..4b59b01 100644
--- a/src/Lucene.Net/Store/BufferedIndexOutput.cs
+++ b/src/Lucene.Net/Store/BufferedIndexOutput.cs
@@ -30,10 +30,10 @@ namespace Lucene.Net.Store
public const int DEFAULT_BUFFER_SIZE = 16384;
private readonly int bufferSize;
- private readonly byte[] buffer;
+ private byte[] buffer;
private long bufferStart = 0; // position in file of buffer
private int bufferPosition = 0; // position in buffer
- private readonly CRC32 crc = new CRC32();
+ private readonly CRC32 crc;
/// <summary>
/// Creates a new <see cref="BufferedIndexOutput"/> with the default buffer size
@@ -48,18 +48,24 @@ namespace Lucene.Net.Store
/// Creates a new <see cref="BufferedIndexOutput"/> with the given buffer size. </summary>
/// <param name="bufferSize"> the buffer size in bytes used to buffer writes internally. </param>
/// <exception cref="ArgumentException"> if the given buffer size is less or equal to <c>0</c> </exception>
- public BufferedIndexOutput(int bufferSize)
+ public BufferedIndexOutput(int bufferSize) : this(bufferSize, new CRC32()) { }
+
+ // LUCENENET specific - added constructor overload so FSDirectory can still subclass BufferedIndexOutput, but
+ // utilize its own buffer, since FileStream is already buffered in .NET.
+ internal BufferedIndexOutput(int bufferSize, CRC32 crc)
{
if (bufferSize <= 0)
{
throw new ArgumentException("bufferSize must be greater than 0 (got " + bufferSize + ")");
}
this.bufferSize = bufferSize;
- buffer = new byte[bufferSize];
+ // LUCENENET: We lazy-load the buffer, so we don't force all subclasses to allocate it
+ this.crc = crc;
}
public override void WriteByte(byte b)
{
+ if (buffer == null) buffer = new byte[bufferSize]; // LUCENENET: Lazy-load the buffer, so we don't force all subclasses to allocate it
if (bufferPosition >= bufferSize)
{
Flush();
@@ -69,6 +75,7 @@ namespace Lucene.Net.Store
public override void WriteBytes(byte[] b, int offset, int length)
{
+ if (buffer == null) buffer = new byte[bufferSize]; // LUCENENET: Lazy-load the buffer, so we don't force all subclasses to allocate it
int bytesLeft = bufferSize - bufferPosition;
// is there enough space in the buffer?
if (bytesLeft >= length)
@@ -120,9 +127,11 @@ namespace Lucene.Net.Store
}
}
+ /// <inheritdoc/>
[MethodImpl(MethodImplOptions.NoInlining)]
public override void Flush()
{
+ if (buffer == null) return; // LUCENENET: Lazy-load the buffer, so we don't force all subclasses to allocate it
crc.Update(buffer, 0, bufferPosition);
FlushBuffer(buffer, bufferPosition);
bufferStart += bufferPosition;
@@ -147,6 +156,7 @@ namespace Lucene.Net.Store
/// <param name="len"> the number of bytes to write </param>
protected internal abstract void FlushBuffer(byte[] b, int offset, int len);
+ /// <inheritdoc/>
protected override void Dispose(bool disposing)
{
if (disposing)
@@ -171,15 +181,8 @@ namespace Lucene.Net.Store
/// <summary>
/// Returns size of the used output buffer in bytes.
- ///
/// </summary>
- public int BufferSize
- {
- get
- {
- return bufferSize;
- }
- }
+ public int BufferSize => bufferSize;
public override long Checksum
{
diff --git a/src/Lucene.Net/Store/FSDirectory.cs b/src/Lucene.Net/Store/FSDirectory.cs
index a07021e..00ddcd3 100644
--- a/src/Lucene.Net/Store/FSDirectory.cs
+++ b/src/Lucene.Net/Store/FSDirectory.cs
@@ -1,3 +1,4 @@
+using Lucene.Net.Support;
using Lucene.Net.Support.IO;
using System;
using System.Collections.Generic;
@@ -5,6 +6,7 @@ using System.Diagnostics;
using System.Globalization;
using System.IO;
using System.Linq;// Used only for WRITE_LOCK_NAME in deprecated create=true case:
+using System.Runtime.CompilerServices;
namespace Lucene.Net.Store
{
@@ -462,51 +464,81 @@ namespace Lucene.Net.Store
get { return chunkSize; }
}
- /// <summary>
+ ///// <summary>
/// Writes output with <see cref="FileStream.Write(byte[], int, int)"/>
/// </summary>
+ // LUCENENET specific: Since FileStream does its own buffering, this class was refactored
+ // to do all checksum operations as well as writing to the FileStream. By doing this we elminate
+ // the extra set of buffers that were only creating unnecessary memory allocations and copy operations.
protected class FSIndexOutput : BufferedIndexOutput
{
- // LUCENENET specific: chunk size not needed
+ private const int CHUNK_SIZE = DEFAULT_BUFFER_SIZE;
private readonly FSDirectory parent;
internal readonly string name;
private readonly FileStream file;
private volatile bool isOpen; // remember if the file is open, so that we don't try to close it more than once
+ private readonly CRC32 crc = new CRC32();
public FSIndexOutput(FSDirectory parent, string name)
- : base(/*CHUNK_SIZE*/)
+ : base(CHUNK_SIZE, null)
{
this.parent = parent;
this.name = name;
- file = new FileStream(Path.Combine(parent.m_directory.FullName, name), FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.ReadWrite);
+ file = new FileStream(
+ path: Path.Combine(parent.m_directory.FullName, name),
+ mode: FileMode.OpenOrCreate,
+ access: FileAccess.Write,
+ share: FileShare.ReadWrite,
+ bufferSize: CHUNK_SIZE);
isOpen = true;
}
+ /// <inheritdoc/>
+ public override void WriteByte(byte b)
+ {
+ if (!isOpen)
+ throw new ObjectDisposedException(nameof(FSIndexOutput));
+
+ crc.Update(b);
+ file.WriteByte(b);
+ }
+
+ /// <inheritdoc/>
+ public override void WriteBytes(byte[] b, int offset, int length)
+ {
+ if (!isOpen)
+ throw new ObjectDisposedException(nameof(FSIndexOutput));
+
+ crc.Update(b, offset, length);
+ file.Write(b, offset, length);
+ }
+
+ /// <inheritdoc/>
protected internal override void FlushBuffer(byte[] b, int offset, int size)
{
- Debug.Assert(isOpen);
+ if (!isOpen)
+ throw new ObjectDisposedException(nameof(FSIndexOutput));
- // LUCENENET specific: FileStream is already optimized to write natively
- // if over the buffer size that is passed through its constructor. So,
- // all we need to do is Write().
+ crc.Update(b, offset, size);
file.Write(b, offset, size);
-
- //Debug.Assert(size == 0);
}
+ /// <inheritdoc/>
+ [MethodImpl(MethodImplOptions.NoInlining)]
public override void Flush()
{
- base.Flush();
- // LUCENENET specific - writing bytes into the FileStream (in FlushBuffer()) does not immediately
- // persist them on disk. We need to explicitly call FileStream.Flush() to move them there.
+ if (!isOpen)
+ throw new ObjectDisposedException(nameof(FSIndexOutput));
+
file.Flush();
}
+ /// <inheritdoc/>
protected override void Dispose(bool disposing)
{
if (disposing)
- {
+ {
parent.OnIndexOutputClosed(this);
// only close the file if it has not been closed yet
if (isOpen)
@@ -514,7 +546,6 @@ namespace Lucene.Net.Store
IOException priorE = null;
try
{
- base.Dispose(disposing); // LUCENENET NOTE: This handles Flush() for us automatically, but we need to call Flush(true) to ensure everything persists
file.Flush(flushToDisk: true);
}
catch (IOException ioe)
@@ -535,13 +566,25 @@ namespace Lucene.Net.Store
[Obsolete("(4.1) this method will be removed in Lucene 5.0")]
public override void Seek(long pos)
{
- base.Seek(pos);
+ if (!isOpen)
+ throw new ObjectDisposedException(nameof(FSIndexOutput));
+
file.Seek(pos, SeekOrigin.Begin);
}
+ /// <inheritdoc/>
public override long Length => file.Length;
// LUCENENET NOTE: FileStream doesn't have a way to set length
+
+ /// <inheritdoc/>
+ public override long Checksum => crc.Value; // LUCENENET specific - need to override, since we are buffering locally
+
+ /// <inheritdoc/>
+ public override long GetFilePointer() // LUCENENET specific - need to override, since we are buffering locally
+ {
+ return file.Position;
+ }
}
// LUCENENET specific: Fsync is pointless in .NET, since we are