You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2009/07/16 20:07:36 UTC
svn commit: r794770 - in /lucene/java/trunk: ./
src/java/org/apache/lucene/index/ src/java/org/apache/lucene/store/
src/test/org/apache/lucene/store/
Author: mikemccand
Date: Thu Jul 16 18:07:35 2009
New Revision: 794770
URL: http://svn.apache.org/viewvc?rev=794770&view=rev
Log:
LUCENE-1566: do very large reads in chunks, to prevent hitting Sun JVM bug that throws invalid OOME
Modified:
lucene/java/trunk/CHANGES.txt
lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java
lucene/java/trunk/src/java/org/apache/lucene/store/FSDirectory.java
lucene/java/trunk/src/java/org/apache/lucene/store/NIOFSDirectory.java
lucene/java/trunk/src/java/org/apache/lucene/store/SimpleFSDirectory.java
lucene/java/trunk/src/test/org/apache/lucene/store/TestBufferedIndexInput.java
Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=794770&r1=794769&r2=794770&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Thu Jul 16 18:07:35 2009
@@ -387,6 +387,16 @@
19. LUCENE-1583: SpanOrQuery skipTo() doesn't always move forwards as Spans
documentation indicates it should. (Moti Nisenson via Mark Miller)
+19. LUCENE-1566: Sun JVM Bug
+ http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6478546 causes
+ invalid OutOfMemoryError when reading too many bytes at once from
+ a file on 32bit JVMs that have a large maximum heap size. This
+ fix adds set/getReadChunkSize to FSDirectory so that large reads
+ are broken into chunks, to work around this JVM bug. On 32bit
+ JVMs the default chunk size is 100 MB; on 64bit JVMs, which don't
+ show the bug, the default is Integer.MAX_VALUE. (Simon Willnauer
+ via Mike McCandless)
+
New features
1. LUCENE-1411: Added expert API to open an IndexWriter on a prior
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java?rev=794770&r1=794769&r2=794770&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java Thu Jul 16 18:07:35 2009
@@ -1327,13 +1327,6 @@
return core.getTermsReader().size();
}
- /*
- // nocommit
- final TermInfosReader getTermInfosReader() {
- return terms.getTermsReader();
- }
- */
-
/**
* Lotsa tests did hacks like:<br/>
* SegmentReader reader = (SegmentReader) IndexReader.open(dir);<br/>
Modified: lucene/java/trunk/src/java/org/apache/lucene/store/FSDirectory.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/store/FSDirectory.java?rev=794770&r1=794769&r2=794770&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/store/FSDirectory.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/store/FSDirectory.java Thu Jul 16 18:07:35 2009
@@ -398,6 +398,7 @@
dir.setUseUnmap(true);
return dir;
*/
+
if (Constants.WINDOWS) {
return new SimpleFSDirectory(path, lockFactory);
} else {
@@ -728,6 +729,59 @@
return this.getClass().getName() + "@" + directory;
}
+ /**
+ * Default read chunk size. This is a conditional
+ * default: on 32bit JVMs, it defaults to 100 MB. On
+ * 64bit JVMs, it's <code>Integer.MAX_VALUE</code>.
+ * @see #setReadChunkSize
+ */
+ public static final int DEFAULT_READ_CHUNK_SIZE = Constants.JRE_IS_64BIT ? Integer.MAX_VALUE: 100 * 1024 * 1024;
+
+ // LUCENE-1566
+ private int chunkSize = DEFAULT_READ_CHUNK_SIZE;
+
+ /**
+ * Sets the maximum number of bytes read at once from the
+ * underlying file during {@link IndexInput#readBytes}.
+ * The default value is {@link #DEFAULT_READ_CHUNK_SIZE};
+ *
+ * <p> This was introduced due to <a
+ * href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6478546">Sun
+ * JVM Bug 6478546</a>, which throws an incorrect
+ * OutOfMemoryError when attempting to read too many bytes
+ * at once. It only happens on 32bit JVMs with a large
+ * maximum heap size.</p>
+ *
+ * <p>Changes to this value will not impact any
+ * already-opened {@link IndexInput}s. You should call
+ * this before attempting to open an index on the
+ * directory.</p>
+ *
+ * <p> <b>NOTE</b>: This value should be as large as
+ * possible to reduce any possible performance impact. If
+ * you still encounter an incorrect OutOfMemoryError,
+ * trying lowering the chunk size.</p>
+ */
+ public final void setReadChunkSize(int chunkSize) {
+ // LUCENE-1566
+ if (chunkSize <= 0) {
+ throw new IllegalArgumentException("chunkSize must be positive");
+ }
+ if (!Constants.JRE_IS_64BIT) {
+ this.chunkSize = chunkSize;
+ }
+ }
+
+ /**
+ * The maximum number of bytes to read at once from the
+ * underlying file during {@link IndexInput#readBytes}.
+ * @see #setReadChunkSize
+ */
+ public final int getReadChunkSize() {
+ // LUCENE-1566
+ return chunkSize;
+ }
+
/** @deprecated Use SimpleFSDirectory.SimpleFSIndexInput instead */
protected static class FSIndexInput extends SimpleFSDirectory.SimpleFSIndexInput {
Modified: lucene/java/trunk/src/java/org/apache/lucene/store/NIOFSDirectory.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/store/NIOFSDirectory.java?rev=794770&r1=794769&r2=794770&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/store/NIOFSDirectory.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/store/NIOFSDirectory.java Thu Jul 16 18:07:35 2009
@@ -66,7 +66,7 @@
/** Creates an IndexInput for the file with the given name. */
public IndexInput openInput(String name, int bufferSize) throws IOException {
ensureOpen();
- return new NIOFSIndexInput(new File(getFile(), name), bufferSize);
+ return new NIOFSIndexInput(new File(getFile(), name), bufferSize, getReadChunkSize());
}
/** Creates an IndexOutput for the file with the given name. */
@@ -75,7 +75,7 @@
return new SimpleFSDirectory.SimpleFSIndexOutput(new File(directory, name));
}
- private static class NIOFSIndexInput extends SimpleFSDirectory.SimpleFSIndexInput {
+ protected static class NIOFSIndexInput extends SimpleFSDirectory.SimpleFSIndexInput {
private ByteBuffer byteBuf; // wraps the buffer for NIO
@@ -84,8 +84,13 @@
final FileChannel channel;
+ /** @deprecated Please use ctor taking chunkSize */
public NIOFSIndexInput(File path, int bufferSize) throws IOException {
- super(path, bufferSize);
+ this(path, bufferSize, FSDirectory.DEFAULT_READ_CHUNK_SIZE);
+ }
+
+ public NIOFSIndexInput(File path, int bufferSize, int chunkSize) throws IOException {
+ super(path, bufferSize, chunkSize);
channel = file.getChannel();
}
@@ -129,17 +134,46 @@
otherByteBuf.clear();
otherByteBuf.limit(len);
bb = otherByteBuf;
- } else
+ } else {
// Always wrap when offset != 0
bb = ByteBuffer.wrap(b, offset, len);
+ }
}
+ int readOffset = bb.position();
+ int readLength = bb.limit() - readOffset;
+ assert readLength == len;
+
long pos = getFilePointer();
- while (bb.hasRemaining()) {
- int i = channel.read(bb, pos);
- if (i == -1)
- throw new IOException("read past EOF");
- pos += i;
+
+ try {
+ while (readLength > 0) {
+ final int limit;
+ if (readLength > chunkSize) {
+ // LUCENE-1566 - work around JVM Bug by breaking
+ // very large reads into chunks
+ limit = readOffset + chunkSize;
+ } else {
+ limit = readOffset + readLength;
+ }
+ bb.limit(limit);
+ int i = channel.read(bb, pos);
+ if (i == -1) {
+ throw new IOException("read past EOF");
+ }
+ pos += i;
+ readOffset += i;
+ readLength -= i;
+ }
+ } catch (OutOfMemoryError e) {
+ // propagate OOM up and add a hint for 32bit VM Users hitting the bug
+ // with a large chunk size in the fast path.
+ final OutOfMemoryError outOfMemoryError = new OutOfMemoryError(
+ "OutOfMemoryError likely caused by the Sun VM Bug described in "
+ + "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize "
+ + "with a a value smaller than the current chunk size (" + chunkSize + ")");
+ outOfMemoryError.initCause(e);
+ throw outOfMemoryError;
}
}
}
Modified: lucene/java/trunk/src/java/org/apache/lucene/store/SimpleFSDirectory.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/store/SimpleFSDirectory.java?rev=794770&r1=794769&r2=794770&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/store/SimpleFSDirectory.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/store/SimpleFSDirectory.java Thu Jul 16 18:07:35 2009
@@ -61,7 +61,7 @@
/** Creates an IndexInput for the file with the given name. */
public IndexInput openInput(String name, int bufferSize) throws IOException {
ensureOpen();
- return new SimpleFSIndexInput(new File(directory, name), bufferSize);
+ return new SimpleFSIndexInput(new File(directory, name), bufferSize, getReadChunkSize());
}
protected static class SimpleFSIndexInput extends BufferedIndexInput {
@@ -89,14 +89,23 @@
protected final Descriptor file;
boolean isClone;
-
+ // LUCENE-1566 - maximum read length on a 32bit JVM to prevent incorrect OOM
+ protected final int chunkSize;
+
+ /** @deprecated Please use ctor taking chunkSize */
public SimpleFSIndexInput(File path) throws IOException {
- this(path, BufferedIndexInput.BUFFER_SIZE);
+ this(path, BufferedIndexInput.BUFFER_SIZE, SimpleFSDirectory.DEFAULT_READ_CHUNK_SIZE);
}
+ /** @deprecated Please use ctor taking chunkSize */
public SimpleFSIndexInput(File path, int bufferSize) throws IOException {
+ this(path, bufferSize, SimpleFSDirectory.DEFAULT_READ_CHUNK_SIZE);
+ }
+
+ public SimpleFSIndexInput(File path, int bufferSize, int chunkSize) throws IOException {
super(bufferSize);
file = new Descriptor(path, "r");
+ this.chunkSize = chunkSize;
}
/** IndexInput methods */
@@ -109,13 +118,33 @@
file.position = position;
}
int total = 0;
- do {
- int i = file.read(b, offset+total, len-total);
- if (i == -1)
- throw new IOException("read past EOF");
- file.position += i;
- total += i;
- } while (total < len);
+
+ try {
+ do {
+ final int readLength;
+ if (total + chunkSize > len) {
+ readLength = len - total;
+ } else {
+ // LUCENE-1566 - work around JVM Bug by breaking very large reads into chunks
+ readLength = chunkSize;
+ }
+ final int i = file.read(b, offset + total, readLength);
+ if (i == -1) {
+ throw new IOException("read past EOF");
+ }
+ file.position += i;
+ total += i;
+ } while (total < len);
+ } catch (OutOfMemoryError e) {
+ // propagate OOM up and add a hint for 32bit VM Users hitting the bug
+ // with a large chunk size in the fast path.
+ final OutOfMemoryError outOfMemoryError = new OutOfMemoryError(
+ "OutOfMemoryError likely caused by the Sun VM Bug described in "
+ + "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize "
+ + "with a a value smaller than the current chunks size (" + chunkSize + ")");
+ outOfMemoryError.initCause(e);
+ throw outOfMemoryError;
+ }
}
}
Modified: lucene/java/trunk/src/test/org/apache/lucene/store/TestBufferedIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/store/TestBufferedIndexInput.java?rev=794770&r1=794769&r2=794770&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/store/TestBufferedIndexInput.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/store/TestBufferedIndexInput.java Thu Jul 16 18:07:35 2009
@@ -18,7 +18,9 @@
*/
import java.io.File;
+import java.io.FileOutputStream;
import java.io.IOException;
+import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
@@ -33,59 +35,142 @@
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.NIOFSDirectory.NIOFSIndexInput;
+import org.apache.lucene.store.SimpleFSDirectory.SimpleFSIndexInput;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.ArrayUtil;
public class TestBufferedIndexInput extends LuceneTestCase {
- // Call readByte() repeatedly, past the buffer boundary, and see that it
- // is working as expected.
- // Our input comes from a dynamically generated/ "file" - see
- // MyBufferedIndexInput below.
- public void testReadByte() throws Exception {
- MyBufferedIndexInput input = new MyBufferedIndexInput();
- for(int i=0; i<BufferedIndexInput.BUFFER_SIZE*10; i++){
- assertEquals(input.readByte(), byten(i));
- }
+
+ private static void writeBytes(File aFile, long size) throws IOException{
+ OutputStream stream = null;
+ try {
+ stream = new FileOutputStream(aFile);
+ for (int i = 0; i < size; i++) {
+ stream.write(byten(i));
+ }
+ stream.flush();
+ } finally {
+ if (stream != null) {
+ stream.close();
+ }
}
+ }
+
+ private static final long TEST_FILE_LENGTH = 1024*1024;
- // Call readBytes() repeatedly, with various chunk sizes (from 1 byte to
- // larger than the buffer size), and see that it returns the bytes we expect.
- // Our input comes from a dynamically generated "file" -
- // see MyBufferedIndexInput below.
- public void testReadBytes() throws Exception {
- MyBufferedIndexInput input = new MyBufferedIndexInput();
- int pos=0;
- // gradually increasing size:
- for(int size=1; size<BufferedIndexInput.BUFFER_SIZE*10; size=size+size/200+1){
- checkReadBytes(input, size, pos);
- pos+=size;
- }
- // wildly fluctuating size:
- for(long i=0; i<1000; i++){
- // The following function generates a fluctuating (but repeatable)
- // size, sometimes small (<100) but sometimes large (>10000)
- int size1 = (int)( i%7 + 7*(i%5)+ 7*5*(i%3) + 5*5*3*(i%2));
- int size2 = (int)( i%11 + 11*(i%7)+ 11*7*(i%5) + 11*7*5*(i%3) + 11*7*5*3*(i%2) );
- int size = (i%3==0)?size2*10:size1;
- checkReadBytes(input, size, pos);
- pos+=size;
- }
- // constant small size (7 bytes):
- for(int i=0; i<BufferedIndexInput.BUFFER_SIZE; i++){
- checkReadBytes(input, 7, pos);
- pos+=7;
- }
+ // Call readByte() repeatedly, past the buffer boundary, and see that it
+ // is working as expected.
+ // Our input comes from a dynamically generated/ "file" - see
+ // MyBufferedIndexInput below.
+ public void testReadByte() throws Exception {
+ MyBufferedIndexInput input = new MyBufferedIndexInput();
+ for (int i = 0; i < BufferedIndexInput.BUFFER_SIZE * 10; i++) {
+ assertEquals(input.readByte(), byten(i));
+ }
+ }
+
+ // Call readBytes() repeatedly, with various chunk sizes (from 1 byte to
+ // larger than the buffer size), and see that it returns the bytes we expect.
+ // Our input comes from a dynamically generated "file" -
+ // see MyBufferedIndexInput below.
+ public void testReadBytes() throws Exception {
+ final Random r = newRandom();
+
+ MyBufferedIndexInput input = new MyBufferedIndexInput();
+ runReadBytes(input, BufferedIndexInput.BUFFER_SIZE, r);
+
+ // This tests the workaround code for LUCENE-1566 where readBytesInternal
+ // provides a workaround for a JVM Bug that incorrectly raises a OOM Error
+ // when a large byte buffer is passed to a file read.
+ // NOTE: this does only test the chunked reads and NOT if the Bug is triggered.
+ //final int tmpFileSize = 1024 * 1024 * 5;
+ final int inputBufferSize = 128;
+ File tmpInputFile = File.createTempFile("IndexInput", "tmpFile");
+ tmpInputFile.deleteOnExit();
+ writeBytes(tmpInputFile, TEST_FILE_LENGTH);
+
+ // run test with chunk size of 10 bytes
+ runReadBytesAndClose(new SimpleFSIndexInput(tmpInputFile,
+ inputBufferSize, 10), inputBufferSize, r);
+ // run test with chunk size of 100 MB - default
+ runReadBytesAndClose(new SimpleFSIndexInput(tmpInputFile,
+ inputBufferSize), inputBufferSize, r);
+ // run test with chunk size of 10 bytes
+ runReadBytesAndClose(new NIOFSIndexInput(tmpInputFile,
+ inputBufferSize, 10), inputBufferSize, r);
+ // run test with chunk size of 100 MB - default
+ runReadBytesAndClose(new NIOFSIndexInput(tmpInputFile,
+ inputBufferSize), inputBufferSize, r);
+ }
+
+ private void runReadBytesAndClose(IndexInput input, int bufferSize, Random r)
+ throws IOException {
+ try {
+ runReadBytes(input, bufferSize, r);
+ } finally {
+ input.close();
}
- private void checkReadBytes(BufferedIndexInput input, int size, int pos) throws IOException{
- // Just to see that "offset" is treated properly in readBytes(), we
- // add an arbitrary offset at the beginning of the array
- int offset = size % 10; // arbitrary
- byte[] b = new byte[offset+size];
- input.readBytes(b, offset, size);
- for(int i=0; i<size; i++){
- assertEquals(b[offset+i], byten(pos+i));
- }
- }
+ }
+
+ private void runReadBytes(IndexInput input, int bufferSize, Random r)
+ throws IOException {
+
+ int pos = 0;
+ // gradually increasing size:
+ for (int size = 1; size < bufferSize * 10; size = size + size / 200 + 1) {
+ checkReadBytes(input, size, pos);
+ pos += size;
+ if (pos >= TEST_FILE_LENGTH) {
+ // wrap
+ pos = 0;
+ input.seek(0L);
+ }
+ }
+ // wildly fluctuating size:
+ for (long i = 0; i < 1000; i++) {
+ final int size = r.nextInt(10000);
+ checkReadBytes(input, 1+size, pos);
+ pos += 1+size;
+ if (pos >= TEST_FILE_LENGTH) {
+ // wrap
+ pos = 0;
+ input.seek(0L);
+ }
+ }
+ // constant small size (7 bytes):
+ for (int i = 0; i < bufferSize; i++) {
+ checkReadBytes(input, 7, pos);
+ pos += 7;
+ if (pos >= TEST_FILE_LENGTH) {
+ // wrap
+ pos = 0;
+ input.seek(0L);
+ }
+ }
+ }
+
+ private byte[] buffer = new byte[10];
+
+ private void checkReadBytes(IndexInput input, int size, int pos) throws IOException{
+ // Just to see that "offset" is treated properly in readBytes(), we
+ // add an arbitrary offset at the beginning of the array
+ int offset = size % 10; // arbitrary
+ buffer = ArrayUtil.grow(buffer, offset+size);
+ assertEquals(pos, input.getFilePointer());
+ long left = TEST_FILE_LENGTH - input.getFilePointer();
+ if (left <= 0) {
+ return;
+ } else if (left < size) {
+ size = (int) left;
+ }
+ input.readBytes(buffer, offset, size);
+ assertEquals(pos+size, input.getFilePointer());
+ for(int i=0; i<size; i++) {
+ assertEquals("pos=" + i + " filepos=" + (pos+i), byten(pos+i), buffer[offset+i]);
+ }
+ }
// This tests that attempts to readBytes() past an EOF will fail, while
// reads up to the EOF will succeed. The EOF is determined by the