You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2011/03/19 18:54:44 UTC
svn commit: r1083239 - in /lucene/dev/branches/lucene_solr_3_1/lucene:
CHANGES.txt src/java/org/apache/lucene/store/IndexInput.java
src/test/org/apache/lucene/index/TestIndexInput.java
Author: uschindler
Date: Sat Mar 19 17:54:44 2011
New Revision: 1083239
URL: http://svn.apache.org/viewvc?rev=1083239&view=rev
Log:
LUCENE-2975: A hotspot bug corrupts IndexInput#readVInt()/readVLong() if the underlying readByte() is inlined (which happens e.g. in MMapDirectory). The loop was unwinded which makes the hotspot bug disappear.
Modified:
lucene/dev/branches/lucene_solr_3_1/lucene/CHANGES.txt
lucene/dev/branches/lucene_solr_3_1/lucene/src/java/org/apache/lucene/store/IndexInput.java
lucene/dev/branches/lucene_solr_3_1/lucene/src/test/org/apache/lucene/index/TestIndexInput.java
Modified: lucene/dev/branches/lucene_solr_3_1/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_1/lucene/CHANGES.txt?rev=1083239&r1=1083238&r2=1083239&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_3_1/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/lucene_solr_3_1/lucene/CHANGES.txt Sat Mar 19 17:54:44 2011
@@ -397,6 +397,11 @@ Bug fixes
* LUCENE-2936: PhraseQuery score explanations were not correctly
identifying matches vs non-matches. (hossman)
+* LUCENE-2975: A hotspot bug corrupts IndexInput#readVInt()/readVLong() if
+ the underlying readByte() is inlined (which happens e.g. in MMapDirectory).
+ The loop was unwinded which makes the hotspot bug disappear.
+ (Uwe Schindler, Robert Muir, Mike McCandless)
+
New features
* LUCENE-2128: Parallelized fetching document frequencies during weight
Modified: lucene/dev/branches/lucene_solr_3_1/lucene/src/java/org/apache/lucene/store/IndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_1/lucene/src/java/org/apache/lucene/store/IndexInput.java?rev=1083239&r1=1083238&r2=1083239&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_3_1/lucene/src/java/org/apache/lucene/store/IndexInput.java (original)
+++ lucene/dev/branches/lucene_solr_3_1/lucene/src/java/org/apache/lucene/store/IndexInput.java Sat Mar 19 17:54:44 2011
@@ -78,6 +78,9 @@ public abstract class IndexInput impleme
* @see IndexOutput#writeVInt(int)
*/
public int readVInt() throws IOException {
+ /* This is the original code of this method,
+ * but a Hotspot bug (see LUCENE-2975) corrupts the for-loop if
+ * readByte() is inlined. So the loop was unwinded!
byte b = readByte();
int i = b & 0x7F;
for (int shift = 7; (b & 0x80) != 0; shift += 7) {
@@ -85,6 +88,22 @@ public abstract class IndexInput impleme
i |= (b & 0x7F) << shift;
}
return i;
+ */
+ byte b = readByte();
+ int i = b & 0x7F;
+ if ((b & 0x80) == 0) return i;
+ b = readByte();
+ i |= (b & 0x7F) << 7;
+ if ((b & 0x80) == 0) return i;
+ b = readByte();
+ i |= (b & 0x7F) << 14;
+ if ((b & 0x80) == 0) return i;
+ b = readByte();
+ i |= (b & 0x7F) << 21;
+ if ((b & 0x80) == 0) return i;
+ b = readByte();
+ assert (b & 0x80) == 0;
+ return i | ((b & 0x7F) << 28);
}
/** Reads eight bytes and returns a long.
@@ -98,6 +117,9 @@ public abstract class IndexInput impleme
* nine bytes. Smaller values take fewer bytes. Negative numbers are not
* supported. */
public long readVLong() throws IOException {
+ /* This is the original code of this method,
+ * but a Hotspot bug (see LUCENE-2975) corrupts the for-loop if
+ * readByte() is inlined. So the loop was unwinded!
byte b = readByte();
long i = b & 0x7F;
for (int shift = 7; (b & 0x80) != 0; shift += 7) {
@@ -105,6 +127,34 @@ public abstract class IndexInput impleme
i |= (b & 0x7FL) << shift;
}
return i;
+ */
+ byte b = readByte();
+ long i = b & 0x7FL;
+ if ((b & 0x80) == 0) return i;
+ b = readByte();
+ i |= (b & 0x7FL) << 7;
+ if ((b & 0x80) == 0) return i;
+ b = readByte();
+ i |= (b & 0x7FL) << 14;
+ if ((b & 0x80) == 0) return i;
+ b = readByte();
+ i |= (b & 0x7FL) << 21;
+ if ((b & 0x80) == 0) return i;
+ b = readByte();
+ i |= (b & 0x7FL) << 28;
+ if ((b & 0x80) == 0) return i;
+ b = readByte();
+ i |= (b & 0x7FL) << 35;
+ if ((b & 0x80) == 0) return i;
+ b = readByte();
+ i |= (b & 0x7FL) << 42;
+ if ((b & 0x80) == 0) return i;
+ b = readByte();
+ i |= (b & 0x7FL) << 49;
+ if ((b & 0x80) == 0) return i;
+ b = readByte();
+ assert (b & 0x80) == 0;
+ return i | ((b & 0x7FL) << 56);
}
/** Call this if readString should read characters stored
Modified: lucene/dev/branches/lucene_solr_3_1/lucene/src/test/org/apache/lucene/index/TestIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_1/lucene/src/test/org/apache/lucene/index/TestIndexInput.java?rev=1083239&r1=1083238&r2=1083239&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_3_1/lucene/src/test/org/apache/lucene/index/TestIndexInput.java (original)
+++ lucene/dev/branches/lucene_solr_3_1/lucene/src/test/org/apache/lucene/index/TestIndexInput.java Sat Mar 19 17:54:44 2011
@@ -19,51 +19,60 @@ package org.apache.lucene.index;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RAMDirectory;
import java.io.IOException;
public class TestIndexInput extends LuceneTestCase {
- public void testRead() throws IOException {
- IndexInput is = new MockIndexInput(new byte[] {
- (byte) 0x80, 0x01,
- (byte) 0xFF, 0x7F,
- (byte) 0x80, (byte) 0x80, 0x01,
- (byte) 0x81, (byte) 0x80, 0x01,
- 0x06, 'L', 'u', 'c', 'e', 'n', 'e',
-
- // 2-byte UTF-8 (U+00BF "INVERTED QUESTION MARK")
- 0x02, (byte) 0xC2, (byte) 0xBF,
- 0x0A, 'L', 'u', (byte) 0xC2, (byte) 0xBF,
- 'c', 'e', (byte) 0xC2, (byte) 0xBF,
- 'n', 'e',
-
- // 3-byte UTF-8 (U+2620 "SKULL AND CROSSBONES")
- 0x03, (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
- 0x0C, 'L', 'u', (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
- 'c', 'e', (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
- 'n', 'e',
-
- // surrogate pairs
- // (U+1D11E "MUSICAL SYMBOL G CLEF")
- // (U+1D160 "MUSICAL SYMBOL EIGHTH NOTE")
- 0x04, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E,
- 0x08, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E,
- (byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0,
- 0x0E, 'L', 'u',
- (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E,
- 'c', 'e',
- (byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0,
- 'n', 'e',
-
- // null bytes
- 0x01, 0x00,
- 0x08, 'L', 'u', 0x00, 'c', 'e', 0x00, 'n', 'e',
- });
-
+
+ static final byte[] READ_TEST_BYTES = new byte[] {
+ (byte) 0x80, 0x01,
+ (byte) 0xFF, 0x7F,
+ (byte) 0x80, (byte) 0x80, 0x01,
+ (byte) 0x81, (byte) 0x80, 0x01,
+ (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0x07,
+ (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0x07,
+ (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0x7F,
+ 0x06, 'L', 'u', 'c', 'e', 'n', 'e',
+
+ // 2-byte UTF-8 (U+00BF "INVERTED QUESTION MARK")
+ 0x02, (byte) 0xC2, (byte) 0xBF,
+ 0x0A, 'L', 'u', (byte) 0xC2, (byte) 0xBF,
+ 'c', 'e', (byte) 0xC2, (byte) 0xBF,
+ 'n', 'e',
+
+ // 3-byte UTF-8 (U+2620 "SKULL AND CROSSBONES")
+ 0x03, (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
+ 0x0C, 'L', 'u', (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
+ 'c', 'e', (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
+ 'n', 'e',
+
+ // surrogate pairs
+ // (U+1D11E "MUSICAL SYMBOL G CLEF")
+ // (U+1D160 "MUSICAL SYMBOL EIGHTH NOTE")
+ 0x04, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E,
+ 0x08, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E,
+ (byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0,
+ 0x0E, 'L', 'u',
+ (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E,
+ 'c', 'e',
+ (byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0,
+ 'n', 'e',
+
+ // null bytes
+ 0x01, 0x00,
+ 0x08, 'L', 'u', 0x00, 'c', 'e', 0x00, 'n', 'e',
+ };
+
+ private void checkReads(IndexInput is) throws IOException {
assertEquals(128,is.readVInt());
assertEquals(16383,is.readVInt());
assertEquals(16384,is.readVInt());
assertEquals(16385,is.readVInt());
+ assertEquals(Integer.MAX_VALUE, is.readVInt());
+ assertEquals((long) Integer.MAX_VALUE, is.readVLong());
+ assertEquals(Long.MAX_VALUE, is.readVLong());
assertEquals("Lucene",is.readString());
assertEquals("\u00BF",is.readString());
@@ -80,6 +89,25 @@ public class TestIndexInput extends Luce
assertEquals("Lu\u0000ce\u0000ne",is.readString());
}
+ // this test only checks BufferedIndexInput because MockIndexInput extends BufferedIndexInput
+ public void testBufferedIndexInputRead() throws IOException {
+ final IndexInput is = new MockIndexInput(READ_TEST_BYTES);
+ checkReads(is);
+ is.close();
+ }
+
+ // this test checks the raw IndexInput methods as it uses RAMIndexInput which extends IndexInput directly
+ public void testRawIndexInputRead() throws IOException {
+ final RAMDirectory dir = new RAMDirectory();
+ final IndexOutput os = dir.createOutput("foo");
+ os.writeBytes(READ_TEST_BYTES, READ_TEST_BYTES.length);
+ os.close();
+ final IndexInput is = dir.openInput("foo");
+ checkReads(is);
+ is.close();
+ dir.close();
+ }
+
/**
* Expert
*