You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2011/03/19 18:58:21 UTC

svn commit: r1083240 - in /lucene/dev/branches/branch_3x: ./ lucene/ lucene/CHANGES.txt lucene/contrib/lucli/build.xml lucene/src/java/org/apache/lucene/store/IndexInput.java lucene/src/test/org/apache/lucene/index/TestIndexInput.java solr/ solr/build.xml

Author: uschindler
Date: Sat Mar 19 17:58:20 2011
New Revision: 1083240

URL: http://svn.apache.org/viewvc?rev=1083240&view=rev
Log:
LUCENE-2975: A hotspot bug corrupts IndexInput#readVInt()/readVLong() if the underlying readByte() is inlined (which happens e.g. in MMapDirectory). The loop was unwinded which makes the hotspot bug disappear.

Modified:
    lucene/dev/branches/branch_3x/   (props changed)
    lucene/dev/branches/branch_3x/lucene/   (props changed)
    lucene/dev/branches/branch_3x/lucene/CHANGES.txt
    lucene/dev/branches/branch_3x/lucene/contrib/lucli/build.xml   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/store/IndexInput.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexInput.java
    lucene/dev/branches/branch_3x/solr/   (props changed)
    lucene/dev/branches/branch_3x/solr/build.xml   (props changed)

Modified: lucene/dev/branches/branch_3x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/CHANGES.txt?rev=1083240&r1=1083239&r2=1083240&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/CHANGES.txt Sat Mar 19 17:58:20 2011
@@ -410,6 +410,11 @@ Bug fixes
 * LUCENE-2936: PhraseQuery score explanations were not correctly 
   identifying matches vs non-matches.  (hossman)
 
+* LUCENE-2975: A hotspot bug corrupts IndexInput#readVInt()/readVLong() if
+  the underlying readByte() is inlined (which happens e.g. in MMapDirectory).
+  The loop was unwinded which makes the hotspot bug disappear.
+  (Uwe Schindler, Robert Muir, Mike McCandless)
+
 New features
 
 * LUCENE-2128: Parallelized fetching document frequencies during weight

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/store/IndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/store/IndexInput.java?rev=1083240&r1=1083239&r2=1083240&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/store/IndexInput.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/store/IndexInput.java Sat Mar 19 17:58:20 2011
@@ -78,6 +78,9 @@ public abstract class IndexInput impleme
    * @see IndexOutput#writeVInt(int)
    */
   public int readVInt() throws IOException {
+    /* This is the original code of this method,
+     * but a Hotspot bug (see LUCENE-2975) corrupts the for-loop if
+     * readByte() is inlined. So the loop was unwinded!
     byte b = readByte();
     int i = b & 0x7F;
     for (int shift = 7; (b & 0x80) != 0; shift += 7) {
@@ -85,6 +88,22 @@ public abstract class IndexInput impleme
       i |= (b & 0x7F) << shift;
     }
     return i;
+    */
+    byte b = readByte();
+    int i = b & 0x7F;
+    if ((b & 0x80) == 0) return i;
+    b = readByte();
+    i |= (b & 0x7F) << 7;
+    if ((b & 0x80) == 0) return i;
+    b = readByte();
+    i |= (b & 0x7F) << 14;
+    if ((b & 0x80) == 0) return i;
+    b = readByte();
+    i |= (b & 0x7F) << 21;
+    if ((b & 0x80) == 0) return i;
+    b = readByte();
+    assert (b & 0x80) == 0;
+    return i | ((b & 0x7F) << 28);
   }
 
   /** Reads eight bytes and returns a long.
@@ -98,6 +117,9 @@ public abstract class IndexInput impleme
    * nine bytes.  Smaller values take fewer bytes.  Negative numbers are not
    * supported. */
   public long readVLong() throws IOException {
+    /* This is the original code of this method,
+     * but a Hotspot bug (see LUCENE-2975) corrupts the for-loop if
+     * readByte() is inlined. So the loop was unwinded!
     byte b = readByte();
     long i = b & 0x7F;
     for (int shift = 7; (b & 0x80) != 0; shift += 7) {
@@ -105,6 +127,34 @@ public abstract class IndexInput impleme
       i |= (b & 0x7FL) << shift;
     }
     return i;
+    */
+    byte b = readByte();
+    long i = b & 0x7FL;
+    if ((b & 0x80) == 0) return i;
+    b = readByte();
+    i |= (b & 0x7FL) << 7;
+    if ((b & 0x80) == 0) return i;
+    b = readByte();
+    i |= (b & 0x7FL) << 14;
+    if ((b & 0x80) == 0) return i;
+    b = readByte();
+    i |= (b & 0x7FL) << 21;
+    if ((b & 0x80) == 0) return i;
+    b = readByte();
+    i |= (b & 0x7FL) << 28;
+    if ((b & 0x80) == 0) return i;
+    b = readByte();
+    i |= (b & 0x7FL) << 35;
+    if ((b & 0x80) == 0) return i;
+    b = readByte();
+    i |= (b & 0x7FL) << 42;
+    if ((b & 0x80) == 0) return i;
+    b = readByte();
+    i |= (b & 0x7FL) << 49;
+    if ((b & 0x80) == 0) return i;
+    b = readByte();
+    assert (b & 0x80) == 0;
+    return i | ((b & 0x7FL) << 56);
   }
 
   /** Call this if readString should read characters stored

Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexInput.java?rev=1083240&r1=1083239&r2=1083240&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexInput.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexInput.java Sat Mar 19 17:58:20 2011
@@ -19,51 +19,60 @@ package org.apache.lucene.index;
 
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RAMDirectory;
 
 import java.io.IOException;
 
 public class TestIndexInput extends LuceneTestCase {
-  public void testRead() throws IOException {
-    IndexInput is = new MockIndexInput(new byte[] { 
-      (byte) 0x80, 0x01,
-      (byte) 0xFF, 0x7F,
-      (byte) 0x80, (byte) 0x80, 0x01,
-      (byte) 0x81, (byte) 0x80, 0x01,
-      0x06, 'L', 'u', 'c', 'e', 'n', 'e',
-
-      // 2-byte UTF-8 (U+00BF "INVERTED QUESTION MARK") 
-      0x02, (byte) 0xC2, (byte) 0xBF,
-      0x0A, 'L', 'u', (byte) 0xC2, (byte) 0xBF, 
-            'c', 'e', (byte) 0xC2, (byte) 0xBF, 
-            'n', 'e',
-
-      // 3-byte UTF-8 (U+2620 "SKULL AND CROSSBONES") 
-      0x03, (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
-      0x0C, 'L', 'u', (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
-            'c', 'e', (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
-            'n', 'e',
-
-      // surrogate pairs
-      // (U+1D11E "MUSICAL SYMBOL G CLEF")
-      // (U+1D160 "MUSICAL SYMBOL EIGHTH NOTE")
-      0x04, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E,
-      0x08, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E, 
-            (byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0, 
-      0x0E, 'L', 'u',
-            (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E,
-            'c', 'e', 
-            (byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0, 
-            'n', 'e',  
-
-      // null bytes
-      0x01, 0x00,
-      0x08, 'L', 'u', 0x00, 'c', 'e', 0x00, 'n', 'e',
-    });
-        
+
+  static final byte[] READ_TEST_BYTES = new byte[] { 
+    (byte) 0x80, 0x01,
+    (byte) 0xFF, 0x7F,
+    (byte) 0x80, (byte) 0x80, 0x01,
+    (byte) 0x81, (byte) 0x80, 0x01,
+    (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0x07,
+    (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0x07,
+    (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0x7F,
+    0x06, 'L', 'u', 'c', 'e', 'n', 'e',
+
+    // 2-byte UTF-8 (U+00BF "INVERTED QUESTION MARK") 
+    0x02, (byte) 0xC2, (byte) 0xBF,
+    0x0A, 'L', 'u', (byte) 0xC2, (byte) 0xBF, 
+          'c', 'e', (byte) 0xC2, (byte) 0xBF, 
+          'n', 'e',
+
+    // 3-byte UTF-8 (U+2620 "SKULL AND CROSSBONES") 
+    0x03, (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
+    0x0C, 'L', 'u', (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
+          'c', 'e', (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
+          'n', 'e',
+
+    // surrogate pairs
+    // (U+1D11E "MUSICAL SYMBOL G CLEF")
+    // (U+1D160 "MUSICAL SYMBOL EIGHTH NOTE")
+    0x04, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E,
+    0x08, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E, 
+          (byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0, 
+    0x0E, 'L', 'u',
+          (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E,
+          'c', 'e', 
+          (byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0, 
+          'n', 'e',  
+
+    // null bytes
+    0x01, 0x00,
+    0x08, 'L', 'u', 0x00, 'c', 'e', 0x00, 'n', 'e',
+  };
+  
+  private void checkReads(IndexInput is) throws IOException {
     assertEquals(128,is.readVInt());
     assertEquals(16383,is.readVInt());
     assertEquals(16384,is.readVInt());
     assertEquals(16385,is.readVInt());
+    assertEquals(Integer.MAX_VALUE, is.readVInt());
+    assertEquals((long) Integer.MAX_VALUE, is.readVLong());
+    assertEquals(Long.MAX_VALUE, is.readVLong());
     assertEquals("Lucene",is.readString());
 
     assertEquals("\u00BF",is.readString());
@@ -80,6 +89,25 @@ public class TestIndexInput extends Luce
     assertEquals("Lu\u0000ce\u0000ne",is.readString());
   }
 
+  // this test only checks BufferedIndexInput because MockIndexInput extends BufferedIndexInput
+  public void testBufferedIndexInputRead() throws IOException {
+    final IndexInput is = new MockIndexInput(READ_TEST_BYTES);
+    checkReads(is);
+    is.close();
+  }
+
+  // this test checks the raw IndexInput methods as it uses RAMIndexInput which extends IndexInput directly
+  public void testRawIndexInputRead() throws IOException {
+    final RAMDirectory dir = new RAMDirectory();
+    final IndexOutput os = dir.createOutput("foo");
+    os.writeBytes(READ_TEST_BYTES, READ_TEST_BYTES.length);
+    os.close();
+    final IndexInput is = dir.openInput("foo");
+    checkReads(is);
+    is.close();
+    dir.close();
+  }
+
   /**
    * Expert
    *