You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2011/02/23 22:12:08 UTC
svn commit: r1073950 - in /lucene/dev/branches/branch_3x: ./ lucene/ lucene/CHANGES.txt lucene/src/java/org/apache/lucene/util/SmallFloat.java lucene/src/test/org/apache/lucene/util/TestSmallFloat.java solr/

Author: yonik
Date: Wed Feb 23 21:12:07 2011
New Revision: 1073950

URL: http://svn.apache.org/viewvc?rev=1073950&view=rev
Log:
LUCENE-2937: fix floatToByte underflow detection

Modified:
    lucene/dev/branches/branch_3x/   (props changed)
    lucene/dev/branches/branch_3x/lucene/   (props changed)
    lucene/dev/branches/branch_3x/lucene/CHANGES.txt
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/util/SmallFloat.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/util/TestSmallFloat.java
    lucene/dev/branches/branch_3x/solr/   (props changed)

Modified: lucene/dev/branches/branch_3x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/CHANGES.txt?rev=1073950&r1=1073949&r2=1073950&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/CHANGES.txt Wed Feb 23 21:12:07 2011
@@ -387,6 +387,13 @@ Bug fixes
 * LUCENE-2891: IndexWriterConfig did not accept -1 in setReaderTermIndexDivisor,
   which can be used to prevent loading the terms index into memory. (Shai Erera)
 
+* LUCENE-2937: Encoding a float into a byte (e.g. encoding field norms during
+  indexing) had an underflow detection bug that caused floatToByte(f)==0 where
+  f was greater than 0, but slightly less than byteToFloat(1).  This meant that
+  certain very small field norms (index_boost * length_norm) could have
+  been rounded down to 0 instead of being rounded up to the smallest
+  positive number.  (yonik)
+
 New features
 
 * LUCENE-2128: Parallelized fetching document frequencies during weight

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/util/SmallFloat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/util/SmallFloat.java?rev=1073950&r1=1073949&r2=1073950&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/util/SmallFloat.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/util/SmallFloat.java Wed Feb 23 21:12:07 2011
@@ -39,7 +39,7 @@ public class SmallFloat {
     int fzero = (63-zeroExp)<<numMantissaBits;
     int bits = Float.floatToRawIntBits(f);
     int smallfloat = bits >> (24-numMantissaBits);
-    if (smallfloat < fzero) {
+    if (smallfloat <= fzero) {
       return (bits<=0) ?
         (byte)0   // negative numbers and zero both map to 0 byte
        :(byte)1;  // underflow is mapped to smallest non-zero number.
@@ -75,7 +75,7 @@ public class SmallFloat {
   public static byte floatToByte315(float f) {
     int bits = Float.floatToRawIntBits(f);
     int smallfloat = bits >> (24-3);
-    if (smallfloat < (63-15)<<3) {
+    if (smallfloat <= ((63-15)<<3)) {
       return (bits<=0) ? (byte)0 : (byte)1;
     }
     if (smallfloat >= ((63-15)<<3) + 0x100) {
@@ -103,7 +103,7 @@ public class SmallFloat {
   public static byte floatToByte52(float f) {
     int bits = Float.floatToRawIntBits(f);
     int smallfloat = bits >> (24-5);
-    if (smallfloat < (63-2)<<5) {
+    if (smallfloat <= (63-2)<<5) {
       return (bits<=0) ? (byte)0 : (byte)1;
     }
     if (smallfloat >= ((63-2)<<5) + 0x100) {

Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/util/TestSmallFloat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/util/TestSmallFloat.java?rev=1073950&r1=1073949&r2=1073950&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/util/TestSmallFloat.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/util/TestSmallFloat.java Wed Feb 23 21:12:07 2011
@@ -28,8 +28,8 @@ public class TestSmallFloat extends Luce
     return Float.intBitsToFloat(bits);
   }
 
-  // original lucene floatToByte
-  static byte orig_floatToByte(float f) {
+  // original lucene floatToByte (since lucene 1.3)
+  static byte orig_floatToByte_v13(float f) {
     if (f < 0.0f)                                 // round negatives up to zero
       f = 0.0f;
 
@@ -53,7 +53,40 @@ public class TestSmallFloat extends Luce
     return (byte)((exponent << 3) | mantissa);    // pack into a byte
   }
 
+  // This is the original lucene floatToBytes (from v1.3)
+  // except with the underflow detection bug fixed for values like 5.8123817E-10f
+  static byte orig_floatToByte(float f) {
+    if (f < 0.0f)                                 // round negatives up to zero
+      f = 0.0f;
+
+    if (f == 0.0f)                                // zero is a special case
+      return 0;
+
+    int bits = Float.floatToIntBits(f);           // parse float into parts
+    int mantissa = (bits & 0xffffff) >> 21;
+    int exponent = (((bits >> 24) & 0x7f) - 63) + 15;
+
+    if (exponent > 31) {                          // overflow: use max value
+      exponent = 31;
+      mantissa = 7;
+    }
+
+    if (exponent < 0 || exponent == 0 && mantissa == 0) { // underflow: use min value
+      exponent = 0;
+      mantissa = 1;
+    }
+
+    return (byte)((exponent << 3) | mantissa);    // pack into a byte
+  }
+
+
   public void testByteToFloat() {
+    assertEquals(0, orig_floatToByte_v13(5.8123817E-10f));       // verify the old bug (see LUCENE-2937)
+    assertEquals(1, orig_floatToByte(5.8123817E-10f));           // verify it's fixed in this test code
+    assertEquals(1, SmallFloat.floatToByte315(5.8123817E-10f));  // verify it's fixed
+
+    assertEquals(1, orig_floatToByte(Float.MIN_VALUE));
+
     for (int i=0; i<256; i++) {
       float f1 = orig_byteToFloat((byte)i);
       float f2 = SmallFloat.byteToFloat((byte)i, 3,15);
@@ -95,8 +128,8 @@ public class TestSmallFloat extends Luce
       if (f==f) { // skip non-numbers
         byte b1 = orig_floatToByte(f);
         byte b2 = SmallFloat.floatToByte315(f);
-        if (b1!=b2) {
-          TestCase.fail("Failed floatToByte315 for float " + f);
+        if (b1!=b2 || b2==0 && f>0) {
+          fail("Failed floatToByte315 for float " + f + " source bits="+Integer.toHexString(i) + " float raw bits=" + Integer.toHexString(Float.floatToRawIntBits(i)));
         }
       }
       if (i==Integer.MAX_VALUE) break;