You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2011/02/23 22:12:08 UTC
svn commit: r1073950 - in /lucene/dev/branches/branch_3x: ./ lucene/
lucene/CHANGES.txt lucene/src/java/org/apache/lucene/util/SmallFloat.java
lucene/src/test/org/apache/lucene/util/TestSmallFloat.java solr/
Author: yonik
Date: Wed Feb 23 21:12:07 2011
New Revision: 1073950
URL: http://svn.apache.org/viewvc?rev=1073950&view=rev
Log:
LUCENE-2937: fix floatToByte underflow detection
Modified:
lucene/dev/branches/branch_3x/ (props changed)
lucene/dev/branches/branch_3x/lucene/ (props changed)
lucene/dev/branches/branch_3x/lucene/CHANGES.txt
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/util/SmallFloat.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/util/TestSmallFloat.java
lucene/dev/branches/branch_3x/solr/ (props changed)
Modified: lucene/dev/branches/branch_3x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/CHANGES.txt?rev=1073950&r1=1073949&r2=1073950&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/CHANGES.txt Wed Feb 23 21:12:07 2011
@@ -387,6 +387,13 @@ Bug fixes
* LUCENE-2891: IndexWriterConfig did not accept -1 in setReaderTermIndexDivisor,
which can be used to prevent loading the terms index into memory. (Shai Erera)
+* LUCENE-2937: Encoding a float into a byte (e.g. encoding field norms during
+ indexing) had an underflow detection bug that caused floatToByte(f)==0 where
+ f was greater than 0, but slightly less than byteToFloat(1). This meant that
+ certain very small field norms (index_boost * length_norm) could have
+ been rounded down to 0 instead of being rounded up to the smallest
+ positive number. (yonik)
+
New features
* LUCENE-2128: Parallelized fetching document frequencies during weight
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/util/SmallFloat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/util/SmallFloat.java?rev=1073950&r1=1073949&r2=1073950&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/util/SmallFloat.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/util/SmallFloat.java Wed Feb 23 21:12:07 2011
@@ -39,7 +39,7 @@ public class SmallFloat {
int fzero = (63-zeroExp)<<numMantissaBits;
int bits = Float.floatToRawIntBits(f);
int smallfloat = bits >> (24-numMantissaBits);
- if (smallfloat < fzero) {
+ if (smallfloat <= fzero) {
return (bits<=0) ?
(byte)0 // negative numbers and zero both map to 0 byte
:(byte)1; // underflow is mapped to smallest non-zero number.
@@ -75,7 +75,7 @@ public class SmallFloat {
public static byte floatToByte315(float f) {
int bits = Float.floatToRawIntBits(f);
int smallfloat = bits >> (24-3);
- if (smallfloat < (63-15)<<3) {
+ if (smallfloat <= ((63-15)<<3)) {
return (bits<=0) ? (byte)0 : (byte)1;
}
if (smallfloat >= ((63-15)<<3) + 0x100) {
@@ -103,7 +103,7 @@ public class SmallFloat {
public static byte floatToByte52(float f) {
int bits = Float.floatToRawIntBits(f);
int smallfloat = bits >> (24-5);
- if (smallfloat < (63-2)<<5) {
+ if (smallfloat <= (63-2)<<5) {
return (bits<=0) ? (byte)0 : (byte)1;
}
if (smallfloat >= ((63-2)<<5) + 0x100) {
Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/util/TestSmallFloat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/util/TestSmallFloat.java?rev=1073950&r1=1073949&r2=1073950&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/util/TestSmallFloat.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/util/TestSmallFloat.java Wed Feb 23 21:12:07 2011
@@ -28,8 +28,8 @@ public class TestSmallFloat extends Luce
return Float.intBitsToFloat(bits);
}
- // original lucene floatToByte
- static byte orig_floatToByte(float f) {
+ // original lucene floatToByte (since lucene 1.3)
+ static byte orig_floatToByte_v13(float f) {
if (f < 0.0f) // round negatives up to zero
f = 0.0f;
@@ -53,7 +53,40 @@ public class TestSmallFloat extends Luce
return (byte)((exponent << 3) | mantissa); // pack into a byte
}
+ // This is the original lucene floatToBytes (from v1.3)
+ // except with the underflow detection bug fixed for values like 5.8123817E-10f
+ static byte orig_floatToByte(float f) {
+ if (f < 0.0f) // round negatives up to zero
+ f = 0.0f;
+
+ if (f == 0.0f) // zero is a special case
+ return 0;
+
+ int bits = Float.floatToIntBits(f); // parse float into parts
+ int mantissa = (bits & 0xffffff) >> 21;
+ int exponent = (((bits >> 24) & 0x7f) - 63) + 15;
+
+ if (exponent > 31) { // overflow: use max value
+ exponent = 31;
+ mantissa = 7;
+ }
+
+ if (exponent < 0 || exponent == 0 && mantissa == 0) { // underflow: use min value
+ exponent = 0;
+ mantissa = 1;
+ }
+
+ return (byte)((exponent << 3) | mantissa); // pack into a byte
+ }
+
+
public void testByteToFloat() {
+ assertEquals(0, orig_floatToByte_v13(5.8123817E-10f)); // verify the old bug (see LUCENE-2937)
+ assertEquals(1, orig_floatToByte(5.8123817E-10f)); // verify it's fixed in this test code
+ assertEquals(1, SmallFloat.floatToByte315(5.8123817E-10f)); // verify it's fixed
+
+ assertEquals(1, orig_floatToByte(Float.MIN_VALUE));
+
for (int i=0; i<256; i++) {
float f1 = orig_byteToFloat((byte)i);
float f2 = SmallFloat.byteToFloat((byte)i, 3,15);
@@ -95,8 +128,8 @@ public class TestSmallFloat extends Luce
if (f==f) { // skip non-numbers
byte b1 = orig_floatToByte(f);
byte b2 = SmallFloat.floatToByte315(f);
- if (b1!=b2) {
- TestCase.fail("Failed floatToByte315 for float " + f);
+ if (b1!=b2 || b2==0 && f>0) {
+ fail("Failed floatToByte315 for float " + f + " source bits="+Integer.toHexString(i) + " float raw bits=" + Integer.toHexString(Float.floatToRawIntBits(i)));
}
}
if (i==Integer.MAX_VALUE) break;