You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2019/02/22 04:27:38 UTC
[spark] branch branch-2.4 updated: [SPARK-26950][SQL][TEST] Make
RandomDataGenerator use Float.NaN or Double.NaN for all NaN values
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-2.4 by this push:
new ef67be3 [SPARK-26950][SQL][TEST] Make RandomDataGenerator use Float.NaN or Double.NaN for all NaN values
ef67be3 is described below
commit ef67be363be6d6b6954b55ef1c243a0672b84abb
Author: Dongjoon Hyun <do...@apache.org>
AuthorDate: Fri Feb 22 12:25:26 2019 +0800
[SPARK-26950][SQL][TEST] Make RandomDataGenerator use Float.NaN or Double.NaN for all NaN values
## What changes were proposed in this pull request?
Apache Spark uses the predefined `Float.NaN` and `Double.NaN` for NaN values, but there exists more NaN values with different binary presentations.
```scala
scala> java.nio.ByteBuffer.allocate(4).putFloat(Float.NaN).array
res1: Array[Byte] = Array(127, -64, 0, 0)
scala> val x = java.lang.Float.intBitsToFloat(-6966608)
x: Float = NaN
scala> java.nio.ByteBuffer.allocate(4).putFloat(x).array
res2: Array[Byte] = Array(-1, -107, -78, -80)
```
Since users can have these values, `RandomDataGenerator` generates these NaN values. However, this causes `checkEvaluationWithUnsafeProjection` failures due to the difference between `UnsafeRow` binary presentation. The following is the UT failure instance. This PR aims to fix this UT flakiness.
- https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/102528/testReport/
## How was this patch tested?
Pass the Jenkins with the newly added test cases.
Closes #23851 from dongjoon-hyun/SPARK-26950.
Authored-by: Dongjoon Hyun <do...@apache.org>
Signed-off-by: Wenchen Fan <we...@databricks.com>
(cherry picked from commit ffef3d40741b0be321421aa52a6e17a26d89f541)
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../org/apache/spark/sql/RandomDataGenerator.scala | 24 +++++++++++++++--
.../spark/sql/RandomDataGeneratorSuite.scala | 31 ++++++++++++++++++++++
2 files changed, 53 insertions(+), 2 deletions(-)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
index 8ae3ff5..d361e62 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
@@ -17,8 +17,6 @@
package org.apache.spark.sql
-import java.lang.Double.longBitsToDouble
-import java.lang.Float.intBitsToFloat
import java.math.MathContext
import scala.collection.mutable
@@ -70,6 +68,28 @@ object RandomDataGenerator {
}
/**
+ * A wrapper of Float.intBitsToFloat to use a unique NaN value for all NaN values.
+ * This prevents `checkEvaluationWithUnsafeProjection` from failing due to
+ * the difference between `UnsafeRow` binary presentation for NaN.
+ * This is visible for testing.
+ */
+ def intBitsToFloat(bits: Int): Float = {
+ val value = java.lang.Float.intBitsToFloat(bits)
+ if (value.isNaN) Float.NaN else value
+ }
+
+ /**
+ * A wrapper of Double.longBitsToDouble to use a unique NaN value for all NaN values.
+ * This prevents `checkEvaluationWithUnsafeProjection` from failing due to
+ * the difference between `UnsafeRow` binary presentation for NaN.
+ * This is visible for testing.
+ */
+ def longBitsToDouble(bits: Long): Double = {
+ val value = java.lang.Double.longBitsToDouble(bits)
+ if (value.isNaN) Double.NaN else value
+ }
+
+ /**
* Returns a randomly generated schema, based on the given accepted types.
*
* @param numFields the number of fields in this schema
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala
index 3c2f8a2..3e62ca0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala
@@ -17,6 +17,9 @@
package org.apache.spark.sql
+import java.nio.ByteBuffer
+import java.util.Arrays
+
import scala.util.Random
import org.apache.spark.SparkFunSuite
@@ -106,4 +109,32 @@ class RandomDataGeneratorSuite extends SparkFunSuite {
assert(deviation.toDouble / expectedTotalElements < 2e-1)
}
}
+
+ test("Use Float.NaN for all NaN values") {
+ val bits = -6966608
+ val nan1 = java.lang.Float.intBitsToFloat(bits)
+ val nan2 = RandomDataGenerator.intBitsToFloat(bits)
+ assert(nan1.isNaN)
+ assert(nan2.isNaN)
+
+ val arrayExpected = ByteBuffer.allocate(4).putFloat(Float.NaN).array
+ val array1 = ByteBuffer.allocate(4).putFloat(nan1).array
+ val array2 = ByteBuffer.allocate(4).putFloat(nan2).array
+ assert(!Arrays.equals(array1, arrayExpected))
+ assert(Arrays.equals(array2, arrayExpected))
+ }
+
+ test("Use Double.NaN for all NaN values") {
+ val bits = -6966608
+ val nan1 = java.lang.Double.longBitsToDouble(bits)
+ val nan2 = RandomDataGenerator.longBitsToDouble(bits)
+ assert(nan1.isNaN)
+ assert(nan2.isNaN)
+
+ val arrayExpected = ByteBuffer.allocate(8).putDouble(Double.NaN).array
+ val array1 = ByteBuffer.allocate(8).putDouble(nan1).array
+ val array2 = ByteBuffer.allocate(8).putDouble(nan2).array
+ assert(!Arrays.equals(array1, arrayExpected))
+ assert(Arrays.equals(array2, arrayExpected))
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org