You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2017/07/15 03:16:08 UTC
spark git commit: [SPARK-21344][SQL] BinaryType comparison does
signed byte array comparison
Repository: spark
Updated Branches:
refs/heads/master 2d968a07d -> ac5d5d795
[SPARK-21344][SQL] BinaryType comparison does signed byte array comparison
## What changes were proposed in this pull request?
This PR fixes a wrong comparison for `BinaryType`. This PR enables unsigned comparison and unsigned prefix generation for an array for `BinaryType`. Previous implementations uses signed operations.
## How was this patch tested?
Added a test suite in `OrderingSuite`.
Author: Kazuaki Ishizaki <is...@jp.ibm.com>
Closes #18571 from kiszk/SPARK-21344.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ac5d5d79
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ac5d5d79
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ac5d5d79
Branch: refs/heads/master
Commit: ac5d5d795909061a17e056696cf0ef87d9e65dd1
Parents: 2d968a0
Author: Kazuaki Ishizaki <is...@jp.ibm.com>
Authored: Fri Jul 14 20:16:04 2017 -0700
Committer: gatorsmile <ga...@gmail.com>
Committed: Fri Jul 14 20:16:04 2017 -0700
----------------------------------------------------------------------
.../org/apache/spark/unsafe/types/ByteArray.java | 2 +-
.../unsafe/sort/PrefixComparatorsSuite.scala | 17 ++++++++++++++++-
.../spark/sql/catalyst/util/TypeUtils.scala | 4 +++-
.../sql/catalyst/expressions/OrderingSuite.scala | 19 +++++++++++++++++++
.../resources/sql-tests/inputs/comparator.sql | 3 +++
.../sql-tests/results/comparator.sql.out | 18 ++++++++++++++++++
6 files changed, 60 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/ac5d5d79/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
----------------------------------------------------------------------
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
index 3ced209..7ced13d 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
@@ -44,7 +44,7 @@ public final class ByteArray {
final int minLen = Math.min(bytes.length, 8);
long p = 0;
for (int i = 0; i < minLen; ++i) {
- p |= (128L + Platform.getByte(bytes, Platform.BYTE_ARRAY_OFFSET + i))
+ p |= ((long) Platform.getByte(bytes, Platform.BYTE_ARRAY_OFFSET + i) & 0xff)
<< (56 - 8 * i);
}
return p;
http://git-wip-us.apache.org/repos/asf/spark/blob/ac5d5d79/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
index 5180c58..73546ef 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
@@ -63,7 +63,9 @@ class PrefixComparatorsSuite extends SparkFunSuite with PropertyChecks {
def compareBinary(x: Array[Byte], y: Array[Byte]): Int = {
for (i <- 0 until x.length; if i < y.length) {
- val res = x(i).compare(y(i))
+ val v1 = x(i) & 0xff
+ val v2 = y(i) & 0xff
+ val res = v1 - v2
if (res != 0) return res
}
x.length - y.length
@@ -80,6 +82,19 @@ class PrefixComparatorsSuite extends SparkFunSuite with PropertyChecks {
(prefixComparisonResult > 0 && compareBinary(x, y) > 0))
}
+ val binaryRegressionTests = Seq(
+ (Array[Byte](1), Array[Byte](-1)),
+ (Array[Byte](1, 1, 1, 1, 1), Array[Byte](1, 1, 1, 1, -1)),
+ (Array[Byte](1, 1, 1, 1, 1, 1, 1, 1, 1), Array[Byte](1, 1, 1, 1, 1, 1, 1, 1, -1)),
+ (Array[Byte](1), Array[Byte](1, 1, 1, 1)),
+ (Array[Byte](1, 1, 1, 1, 1), Array[Byte](1, 1, 1, 1, 1, 1, 1, 1, 1)),
+ (Array[Byte](-1), Array[Byte](-1, -1, -1, -1)),
+ (Array[Byte](-1, -1, -1, -1, -1), Array[Byte](-1, -1, -1, -1, -1, -1, -1, -1, -1))
+ )
+ binaryRegressionTests.foreach { case (b1, b2) =>
+ testPrefixComparison(b1, b2)
+ }
+
// scalastyle:off
val regressionTests = Table(
("s1", "s2"),
http://git-wip-us.apache.org/repos/asf/spark/blob/ac5d5d79/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala
index 7101ca5..4522577 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala
@@ -70,7 +70,9 @@ object TypeUtils {
def compareBinary(x: Array[Byte], y: Array[Byte]): Int = {
for (i <- 0 until x.length; if i < y.length) {
- val res = x(i).compareTo(y(i))
+ val v1 = x(i) & 0xff
+ val v2 = y(i) & 0xff
+ val res = v1 - v2
if (res != 0) return res
}
x.length - y.length
http://git-wip-us.apache.org/repos/asf/spark/blob/ac5d5d79/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
index 190fab5..aa61ba2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
@@ -137,4 +137,23 @@ class OrderingSuite extends SparkFunSuite with ExpressionEvalHelper {
// verify that we can support up to 5000 ordering comparisons, which should be sufficient
GenerateOrdering.generate(Array.fill(5000)(sortOrder))
}
+
+ test("SPARK-21344: BinaryType comparison does signed byte array comparison") {
+ val data = Seq(
+ (Array[Byte](1), Array[Byte](-1)),
+ (Array[Byte](1, 1, 1, 1, 1), Array[Byte](1, 1, 1, 1, -1)),
+ (Array[Byte](1, 1, 1, 1, 1, 1, 1, 1, 1), Array[Byte](1, 1, 1, 1, 1, 1, 1, 1, -1))
+ )
+ data.foreach { case (b1, b2) =>
+ val rowOrdering = InterpretedOrdering.forSchema(Seq(BinaryType))
+ val genOrdering = GenerateOrdering.generate(
+ BoundReference(0, BinaryType, nullable = true).asc :: Nil)
+ val rowType = StructType(StructField("b", BinaryType, nullable = true) :: Nil)
+ val toCatalyst = CatalystTypeConverters.createToCatalystConverter(rowType)
+ val rowB1 = toCatalyst(Row(b1)).asInstanceOf[InternalRow]
+ val rowB2 = toCatalyst(Row(b2)).asInstanceOf[InternalRow]
+ assert(rowOrdering.compare(rowB1, rowB2) < 0)
+ assert(genOrdering.compare(rowB1, rowB2) < 0)
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/spark/blob/ac5d5d79/sql/core/src/test/resources/sql-tests/inputs/comparator.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/comparator.sql b/sql/core/src/test/resources/sql-tests/inputs/comparator.sql
new file mode 100644
index 0000000..3e24477
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/comparator.sql
@@ -0,0 +1,3 @@
+-- binary type
+select x'00' < x'0f';
+select x'00' < x'ff';
http://git-wip-us.apache.org/repos/asf/spark/blob/ac5d5d79/sql/core/src/test/resources/sql-tests/results/comparator.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/comparator.sql.out b/sql/core/src/test/resources/sql-tests/results/comparator.sql.out
new file mode 100644
index 0000000..afc7b54
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/comparator.sql.out
@@ -0,0 +1,18 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 2
+
+
+-- !query 0
+select x'00' < x'0f'
+-- !query 0 schema
+struct<(X'00' < X'0F'):boolean>
+-- !query 0 output
+true
+
+
+-- !query 1
+select x'00' < x'ff'
+-- !query 1 schema
+struct<(X'00' < X'FF'):boolean>
+-- !query 1 output
+true
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org