You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/01/27 18:34:55 UTC
[spark] branch branch-2.4 updated: [SPARK-30633][SQL] Append L to
seed when type is LongType
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-2.4 by this push:
new ad9f578 [SPARK-30633][SQL] Append L to seed when type is LongType
ad9f578 is described below
commit ad9f578ef5726c2b48362b147c2809cb36ead245
Author: Patrick Cording <pa...@datarobot.com>
AuthorDate: Mon Jan 27 10:32:15 2020 -0800
[SPARK-30633][SQL] Append L to seed when type is LongType
### What changes were proposed in this pull request?
Allow for using longs as seed for xxHash.
### Why are the changes needed?
Codegen fails when passing a seed to xxHash that is > 2^31.
### Does this PR introduce any user-facing change?
No.
### How was this patch tested?
Existing tests pass. Should more be added?
Closes #27354 from patrickcording/fix_xxhash_seed_bug.
Authored-by: Patrick Cording <pa...@datarobot.com>
Signed-off-by: Dongjoon Hyun <dh...@apple.com>
(cherry picked from commit c5c580ba0d253a04a3df5bbfd5acf6b5d23cdc1c)
Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
.../spark/sql/catalyst/expressions/hash.scala | 3 +-
.../expressions/HashExpressionsSuite.scala | 39 ++++++++++++++++++++++
2 files changed, 41 insertions(+), 1 deletion(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
index 742a4f8..163cf61 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
@@ -280,6 +280,7 @@ abstract class HashExpression[E] extends Expression {
}
val hashResultType = CodeGenerator.javaType(dataType)
+ val typedSeed = if (dataType.sameType(LongType)) s"${seed}L" else s"$seed"
val codes = ctx.splitExpressionsWithCurrentInputs(
expressions = childrenHash,
funcName = "computeHash",
@@ -294,7 +295,7 @@ abstract class HashExpression[E] extends Expression {
ev.copy(code =
code"""
- |$hashResultType ${ev.value} = $seed;
+ |$hashResultType ${ev.value} = $typedSeed;
|$codes
""".stripMargin)
}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
index 4281c89..b136681 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
@@ -676,6 +676,33 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
assert(murmur3HashPlan(wideRow).getInt(0) == murmursHashEval)
}
+ test("SPARK-30633: xxHash with different type seeds") {
+ val literal = Literal.create(42L, LongType)
+
+ val longSeeds = Seq(
+ Long.MinValue,
+ Integer.MIN_VALUE.toLong - 1L,
+ 0L,
+ Integer.MAX_VALUE.toLong + 1L,
+ Long.MaxValue
+ )
+ for (seed <- longSeeds) {
+ checkEvaluation(XxHash64(Seq(literal), seed), XxHash64(Seq(literal), seed).eval())
+ }
+
+ val intSeeds = Seq(
+ Integer.MIN_VALUE,
+ 0,
+ Integer.MAX_VALUE
+ )
+ for (seed <- intSeeds) {
+ checkEvaluation(XxHash64(Seq(literal), seed), XxHash64(Seq(literal), seed).eval())
+ }
+
+ checkEvaluation(XxHash64(Seq(literal), 100), XxHash64(Seq(literal), 100L).eval())
+ checkEvaluation(XxHash64(Seq(literal), 100L), XxHash64(Seq(literal), 100).eval())
+ }
+
private def testHash(inputSchema: StructType): Unit = {
val inputGenerator = RandomDataGenerator.forType(inputSchema, nullable = false).get
val encoder = RowEncoder(inputSchema)
@@ -692,5 +719,17 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation(HiveHash(literals), HiveHash(literals).eval())
}
}
+
+ val longSeed = Math.abs(seed).toLong + Integer.MAX_VALUE.toLong
+ test(s"SPARK-30633: xxHash64 with long seed: ${inputSchema.simpleString}") {
+ for (_ <- 1 to 10) {
+ val input = encoder.toRow(inputGenerator.apply().asInstanceOf[Row]).asInstanceOf[UnsafeRow]
+ val literals = input.toSeq(inputSchema).zip(inputSchema.map(_.dataType)).map {
+ case (value, dt) => Literal.create(value, dt)
+ }
+ // Only test the interpreted version has same result with codegen version.
+ checkEvaluation(XxHash64(literals, longSeed), XxHash64(literals, longSeed).eval())
+ }
+ }
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org