You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2022/08/29 07:26:00 UTC
[spark] branch master updated: [SPARK-40247][SQL] Fix BitSet equality check
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 527ddece8fd [SPARK-40247][SQL] Fix BitSet equality check
527ddece8fd is described below
commit 527ddece8fdbe703dcd239401c97ddb2c6122182
Author: Peter Toth <pt...@cloudera.com>
AuthorDate: Mon Aug 29 15:25:39 2022 +0800
[SPARK-40247][SQL] Fix BitSet equality check
### What changes were proposed in this pull request?
Spark's `BitSet` doesn't implement `equals()` and `hashCode()` but it is used in `FileSourceScanExec` for bucket pruning.
### Why are the changes needed?
Without proper equality check reuse issues can occur.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Added new UT.
Closes #37696 from peter-toth/SPARK-40247-fix-bitset-equals.
Authored-by: Peter Toth <pt...@cloudera.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../org/apache/spark/util/collection/BitSet.scala | 9 ++++++++
.../scala/org/apache/spark/sql/SQLQuerySuite.scala | 25 ++++++++++++++++++++++
2 files changed, 34 insertions(+)
diff --git a/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala b/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala
index 61386114997..6bb5058f5ed 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala
@@ -250,4 +250,13 @@ class BitSet(numBits: Int) extends Serializable {
/** Return the number of longs it would take to hold numBits. */
private def bit2words(numBits: Int) = ((numBits - 1) >> 6) + 1
+
+ override def equals(other: Any): Boolean = other match {
+ case otherSet: BitSet => Arrays.equals(words, otherSet.words)
+ case _ => false
+ }
+
+ override def hashCode(): Int = {
+ Arrays.hashCode(words)
+ }
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index cbd65ede054..0fa2c7195db 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -4003,6 +4003,31 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
}
}
+ test("SPARK-40247: Fix BitSet equals") {
+ withTable("td") {
+ testData
+ .withColumn("bucket", $"key" % 3)
+ .write
+ .mode(SaveMode.Overwrite)
+ .bucketBy(2, "bucket")
+ .format("parquet")
+ .saveAsTable("td")
+ val df = sql(
+ """
+ |SELECT t1.key, t2.key, t3.key
+ |FROM td AS t1
+ |JOIN td AS t2 ON t2.key = t1.key
+ |JOIN td AS t3 ON t3.key = t2.key
+ |WHERE t1.bucket = 1 AND t2.bucket = 1 AND t3.bucket = 1
+ |""".stripMargin)
+ df.collect()
+ val reusedExchanges = collect(df.queryExecution.executedPlan) {
+ case r: ReusedExchangeExec => r
+ }
+ assert(reusedExchanges.size == 1)
+ }
+ }
+
test("SPARK-35331: Fix resolving original expression in RepartitionByExpression after aliased") {
Seq("CLUSTER", "DISTRIBUTE").foreach { keyword =>
Seq("a", "substr(a, 0, 3)").foreach { expr =>
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org