You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by "beliefer (via GitHub)" <gi...@apache.org> on 2023/08/25 06:18:39 UTC

[GitHub] [spark] beliefer commented on a diff in pull request #41860: [SPARK-44307][SQL] Add Bloom filter for left outer join even if the left side table is smaller than broadcast threshold.

beliefer commented on code in PR #41860:
URL: https://github.com/apache/spark/pull/41860#discussion_r1305217940


##########
sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala:
##########
@@ -644,4 +644,76 @@ class InjectRuntimeFilterSuite extends QueryTest with SQLTestUtils with SharedSp
         "Missing or unexpected reused ReusedSubqueryExec in the plan")
     }
   }
+
+  test("Runtime bloom filter join: should add bf for left outer join even if left side is" +
+    " smaller than broadcast threshold") {
+    withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "600",
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "300",
+      SQLConf.RUNTIME_BLOOM_FILTER_CREATION_SIDE_THRESHOLD.key -> "4000",
+      SQLConf.CBO_ENABLED.key -> "true") {
+      assertRewroteWithBloomFilter(
+        "select * from bf2 left outer join bf1 on bf1.c1 = bf2.c2 where bf2.a2 = 62"
+      )
+    }
+  }
+
+  test("Runtime bloom filter join: should not add bf for left outer join if right side is" +
+    " smaller than broadcast threshold") {
+    withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "600",
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "600",
+      SQLConf.RUNTIME_BLOOM_FILTER_CREATION_SIDE_THRESHOLD.key -> "4000",
+      SQLConf.CBO_ENABLED.key -> "true") {
+      assertDidNotRewriteWithBloomFilter(
+        "select * from bf2 left outer join bf1 on bf1.c1 = bf2.c2 where bf2.a2 = 62"
+      )
+    }
+  }
+
+  test ("Runtime bloom filter join: should add bf for right outer join even if right side is" +

Review Comment:
   blank here.



##########
sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala:
##########
@@ -644,4 +644,76 @@ class InjectRuntimeFilterSuite extends QueryTest with SQLTestUtils with SharedSp
         "Missing or unexpected reused ReusedSubqueryExec in the plan")
     }
   }
+
+  test("Runtime bloom filter join: should add bf for left outer join even if left side is" +
+    " smaller than broadcast threshold") {
+    withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "600",
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "300",
+      SQLConf.RUNTIME_BLOOM_FILTER_CREATION_SIDE_THRESHOLD.key -> "4000",
+      SQLConf.CBO_ENABLED.key -> "true") {
+      assertRewroteWithBloomFilter(
+        "select * from bf2 left outer join bf1 on bf1.c1 = bf2.c2 where bf2.a2 = 62"
+      )
+    }
+  }
+
+  test("Runtime bloom filter join: should not add bf for left outer join if right side is" +
+    " smaller than broadcast threshold") {
+    withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "600",
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "600",
+      SQLConf.RUNTIME_BLOOM_FILTER_CREATION_SIDE_THRESHOLD.key -> "4000",
+      SQLConf.CBO_ENABLED.key -> "true") {
+      assertDidNotRewriteWithBloomFilter(
+        "select * from bf2 left outer join bf1 on bf1.c1 = bf2.c2 where bf2.a2 = 62"
+      )
+    }
+  }
+
+  test ("Runtime bloom filter join: should add bf for right outer join even if right side is" +
+    " smaller than broadcast threshold") {
+    withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "600",
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "300",
+      SQLConf.RUNTIME_BLOOM_FILTER_CREATION_SIDE_THRESHOLD.key -> "4000",
+      SQLConf.CBO_ENABLED.key -> "true") {
+      assertRewroteWithBloomFilter(
+        "select * from bf1 right outer join bf2 on bf1.c1 = bf2.c2 where bf2.a2 = 62"
+      )
+    }
+  }
+
+  test("Runtime bloom filter join: should not add bf for right outer join if left side is" +
+    " smaller than broadcast threshold") {
+    withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "600",
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "600",
+      SQLConf.RUNTIME_BLOOM_FILTER_CREATION_SIDE_THRESHOLD.key -> "4000",
+      SQLConf.CBO_ENABLED.key -> "true") {
+      assertDidNotRewriteWithBloomFilter(
+        "select * from bf1 right outer join bf2 on bf1.c1 = bf2.c2 where bf2.a2 = 62"
+      )
+    }
+  }
+
+  test ("Runtime bloom filter join: should add bf for left semi join even if left side is" +

Review Comment:
   ditto



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org