You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2017/07/10 16:26:47 UTC
spark git commit: [SPARK-21272] SortMergeJoin LeftAnti does not
update numOutputRows
Repository: spark
Updated Branches:
refs/heads/master 6a06c4b03 -> 18b3b00ec
[SPARK-21272] SortMergeJoin LeftAnti does not update numOutputRows
## What changes were proposed in this pull request?
Updating numOutputRows metric was missing from one return path of LeftAnti SortMergeJoin.
## How was this patch tested?
Non-zero output rows manually seen in metrics.
Author: Juliusz Sompolski <ju...@databricks.com>
Closes #18494 from juliuszsompolski/SPARK-21272.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/18b3b00e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/18b3b00e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/18b3b00e
Branch: refs/heads/master
Commit: 18b3b00ecfde6c694fb6fee4f4d07d04e3d08ccf
Parents: 6a06c4b
Author: Juliusz Sompolski <ju...@databricks.com>
Authored: Mon Jul 10 09:26:42 2017 -0700
Committer: gatorsmile <ga...@gmail.com>
Committed: Mon Jul 10 09:26:42 2017 -0700
----------------------------------------------------------------------
.../spark/sql/execution/joins/SortMergeJoinExec.scala | 1 +
.../spark/sql/execution/metric/SQLMetricsSuite.scala | 12 ++++++++++++
2 files changed, 13 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/18b3b00e/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index 8445c26..639b8e0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -290,6 +290,7 @@ case class SortMergeJoinExec(
currentLeftRow = smjScanner.getStreamedRow
val currentRightMatches = smjScanner.getBufferedMatches
if (currentRightMatches == null || currentRightMatches.length == 0) {
+ numOutputRows += 1
return true
}
var found = false
http://git-wip-us.apache.org/repos/asf/spark/blob/18b3b00e/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index cb3405b..2911cbb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -483,6 +483,18 @@ class SQLMetricsSuite extends SparkFunSuite with SharedSQLContext {
}
}
+ test("SortMergeJoin(left-anti) metrics") {
+ val anti = testData2.filter("a > 2")
+ withTempView("antiData") {
+ anti.createOrReplaceTempView("antiData")
+ val df = spark.sql(
+ "SELECT * FROM testData2 ANTI JOIN antiData ON testData2.a = antiData.a")
+ testSparkPlanMetrics(df, 1, Map(
+ 0L -> ("SortMergeJoin", Map("number of output rows" -> 4L)))
+ )
+ }
+ }
+
test("save metrics") {
withTempPath { file =>
// person creates a temporary view. get the DF before listing previous execution IDs
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org