You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ya...@apache.org on 2020/03/28 23:51:23 UTC
[spark] branch branch-3.0 updated: [SPARK-31292][CORE][SQL] Replace
toSet.toSeq with distinct for readability
This is an automated email from the ASF dual-hosted git repository.
yamamuro pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 71dcf66 [SPARK-31292][CORE][SQL] Replace toSet.toSeq with distinct for readability
71dcf66 is described below
commit 71dcf6691a48dd622b83e128aa9be30f757b45ec
Author: Kengo Seki <se...@apache.org>
AuthorDate: Sun Mar 29 08:48:08 2020 +0900
[SPARK-31292][CORE][SQL] Replace toSet.toSeq with distinct for readability
### What changes were proposed in this pull request?
This PR replaces the method calls of `toSet.toSeq` with `distinct`.
### Why are the changes needed?
`toSet.toSeq` is intended to make its elements unique but a bit verbose. Using `distinct` instead is easier to understand and improves readability.
### Does this PR introduce any user-facing change?
No
### How was this patch tested?
Tested with the existing unit tests and found no problem.
Closes #28062 from sekikn/SPARK-31292.
Authored-by: Kengo Seki <se...@apache.org>
Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
(cherry picked from commit 0b237bd615da4b2c2b781e72af4ad3a4f2951444)
Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
---
core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala | 2 +-
core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala | 2 +-
core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala | 2 +-
core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala | 2 +-
.../test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala | 2 +-
sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 2 +-
6 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala b/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
index 7dd7fc1..994b363 100644
--- a/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
@@ -149,7 +149,7 @@ private[spark] object ResourceUtils extends Logging {
def listResourceIds(sparkConf: SparkConf, componentName: String): Seq[ResourceID] = {
sparkConf.getAllWithPrefix(s"$componentName.$RESOURCE_PREFIX.").map { case (key, _) =>
key.substring(0, key.indexOf('.'))
- }.toSet.toSeq.map(name => new ResourceID(componentName, name))
+ }.distinct.map(name => new ResourceID(componentName, name))
}
def parseAllResourceRequests(
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
index 857c89d..15f2161 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
@@ -69,7 +69,7 @@ private[spark] class ResultTask[T, U](
with Serializable {
@transient private[this] val preferredLocs: Seq[TaskLocation] = {
- if (locs == null) Nil else locs.toSet.toSeq
+ if (locs == null) Nil else locs.distinct
}
override def runTask(context: TaskContext): U = {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
index 4c0c30a..a0ba920 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
@@ -71,7 +71,7 @@ private[spark] class ShuffleMapTask(
}
@transient private val preferredLocs: Seq[TaskLocation] = {
- if (locs == null) Nil else locs.toSet.toSeq
+ if (locs == null) Nil else locs.distinct
}
override def runTask(context: TaskContext): MapStatus = {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index 6a1d460..ed30473 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -408,7 +408,7 @@ private[spark] class TaskSchedulerImpl(
newExecAvail = true
}
}
- val hosts = offers.map(_.host).toSet.toSeq
+ val hosts = offers.map(_.host).distinct
for ((host, Some(rack)) <- hosts.zip(getRacksForHosts(hosts))) {
hostsByRack.getOrElseUpdate(rack, new HashSet[String]()) += host
}
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
index e7ecf84..a083cdb 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
@@ -758,7 +758,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
// that are explicitly blacklisted, plus those that have *any* executors blacklisted.
val nodesForBlacklistedExecutors = offers.filter { offer =>
execBlacklist.contains(offer.executorId)
- }.map(_.host).toSet.toSeq
+ }.map(_.host).distinct
val nodesWithAnyBlacklisting = (nodeBlacklist ++ nodesForBlacklistedExecutors).toSet
// Similarly, figure out which executors have any blacklisting. This means all executors
// that are explicitly blacklisted, plus all executors on nodes that are blacklisted.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index d85e23b..b910136 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -2455,7 +2455,7 @@ class Dataset[T] private[sql](
def dropDuplicates(colNames: Seq[String]): Dataset[T] = withTypedPlan {
val resolver = sparkSession.sessionState.analyzer.resolver
val allColumns = queryExecution.analyzed.output
- val groupCols = colNames.toSet.toSeq.flatMap { (colName: String) =>
+ val groupCols = colNames.distinct.flatMap { (colName: String) =>
// It is possibly there are more than one columns with the same name,
// so we call filter instead of find.
val cols = allColumns.filter(col => resolver(col.name, colName))
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org