You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2023/03/22 01:20:43 UTC

[spark] branch master updated: [MINOR][DOCS] Fix typos

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 7f45285c821 [MINOR][DOCS] Fix typos
7f45285c821 is described below

commit 7f45285c8217429cced2854cdf98512bb3208de4
Author: sudoliyang <su...@gmail.com>
AuthorDate: Wed Mar 22 10:20:13 2023 +0900

    [MINOR][DOCS] Fix typos
    
    ### What changes were proposed in this pull request?
    
    Fix typos in the repo.
    
    ### Why are the changes needed?
    
    Improve readability.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    
    No tests are needed.
    
    Closes #40494 from sudoliyang/fix_typos.
    
    Authored-by: sudoliyang <su...@gmail.com>
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
 R/pkg/R/functions.R                                                 | 2 +-
 .../src/main/java/org/apache/spark/network/util/NettyUtils.java     | 2 +-
 core/src/main/scala/org/apache/spark/ui/UIUtils.scala               | 2 +-
 docs/sql-data-sources-jdbc.md                                       | 2 +-
 docs/sql-migration-guide.md                                         | 2 +-
 docs/sql-ref-syntax-aux-conf-mgmt-reset.md                          | 2 +-
 .../main/scala/org/apache/spark/ml/classification/LinearSVC.scala   | 2 +-
 .../org/apache/spark/ml/regression/AFTSurvivalRegression.scala      | 2 +-
 .../scala/org/apache/spark/ml/regression/LinearRegression.scala     | 2 +-
 mllib/core/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala | 4 ++--
 python/pyspark/broadcast.py                                         | 2 +-
 python/pyspark/ml/functions.py                                      | 2 +-
 python/pyspark/ml/linalg/__init__.py                                | 2 +-
 python/pyspark/ml/stat.py                                           | 4 ++--
 python/pyspark/mllib/linalg/__init__.py                             | 2 +-
 python/pyspark/rdd.py                                               | 2 +-
 python/pyspark/sql/functions.py                                     | 6 +++---
 python/pyspark/worker.py                                            | 2 +-
 .../main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala   | 2 +-
 .../spark/sql/execution/datasources/orc/OrcColumnStatistics.java    | 2 +-
 .../org/apache/spark/sql/execution/streaming/AsyncCommitLog.scala   | 2 +-
 .../apache/spark/sql/execution/streaming/AsyncOffsetSeqLog.scala    | 2 +-
 22 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 00ce630bd18..8fc5be17fde 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -259,7 +259,7 @@ NULL
 #' @param finish an unary \code{function} \code{(Column) -> Column} used to
 #'          apply final transformation on the accumulated data in \code{array_aggregate}.
 #' @param comparator an optional binary (\code{(Column, Column) -> Column}) \code{function}
-#'          which is used to compare the elemnts of the array.
+#'          which is used to compare the elements of the array.
 #'          The comparator will take two
 #'          arguments representing two elements of the array. It returns a negative integer,
 #'          0, or a positive integer as the first element is less than, equal to,
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/NettyUtils.java b/common/network-common/src/main/java/org/apache/spark/network/util/NettyUtils.java
index cc4657efe39..d8f720e98e3 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/NettyUtils.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/NettyUtils.java
@@ -181,7 +181,7 @@ public class NettyUtils {
   }
 
   /**
-   * ByteBuf allocator prefers to allocate direct ByteBuf iif both Spark allows to create direct
+   * ByteBuf allocator prefers to allocate direct ByteBuf if both Spark allows to create direct
    * ByteBuf and Netty enables directBufferPreferred.
    */
   public static boolean preferDirectBufs(TransportConf conf) {
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index 4140de09f95..0ce647d12c5 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -141,7 +141,7 @@ private[spark] object UIUtils extends Logging {
    *
    * @param batchTime the batch time to be formatted
    * @param batchInterval the batch interval
-   * @param showYYYYMMSS if showing the `yyyy/MM/dd` part. If it's false, the return value wll be
+   * @param showYYYYMMSS if showing the `yyyy/MM/dd` part. If it's false, the return value will be
    *                     only `HH:mm:ss` or `HH:mm:ss.SSS` depending on `batchInterval`
    * @param timezone only for test
    */
diff --git a/docs/sql-data-sources-jdbc.md b/docs/sql-data-sources-jdbc.md
index b74c5bcaaf7..f96776514c6 100644
--- a/docs/sql-data-sources-jdbc.md
+++ b/docs/sql-data-sources-jdbc.md
@@ -340,7 +340,7 @@ logging into the data sources.
       This option controls whether the kerberos configuration is to be refreshed or not for the JDBC client before
       establishing a new connection. Set to true if you want to refresh the configuration, otherwise set to false.
       The default value is false. Note that if you set this option to true and try to establish multiple connections,
-      a race condition can occur. One possble situation would be like as follows.
+      a race condition can occur. One possible situation would be like as follows.
       <ol>
         <li>refreshKrb5Config flag is set with security context 1</li>
         <li>A JDBC connection provider is used for the corresponding DBMS</li>
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 04ee3f6ea8f..fe218fb2d37 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -104,7 +104,7 @@ license: |
 
   - In Spark 3.2, the following meta-characters are escaped in the `show()` action. In Spark 3.1 or earlier, the following metacharacters are output as it is.
     * `\n` (new line)
-    * `\r` (carrige ret)
+    * `\r` (carriage ret)
     * `\t` (horizontal tab)
     * `\f` (form feed)
     * `\b` (backspace)
diff --git a/docs/sql-ref-syntax-aux-conf-mgmt-reset.md b/docs/sql-ref-syntax-aux-conf-mgmt-reset.md
index 68df0913b29..d91c767b251 100644
--- a/docs/sql-ref-syntax-aux-conf-mgmt-reset.md
+++ b/docs/sql-ref-syntax-aux-conf-mgmt-reset.md
@@ -47,7 +47,7 @@ RESET configuration_key;
 -- Reset any runtime configurations specific to the current session which were set via the SET command to their default values.
 RESET;
 
--- If you start your application with --conf spark.foo=bar and set spark.foo=foobar in runtime, the example below will restore it to 'bar'. If spark.foo is not specified during starting, the example bellow will remove this config from the SQLConf. It will ignore nonexistent keys.
+-- If you start your application with --conf spark.foo=bar and set spark.foo=foobar in runtime, the example below will restore it to 'bar'. If spark.foo is not specified during starting, the example below will remove this config from the SQLConf. It will ignore nonexistent keys.
 RESET spark.abc;
 ```
 
diff --git a/mllib/core/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/core/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
index 4381b8c05c2..54f3c1e5719 100644
--- a/mllib/core/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
+++ b/mllib/core/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
@@ -312,7 +312,7 @@ class LinearSVC @Since("2.2.0") (
 
     val initialSolution = Array.ofDim[Double](numFeaturesPlusIntercept)
     if ($(fitIntercept)) {
-      // orginal `initialSolution` is for problem:
+      // original `initialSolution` is for problem:
       // y = f(w1 * x1 / std_x1, w2 * x2 / std_x2, ..., intercept)
       // we should adjust it to the initial solution for problem:
       // y = f(w1 * (x1 - avg_x1) / std_x1, w2 * (x2 - avg_x2) / std_x2, ..., intercept)
diff --git a/mllib/core/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/core/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
index 5ac58431f17..3b988fb3cfe 100644
--- a/mllib/core/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
+++ b/mllib/core/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
@@ -312,7 +312,7 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S
     val costFun = new RDDLossFunction(blocks, getAggregatorFunc, None, $(aggregationDepth))
 
     if ($(fitIntercept)) {
-      // orginal `initialSolution` is for problem:
+      // original `initialSolution` is for problem:
       // y = f(w1 * x1 / std_x1, w2 * x2 / std_x2, ..., intercept)
       // we should adjust it to the initial solution for problem:
       // y = f(w1 * (x1 - avg_x1) / std_x1, w2 * (x2 - avg_x2) / std_x2, ..., intercept)
diff --git a/mllib/core/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/core/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 09425fe60fe..7295ce6f320 100644
--- a/mllib/core/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/core/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -559,7 +559,7 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
       .setName(s"$uid: training blocks (blockSizeInMB=$actualBlockSizeInMB)")
 
     if ($(fitIntercept) && $(loss) == Huber) {
-      // orginal `initialSolution` is for problem:
+      // original `initialSolution` is for problem:
       // y = f(w1 * x1 / std_x1, w2 * x2 / std_x2, ..., intercept)
       // we should adjust it to the initial solution for problem:
       // y = f(w1 * (x1 - avg_x1) / std_x1, w2 * (x2 - avg_x2) / std_x2, ..., intercept)
diff --git a/mllib/core/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala b/mllib/core/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
index 8a124ae4f4c..4697bfbe4b0 100644
--- a/mllib/core/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
+++ b/mllib/core/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
@@ -93,8 +93,8 @@ object Summarizer extends Logging {
    * The following metrics are accepted (case sensitive):
    *  - mean: a vector that contains the coefficient-wise mean.
    *  - sum: a vector that contains the coefficient-wise sum.
-   *  - variance: a vector tha contains the coefficient-wise variance.
-   *  - std: a vector tha contains the coefficient-wise standard deviation.
+   *  - variance: a vector that contains the coefficient-wise variance.
+   *  - std: a vector that contains the coefficient-wise standard deviation.
    *  - count: the count of all vectors seen.
    *  - numNonzeros: a vector with the number of non-zeros for each coefficients
    *  - max: the maximum for each coefficient.
diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py
index c163ad2eb77..b1a9b790af5 100644
--- a/python/pyspark/broadcast.py
+++ b/python/pyspark/broadcast.py
@@ -207,7 +207,7 @@ class Broadcast(Generic[T]):
         >>> b = spark.sparkContext.broadcast([1, 2, 3, 4, 5])
         >>> c = spark.sparkContext.broadcast(1)
 
-        Read the pickled representation of value fron temp file.
+        Read the pickled representation of value from temp file.
 
         >>> with tempfile.TemporaryDirectory() as d:
         ...     path = os.path.join(d, "test.txt")
diff --git a/python/pyspark/ml/functions.py b/python/pyspark/ml/functions.py
index c335934be8d..4ad239cb5f0 100644
--- a/python/pyspark/ml/functions.py
+++ b/python/pyspark/ml/functions.py
@@ -530,7 +530,7 @@ def predict_batch_udf(
         only showing top 5 rows
 
     * Multiple scalar columns
-        Input DataFrame has muliple columns of scalar values.  If the user-provided `predict`
+        Input DataFrame has multiple columns of scalar values.  If the user-provided `predict`
         function expects a single input, then the user must combine the multiple columns into a
         single tensor using `pyspark.sql.functions.array`.
 
diff --git a/python/pyspark/ml/linalg/__init__.py b/python/pyspark/ml/linalg/__init__.py
index d3d2cbdaa0a..a5a849e4aa3 100644
--- a/python/pyspark/ml/linalg/__init__.py
+++ b/python/pyspark/ml/linalg/__init__.py
@@ -871,7 +871,7 @@ class Vectors:
     Notes
     -----
     Dense vectors are simply represented as NumPy array objects,
-    so there is no need to covert them for use in MLlib. For sparse vectors,
+    so there is no need to convert them for use in MLlib. For sparse vectors,
     the factory methods in this class create an MLlib-compatible type, or users
     can pass in SciPy's `scipy.sparse` column vectors.
     """
diff --git a/python/pyspark/ml/stat.py b/python/pyspark/ml/stat.py
index 704d2dc9baa..e0c4d05a32a 100644
--- a/python/pyspark/ml/stat.py
+++ b/python/pyspark/ml/stat.py
@@ -401,8 +401,8 @@ class Summarizer:
         The following metrics are accepted (case sensitive):
          - mean: a vector that contains the coefficient-wise mean.
          - sum: a vector that contains the coefficient-wise sum.
-         - variance: a vector tha contains the coefficient-wise variance.
-         - std: a vector tha contains the coefficient-wise standard deviation.
+         - variance: a vector that contains the coefficient-wise variance.
+         - std: a vector that contains the coefficient-wise standard deviation.
          - count: the count of all vectors seen.
          - numNonzeros: a vector with the number of non-zeros for each coefficients
          - max: the maximum for each coefficient.
diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py
index 20566f569bd..4dcd0c97d89 100644
--- a/python/pyspark/mllib/linalg/__init__.py
+++ b/python/pyspark/mllib/linalg/__init__.py
@@ -991,7 +991,7 @@ class Vectors:
     Notes
     -----
     Dense vectors are simply represented as NumPy array objects,
-    so there is no need to covert them for use in MLlib. For sparse vectors,
+    so there is no need to convert them for use in MLlib. For sparse vectors,
     the factory methods in this class create an MLlib-compatible type, or users
     can pass in SciPy's `scipy.sparse` column vectors.
     """
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 053727263ac..06e4facc962 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -1725,7 +1725,7 @@ class RDD(Generic[T_co]):
         Parameters
         ----------
         f : function
-            a function applyed to each element
+            a function applied to each element
 
         See Also
         --------
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 1f02be3ad21..abe92c4adc0 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -6391,7 +6391,7 @@ def instr(str: "ColumnOrName", substr: str) -> Column:
     Returns
     -------
     :class:`~pyspark.sql.Column`
-        location of the first occurence of the substring as integer.
+        location of the first occurrence of the substring as integer.
 
     Examples
     --------
@@ -6575,7 +6575,7 @@ def substring_index(str: "ColumnOrName", delim: str, count: int) -> Column:
     delim : str
         delimiter of values.
     count : int
-        number of occurences.
+        number of occurrences.
 
     Returns
     -------
@@ -7882,7 +7882,7 @@ def array_compact(col: "ColumnOrName") -> Column:
     Returns
     -------
     :class:`~pyspark.sql.Column`
-        an array by exluding the null values.
+        an array by excluding the null values.
 
     Examples
     --------
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 7d6af5310a9..c9d9ea866a8 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -904,7 +904,7 @@ if __name__ == "__main__":
     java_port = int(os.environ["PYTHON_WORKER_FACTORY_PORT"])
     auth_secret = os.environ["PYTHON_WORKER_FACTORY_SECRET"]
     (sock_file, _) = local_connect_and_auth(java_port, auth_secret)
-    # TODO: Remove thw following two lines and use `Process.pid()` when we drop JDK 8.
+    # TODO: Remove the following two lines and use `Process.pid()` when we drop JDK 8.
     write_int(os.getpid(), sock_file)
     sock_file.flush()
     main(sock_file, sock_file)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 7468d895cff..75802de1a66 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -885,7 +885,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product with Tre
   private lazy val allChildren: Set[TreeNode[_]] = (children ++ innerChildren).toSet[TreeNode[_]]
 
   private def redactMapString[K, V](map: Map[K, V], maxFields: Int): List[String] = {
-    // For security reason, redact the map value if the key is in centain patterns
+    // For security reason, redact the map value if the key is in certain patterns
     val redactedMap = SQLConf.get.redactOptions(map.toMap)
     // construct the redacted map as strings of the format "key=value"
     val keyValuePairs = redactedMap.toSeq.map { item =>
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnStatistics.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnStatistics.java
index 8adb9e8ca20..683503cf8f9 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnStatistics.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnStatistics.java
@@ -26,7 +26,7 @@ import java.util.List;
  * Columns statistics interface wrapping ORC {@link ColumnStatistics}s.
  *
  * Because ORC {@link ColumnStatistics}s are stored as an flatten array in ORC file footer,
- * this class is used to covert ORC {@link ColumnStatistics}s from array to nested tree structure,
+ * this class is used to convert ORC {@link ColumnStatistics}s from array to nested tree structure,
  * according to data types. The flatten array stores all data types (including nested types) in
  * tree pre-ordering. This is used for aggregate push down in ORC.
  *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncCommitLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncCommitLog.scala
index 0f9c13244a4..e9ad8bed27c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncCommitLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncCommitLog.scala
@@ -94,7 +94,7 @@ class AsyncCommitLog(sparkSession: SparkSession, path: String, executorService:
    * @param batchId id of batch to write
    * @param fn serialization function
    * @return CompletableFuture that contains a boolean do
-   *         indicate whether the write was successfuly or not.
+   *         indicate whether the write was successfully or not.
    *         Future can also be completed exceptionally to indicate write errors.
    */
   private def addNewBatchByStreamAsync(batchId: Long)(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncOffsetSeqLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncOffsetSeqLog.scala
index dfab8ec8b3b..4dd49951436 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncOffsetSeqLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncOffsetSeqLog.scala
@@ -128,7 +128,7 @@ class AsyncOffsetSeqLog(
    * @param batchId id of batch to write
    * @param fn serialization function
    * @return CompletableFuture that contains a boolean do
-   *         indicate whether the write was successfuly or not.
+   *         indicate whether the write was successfully or not.
    *         Future can also be completed exceptionally to indicate write errors.
    */
   private def addNewBatchByStreamAsync(batchId: Long)(


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org