You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2018/12/17 08:52:24 UTC

[GitHub] zhangzg187 closed pull request #23335: Revert 5 master

zhangzg187 closed pull request #23335: Revert 5 master
URL: https://github.com/apache/spark/pull/23335
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/.gitignore b/.gitignore
index e4c44d0590d59..39085904e324c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -76,7 +76,6 @@ streaming-tests.log
 target/
 unit-tests.log
 work/
-docs/.jekyll-metadata
 
 # For Hive
 TempStatsStore/
diff --git a/LICENSE b/LICENSE
index 820f14dbdeed0..c2b0d72663b55 100644
--- a/LICENSE
+++ b/LICENSE
@@ -263,7 +263,7 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
      (New BSD license) Protocol Buffer Java API (org.spark-project.protobuf:protobuf-java:2.4.1-shaded - http://code.google.com/p/protobuf)
      (The BSD License) Fortran to Java ARPACK (net.sourceforge.f2j:arpack_combined_all:0.1 - http://f2j.sourceforge.net)
      (The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net)
-     (The New BSD License) Py4J (net.sf.py4j:py4j:0.10.7 - http://py4j.sourceforge.net/)
+     (The New BSD License) Py4J (net.sf.py4j:py4j:0.10.6 - http://py4j.sourceforge.net/)
      (Two-clause BSD-style license) JUnit-Interface (com.novocode:junit-interface:0.10 - http://github.com/szeiger/junit-interface/)
      (BSD licence) sbt and sbt-launch-lib.bash
      (BSD 3 Clause) d3.min.js (https://github.com/mbostock/d3/blob/master/LICENSE)
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index f52d785e05cdd..855eb5bf77f16 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -13,7 +13,6 @@ Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
 License: Apache License (== 2.0)
 URL: http://www.apache.org/ http://spark.apache.org/
 BugReports: http://spark.apache.org/contributing.html
-SystemRequirements: Java (== 8)
 Depends:
     R (>= 3.0),
     methods
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 73a33af4dd48b..c51eb0f39c4b1 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -151,7 +151,6 @@ exportMethods("arrange",
               "registerTempTable",
               "rename",
               "repartition",
-              "repartitionByRange",
               "rollup",
               "sample",
               "sample_frac",
@@ -201,12 +200,6 @@ exportMethods("%<=>%",
               "approxCountDistinct",
               "approxQuantile",
               "array_contains",
-              "array_max",
-              "array_min",
-              "array_position",
-              "array_repeat",
-              "array_sort",
-              "arrays_overlap",
               "asc",
               "ascii",
               "asin",
@@ -251,7 +244,6 @@ exportMethods("%<=>%",
               "decode",
               "dense_rank",
               "desc",
-              "element_at",
               "encode",
               "endsWith",
               "exp",
@@ -261,7 +253,6 @@ exportMethods("%<=>%",
               "expr",
               "factorial",
               "first",
-              "flatten",
               "floor",
               "format_number",
               "format_string",
@@ -304,7 +295,6 @@ exportMethods("%<=>%",
               "lower",
               "lpad",
               "ltrim",
-              "map_entries",
               "map_keys",
               "map_values",
               "max",
@@ -355,7 +345,6 @@ exportMethods("%<=>%",
               "sinh",
               "size",
               "skewness",
-              "slice",
               "sort_array",
               "soundex",
               "spark_partition_id",
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 70eb7a874b75c..41c3c3a89fa72 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -36,6 +36,7 @@ setOldClass("structType")
 #' @slot sdf A Java object reference to the backing Scala DataFrame
 #' @seealso \link{createDataFrame}, \link{read.json}, \link{table}
 #' @seealso \url{https://spark.apache.org/docs/latest/sparkr.html#sparkr-dataframes}
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -76,6 +77,7 @@ setWriteMode <- function(write, mode) {
   write
 }
 
+#' @export
 #' @param sdf A Java object reference to the backing Scala DataFrame
 #' @param isCached TRUE if the SparkDataFrame is cached
 #' @noRd
@@ -95,6 +97,7 @@ dataFrame <- function(sdf, isCached = FALSE) {
 #' @rdname printSchema
 #' @name printSchema
 #' @aliases printSchema,SparkDataFrame-method
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -120,6 +123,7 @@ setMethod("printSchema",
 #' @rdname schema
 #' @name schema
 #' @aliases schema,SparkDataFrame-method
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -142,6 +146,7 @@ setMethod("schema",
 #' @aliases explain,SparkDataFrame-method
 #' @rdname explain
 #' @name explain
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -173,6 +178,7 @@ setMethod("explain",
 #' @rdname isLocal
 #' @name isLocal
 #' @aliases isLocal,SparkDataFrame-method
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -203,6 +209,7 @@ setMethod("isLocal",
 #' @aliases showDF,SparkDataFrame-method
 #' @rdname showDF
 #' @name showDF
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -234,6 +241,7 @@ setMethod("showDF",
 #' @rdname show
 #' @aliases show,SparkDataFrame-method
 #' @name show
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -261,6 +269,7 @@ setMethod("show", "SparkDataFrame",
 #' @rdname dtypes
 #' @name dtypes
 #' @aliases dtypes,SparkDataFrame-method
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -287,6 +296,7 @@ setMethod("dtypes",
 #' @rdname columns
 #' @name columns
 #' @aliases columns,SparkDataFrame-method
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -378,6 +388,7 @@ setMethod("colnames<-",
 #' @aliases coltypes,SparkDataFrame-method
 #' @name coltypes
 #' @family SparkDataFrame functions
+#' @export
 #' @examples
 #'\dontrun{
 #' irisDF <- createDataFrame(iris)
@@ -434,6 +445,7 @@ setMethod("coltypes",
 #' @rdname coltypes
 #' @name coltypes<-
 #' @aliases coltypes<-,SparkDataFrame,character-method
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -482,6 +494,7 @@ setMethod("coltypes<-",
 #' @rdname createOrReplaceTempView
 #' @name createOrReplaceTempView
 #' @aliases createOrReplaceTempView,SparkDataFrame,character-method
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -508,6 +521,7 @@ setMethod("createOrReplaceTempView",
 #' @rdname registerTempTable-deprecated
 #' @name registerTempTable
 #' @aliases registerTempTable,SparkDataFrame,character-method
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -538,6 +552,7 @@ setMethod("registerTempTable",
 #' @rdname insertInto
 #' @name insertInto
 #' @aliases insertInto,SparkDataFrame,character-method
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -565,6 +580,7 @@ setMethod("insertInto",
 #' @aliases cache,SparkDataFrame-method
 #' @rdname cache
 #' @name cache
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -595,6 +611,7 @@ setMethod("cache",
 #' @rdname persist
 #' @name persist
 #' @aliases persist,SparkDataFrame,character-method
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -624,6 +641,7 @@ setMethod("persist",
 #' @rdname unpersist
 #' @aliases unpersist,SparkDataFrame-method
 #' @name unpersist
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -651,6 +669,7 @@ setMethod("unpersist",
 #' @rdname storageLevel
 #' @aliases storageLevel,SparkDataFrame-method
 #' @name storageLevel
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -687,7 +706,8 @@ setMethod("storageLevel",
 #' @rdname coalesce
 #' @name coalesce
 #' @aliases coalesce,SparkDataFrame-method
-#' @seealso \link{repartition}, \link{repartitionByRange}
+#' @seealso \link{repartition}
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -723,7 +743,8 @@ setMethod("coalesce",
 #' @rdname repartition
 #' @name repartition
 #' @aliases repartition,SparkDataFrame-method
-#' @seealso \link{coalesce}, \link{repartitionByRange}
+#' @seealso \link{coalesce}
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -759,67 +780,6 @@ setMethod("repartition",
             dataFrame(sdf)
           })
 
-
-#' Repartition by range
-#'
-#' The following options for repartition by range are possible:
-#' \itemize{
-#'  \item{1.} {Return a new SparkDataFrame range partitioned by
-#'                      the given columns into \code{numPartitions}.}
-#'  \item{2.} {Return a new SparkDataFrame range partitioned by the given column(s),
-#'                      using \code{spark.sql.shuffle.partitions} as number of partitions.}
-#'}
-#'
-#' @param x a SparkDataFrame.
-#' @param numPartitions the number of partitions to use.
-#' @param col the column by which the range partitioning will be performed.
-#' @param ... additional column(s) to be used in the range partitioning.
-#'
-#' @family SparkDataFrame functions
-#' @rdname repartitionByRange
-#' @name repartitionByRange
-#' @aliases repartitionByRange,SparkDataFrame-method
-#' @seealso \link{repartition}, \link{coalesce}
-#' @examples
-#'\dontrun{
-#' sparkR.session()
-#' path <- "path/to/file.json"
-#' df <- read.json(path)
-#' newDF <- repartitionByRange(df, col = df$col1, df$col2)
-#' newDF <- repartitionByRange(df, 3L, col = df$col1, df$col2)
-#'}
-#' @note repartitionByRange since 2.4.0
-setMethod("repartitionByRange",
-          signature(x = "SparkDataFrame"),
-          function(x, numPartitions = NULL, col = NULL, ...) {
-            if (!is.null(numPartitions) && !is.null(col)) {
-              # number of partitions and columns both are specified
-              if (is.numeric(numPartitions) && class(col) == "Column") {
-                cols <- list(col, ...)
-                jcol <- lapply(cols, function(c) { c@jc })
-                sdf <- callJMethod(x@sdf, "repartitionByRange", numToInt(numPartitions), jcol)
-              } else {
-                stop(paste("numPartitions and col must be numeric and Column; however, got",
-                           class(numPartitions), "and", class(col)))
-              }
-            } else if (!is.null(col))  {
-              # only columns are specified
-              if (class(col) == "Column") {
-                cols <- list(col, ...)
-                jcol <- lapply(cols, function(c) { c@jc })
-                sdf <- callJMethod(x@sdf, "repartitionByRange", jcol)
-              } else {
-                stop(paste("col must be Column; however, got", class(col)))
-              }
-            } else if (!is.null(numPartitions)) {
-              # only numPartitions is specified
-              stop("At least one partition-by column must be specified.")
-            } else {
-              stop("Please, specify a column(s) or the number of partitions with a column(s)")
-            }
-            dataFrame(sdf)
-          })
-
 #' toJSON
 #'
 #' Converts a SparkDataFrame into a SparkDataFrame of JSON string.
@@ -833,6 +793,7 @@ setMethod("repartitionByRange",
 #' @rdname toJSON
 #' @name toJSON
 #' @aliases toJSON,SparkDataFrame-method
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -865,6 +826,7 @@ setMethod("toJSON",
 #' @rdname write.json
 #' @name write.json
 #' @aliases write.json,SparkDataFrame,character-method
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -896,6 +858,7 @@ setMethod("write.json",
 #' @aliases write.orc,SparkDataFrame,character-method
 #' @rdname write.orc
 #' @name write.orc
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -927,6 +890,7 @@ setMethod("write.orc",
 #' @rdname write.parquet
 #' @name write.parquet
 #' @aliases write.parquet,SparkDataFrame,character-method
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -947,6 +911,7 @@ setMethod("write.parquet",
 #' @rdname write.parquet
 #' @name saveAsParquetFile
 #' @aliases saveAsParquetFile,SparkDataFrame,character-method
+#' @export
 #' @note saveAsParquetFile since 1.4.0
 setMethod("saveAsParquetFile",
           signature(x = "SparkDataFrame", path = "character"),
@@ -971,6 +936,7 @@ setMethod("saveAsParquetFile",
 #' @aliases write.text,SparkDataFrame,character-method
 #' @rdname write.text
 #' @name write.text
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -997,6 +963,7 @@ setMethod("write.text",
 #' @aliases distinct,SparkDataFrame-method
 #' @rdname distinct
 #' @name distinct
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -1037,6 +1004,7 @@ setMethod("unique",
 #' @aliases sample,SparkDataFrame-method
 #' @rdname sample
 #' @name sample
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -1093,6 +1061,7 @@ setMethod("sample_frac",
 #' @rdname nrow
 #' @name nrow
 #' @aliases count,SparkDataFrame-method
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -1125,6 +1094,7 @@ setMethod("nrow",
 #' @rdname ncol
 #' @name ncol
 #' @aliases ncol,SparkDataFrame-method
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -1148,6 +1118,7 @@ setMethod("ncol",
 #' @rdname dim
 #' @aliases dim,SparkDataFrame-method
 #' @name dim
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -1173,6 +1144,7 @@ setMethod("dim",
 #' @rdname collect
 #' @aliases collect,SparkDataFrame-method
 #' @name collect
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -1257,6 +1229,7 @@ setMethod("collect",
 #' @rdname limit
 #' @name limit
 #' @aliases limit,SparkDataFrame,numeric-method
+#' @export
 #' @examples
 #' \dontrun{
 #' sparkR.session()
@@ -1280,6 +1253,7 @@ setMethod("limit",
 #' @rdname take
 #' @name take
 #' @aliases take,SparkDataFrame,numeric-method
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -1308,6 +1282,7 @@ setMethod("take",
 #' @aliases head,SparkDataFrame-method
 #' @rdname head
 #' @name head
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -1332,6 +1307,7 @@ setMethod("head",
 #' @aliases first,SparkDataFrame-method
 #' @rdname first
 #' @name first
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -1383,6 +1359,7 @@ setMethod("toRDD",
 #' @aliases groupBy,SparkDataFrame-method
 #' @rdname groupBy
 #' @name groupBy
+#' @export
 #' @examples
 #' \dontrun{
 #'   # Compute the average for all numeric columns grouped by department.
@@ -1424,6 +1401,7 @@ setMethod("group_by",
 #' @aliases agg,SparkDataFrame-method
 #' @rdname summarize
 #' @name agg
+#' @export
 #' @note agg since 1.4.0
 setMethod("agg",
           signature(x = "SparkDataFrame"),
@@ -1482,6 +1460,7 @@ setClassUnion("characterOrstructType", c("character", "structType"))
 #' @aliases dapply,SparkDataFrame,function,characterOrstructType-method
 #' @name dapply
 #' @seealso \link{dapplyCollect}
+#' @export
 #' @examples
 #' \dontrun{
 #'   df <- createDataFrame(iris)
@@ -1540,6 +1519,7 @@ setMethod("dapply",
 #' @aliases dapplyCollect,SparkDataFrame,function-method
 #' @name dapplyCollect
 #' @seealso \link{dapply}
+#' @export
 #' @examples
 #' \dontrun{
 #'   df <- createDataFrame(iris)
@@ -1596,6 +1576,7 @@ setMethod("dapplyCollect",
 #' @rdname gapply
 #' @name gapply
 #' @seealso \link{gapplyCollect}
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -1692,6 +1673,7 @@ setMethod("gapply",
 #' @rdname gapplyCollect
 #' @name gapplyCollect
 #' @seealso \link{gapply}
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -1965,6 +1947,7 @@ setMethod("[", signature(x = "SparkDataFrame"),
 #' @param ... currently not used.
 #' @return A new SparkDataFrame containing only the rows that meet the condition with selected
 #'         columns.
+#' @export
 #' @family SparkDataFrame functions
 #' @aliases subset,SparkDataFrame-method
 #' @seealso \link{withColumn}
@@ -2009,6 +1992,7 @@ setMethod("subset", signature(x = "SparkDataFrame"),
 #'            If more than one column is assigned in \code{col}, \code{...}
 #'            should be left empty.
 #' @return A new SparkDataFrame with selected columns.
+#' @export
 #' @family SparkDataFrame functions
 #' @rdname select
 #' @aliases select,SparkDataFrame,character-method
@@ -2040,6 +2024,7 @@ setMethod("select", signature(x = "SparkDataFrame", col = "character"),
           })
 
 #' @rdname select
+#' @export
 #' @aliases select,SparkDataFrame,Column-method
 #' @note select(SparkDataFrame, Column) since 1.4.0
 setMethod("select", signature(x = "SparkDataFrame", col = "Column"),
@@ -2052,6 +2037,7 @@ setMethod("select", signature(x = "SparkDataFrame", col = "Column"),
           })
 
 #' @rdname select
+#' @export
 #' @aliases select,SparkDataFrame,list-method
 #' @note select(SparkDataFrame, list) since 1.4.0
 setMethod("select",
@@ -2080,6 +2066,7 @@ setMethod("select",
 #' @aliases selectExpr,SparkDataFrame,character-method
 #' @rdname selectExpr
 #' @name selectExpr
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -2111,6 +2098,7 @@ setMethod("selectExpr",
 #' @rdname withColumn
 #' @name withColumn
 #' @seealso \link{rename} \link{mutate} \link{subset}
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -2149,6 +2137,7 @@ setMethod("withColumn",
 #' @rdname mutate
 #' @name mutate
 #' @seealso \link{rename} \link{withColumn}
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -2219,6 +2208,7 @@ setMethod("mutate",
           })
 
 #' @param _data a SparkDataFrame.
+#' @export
 #' @rdname mutate
 #' @aliases transform,SparkDataFrame-method
 #' @name transform
@@ -2242,6 +2232,7 @@ setMethod("transform",
 #' @name withColumnRenamed
 #' @aliases withColumnRenamed,SparkDataFrame,character,character-method
 #' @seealso \link{mutate}
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -2267,6 +2258,7 @@ setMethod("withColumnRenamed",
 #' @rdname rename
 #' @name rename
 #' @aliases rename,SparkDataFrame-method
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -2297,8 +2289,6 @@ setMethod("rename",
 
 setClassUnion("characterOrColumn", c("character", "Column"))
 
-setClassUnion("numericOrColumn", c("numeric", "Column"))
-
 #' Arrange Rows by Variables
 #'
 #' Sort a SparkDataFrame by the specified column(s).
@@ -2314,6 +2304,7 @@ setClassUnion("numericOrColumn", c("numeric", "Column"))
 #' @aliases arrange,SparkDataFrame,Column-method
 #' @rdname arrange
 #' @name arrange
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -2344,6 +2335,7 @@ setMethod("arrange",
 #' @rdname arrange
 #' @name arrange
 #' @aliases arrange,SparkDataFrame,character-method
+#' @export
 #' @note arrange(SparkDataFrame, character) since 1.4.0
 setMethod("arrange",
           signature(x = "SparkDataFrame", col = "character"),
@@ -2376,6 +2368,7 @@ setMethod("arrange",
 
 #' @rdname arrange
 #' @aliases orderBy,SparkDataFrame,characterOrColumn-method
+#' @export
 #' @note orderBy(SparkDataFrame, characterOrColumn) since 1.4.0
 setMethod("orderBy",
           signature(x = "SparkDataFrame", col = "characterOrColumn"),
@@ -2396,6 +2389,7 @@ setMethod("orderBy",
 #' @rdname filter
 #' @name filter
 #' @family subsetting functions
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -2438,6 +2432,7 @@ setMethod("where",
 #' @aliases dropDuplicates,SparkDataFrame-method
 #' @rdname dropDuplicates
 #' @name dropDuplicates
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -2486,6 +2481,7 @@ setMethod("dropDuplicates",
 #' @rdname join
 #' @name join
 #' @seealso \link{merge} \link{crossJoin}
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -2537,6 +2533,7 @@ setMethod("join",
 #' @rdname crossJoin
 #' @name crossJoin
 #' @seealso \link{merge} \link{join}
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -2584,6 +2581,7 @@ setMethod("crossJoin",
 #' @aliases merge,SparkDataFrame,SparkDataFrame-method
 #' @rdname merge
 #' @seealso \link{join} \link{crossJoin}
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -2723,6 +2721,7 @@ genAliasesForIntersectedCols <- function(x, intersectedColNames, suffix) {
 #' @name union
 #' @aliases union,SparkDataFrame,SparkDataFrame-method
 #' @seealso \link{rbind} \link{unionByName}
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -2743,6 +2742,7 @@ setMethod("union",
 #' @rdname union
 #' @name unionAll
 #' @aliases unionAll,SparkDataFrame,SparkDataFrame-method
+#' @export
 #' @note unionAll since 1.4.0
 setMethod("unionAll",
           signature(x = "SparkDataFrame", y = "SparkDataFrame"),
@@ -2769,6 +2769,7 @@ setMethod("unionAll",
 #' @name unionByName
 #' @aliases unionByName,SparkDataFrame,SparkDataFrame-method
 #' @seealso \link{rbind} \link{union}
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -2801,6 +2802,7 @@ setMethod("unionByName",
 #' @rdname rbind
 #' @name rbind
 #' @seealso \link{union} \link{unionByName}
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -2833,6 +2835,7 @@ setMethod("rbind",
 #' @aliases intersect,SparkDataFrame,SparkDataFrame-method
 #' @rdname intersect
 #' @name intersect
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -2860,6 +2863,7 @@ setMethod("intersect",
 #' @aliases except,SparkDataFrame,SparkDataFrame-method
 #' @rdname except
 #' @name except
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -2868,6 +2872,7 @@ setMethod("intersect",
 #' exceptDF <- except(df, df2)
 #' }
 #' @rdname except
+#' @export
 #' @note except since 1.4.0
 setMethod("except",
           signature(x = "SparkDataFrame", y = "SparkDataFrame"),
@@ -2904,6 +2909,7 @@ setMethod("except",
 #' @aliases write.df,SparkDataFrame-method
 #' @rdname write.df
 #' @name write.df
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -2938,6 +2944,7 @@ setMethod("write.df",
 #' @rdname write.df
 #' @name saveDF
 #' @aliases saveDF,SparkDataFrame,character-method
+#' @export
 #' @note saveDF since 1.4.0
 setMethod("saveDF",
           signature(df = "SparkDataFrame", path = "character"),
@@ -2971,6 +2978,7 @@ setMethod("saveDF",
 #' @aliases saveAsTable,SparkDataFrame,character-method
 #' @rdname saveAsTable
 #' @name saveAsTable
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -3007,6 +3015,7 @@ setMethod("saveAsTable",
 #' @aliases describe,SparkDataFrame,character-method describe,SparkDataFrame,ANY-method
 #' @rdname describe
 #' @name describe
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -3062,6 +3071,7 @@ setMethod("describe",
 #' @rdname summary
 #' @name summary
 #' @aliases summary,SparkDataFrame-method
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -3107,6 +3117,7 @@ setMethod("summary",
 #' @rdname nafunctions
 #' @aliases dropna,SparkDataFrame-method
 #' @name dropna
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -3137,6 +3148,7 @@ setMethod("dropna",
 #' @rdname nafunctions
 #' @name na.omit
 #' @aliases na.omit,SparkDataFrame-method
+#' @export
 #' @note na.omit since 1.5.0
 setMethod("na.omit",
           signature(object = "SparkDataFrame"),
@@ -3156,6 +3168,7 @@ setMethod("na.omit",
 #' @rdname nafunctions
 #' @name fillna
 #' @aliases fillna,SparkDataFrame-method
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -3386,6 +3399,7 @@ setMethod("str",
 #' @rdname drop
 #' @name drop
 #' @aliases drop,SparkDataFrame-method
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -3413,6 +3427,7 @@ setMethod("drop",
 #' @name drop
 #' @rdname drop
 #' @aliases drop,ANY-method
+#' @export
 setMethod("drop",
           signature(x = "ANY"),
           function(x) {
@@ -3431,6 +3446,7 @@ setMethod("drop",
 #' @rdname histogram
 #' @aliases histogram,SparkDataFrame,characterOrColumn-method
 #' @family SparkDataFrame functions
+#' @export
 #' @examples
 #' \dontrun{
 #'
@@ -3566,6 +3582,7 @@ setMethod("histogram",
 #' @rdname write.jdbc
 #' @name write.jdbc
 #' @aliases write.jdbc,SparkDataFrame,character,character-method
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -3594,6 +3611,7 @@ setMethod("write.jdbc",
 #' @aliases randomSplit,SparkDataFrame,numeric-method
 #' @rdname randomSplit
 #' @name randomSplit
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -3627,6 +3645,7 @@ setMethod("randomSplit",
 #' @aliases getNumPartitions,SparkDataFrame-method
 #' @rdname getNumPartitions
 #' @name getNumPartitions
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -3653,6 +3672,7 @@ setMethod("getNumPartitions",
 #' @rdname isStreaming
 #' @name isStreaming
 #' @seealso \link{read.stream} \link{write.stream}
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -3706,6 +3726,7 @@ setMethod("isStreaming",
 #' @aliases write.stream,SparkDataFrame-method
 #' @rdname write.stream
 #' @name write.stream
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -3798,6 +3819,7 @@ setMethod("write.stream",
 #' @rdname checkpoint
 #' @name checkpoint
 #' @seealso \link{setCheckpointDir}
+#' @export
 #' @examples
 #'\dontrun{
 #' setCheckpointDir("/checkpoint")
@@ -3825,6 +3847,7 @@ setMethod("checkpoint",
 #' @aliases localCheckpoint,SparkDataFrame-method
 #' @rdname localCheckpoint
 #' @name localCheckpoint
+#' @export
 #' @examples
 #'\dontrun{
 #' df <- localCheckpoint(df)
@@ -3851,6 +3874,7 @@ setMethod("localCheckpoint",
 #' @aliases cube,SparkDataFrame-method
 #' @rdname cube
 #' @name cube
+#' @export
 #' @examples
 #' \dontrun{
 #' df <- createDataFrame(mtcars)
@@ -3885,6 +3909,7 @@ setMethod("cube",
 #' @aliases rollup,SparkDataFrame-method
 #' @rdname rollup
 #' @name rollup
+#' @export
 #' @examples
 #'\dontrun{
 #' df <- createDataFrame(mtcars)
@@ -3917,6 +3942,7 @@ setMethod("rollup",
 #' @aliases hint,SparkDataFrame,character-method
 #' @rdname hint
 #' @name hint
+#' @export
 #' @examples
 #' \dontrun{
 #' df <- createDataFrame(mtcars)
@@ -3940,6 +3966,7 @@ setMethod("hint",
 #' @family SparkDataFrame functions
 #' @rdname alias
 #' @name alias
+#' @export
 #' @examples
 #' \dontrun{
 #' df <- alias(createDataFrame(mtcars), "mtcars")
@@ -3970,6 +3997,7 @@ setMethod("alias",
 #' @family SparkDataFrame functions
 #' @rdname broadcast
 #' @name broadcast
+#' @export
 #' @examples
 #' \dontrun{
 #' df <- createDataFrame(mtcars)
@@ -4013,6 +4041,7 @@ setMethod("broadcast",
 #' @family SparkDataFrame functions
 #' @rdname withWatermark
 #' @name withWatermark
+#' @export
 #' @examples
 #' \dontrun{
 #' sparkR.session()
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 429dd5d565492..9d0a2d5e074e4 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -123,6 +123,7 @@ infer_type <- function(x) {
 #' @return a list of config values with keys as their names
 #' @rdname sparkR.conf
 #' @name sparkR.conf
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -162,6 +163,7 @@ sparkR.conf <- function(key, defaultValue) {
 #' @return a character string of the Spark version
 #' @rdname sparkR.version
 #' @name sparkR.version
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -189,6 +191,7 @@ getDefaultSqlSource <- function() {
 #'        limited by length of the list or number of rows of the data.frame
 #' @return A SparkDataFrame.
 #' @rdname createDataFrame
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -291,6 +294,7 @@ createDataFrame <- function(x, ...) {
 
 #' @rdname createDataFrame
 #' @aliases createDataFrame
+#' @export
 #' @method as.DataFrame default
 #' @note as.DataFrame since 1.6.0
 as.DataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0, numPartitions = NULL) {
@@ -300,6 +304,7 @@ as.DataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0, numPa
 #' @param ... additional argument(s).
 #' @rdname createDataFrame
 #' @aliases as.DataFrame
+#' @export
 as.DataFrame <- function(data, ...) {
   dispatchFunc("as.DataFrame(data, schema = NULL)", data, ...)
 }
@@ -337,6 +342,7 @@ setMethod("toDF", signature(x = "RDD"),
 #' @param ... additional external data source specific named properties.
 #' @return SparkDataFrame
 #' @rdname read.json
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -365,6 +371,7 @@ read.json <- function(x, ...) {
 
 #' @rdname read.json
 #' @name jsonFile
+#' @export
 #' @method jsonFile default
 #' @note jsonFile since 1.4.0
 jsonFile.default <- function(path) {
@@ -416,6 +423,7 @@ jsonRDD <- function(sqlContext, rdd, schema = NULL, samplingRatio = 1.0) {
 #' @param ... additional external data source specific named properties.
 #' @return SparkDataFrame
 #' @rdname read.orc
+#' @export
 #' @name read.orc
 #' @note read.orc since 2.0.0
 read.orc <- function(path, ...) {
@@ -436,6 +444,7 @@ read.orc <- function(path, ...) {
 #' @param path path of file to read. A vector of multiple paths is allowed.
 #' @return SparkDataFrame
 #' @rdname read.parquet
+#' @export
 #' @name read.parquet
 #' @method read.parquet default
 #' @note read.parquet since 1.6.0
@@ -457,6 +466,7 @@ read.parquet <- function(x, ...) {
 #' @param ... argument(s) passed to the method.
 #' @rdname read.parquet
 #' @name parquetFile
+#' @export
 #' @method parquetFile default
 #' @note parquetFile since 1.4.0
 parquetFile.default <- function(...) {
@@ -480,6 +490,7 @@ parquetFile <- function(x, ...) {
 #' @param ... additional external data source specific named properties.
 #' @return SparkDataFrame
 #' @rdname read.text
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -511,6 +522,7 @@ read.text <- function(x, ...) {
 #' @param sqlQuery A character vector containing the SQL query
 #' @return SparkDataFrame
 #' @rdname sql
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -544,6 +556,7 @@ sql <- function(x, ...) {
 #' @return SparkDataFrame
 #' @rdname tableToDF
 #' @name tableToDF
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -578,6 +591,7 @@ tableToDF <- function(tableName) {
 #' @rdname read.df
 #' @name read.df
 #' @seealso \link{read.json}
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -667,6 +681,7 @@ loadDF <- function(x = NULL, ...) {
 #' @return SparkDataFrame
 #' @rdname read.jdbc
 #' @name read.jdbc
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -712,13 +727,14 @@ read.jdbc <- function(url, tableName,
 #' @param schema The data schema defined in structType or a DDL-formatted string, this is
 #'               required for file-based streaming data source
 #' @param ... additional external data source specific named options, for instance \code{path} for
-#'        file-based streaming data source. \code{timeZone} to indicate a timezone to be used to
+#'        file-based streaming data source. \code{timeZone} to indicate a timezone to be used to 
 #'        parse timestamps in the JSON/CSV data sources or partition values; If it isn't set, it
 #'        uses the default value, session local timezone.
 #' @return SparkDataFrame
 #' @rdname read.stream
 #' @name read.stream
 #' @seealso \link{write.stream}
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
diff --git a/R/pkg/R/WindowSpec.R b/R/pkg/R/WindowSpec.R
index ee7f4adf726e6..debc7cbde55e7 100644
--- a/R/pkg/R/WindowSpec.R
+++ b/R/pkg/R/WindowSpec.R
@@ -28,6 +28,7 @@ NULL
 #' @seealso \link{windowPartitionBy}, \link{windowOrderBy}
 #'
 #' @param sws A Java object reference to the backing Scala WindowSpec
+#' @export
 #' @note WindowSpec since 2.0.0
 setClass("WindowSpec",
          slots = list(sws = "jobj"))
@@ -43,6 +44,7 @@ windowSpec <- function(sws) {
 }
 
 #' @rdname show
+#' @export
 #' @note show(WindowSpec) since 2.0.0
 setMethod("show", "WindowSpec",
           function(object) {
@@ -61,6 +63,7 @@ setMethod("show", "WindowSpec",
 #' @name partitionBy
 #' @aliases partitionBy,WindowSpec-method
 #' @family windowspec_method
+#' @export
 #' @examples
 #' \dontrun{
 #'   partitionBy(ws, "col1", "col2")
@@ -94,6 +97,7 @@ setMethod("partitionBy",
 #' @aliases orderBy,WindowSpec,character-method
 #' @family windowspec_method
 #' @seealso See \link{arrange} for use in sorting a SparkDataFrame
+#' @export
 #' @examples
 #' \dontrun{
 #'   orderBy(ws, "col1", "col2")
@@ -109,6 +113,7 @@ setMethod("orderBy",
 #' @rdname orderBy
 #' @name orderBy
 #' @aliases orderBy,WindowSpec,Column-method
+#' @export
 #' @note orderBy(WindowSpec, Column) since 2.0.0
 setMethod("orderBy",
           signature(x = "WindowSpec", col = "Column"),
@@ -137,6 +142,7 @@ setMethod("orderBy",
 #' @aliases rowsBetween,WindowSpec,numeric,numeric-method
 #' @name rowsBetween
 #' @family windowspec_method
+#' @export
 #' @examples
 #' \dontrun{
 #'   rowsBetween(ws, 0, 3)
@@ -168,6 +174,7 @@ setMethod("rowsBetween",
 #' @aliases rangeBetween,WindowSpec,numeric,numeric-method
 #' @name rangeBetween
 #' @family windowspec_method
+#' @export
 #' @examples
 #' \dontrun{
 #'   rangeBetween(ws, 0, 3)
@@ -195,6 +202,7 @@ setMethod("rangeBetween",
 #' @name over
 #' @aliases over,Column,WindowSpec-method
 #' @family colum_func
+#' @export
 #' @examples
 #' \dontrun{
 #'   df <- createDataFrame(mtcars)
diff --git a/R/pkg/R/broadcast.R b/R/pkg/R/broadcast.R
index 282f8a6857738..398dffc4ab1b4 100644
--- a/R/pkg/R/broadcast.R
+++ b/R/pkg/R/broadcast.R
@@ -32,12 +32,14 @@
 # @seealso broadcast
 #
 # @param id Id of the backing Spark broadcast variable
+# @export
 setClass("Broadcast", slots = list(id = "character"))
 
 # @rdname broadcast-class
 # @param value Value of the broadcast variable
 # @param jBroadcastRef reference to the backing Java broadcast object
 # @param objName name of broadcasted object
+# @export
 Broadcast <- function(id, value, jBroadcastRef, objName) {
   .broadcastValues[[id]] <- value
   .broadcastNames[[as.character(objName)]] <- jBroadcastRef
@@ -71,6 +73,7 @@ setMethod("value",
 
 # @param bcastId The id of broadcast variable to set
 # @param value The value to be set
+# @export
 setBroadcastValue <- function(bcastId, value) {
   bcastIdStr <- as.character(bcastId)
   .broadcastValues[[bcastIdStr]] <- value
diff --git a/R/pkg/R/catalog.R b/R/pkg/R/catalog.R
index baf4d861fcf86..e59a7024333ac 100644
--- a/R/pkg/R/catalog.R
+++ b/R/pkg/R/catalog.R
@@ -34,6 +34,7 @@
 #' @return A SparkDataFrame.
 #' @rdname createExternalTable-deprecated
 #' @seealso \link{createTable}
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -70,6 +71,7 @@ createExternalTable <- function(x, ...) {
 #' @return A SparkDataFrame.
 #' @rdname createTable
 #' @seealso \link{createExternalTable}
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -108,6 +110,7 @@ createTable <- function(tableName, path = NULL, source = NULL, schema = NULL, ..
 #'                  identifier is provided, it refers to a table in the current database.
 #' @return SparkDataFrame
 #' @rdname cacheTable
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -137,6 +140,7 @@ cacheTable <- function(x, ...) {
 #'                  identifier is provided, it refers to a table in the current database.
 #' @return SparkDataFrame
 #' @rdname uncacheTable
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -163,6 +167,7 @@ uncacheTable <- function(x, ...) {
 #' Removes all cached tables from the in-memory cache.
 #'
 #' @rdname clearCache
+#' @export
 #' @examples
 #' \dontrun{
 #' clearCache()
@@ -188,6 +193,7 @@ clearCache <- function() {
 #' @param tableName The name of the SparkSQL table to be dropped.
 #' @seealso \link{dropTempView}
 #' @rdname dropTempTable-deprecated
+#' @export
 #' @examples
 #' \dontrun{
 #' sparkR.session()
@@ -219,6 +225,7 @@ dropTempTable <- function(x, ...) {
 #' @return TRUE if the view is dropped successfully, FALSE otherwise.
 #' @rdname dropTempView
 #' @name dropTempView
+#' @export
 #' @examples
 #' \dontrun{
 #' sparkR.session()
@@ -244,6 +251,7 @@ dropTempView <- function(viewName) {
 #' @return a SparkDataFrame
 #' @rdname tables
 #' @seealso \link{listTables}
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -268,6 +276,7 @@ tables <- function(x, ...) {
 #' @param databaseName (optional) name of the database
 #' @return a list of table names
 #' @rdname tableNames
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -295,6 +304,7 @@ tableNames <- function(x, ...) {
 #' @return name of the current default database.
 #' @rdname currentDatabase
 #' @name currentDatabase
+#' @export
 #' @examples
 #' \dontrun{
 #' sparkR.session()
@@ -314,6 +324,7 @@ currentDatabase <- function() {
 #' @param databaseName name of the database
 #' @rdname setCurrentDatabase
 #' @name setCurrentDatabase
+#' @export
 #' @examples
 #' \dontrun{
 #' sparkR.session()
@@ -336,6 +347,7 @@ setCurrentDatabase <- function(databaseName) {
 #' @return a SparkDataFrame of the list of databases.
 #' @rdname listDatabases
 #' @name listDatabases
+#' @export
 #' @examples
 #' \dontrun{
 #' sparkR.session()
@@ -358,6 +370,7 @@ listDatabases <- function() {
 #' @rdname listTables
 #' @name listTables
 #' @seealso \link{tables}
+#' @export
 #' @examples
 #' \dontrun{
 #' sparkR.session()
@@ -390,6 +403,7 @@ listTables <- function(databaseName = NULL) {
 #' @return a SparkDataFrame of the list of column descriptions.
 #' @rdname listColumns
 #' @name listColumns
+#' @export
 #' @examples
 #' \dontrun{
 #' sparkR.session()
@@ -419,6 +433,7 @@ listColumns <- function(tableName, databaseName = NULL) {
 #' @return a SparkDataFrame of the list of function descriptions.
 #' @rdname listFunctions
 #' @name listFunctions
+#' @export
 #' @examples
 #' \dontrun{
 #' sparkR.session()
@@ -448,6 +463,7 @@ listFunctions <- function(databaseName = NULL) {
 #'                  identifier is provided, it refers to a table in the current database.
 #' @rdname recoverPartitions
 #' @name recoverPartitions
+#' @export
 #' @examples
 #' \dontrun{
 #' sparkR.session()
@@ -474,6 +490,7 @@ recoverPartitions <- function(tableName) {
 #'                  identifier is provided, it refers to a table in the current database.
 #' @rdname refreshTable
 #' @name refreshTable
+#' @export
 #' @examples
 #' \dontrun{
 #' sparkR.session()
@@ -495,6 +512,7 @@ refreshTable <- function(tableName) {
 #' @param path the path of the data source.
 #' @rdname refreshByPath
 #' @name refreshByPath
+#' @export
 #' @examples
 #' \dontrun{
 #' sparkR.session()
diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R
index 4c87f64e7f0e1..9d82814211bc5 100644
--- a/R/pkg/R/client.R
+++ b/R/pkg/R/client.R
@@ -19,7 +19,7 @@
 
 # Creates a SparkR client connection object
 # if one doesn't already exist
-connectBackend <- function(hostname, port, timeout, authSecret) {
+connectBackend <- function(hostname, port, timeout) {
   if (exists(".sparkRcon", envir = .sparkREnv)) {
     if (isOpen(.sparkREnv[[".sparkRCon"]])) {
       cat("SparkRBackend client connection already exists\n")
@@ -29,7 +29,7 @@ connectBackend <- function(hostname, port, timeout, authSecret) {
 
   con <- socketConnection(host = hostname, port = port, server = FALSE,
                           blocking = TRUE, open = "wb", timeout = timeout)
-  doServerAuth(con, authSecret)
+
   assign(".sparkRCon", con, envir = .sparkREnv)
   con
 }
@@ -60,41 +60,6 @@ generateSparkSubmitArgs <- function(args, sparkHome, jars, sparkSubmitOpts, pack
   combinedArgs
 }
 
-checkJavaVersion <- function() {
-  javaBin <- "java"
-  javaHome <- Sys.getenv("JAVA_HOME")
-  javaReqs <- utils::packageDescription(utils::packageName(), fields = c("SystemRequirements"))
-  sparkJavaVersion <- as.numeric(tail(strsplit(javaReqs, "[(=)]")[[1]], n = 1L))
-  if (javaHome != "") {
-    javaBin <- file.path(javaHome, "bin", javaBin)
-  }
-
-  # If java is missing from PATH, we get an error in Unix and a warning in Windows
-  javaVersionOut <- tryCatch(
-      launchScript(javaBin, "-version", wait = TRUE, stdout = TRUE, stderr = TRUE),
-                   error = function(e) {
-                     stop("Java version check failed. Please make sure Java is installed",
-                          " and set JAVA_HOME to point to the installation directory.", e)
-                   },
-                   warning = function(w) {
-                     stop("Java version check failed. Please make sure Java is installed",
-                          " and set JAVA_HOME to point to the installation directory.", w)
-                   })
-  javaVersionFilter <- Filter(
-      function(x) {
-        grepl(" version", x)
-      }, javaVersionOut)
-
-  javaVersionStr <- strsplit(javaVersionFilter[[1]], "[\"]")[[1L]][2]
-  # javaVersionStr is of the form 1.8.0_92.
-  # Extract 8 from it to compare to sparkJavaVersion
-  javaVersionNum <- as.integer(strsplit(javaVersionStr, "[.]")[[1L]][2])
-  if (javaVersionNum != sparkJavaVersion) {
-    stop(paste("Java version", sparkJavaVersion, "is required for this package; found version:",
-               javaVersionStr))
-  }
-}
-
 launchBackend <- function(args, sparkHome, jars, sparkSubmitOpts, packages) {
   sparkSubmitBinName <- determineSparkSubmitBin()
   if (sparkHome != "") {
@@ -102,7 +67,6 @@ launchBackend <- function(args, sparkHome, jars, sparkSubmitOpts, packages) {
   } else {
     sparkSubmitBin <- sparkSubmitBinName
   }
-
   combinedArgs <- generateSparkSubmitArgs(args, sparkHome, jars, sparkSubmitOpts, packages)
   cat("Launching java with spark-submit command", sparkSubmitBin, combinedArgs, "\n")
   invisible(launchScript(sparkSubmitBin, combinedArgs))
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index 7926a9a2467ee..3095adb918b67 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -29,6 +29,7 @@ setOldClass("jobj")
 #' @rdname column
 #'
 #' @slot jc reference to JVM SparkDataFrame column
+#' @export
 #' @note Column since 1.4.0
 setClass("Column",
          slots = list(jc = "jobj"))
@@ -55,6 +56,7 @@ setMethod("column",
 #' @rdname show
 #' @name show
 #' @aliases show,Column-method
+#' @export
 #' @note show(Column) since 1.4.0
 setMethod("show", "Column",
           function(object) {
@@ -132,6 +134,7 @@ createMethods()
 #' @name alias
 #' @aliases alias,Column-method
 #' @family colum_func
+#' @export
 #' @examples
 #' \dontrun{
 #' df <- createDataFrame(iris)
@@ -161,18 +164,12 @@ setMethod("alias",
 #' @aliases substr,Column-method
 #'
 #' @param x a Column.
-#' @param start starting position. It should be 1-base.
+#' @param start starting position.
 #' @param stop ending position.
-#' @examples
-#' \dontrun{
-#' df <- createDataFrame(list(list(a="abcdef")))
-#' collect(select(df, substr(df$a, 1, 4))) # the result is `abcd`.
-#' collect(select(df, substr(df$a, 2, 4))) # the result is `bcd`.
-#' }
 #' @note substr since 1.4.0
 setMethod("substr", signature(x = "Column"),
           function(x, start, stop) {
-            jc <- callJMethod(x@jc, "substr", as.integer(start), as.integer(stop - start + 1))
+            jc <- callJMethod(x@jc, "substr", as.integer(start - 1), as.integer(stop - start + 1))
             column(jc)
           })
 
@@ -273,6 +270,7 @@ setMethod("cast",
 #' @name %in%
 #' @aliases %in%,Column-method
 #' @return A matched values as a result of comparing with given values.
+#' @export
 #' @examples
 #' \dontrun{
 #' filter(df, "age in (10, 30)")
@@ -298,6 +296,7 @@ setMethod("%in%",
 #' @name otherwise
 #' @family colum_func
 #' @aliases otherwise,Column-method
+#' @export
 #' @note otherwise since 1.5.0
 setMethod("otherwise",
           signature(x = "Column", value = "ANY"),
@@ -319,6 +318,7 @@ setMethod("otherwise",
 #' @rdname eq_null_safe
 #' @name %<=>%
 #' @aliases %<=>%,Column-method
+#' @export
 #' @examples
 #' \dontrun{
 #' df1 <- createDataFrame(data.frame(
@@ -348,6 +348,7 @@ setMethod("%<=>%",
 #' @rdname not
 #' @name not
 #' @aliases !,Column-method
+#' @export
 #' @examples
 #' \dontrun{
 #' df <- createDataFrame(data.frame(x = c(-1, 0, 1)))
diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
index 8ec727dd042bc..443c2ff8f9ace 100644
--- a/R/pkg/R/context.R
+++ b/R/pkg/R/context.R
@@ -308,6 +308,7 @@ setCheckpointDirSC <- function(sc, dirName) {
 #' @rdname spark.addFile
 #' @param path The path of the file to be added
 #' @param recursive Whether to add files recursively from the path. Default is FALSE.
+#' @export
 #' @examples
 #'\dontrun{
 #' spark.addFile("~/myfile")
@@ -322,6 +323,7 @@ spark.addFile <- function(path, recursive = FALSE) {
 #'
 #' @rdname spark.getSparkFilesRootDirectory
 #' @return the root directory that contains files added through spark.addFile
+#' @export
 #' @examples
 #'\dontrun{
 #' spark.getSparkFilesRootDirectory()
@@ -342,6 +344,7 @@ spark.getSparkFilesRootDirectory <- function() { # nolint
 #' @rdname spark.getSparkFiles
 #' @param fileName The name of the file added through spark.addFile
 #' @return the absolute path of a file added through spark.addFile.
+#' @export
 #' @examples
 #'\dontrun{
 #' spark.getSparkFiles("myfile")
@@ -388,6 +391,7 @@ spark.getSparkFiles <- function(fileName) {
 #' @param list the list of elements
 #' @param func a function that takes one argument.
 #' @return a list of results (the exact type being determined by the function)
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -408,6 +412,7 @@ spark.lapply <- function(list, func) {
 #'
 #' @rdname setLogLevel
 #' @param level New log level
+#' @export
 #' @examples
 #'\dontrun{
 #' setLogLevel("ERROR")
@@ -426,6 +431,7 @@ setLogLevel <- function(level) {
 #' @rdname setCheckpointDir
 #' @param directory Directory path to checkpoint to
 #' @seealso \link{checkpoint}
+#' @export
 #' @examples
 #'\dontrun{
 #' setCheckpointDir("/checkpoint")
diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R
index cb03f1667629f..a90f7d381026b 100644
--- a/R/pkg/R/deserialize.R
+++ b/R/pkg/R/deserialize.R
@@ -60,18 +60,14 @@ readTypedObject <- function(con, type) {
     stop(paste("Unsupported type for deserialization", type)))
 }
 
-readStringData <- function(con, len) {
-  raw <- readBin(con, raw(), len, endian = "big")
+readString <- function(con) {
+  stringLen <- readInt(con)
+  raw <- readBin(con, raw(), stringLen, endian = "big")
   string <- rawToChar(raw)
   Encoding(string) <- "UTF-8"
   string
 }
 
-readString <- function(con) {
-  stringLen <- readInt(con)
-  readStringData(con, stringLen)
-}
-
 readInt <- function(con) {
   readBin(con, integer(), n = 1, endian = "big")
 }
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index abc91aeeb4825..9f7c6317cd924 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -189,12 +189,6 @@ NULL
 #'              the map or array of maps.
 #'          \item \code{from_json}: it is the column containing the JSON string.
 #'          }
-#' @param y Column to compute on.
-#' @param value A value to compute on.
-#'          \itemize{
-#'          \item \code{array_contains}: a value to be checked if contained in the column.
-#'          \item \code{array_position}: a value to locate in the given array.
-#'          }
 #' @param ... additional argument(s). In \code{to_json} and \code{from_json}, this contains
 #'            additional named properties to control how it is converted, accepts the same
 #'            options as the JSON data source.
@@ -207,21 +201,14 @@ NULL
 #' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
 #' tmp <- mutate(df, v1 = create_array(df$mpg, df$cyl, df$hp))
 #' head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1)))
-#' head(select(tmp, array_max(tmp$v1), array_min(tmp$v1)))
-#' head(select(tmp, array_position(tmp$v1, 21), array_repeat(df$mpg, 3), array_sort(tmp$v1)))
-#' head(select(tmp, flatten(tmp$v1), reverse(tmp$v1)))
 #' tmp2 <- mutate(tmp, v2 = explode(tmp$v1))
 #' head(tmp2)
 #' head(select(tmp, posexplode(tmp$v1)))
-#' head(select(tmp, slice(tmp$v1, 2L, 2L)))
 #' head(select(tmp, sort_array(tmp$v1)))
 #' head(select(tmp, sort_array(tmp$v1, asc = FALSE)))
 #' tmp3 <- mutate(df, v3 = create_map(df$model, df$cyl))
-#' head(select(tmp3, map_entries(tmp3$v3), map_keys(tmp3$v3), map_values(tmp3$v3)))
-#' head(select(tmp3, element_at(tmp3$v3, "Valiant")))
-#' tmp4 <- mutate(df, v4 = create_array(df$mpg, df$cyl), v5 = create_array(df$cyl, df$hp))
-#' head(select(tmp4, concat(tmp4$v4, tmp4$v5), arrays_overlap(tmp4$v4, tmp4$v5)))
-#' head(select(tmp, concat(df$mpg, df$cyl, df$hp)))}
+#' head(select(tmp3, map_keys(tmp3$v3)))
+#' head(select(tmp3, map_values(tmp3$v3)))}
 NULL
 
 #' Window functions for Column operations
@@ -257,6 +244,7 @@ NULL
 #' If the parameter is a Column, it is returned unchanged.
 #'
 #' @rdname column_nonaggregate_functions
+#' @export
 #' @aliases lit lit,ANY-method
 #' @examples
 #'
@@ -279,6 +267,7 @@ setMethod("lit", signature("ANY"),
 #' \code{abs}: Computes the absolute value.
 #'
 #' @rdname column_math_functions
+#' @export
 #' @aliases abs abs,Column-method
 #' @note abs since 1.5.0
 setMethod("abs",
@@ -289,10 +278,11 @@ setMethod("abs",
           })
 
 #' @details
-#' \code{acos}: Returns the inverse cosine of the given value,
-#' as if computed by \code{java.lang.Math.acos()}
+#' \code{acos}: Computes the cosine inverse of the given value; the returned angle is in
+#' the range 0.0 through pi.
 #'
 #' @rdname column_math_functions
+#' @export
 #' @aliases acos acos,Column-method
 #' @note acos since 1.5.0
 setMethod("acos",
@@ -306,6 +296,7 @@ setMethod("acos",
 #' \code{approxCountDistinct}: Returns the approximate number of distinct items in a group.
 #'
 #' @rdname column_aggregate_functions
+#' @export
 #' @aliases approxCountDistinct approxCountDistinct,Column-method
 #' @examples
 #'
@@ -328,6 +319,7 @@ setMethod("approxCountDistinct",
 #' and returns the result as an int column.
 #'
 #' @rdname column_string_functions
+#' @export
 #' @aliases ascii ascii,Column-method
 #' @examples
 #'
@@ -342,10 +334,11 @@ setMethod("ascii",
           })
 
 #' @details
-#' \code{asin}: Returns the inverse sine of the given value,
-#' as if computed by \code{java.lang.Math.asin()}
+#' \code{asin}: Computes the sine inverse of the given value; the returned angle is in
+#' the range -pi/2 through pi/2.
 #'
 #' @rdname column_math_functions
+#' @export
 #' @aliases asin asin,Column-method
 #' @note asin since 1.5.0
 setMethod("asin",
@@ -356,10 +349,11 @@ setMethod("asin",
           })
 
 #' @details
-#' \code{atan}: Returns the inverse tangent of the given value,
-#' as if computed by \code{java.lang.Math.atan()}
+#' \code{atan}: Computes the tangent inverse of the given value; the returned angle is in the range
+#' -pi/2 through pi/2.
 #'
 #' @rdname column_math_functions
+#' @export
 #' @aliases atan atan,Column-method
 #' @note atan since 1.5.0
 setMethod("atan",
@@ -376,6 +370,7 @@ setMethod("atan",
 #' @rdname avg
 #' @name avg
 #' @family aggregate functions
+#' @export
 #' @aliases avg,Column-method
 #' @examples \dontrun{avg(df$c)}
 #' @note avg since 1.4.0
@@ -391,6 +386,7 @@ setMethod("avg",
 #' a string column. This is the reverse of unbase64.
 #'
 #' @rdname column_string_functions
+#' @export
 #' @aliases base64 base64,Column-method
 #' @examples
 #'
@@ -414,6 +410,7 @@ setMethod("base64",
 #' of the given long column. For example, bin("12") returns "1100".
 #'
 #' @rdname column_math_functions
+#' @export
 #' @aliases bin bin,Column-method
 #' @note bin since 1.5.0
 setMethod("bin",
@@ -427,6 +424,7 @@ setMethod("bin",
 #' \code{bitwiseNOT}: Computes bitwise NOT.
 #'
 #' @rdname column_nonaggregate_functions
+#' @export
 #' @aliases bitwiseNOT bitwiseNOT,Column-method
 #' @examples
 #'
@@ -444,6 +442,7 @@ setMethod("bitwiseNOT",
 #' \code{cbrt}: Computes the cube-root of the given value.
 #'
 #' @rdname column_math_functions
+#' @export
 #' @aliases cbrt cbrt,Column-method
 #' @note cbrt since 1.4.0
 setMethod("cbrt",
@@ -457,6 +456,7 @@ setMethod("cbrt",
 #' \code{ceil}: Computes the ceiling of the given value.
 #'
 #' @rdname column_math_functions
+#' @export
 #' @aliases ceil ceil,Column-method
 #' @note ceil since 1.5.0
 setMethod("ceil",
@@ -471,6 +471,7 @@ setMethod("ceil",
 #'
 #' @rdname column_math_functions
 #' @aliases ceiling ceiling,Column-method
+#' @export
 #' @note ceiling since 1.5.0
 setMethod("ceiling",
           signature(x = "Column"),
@@ -482,6 +483,7 @@ setMethod("ceiling",
 #' \code{coalesce}: Returns the first column that is not NA, or NA if all inputs are.
 #'
 #' @rdname column_nonaggregate_functions
+#' @export
 #' @aliases coalesce,Column-method
 #' @note coalesce(Column) since 2.1.1
 setMethod("coalesce",
@@ -512,6 +514,7 @@ col <- function(x) {
 #' @rdname column
 #' @name column
 #' @family non-aggregate functions
+#' @export
 #' @aliases column,character-method
 #' @examples \dontrun{column("name")}
 #' @note column since 1.6.0
@@ -530,6 +533,7 @@ setMethod("column",
 #' @rdname corr
 #' @name corr
 #' @family aggregate functions
+#' @export
 #' @aliases corr,Column-method
 #' @examples
 #' \dontrun{
@@ -553,6 +557,7 @@ setMethod("corr", signature(x = "Column"),
 #' @rdname cov
 #' @name cov
 #' @family aggregate functions
+#' @export
 #' @aliases cov,characterOrColumn-method
 #' @examples
 #' \dontrun{
@@ -593,6 +598,7 @@ setMethod("covar_samp", signature(col1 = "characterOrColumn", col2 = "characterO
 #'
 #' @rdname cov
 #' @name covar_pop
+#' @export
 #' @aliases covar_pop,characterOrColumn,characterOrColumn-method
 #' @note covar_pop since 2.0.0
 setMethod("covar_pop", signature(col1 = "characterOrColumn", col2 = "characterOrColumn"),
@@ -607,11 +613,11 @@ setMethod("covar_pop", signature(col1 = "characterOrColumn", col2 = "characterOr
           })
 
 #' @details
-#' \code{cos}: Returns the cosine of the given value,
-#' as if computed by \code{java.lang.Math.cos()}. Units in radians.
+#' \code{cos}: Computes the cosine of the given value. Units in radians.
 #'
 #' @rdname column_math_functions
 #' @aliases cos cos,Column-method
+#' @export
 #' @note cos since 1.5.0
 setMethod("cos",
           signature(x = "Column"),
@@ -621,11 +627,11 @@ setMethod("cos",
           })
 
 #' @details
-#' \code{cosh}: Returns the hyperbolic cosine of the given value,
-#' as if computed by \code{java.lang.Math.cosh()}.
+#' \code{cosh}: Computes the hyperbolic cosine of the given value.
 #'
 #' @rdname column_math_functions
 #' @aliases cosh cosh,Column-method
+#' @export
 #' @note cosh since 1.5.0
 setMethod("cosh",
           signature(x = "Column"),
@@ -643,6 +649,7 @@ setMethod("cosh",
 #' @name count
 #' @family aggregate functions
 #' @aliases count,Column-method
+#' @export
 #' @examples \dontrun{count(df$c)}
 #' @note count since 1.4.0
 setMethod("count",
@@ -658,6 +665,7 @@ setMethod("count",
 #'
 #' @rdname column_misc_functions
 #' @aliases crc32 crc32,Column-method
+#' @export
 #' @note crc32 since 1.5.0
 setMethod("crc32",
           signature(x = "Column"),
@@ -672,6 +680,7 @@ setMethod("crc32",
 #'
 #' @rdname column_misc_functions
 #' @aliases hash hash,Column-method
+#' @export
 #' @note hash since 2.0.0
 setMethod("hash",
           signature(x = "Column"),
@@ -690,6 +699,7 @@ setMethod("hash",
 #'
 #' @rdname column_datetime_functions
 #' @aliases dayofmonth dayofmonth,Column-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -711,6 +721,7 @@ setMethod("dayofmonth",
 #'
 #' @rdname column_datetime_functions
 #' @aliases dayofweek dayofweek,Column-method
+#' @export
 #' @note dayofweek since 2.3.0
 setMethod("dayofweek",
           signature(x = "Column"),
@@ -725,6 +736,7 @@ setMethod("dayofweek",
 #'
 #' @rdname column_datetime_functions
 #' @aliases dayofyear dayofyear,Column-method
+#' @export
 #' @note dayofyear since 1.5.0
 setMethod("dayofyear",
           signature(x = "Column"),
@@ -742,6 +754,7 @@ setMethod("dayofyear",
 #'
 #' @rdname column_string_functions
 #' @aliases decode decode,Column,character-method
+#' @export
 #' @note decode since 1.6.0
 setMethod("decode",
           signature(x = "Column", charset = "character"),
@@ -756,6 +769,7 @@ setMethod("decode",
 #'
 #' @rdname column_string_functions
 #' @aliases encode encode,Column,character-method
+#' @export
 #' @note encode since 1.6.0
 setMethod("encode",
           signature(x = "Column", charset = "character"),
@@ -769,6 +783,7 @@ setMethod("encode",
 #'
 #' @rdname column_math_functions
 #' @aliases exp exp,Column-method
+#' @export
 #' @note exp since 1.5.0
 setMethod("exp",
           signature(x = "Column"),
@@ -782,6 +797,7 @@ setMethod("exp",
 #'
 #' @rdname column_math_functions
 #' @aliases expm1 expm1,Column-method
+#' @export
 #' @note expm1 since 1.5.0
 setMethod("expm1",
           signature(x = "Column"),
@@ -795,6 +811,7 @@ setMethod("expm1",
 #'
 #' @rdname column_math_functions
 #' @aliases factorial factorial,Column-method
+#' @export
 #' @note factorial since 1.5.0
 setMethod("factorial",
           signature(x = "Column"),
@@ -809,8 +826,6 @@ setMethod("factorial",
 #'
 #' The function by default returns the first values it sees. It will return the first non-missing
 #' value it sees when na.rm is set to true. If all values are missing, then NA is returned.
-#' Note: the function is non-deterministic because its results depends on order of rows which
-#' may be non-deterministic after a shuffle.
 #'
 #' @param na.rm a logical value indicating whether NA values should be stripped
 #'        before the computation proceeds.
@@ -819,6 +834,7 @@ setMethod("factorial",
 #' @name first
 #' @aliases first,characterOrColumn-method
 #' @family aggregate functions
+#' @export
 #' @examples
 #' \dontrun{
 #' first(df$c)
@@ -842,6 +858,7 @@ setMethod("first",
 #'
 #' @rdname column_math_functions
 #' @aliases floor floor,Column-method
+#' @export
 #' @note floor since 1.5.0
 setMethod("floor",
           signature(x = "Column"),
@@ -855,6 +872,7 @@ setMethod("floor",
 #'
 #' @rdname column_math_functions
 #' @aliases hex hex,Column-method
+#' @export
 #' @note hex since 1.5.0
 setMethod("hex",
           signature(x = "Column"),
@@ -868,6 +886,7 @@ setMethod("hex",
 #'
 #' @rdname column_datetime_functions
 #' @aliases hour hour,Column-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -890,6 +909,7 @@ setMethod("hour",
 #'
 #' @rdname column_string_functions
 #' @aliases initcap initcap,Column-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -924,6 +944,7 @@ setMethod("isnan",
 #'
 #' @rdname column_nonaggregate_functions
 #' @aliases is.nan is.nan,Column-method
+#' @export
 #' @note is.nan since 2.0.0
 setMethod("is.nan",
           signature(x = "Column"),
@@ -936,6 +957,7 @@ setMethod("is.nan",
 #'
 #' @rdname column_aggregate_functions
 #' @aliases kurtosis kurtosis,Column-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -954,8 +976,6 @@ setMethod("kurtosis",
 #'
 #' The function by default returns the last values it sees. It will return the last non-missing
 #' value it sees when na.rm is set to true. If all values are missing, then NA is returned.
-#' Note: the function is non-deterministic because its results depends on order of rows which
-#' may be non-deterministic after a shuffle.
 #'
 #' @param x column to compute on.
 #' @param na.rm a logical value indicating whether NA values should be stripped
@@ -966,6 +986,7 @@ setMethod("kurtosis",
 #' @name last
 #' @aliases last,characterOrColumn-method
 #' @family aggregate functions
+#' @export
 #' @examples
 #' \dontrun{
 #' last(df$c)
@@ -991,6 +1012,7 @@ setMethod("last",
 #'
 #' @rdname column_datetime_functions
 #' @aliases last_day last_day,Column-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -1010,6 +1032,7 @@ setMethod("last_day",
 #'
 #' @rdname column_string_functions
 #' @aliases length length,Column-method
+#' @export
 #' @note length since 1.5.0
 setMethod("length",
           signature(x = "Column"),
@@ -1023,6 +1046,7 @@ setMethod("length",
 #'
 #' @rdname column_math_functions
 #' @aliases log log,Column-method
+#' @export
 #' @note log since 1.5.0
 setMethod("log",
           signature(x = "Column"),
@@ -1036,6 +1060,7 @@ setMethod("log",
 #'
 #' @rdname column_math_functions
 #' @aliases log10 log10,Column-method
+#' @export
 #' @note log10 since 1.5.0
 setMethod("log10",
           signature(x = "Column"),
@@ -1049,6 +1074,7 @@ setMethod("log10",
 #'
 #' @rdname column_math_functions
 #' @aliases log1p log1p,Column-method
+#' @export
 #' @note log1p since 1.5.0
 setMethod("log1p",
           signature(x = "Column"),
@@ -1062,6 +1088,7 @@ setMethod("log1p",
 #'
 #' @rdname column_math_functions
 #' @aliases log2 log2,Column-method
+#' @export
 #' @note log2 since 1.5.0
 setMethod("log2",
           signature(x = "Column"),
@@ -1075,6 +1102,7 @@ setMethod("log2",
 #'
 #' @rdname column_string_functions
 #' @aliases lower lower,Column-method
+#' @export
 #' @note lower since 1.4.0
 setMethod("lower",
           signature(x = "Column"),
@@ -1089,6 +1117,7 @@ setMethod("lower",
 #'
 #' @rdname column_string_functions
 #' @aliases ltrim ltrim,Column,missing-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -1112,6 +1141,7 @@ setMethod("ltrim",
 #' @param trimString a character string to trim with
 #' @rdname column_string_functions
 #' @aliases ltrim,Column,character-method
+#' @export
 #' @note ltrim(Column, character) since 2.3.0
 setMethod("ltrim",
           signature(x = "Column", trimString = "character"),
@@ -1139,6 +1169,7 @@ setMethod("max",
 #'
 #' @rdname column_misc_functions
 #' @aliases md5 md5,Column-method
+#' @export
 #' @note md5 since 1.5.0
 setMethod("md5",
           signature(x = "Column"),
@@ -1152,6 +1183,7 @@ setMethod("md5",
 #'
 #' @rdname column_aggregate_functions
 #' @aliases mean mean,Column-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -1177,6 +1209,7 @@ setMethod("mean",
 #'
 #' @rdname column_aggregate_functions
 #' @aliases min min,Column-method
+#' @export
 #' @note min since 1.5.0
 setMethod("min",
           signature(x = "Column"),
@@ -1190,6 +1223,7 @@ setMethod("min",
 #'
 #' @rdname column_datetime_functions
 #' @aliases minute minute,Column-method
+#' @export
 #' @note minute since 1.5.0
 setMethod("minute",
           signature(x = "Column"),
@@ -1209,10 +1243,10 @@ setMethod("minute",
 #' 0, 1, 2, 8589934592 (1L << 33), 8589934593, 8589934594.
 #' This is equivalent to the MONOTONICALLY_INCREASING_ID function in SQL.
 #' The method should be used with no argument.
-#' Note: the function is non-deterministic because its result depends on partition IDs.
 #'
 #' @rdname column_nonaggregate_functions
 #' @aliases monotonically_increasing_id monotonically_increasing_id,missing-method
+#' @export
 #' @examples
 #'
 #' \dontrun{head(select(df, monotonically_increasing_id()))}
@@ -1228,6 +1262,7 @@ setMethod("monotonically_increasing_id",
 #'
 #' @rdname column_datetime_functions
 #' @aliases month month,Column-method
+#' @export
 #' @note month since 1.5.0
 setMethod("month",
           signature(x = "Column"),
@@ -1241,6 +1276,7 @@ setMethod("month",
 #'
 #' @rdname column_nonaggregate_functions
 #' @aliases negate negate,Column-method
+#' @export
 #' @note negate since 1.5.0
 setMethod("negate",
           signature(x = "Column"),
@@ -1254,6 +1290,7 @@ setMethod("negate",
 #'
 #' @rdname column_datetime_functions
 #' @aliases quarter quarter,Column-method
+#' @export
 #' @note quarter since 1.5.0
 setMethod("quarter",
           signature(x = "Column"),
@@ -1263,10 +1300,11 @@ setMethod("quarter",
           })
 
 #' @details
-#' \code{reverse}: Returns a reversed string or an array with reverse order of elements.
+#' \code{reverse}: Reverses the string column and returns it as a new string column.
 #'
-#' @rdname column_collection_functions
+#' @rdname column_string_functions
 #' @aliases reverse reverse,Column-method
+#' @export
 #' @note reverse since 1.5.0
 setMethod("reverse",
           signature(x = "Column"),
@@ -1281,6 +1319,7 @@ setMethod("reverse",
 #'
 #' @rdname column_math_functions
 #' @aliases rint rint,Column-method
+#' @export
 #' @note rint since 1.5.0
 setMethod("rint",
           signature(x = "Column"),
@@ -1295,6 +1334,7 @@ setMethod("rint",
 #'
 #' @rdname column_math_functions
 #' @aliases round round,Column-method
+#' @export
 #' @note round since 1.5.0
 setMethod("round",
           signature(x = "Column"),
@@ -1314,6 +1354,7 @@ setMethod("round",
 #'        to the left of the decimal point when \code{scale} < 0.
 #' @rdname column_math_functions
 #' @aliases bround bround,Column-method
+#' @export
 #' @note bround since 2.0.0
 setMethod("bround",
           signature(x = "Column"),
@@ -1328,6 +1369,7 @@ setMethod("bround",
 #'
 #' @rdname column_string_functions
 #' @aliases rtrim rtrim,Column,missing-method
+#' @export
 #' @note rtrim since 1.5.0
 setMethod("rtrim",
           signature(x = "Column", trimString = "missing"),
@@ -1338,6 +1380,7 @@ setMethod("rtrim",
 
 #' @rdname column_string_functions
 #' @aliases rtrim,Column,character-method
+#' @export
 #' @note rtrim(Column, character) since 2.3.0
 setMethod("rtrim",
           signature(x = "Column", trimString = "character"),
@@ -1351,6 +1394,7 @@ setMethod("rtrim",
 #'
 #' @rdname column_aggregate_functions
 #' @aliases sd sd,Column-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -1368,6 +1412,7 @@ setMethod("sd",
 #'
 #' @rdname column_datetime_functions
 #' @aliases second second,Column-method
+#' @export
 #' @note second since 1.5.0
 setMethod("second",
           signature(x = "Column"),
@@ -1382,6 +1427,7 @@ setMethod("second",
 #'
 #' @rdname column_misc_functions
 #' @aliases sha1 sha1,Column-method
+#' @export
 #' @note sha1 since 1.5.0
 setMethod("sha1",
           signature(x = "Column"),
@@ -1395,6 +1441,7 @@ setMethod("sha1",
 #'
 #' @rdname column_math_functions
 #' @aliases signum signum,Column-method
+#' @export
 #' @note signum since 1.5.0
 setMethod("signum",
           signature(x = "Column"),
@@ -1408,6 +1455,7 @@ setMethod("signum",
 #'
 #' @rdname column_math_functions
 #' @aliases sign sign,Column-method
+#' @export
 #' @note sign since 1.5.0
 setMethod("sign", signature(x = "Column"),
           function(x) {
@@ -1415,11 +1463,11 @@ setMethod("sign", signature(x = "Column"),
           })
 
 #' @details
-#' \code{sin}: Returns the sine of the given value,
-#' as if computed by \code{java.lang.Math.sin()}. Units in radians.
+#' \code{sin}: Computes the sine of the given value. Units in radians.
 #'
 #' @rdname column_math_functions
 #' @aliases sin sin,Column-method
+#' @export
 #' @note sin since 1.5.0
 setMethod("sin",
           signature(x = "Column"),
@@ -1429,11 +1477,11 @@ setMethod("sin",
           })
 
 #' @details
-#' \code{sinh}: Returns the hyperbolic sine of the given value,
-#' as if computed by \code{java.lang.Math.sinh()}.
+#' \code{sinh}: Computes the hyperbolic sine of the given value.
 #'
 #' @rdname column_math_functions
 #' @aliases sinh sinh,Column-method
+#' @export
 #' @note sinh since 1.5.0
 setMethod("sinh",
           signature(x = "Column"),
@@ -1447,6 +1495,7 @@ setMethod("sinh",
 #'
 #' @rdname column_aggregate_functions
 #' @aliases skewness skewness,Column-method
+#' @export
 #' @note skewness since 1.6.0
 setMethod("skewness",
           signature(x = "Column"),
@@ -1460,6 +1509,7 @@ setMethod("skewness",
 #'
 #' @rdname column_string_functions
 #' @aliases soundex soundex,Column-method
+#' @export
 #' @note soundex since 1.5.0
 setMethod("soundex",
           signature(x = "Column"),
@@ -1476,6 +1526,7 @@ setMethod("soundex",
 #'
 #' @rdname column_nonaggregate_functions
 #' @aliases spark_partition_id spark_partition_id,missing-method
+#' @export
 #' @examples
 #'
 #' \dontrun{head(select(df, spark_partition_id()))}
@@ -1505,6 +1556,7 @@ setMethod("stddev",
 #'
 #' @rdname column_aggregate_functions
 #' @aliases stddev_pop stddev_pop,Column-method
+#' @export
 #' @note stddev_pop since 1.6.0
 setMethod("stddev_pop",
           signature(x = "Column"),
@@ -1518,6 +1570,7 @@ setMethod("stddev_pop",
 #'
 #' @rdname column_aggregate_functions
 #' @aliases stddev_samp stddev_samp,Column-method
+#' @export
 #' @note stddev_samp since 1.6.0
 setMethod("stddev_samp",
           signature(x = "Column"),
@@ -1531,6 +1584,7 @@ setMethod("stddev_samp",
 #'
 #' @rdname column_nonaggregate_functions
 #' @aliases struct struct,characterOrColumn-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -1556,6 +1610,7 @@ setMethod("struct",
 #'
 #' @rdname column_math_functions
 #' @aliases sqrt sqrt,Column-method
+#' @export
 #' @note sqrt since 1.5.0
 setMethod("sqrt",
           signature(x = "Column"),
@@ -1569,6 +1624,7 @@ setMethod("sqrt",
 #'
 #' @rdname column_aggregate_functions
 #' @aliases sum sum,Column-method
+#' @export
 #' @note sum since 1.5.0
 setMethod("sum",
           signature(x = "Column"),
@@ -1582,6 +1638,7 @@ setMethod("sum",
 #'
 #' @rdname column_aggregate_functions
 #' @aliases sumDistinct sumDistinct,Column-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -1596,12 +1653,11 @@ setMethod("sumDistinct",
           })
 
 #' @details
-#' \code{tan}: Returns the tangent of the given value,
-#' as if computed by \code{java.lang.Math.tan()}.
-#' Units in radians.
+#' \code{tan}: Computes the tangent of the given value. Units in radians.
 #'
 #' @rdname column_math_functions
 #' @aliases tan tan,Column-method
+#' @export
 #' @note tan since 1.5.0
 setMethod("tan",
           signature(x = "Column"),
@@ -1611,11 +1667,11 @@ setMethod("tan",
           })
 
 #' @details
-#' \code{tanh}: Returns the hyperbolic tangent of the given value,
-#' as if computed by \code{java.lang.Math.tanh()}.
+#' \code{tanh}: Computes the hyperbolic tangent of the given value.
 #'
 #' @rdname column_math_functions
 #' @aliases tanh tanh,Column-method
+#' @export
 #' @note tanh since 1.5.0
 setMethod("tanh",
           signature(x = "Column"),
@@ -1630,6 +1686,7 @@ setMethod("tanh",
 #'
 #' @rdname column_math_functions
 #' @aliases toDegrees toDegrees,Column-method
+#' @export
 #' @note toDegrees since 1.4.0
 setMethod("toDegrees",
           signature(x = "Column"),
@@ -1644,6 +1701,7 @@ setMethod("toDegrees",
 #'
 #' @rdname column_math_functions
 #' @aliases toRadians toRadians,Column-method
+#' @export
 #' @note toRadians since 1.4.0
 setMethod("toRadians",
           signature(x = "Column"),
@@ -1663,6 +1721,7 @@ setMethod("toRadians",
 #'
 #' @rdname column_datetime_functions
 #' @aliases to_date to_date,Column,missing-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -1683,6 +1742,7 @@ setMethod("to_date",
 
 #' @rdname column_datetime_functions
 #' @aliases to_date,Column,character-method
+#' @export
 #' @note to_date(Column, character) since 2.2.0
 setMethod("to_date",
           signature(x = "Column", format = "character"),
@@ -1698,6 +1758,7 @@ setMethod("to_date",
 #'
 #' @rdname column_collection_functions
 #' @aliases to_json to_json,Column-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -1735,6 +1796,7 @@ setMethod("to_json", signature(x = "Column"),
 #'
 #' @rdname column_datetime_functions
 #' @aliases to_timestamp to_timestamp,Column,missing-method
+#' @export
 #' @note to_timestamp(Column) since 2.2.0
 setMethod("to_timestamp",
           signature(x = "Column", format = "missing"),
@@ -1745,6 +1807,7 @@ setMethod("to_timestamp",
 
 #' @rdname column_datetime_functions
 #' @aliases to_timestamp,Column,character-method
+#' @export
 #' @note to_timestamp(Column, character) since 2.2.0
 setMethod("to_timestamp",
           signature(x = "Column", format = "character"),
@@ -1759,6 +1822,7 @@ setMethod("to_timestamp",
 #'
 #' @rdname column_string_functions
 #' @aliases trim trim,Column,missing-method
+#' @export
 #' @note trim since 1.5.0
 setMethod("trim",
           signature(x = "Column", trimString = "missing"),
@@ -1769,6 +1833,7 @@ setMethod("trim",
 
 #' @rdname column_string_functions
 #' @aliases trim,Column,character-method
+#' @export
 #' @note trim(Column, character) since 2.3.0
 setMethod("trim",
           signature(x = "Column", trimString = "character"),
@@ -1783,6 +1848,7 @@ setMethod("trim",
 #'
 #' @rdname column_string_functions
 #' @aliases unbase64 unbase64,Column-method
+#' @export
 #' @note unbase64 since 1.5.0
 setMethod("unbase64",
           signature(x = "Column"),
@@ -1797,6 +1863,7 @@ setMethod("unbase64",
 #'
 #' @rdname column_math_functions
 #' @aliases unhex unhex,Column-method
+#' @export
 #' @note unhex since 1.5.0
 setMethod("unhex",
           signature(x = "Column"),
@@ -1810,6 +1877,7 @@ setMethod("unhex",
 #'
 #' @rdname column_string_functions
 #' @aliases upper upper,Column-method
+#' @export
 #' @note upper since 1.4.0
 setMethod("upper",
           signature(x = "Column"),
@@ -1823,6 +1891,7 @@ setMethod("upper",
 #'
 #' @rdname column_aggregate_functions
 #' @aliases var var,Column-method
+#' @export
 #' @examples
 #'
 #'\dontrun{
@@ -1837,6 +1906,7 @@ setMethod("var",
 
 #' @rdname column_aggregate_functions
 #' @aliases variance variance,Column-method
+#' @export
 #' @note variance since 1.6.0
 setMethod("variance",
           signature(x = "Column"),
@@ -1850,6 +1920,7 @@ setMethod("variance",
 #'
 #' @rdname column_aggregate_functions
 #' @aliases var_pop var_pop,Column-method
+#' @export
 #' @note var_pop since 1.5.0
 setMethod("var_pop",
           signature(x = "Column"),
@@ -1863,6 +1934,7 @@ setMethod("var_pop",
 #'
 #' @rdname column_aggregate_functions
 #' @aliases var_samp var_samp,Column-method
+#' @export
 #' @note var_samp since 1.6.0
 setMethod("var_samp",
           signature(x = "Column"),
@@ -1876,6 +1948,7 @@ setMethod("var_samp",
 #'
 #' @rdname column_datetime_functions
 #' @aliases weekofyear weekofyear,Column-method
+#' @export
 #' @note weekofyear since 1.5.0
 setMethod("weekofyear",
           signature(x = "Column"),
@@ -1889,6 +1962,7 @@ setMethod("weekofyear",
 #'
 #' @rdname column_datetime_functions
 #' @aliases year year,Column-method
+#' @export
 #' @note year since 1.5.0
 setMethod("year",
           signature(x = "Column"),
@@ -1899,11 +1973,11 @@ setMethod("year",
 
 #' @details
 #' \code{atan2}: Returns the angle theta from the conversion of rectangular coordinates
-#' (x, y) to polar coordinates (r, theta),
-#' as if computed by \code{java.lang.Math.atan2()}. Units in radians.
+#' (x, y) to polar coordinates (r, theta). Units in radians.
 #'
 #' @rdname column_math_functions
 #' @aliases atan2 atan2,Column-method
+#' @export
 #' @note atan2 since 1.5.0
 setMethod("atan2", signature(y = "Column"),
           function(y, x) {
@@ -1916,10 +1990,10 @@ setMethod("atan2", signature(y = "Column"),
 
 #' @details
 #' \code{datediff}: Returns the number of days from \code{y} to \code{x}.
-#' If \code{y} is later than \code{x} then the result is positive.
 #'
 #' @rdname column_datetime_diff_functions
 #' @aliases datediff datediff,Column-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -1943,6 +2017,7 @@ setMethod("datediff", signature(y = "Column"),
 #'
 #' @rdname column_math_functions
 #' @aliases hypot hypot,Column-method
+#' @export
 #' @note hypot since 1.4.0
 setMethod("hypot", signature(y = "Column"),
           function(y, x) {
@@ -1958,6 +2033,7 @@ setMethod("hypot", signature(y = "Column"),
 #'
 #' @rdname column_string_functions
 #' @aliases levenshtein levenshtein,Column-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -1976,13 +2052,11 @@ setMethod("levenshtein", signature(y = "Column"),
           })
 
 #' @details
-#' \code{months_between}: Returns number of months between dates \code{y} and \code{x}. 
-#' If \code{y} is later than \code{x}, then the result is positive. If \code{y} and \code{x}
-#' are on the same day of month, or both are the last day of month, time of day will be ignored.
-#' Otherwise, the difference is calculated based on 31 days per month, and rounded to 8 digits.
+#' \code{months_between}: Returns number of months between dates \code{y} and \code{x}.
 #'
 #' @rdname column_datetime_diff_functions
 #' @aliases months_between months_between,Column-method
+#' @export
 #' @note months_between since 1.5.0
 setMethod("months_between", signature(y = "Column"),
           function(y, x) {
@@ -2000,6 +2074,7 @@ setMethod("months_between", signature(y = "Column"),
 #'
 #' @rdname column_nonaggregate_functions
 #' @aliases nanvl nanvl,Column-method
+#' @export
 #' @note nanvl since 1.5.0
 setMethod("nanvl", signature(y = "Column"),
           function(y, x) {
@@ -2016,6 +2091,7 @@ setMethod("nanvl", signature(y = "Column"),
 #'
 #' @rdname column_math_functions
 #' @aliases pmod pmod,Column-method
+#' @export
 #' @note pmod since 1.5.0
 setMethod("pmod", signature(y = "Column"),
           function(y, x) {
@@ -2030,6 +2106,7 @@ setMethod("pmod", signature(y = "Column"),
 #'
 #' @rdname column_aggregate_functions
 #' @aliases approxCountDistinct,Column-method
+#' @export
 #' @note approxCountDistinct(Column, numeric) since 1.4.0
 setMethod("approxCountDistinct",
           signature(x = "Column"),
@@ -2043,6 +2120,7 @@ setMethod("approxCountDistinct",
 #'
 #' @rdname column_aggregate_functions
 #' @aliases countDistinct countDistinct,Column-method
+#' @export
 #' @note countDistinct since 1.4.0
 setMethod("countDistinct",
           signature(x = "Column"),
@@ -2058,10 +2136,21 @@ setMethod("countDistinct",
 
 #' @details
 #' \code{concat}: Concatenates multiple input columns together into a single column.
-#' The function works with strings, binary and compatible array columns.
+#' If all inputs are binary, concat returns an output as binary. Otherwise, it returns as string.
 #'
-#' @rdname column_collection_functions
+#' @rdname column_string_functions
 #' @aliases concat concat,Column-method
+#' @export
+#' @examples
+#'
+#' \dontrun{
+#' # concatenate strings
+#' tmp <- mutate(df, s1 = concat(df$Class, df$Sex),
+#'                   s2 = concat(df$Class, df$Sex, df$Age),
+#'                   s3 = concat(df$Class, df$Sex, df$Age, df$Class),
+#'                   s4 = concat_ws("_", df$Class, df$Sex),
+#'                   s5 = concat_ws("+", df$Class, df$Sex, df$Age, df$Survived))
+#' head(tmp)}
 #' @note concat since 1.5.0
 setMethod("concat",
           signature(x = "Column"),
@@ -2080,6 +2169,7 @@ setMethod("concat",
 #'
 #' @rdname column_nonaggregate_functions
 #' @aliases greatest greatest,Column-method
+#' @export
 #' @note greatest since 1.5.0
 setMethod("greatest",
           signature(x = "Column"),
@@ -2099,6 +2189,7 @@ setMethod("greatest",
 #'
 #' @rdname column_nonaggregate_functions
 #' @aliases least least,Column-method
+#' @export
 #' @note least since 1.5.0
 setMethod("least",
           signature(x = "Column"),
@@ -2117,6 +2208,7 @@ setMethod("least",
 #'
 #' @rdname column_aggregate_functions
 #' @aliases n_distinct n_distinct,Column-method
+#' @export
 #' @note n_distinct since 1.4.0
 setMethod("n_distinct", signature(x = "Column"),
           function(x, ...) {
@@ -2126,6 +2218,7 @@ setMethod("n_distinct", signature(x = "Column"),
 #' @rdname count
 #' @name n
 #' @aliases n,Column-method
+#' @export
 #' @examples \dontrun{n(df$c)}
 #' @note n since 1.4.0
 setMethod("n", signature(x = "Column"),
@@ -2144,6 +2237,7 @@ setMethod("n", signature(x = "Column"),
 #' @rdname column_datetime_diff_functions
 #'
 #' @aliases date_format date_format,Column,character-method
+#' @export
 #' @note date_format since 1.5.0
 setMethod("date_format", signature(y = "Column", x = "character"),
           function(y, x) {
@@ -2161,6 +2255,7 @@ setMethod("date_format", signature(y = "Column", x = "character"),
 #'               Since Spark 2.3, the DDL-formatted string is also supported for the schema.
 #' @param as.json.array indicating if input string is JSON array of objects or a single object.
 #' @aliases from_json from_json,Column,characterOrstructType-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -2203,6 +2298,7 @@ setMethod("from_json", signature(x = "Column", schema = "characterOrstructType")
 #' @rdname column_datetime_diff_functions
 #'
 #' @aliases from_utc_timestamp from_utc_timestamp,Column,character-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -2224,6 +2320,7 @@ setMethod("from_utc_timestamp", signature(y = "Column", x = "character"),
 #'
 #' @rdname column_string_functions
 #' @aliases instr instr,Column,character-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -2246,6 +2343,7 @@ setMethod("instr", signature(y = "Column", x = "character"),
 #'
 #' @rdname column_datetime_diff_functions
 #' @aliases next_day next_day,Column,character-method
+#' @export
 #' @note next_day since 1.5.0
 setMethod("next_day", signature(y = "Column", x = "character"),
           function(y, x) {
@@ -2260,6 +2358,7 @@ setMethod("next_day", signature(y = "Column", x = "character"),
 #'
 #' @rdname column_datetime_diff_functions
 #' @aliases to_utc_timestamp to_utc_timestamp,Column,character-method
+#' @export
 #' @note to_utc_timestamp since 1.5.0
 setMethod("to_utc_timestamp", signature(y = "Column", x = "character"),
           function(y, x) {
@@ -2272,6 +2371,7 @@ setMethod("to_utc_timestamp", signature(y = "Column", x = "character"),
 #'
 #' @rdname column_datetime_diff_functions
 #' @aliases add_months add_months,Column,numeric-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -2292,6 +2392,7 @@ setMethod("add_months", signature(y = "Column", x = "numeric"),
 #'
 #' @rdname column_datetime_diff_functions
 #' @aliases date_add date_add,Column,numeric-method
+#' @export
 #' @note date_add since 1.5.0
 setMethod("date_add", signature(y = "Column", x = "numeric"),
           function(y, x) {
@@ -2305,6 +2406,7 @@ setMethod("date_add", signature(y = "Column", x = "numeric"),
 #' @rdname column_datetime_diff_functions
 #'
 #' @aliases date_sub date_sub,Column,numeric-method
+#' @export
 #' @note date_sub since 1.5.0
 setMethod("date_sub", signature(y = "Column", x = "numeric"),
           function(y, x) {
@@ -2321,6 +2423,7 @@ setMethod("date_sub", signature(y = "Column", x = "numeric"),
 #'
 #' @rdname column_string_functions
 #' @aliases format_number format_number,Column,numeric-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -2343,6 +2446,7 @@ setMethod("format_number", signature(y = "Column", x = "numeric"),
 #'
 #' @rdname column_misc_functions
 #' @aliases sha2 sha2,Column,numeric-method
+#' @export
 #' @note sha2 since 1.5.0
 setMethod("sha2", signature(y = "Column", x = "numeric"),
           function(y, x) {
@@ -2356,6 +2460,7 @@ setMethod("sha2", signature(y = "Column", x = "numeric"),
 #'
 #' @rdname column_math_functions
 #' @aliases shiftLeft shiftLeft,Column,numeric-method
+#' @export
 #' @note shiftLeft since 1.5.0
 setMethod("shiftLeft", signature(y = "Column", x = "numeric"),
           function(y, x) {
@@ -2371,6 +2476,7 @@ setMethod("shiftLeft", signature(y = "Column", x = "numeric"),
 #'
 #' @rdname column_math_functions
 #' @aliases shiftRight shiftRight,Column,numeric-method
+#' @export
 #' @note shiftRight since 1.5.0
 setMethod("shiftRight", signature(y = "Column", x = "numeric"),
           function(y, x) {
@@ -2386,6 +2492,7 @@ setMethod("shiftRight", signature(y = "Column", x = "numeric"),
 #'
 #' @rdname column_math_functions
 #' @aliases shiftRightUnsigned shiftRightUnsigned,Column,numeric-method
+#' @export
 #' @note shiftRightUnsigned since 1.5.0
 setMethod("shiftRightUnsigned", signature(y = "Column", x = "numeric"),
           function(y, x) {
@@ -2402,13 +2509,7 @@ setMethod("shiftRightUnsigned", signature(y = "Column", x = "numeric"),
 #' @param sep separator to use.
 #' @rdname column_string_functions
 #' @aliases concat_ws concat_ws,character,Column-method
-#' @examples
-#'
-#' \dontrun{
-#' # concatenate strings
-#' tmp <- mutate(df, s1 = concat_ws("_", df$Class, df$Sex),
-#'                   s2 = concat_ws("+", df$Class, df$Sex, df$Age, df$Survived))
-#' head(tmp)}
+#' @export
 #' @note concat_ws since 1.5.0
 setMethod("concat_ws", signature(sep = "character", x = "Column"),
           function(sep, x, ...) {
@@ -2424,6 +2525,7 @@ setMethod("concat_ws", signature(sep = "character", x = "Column"),
 #' @param toBase base to convert to.
 #' @rdname column_math_functions
 #' @aliases conv conv,Column,numeric,numeric-method
+#' @export
 #' @note conv since 1.5.0
 setMethod("conv", signature(x = "Column", fromBase = "numeric", toBase = "numeric"),
           function(x, fromBase, toBase) {
@@ -2441,6 +2543,7 @@ setMethod("conv", signature(x = "Column", fromBase = "numeric", toBase = "numeri
 #'
 #' @rdname column_nonaggregate_functions
 #' @aliases expr expr,character-method
+#' @export
 #' @note expr since 1.5.0
 setMethod("expr", signature(x = "character"),
           function(x) {
@@ -2455,6 +2558,7 @@ setMethod("expr", signature(x = "character"),
 #' @param format a character object of format strings.
 #' @rdname column_string_functions
 #' @aliases format_string format_string,character,Column-method
+#' @export
 #' @note format_string since 1.5.0
 setMethod("format_string", signature(format = "character", x = "Column"),
           function(format, x, ...) {
@@ -2475,6 +2579,7 @@ setMethod("format_string", signature(format = "character", x = "Column"),
 #' @rdname column_datetime_functions
 #'
 #' @aliases from_unixtime from_unixtime,Column-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -2516,6 +2621,7 @@ setMethod("from_unixtime", signature(x = "Column"),
 #'                  \code{startTime} as \code{"15 minutes"}.
 #' @rdname column_datetime_functions
 #' @aliases window window,Column-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -2566,6 +2672,7 @@ setMethod("window", signature(x = "Column"),
 #' @param pos start position of search.
 #' @rdname column_string_functions
 #' @aliases locate locate,character,Column-method
+#' @export
 #' @note locate since 1.5.0
 setMethod("locate", signature(substr = "character", str = "Column"),
           function(substr, str, pos = 1) {
@@ -2582,6 +2689,7 @@ setMethod("locate", signature(substr = "character", str = "Column"),
 #' @param pad a character string to be padded with.
 #' @rdname column_string_functions
 #' @aliases lpad lpad,Column,numeric,character-method
+#' @export
 #' @note lpad since 1.5.0
 setMethod("lpad", signature(x = "Column", len = "numeric", pad = "character"),
           function(x, len, pad) {
@@ -2594,11 +2702,11 @@ setMethod("lpad", signature(x = "Column", len = "numeric", pad = "character"),
 #' @details
 #' \code{rand}: Generates a random column with independent and identically distributed (i.i.d.)
 #' samples from U[0.0, 1.0].
-#' Note: the function is non-deterministic in general case.
 #'
 #' @rdname column_nonaggregate_functions
 #' @param seed a random seed. Can be missing.
 #' @aliases rand rand,missing-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -2613,6 +2721,7 @@ setMethod("rand", signature(seed = "missing"),
 
 #' @rdname column_nonaggregate_functions
 #' @aliases rand,numeric-method
+#' @export
 #' @note rand(numeric) since 1.5.0
 setMethod("rand", signature(seed = "numeric"),
           function(seed) {
@@ -2623,10 +2732,10 @@ setMethod("rand", signature(seed = "numeric"),
 #' @details
 #' \code{randn}: Generates a column with independent and identically distributed (i.i.d.) samples
 #' from the standard normal distribution.
-#' Note: the function is non-deterministic in general case.
 #'
 #' @rdname column_nonaggregate_functions
 #' @aliases randn randn,missing-method
+#' @export
 #' @note randn since 1.5.0
 setMethod("randn", signature(seed = "missing"),
           function(seed) {
@@ -2636,6 +2745,7 @@ setMethod("randn", signature(seed = "missing"),
 
 #' @rdname column_nonaggregate_functions
 #' @aliases randn,numeric-method
+#' @export
 #' @note randn(numeric) since 1.5.0
 setMethod("randn", signature(seed = "numeric"),
           function(seed) {
@@ -2652,6 +2762,7 @@ setMethod("randn", signature(seed = "numeric"),
 #' @param idx a group index.
 #' @rdname column_string_functions
 #' @aliases regexp_extract regexp_extract,Column,character,numeric-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -2680,6 +2791,7 @@ setMethod("regexp_extract",
 #' @param replacement a character string that a matched \code{pattern} is replaced with.
 #' @rdname column_string_functions
 #' @aliases regexp_replace regexp_replace,Column,character,character-method
+#' @export
 #' @note regexp_replace since 1.5.0
 setMethod("regexp_replace",
           signature(x = "Column", pattern = "character", replacement = "character"),
@@ -2695,6 +2807,7 @@ setMethod("regexp_replace",
 #'
 #' @rdname column_string_functions
 #' @aliases rpad rpad,Column,numeric,character-method
+#' @export
 #' @note rpad since 1.5.0
 setMethod("rpad", signature(x = "Column", len = "numeric", pad = "character"),
           function(x, len, pad) {
@@ -2717,6 +2830,7 @@ setMethod("rpad", signature(x = "Column", len = "numeric", pad = "character"),
 #'              counting from the right.
 #' @rdname column_string_functions
 #' @aliases substring_index substring_index,Column,character,numeric-method
+#' @export
 #' @note substring_index since 1.5.0
 setMethod("substring_index",
           signature(x = "Column", delim = "character", count = "numeric"),
@@ -2739,6 +2853,7 @@ setMethod("substring_index",
 #'                      at the same location, if any.
 #' @rdname column_string_functions
 #' @aliases translate translate,Column,character,character-method
+#' @export
 #' @note translate since 1.5.0
 setMethod("translate",
           signature(x = "Column", matchingString = "character", replaceString = "character"),
@@ -2753,6 +2868,7 @@ setMethod("translate",
 #'
 #' @rdname column_datetime_functions
 #' @aliases unix_timestamp unix_timestamp,missing,missing-method
+#' @export
 #' @note unix_timestamp since 1.5.0
 setMethod("unix_timestamp", signature(x = "missing", format = "missing"),
           function(x, format) {
@@ -2762,6 +2878,7 @@ setMethod("unix_timestamp", signature(x = "missing", format = "missing"),
 
 #' @rdname column_datetime_functions
 #' @aliases unix_timestamp,Column,missing-method
+#' @export
 #' @note unix_timestamp(Column) since 1.5.0
 setMethod("unix_timestamp", signature(x = "Column", format = "missing"),
           function(x, format) {
@@ -2771,6 +2888,7 @@ setMethod("unix_timestamp", signature(x = "Column", format = "missing"),
 
 #' @rdname column_datetime_functions
 #' @aliases unix_timestamp,Column,character-method
+#' @export
 #' @note unix_timestamp(Column, character) since 1.5.0
 setMethod("unix_timestamp", signature(x = "Column", format = "character"),
           function(x, format = "yyyy-MM-dd HH:mm:ss") {
@@ -2786,6 +2904,7 @@ setMethod("unix_timestamp", signature(x = "Column", format = "character"),
 #' @param condition the condition to test on. Must be a Column expression.
 #' @param value result expression.
 #' @aliases when when,Column-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -2814,6 +2933,7 @@ setMethod("when", signature(condition = "Column", value = "ANY"),
 #' @param yes return values for \code{TRUE} elements of test.
 #' @param no return values for \code{FALSE} elements of test.
 #' @aliases ifelse ifelse,Column-method
+#' @export
 #' @note ifelse since 1.5.0
 setMethod("ifelse",
           signature(test = "Column", yes = "ANY", no = "ANY"),
@@ -2839,6 +2959,7 @@ setMethod("ifelse",
 #'
 #' @rdname column_window_functions
 #' @aliases cume_dist cume_dist,missing-method
+#' @export
 #' @note cume_dist since 1.6.0
 setMethod("cume_dist",
           signature("missing"),
@@ -2859,6 +2980,7 @@ setMethod("cume_dist",
 #'
 #' @rdname column_window_functions
 #' @aliases dense_rank dense_rank,missing-method
+#' @export
 #' @note dense_rank since 1.6.0
 setMethod("dense_rank",
           signature("missing"),
@@ -2875,6 +2997,7 @@ setMethod("dense_rank",
 #'
 #' @rdname column_window_functions
 #' @aliases lag lag,characterOrColumn-method
+#' @export
 #' @note lag since 1.6.0
 setMethod("lag",
           signature(x = "characterOrColumn"),
@@ -2899,6 +3022,7 @@ setMethod("lag",
 #'
 #' @rdname column_window_functions
 #' @aliases lead lead,characterOrColumn,numeric-method
+#' @export
 #' @note lead since 1.6.0
 setMethod("lead",
           signature(x = "characterOrColumn", offset = "numeric", defaultValue = "ANY"),
@@ -2922,6 +3046,7 @@ setMethod("lead",
 #'
 #' @rdname column_window_functions
 #' @aliases ntile ntile,numeric-method
+#' @export
 #' @note ntile since 1.6.0
 setMethod("ntile",
           signature(x = "numeric"),
@@ -2939,6 +3064,7 @@ setMethod("ntile",
 #'
 #' @rdname column_window_functions
 #' @aliases percent_rank percent_rank,missing-method
+#' @export
 #' @note percent_rank since 1.6.0
 setMethod("percent_rank",
           signature("missing"),
@@ -2959,6 +3085,7 @@ setMethod("percent_rank",
 #'
 #' @rdname column_window_functions
 #' @aliases rank rank,missing-method
+#' @export
 #' @note rank since 1.6.0
 setMethod("rank",
           signature(x = "missing"),
@@ -2969,6 +3096,7 @@ setMethod("rank",
 
 #' @rdname column_window_functions
 #' @aliases rank,ANY-method
+#' @export
 setMethod("rank",
           signature(x = "ANY"),
           function(x, ...) {
@@ -2982,6 +3110,7 @@ setMethod("rank",
 #'
 #' @rdname column_window_functions
 #' @aliases row_number row_number,missing-method
+#' @export
 #' @note row_number since 1.6.0
 setMethod("row_number",
           signature("missing"),
@@ -2996,8 +3125,10 @@ setMethod("row_number",
 #' \code{array_contains}: Returns null if the array is null, true if the array contains
 #' the value, and false otherwise.
 #'
+#' @param value a value to be checked if contained in the column
 #' @rdname column_collection_functions
 #' @aliases array_contains array_contains,Column-method
+#' @export
 #' @note array_contains since 1.6.0
 setMethod("array_contains",
           signature(x = "Column", value = "ANY"),
@@ -3006,129 +3137,12 @@ setMethod("array_contains",
             column(jc)
           })
 
-#' @details
-#' \code{array_max}: Returns the maximum value of the array.
-#'
-#' @rdname column_collection_functions
-#' @aliases array_max array_max,Column-method
-#' @note array_max since 2.4.0
-setMethod("array_max",
-          signature(x = "Column"),
-          function(x) {
-            jc <- callJStatic("org.apache.spark.sql.functions", "array_max", x@jc)
-            column(jc)
-          })
-
-#' @details
-#' \code{array_min}: Returns the minimum value of the array.
-#'
-#' @rdname column_collection_functions
-#' @aliases array_min array_min,Column-method
-#' @note array_min since 2.4.0
-setMethod("array_min",
-          signature(x = "Column"),
-          function(x) {
-            jc <- callJStatic("org.apache.spark.sql.functions", "array_min", x@jc)
-            column(jc)
-          })
-
-#' @details
-#' \code{array_position}: Locates the position of the first occurrence of the given value
-#' in the given array. Returns NA if either of the arguments are NA.
-#' Note: The position is not zero based, but 1 based index. Returns 0 if the given
-#' value could not be found in the array.
-#'
-#' @rdname column_collection_functions
-#' @aliases array_position array_position,Column-method
-#' @note array_position since 2.4.0
-setMethod("array_position",
-          signature(x = "Column", value = "ANY"),
-          function(x, value) {
-            jc <- callJStatic("org.apache.spark.sql.functions", "array_position", x@jc, value)
-            column(jc)
-          })
-
-#' @details
-#' \code{array_repeat}: Creates an array containing \code{x} repeated the number of times
-#' given by \code{count}.
-#'
-#' @param count a Column or constant determining the number of repetitions.
-#' @rdname column_collection_functions
-#' @aliases array_repeat array_repeat,Column,numericOrColumn-method
-#' @note array_repeat since 2.4.0
-setMethod("array_repeat",
-          signature(x = "Column", count = "numericOrColumn"),
-          function(x, count) {
-            if (class(count) == "Column") {
-              count <- count@jc
-            } else {
-              count <- as.integer(count)
-            }
-            jc <- callJStatic("org.apache.spark.sql.functions", "array_repeat", x@jc, count)
-            column(jc)
-          })
-
-#' @details
-#' \code{array_sort}: Sorts the input array in ascending order. The elements of the input array
-#' must be orderable. NA elements will be placed at the end of the returned array.
-#'
-#' @rdname column_collection_functions
-#' @aliases array_sort array_sort,Column-method
-#' @note array_sort since 2.4.0
-setMethod("array_sort",
-          signature(x = "Column"),
-          function(x) {
-            jc <- callJStatic("org.apache.spark.sql.functions", "array_sort", x@jc)
-            column(jc)
-          })
-
-#' @details
-#' \code{arrays_overlap}: Returns true if the input arrays have at least one non-null element in
-#' common. If not and both arrays are non-empty and any of them contains a null, it returns null.
-#' It returns false otherwise.
-#'
-#' @rdname column_collection_functions
-#' @aliases arrays_overlap arrays_overlap,Column-method
-#' @note arrays_overlap since 2.4.0
-setMethod("arrays_overlap",
-          signature(x = "Column", y = "Column"),
-          function(x, y) {
-            jc <- callJStatic("org.apache.spark.sql.functions", "arrays_overlap", x@jc, y@jc)
-            column(jc)
-          })
-
-#' @details
-#' \code{flatten}: Creates a single array from an array of arrays.
-#' If a structure of nested arrays is deeper than two levels, only one level of nesting is removed.
-#'
-#' @rdname column_collection_functions
-#' @aliases flatten flatten,Column-method
-#' @note flatten since 2.4.0
-setMethod("flatten",
-          signature(x = "Column"),
-          function(x) {
-            jc <- callJStatic("org.apache.spark.sql.functions", "flatten", x@jc)
-            column(jc)
-          })
-
-#' @details
-#' \code{map_entries}: Returns an unordered array of all entries in the given map.
-#'
-#' @rdname column_collection_functions
-#' @aliases map_entries map_entries,Column-method
-#' @note map_entries since 2.4.0
-setMethod("map_entries",
-          signature(x = "Column"),
-          function(x) {
-            jc <- callJStatic("org.apache.spark.sql.functions", "map_entries", x@jc)
-            column(jc)
-         })
-
 #' @details
 #' \code{map_keys}: Returns an unordered array containing the keys of the map.
 #'
 #' @rdname column_collection_functions
 #' @aliases map_keys map_keys,Column-method
+#' @export
 #' @note map_keys since 2.3.0
 setMethod("map_keys",
           signature(x = "Column"),
@@ -3142,6 +3156,7 @@ setMethod("map_keys",
 #'
 #' @rdname column_collection_functions
 #' @aliases map_values map_values,Column-method
+#' @export
 #' @note map_values since 2.3.0
 setMethod("map_values",
           signature(x = "Column"),
@@ -3150,27 +3165,12 @@ setMethod("map_values",
             column(jc)
           })
 
-#' @details
-#' \code{element_at}: Returns element of array at given index in \code{extraction} if
-#' \code{x} is array. Returns value for the given key in \code{extraction} if \code{x} is map.
-#' Note: The position is not zero based, but 1 based index.
-#'
-#' @param extraction index to check for in array or key to check for in map
-#' @rdname column_collection_functions
-#' @aliases element_at element_at,Column-method
-#' @note element_at since 2.4.0
-setMethod("element_at",
-          signature(x = "Column", extraction = "ANY"),
-          function(x, extraction) {
-            jc <- callJStatic("org.apache.spark.sql.functions", "element_at", x@jc, extraction)
-            column(jc)
-          })
-
 #' @details
 #' \code{explode}: Creates a new row for each element in the given array or map column.
 #'
 #' @rdname column_collection_functions
 #' @aliases explode explode,Column-method
+#' @export
 #' @note explode since 1.5.0
 setMethod("explode",
           signature(x = "Column"),
@@ -3184,6 +3184,7 @@ setMethod("explode",
 #'
 #' @rdname column_collection_functions
 #' @aliases size size,Column-method
+#' @export
 #' @note size since 1.5.0
 setMethod("size",
           signature(x = "Column"),
@@ -3193,31 +3194,15 @@ setMethod("size",
           })
 
 #' @details
-#' \code{slice}: Returns an array containing all the elements in x from the index start
-#' (or starting from the end if start is negative) with the specified length.
-#'
-#' @rdname column_collection_functions
-#' @param start an index indicating the first element occuring in the result.
-#' @param length a number of consecutive elements choosen to the result.
-#' @aliases slice slice,Column-method
-#' @note slice since 2.4.0
-setMethod("slice",
-          signature(x = "Column"),
-          function(x, start, length) {
-            jc <- callJStatic("org.apache.spark.sql.functions", "slice", x@jc, start, length)
-            column(jc)
-          })
-
-#' @details
-#' \code{sort_array}: Sorts the input array in ascending or descending order according to
-#' the natural ordering of the array elements. NA elements will be placed at the beginning of
-#' the returned array in ascending order or at the end of the returned array in descending order.
+#' \code{sort_array}: Sorts the input array in ascending or descending order according
+#' to the natural ordering of the array elements.
 #'
 #' @rdname column_collection_functions
 #' @param asc a logical flag indicating the sorting order.
 #'            TRUE, sorting is in ascending order.
 #'            FALSE, sorting is in descending order.
 #' @aliases sort_array sort_array,Column-method
+#' @export
 #' @note sort_array since 1.6.0
 setMethod("sort_array",
           signature(x = "Column"),
@@ -3232,6 +3217,7 @@ setMethod("sort_array",
 #'
 #' @rdname column_collection_functions
 #' @aliases posexplode posexplode,Column-method
+#' @export
 #' @note posexplode since 2.1.0
 setMethod("posexplode",
           signature(x = "Column"),
@@ -3246,6 +3232,7 @@ setMethod("posexplode",
 #'
 #' @rdname column_nonaggregate_functions
 #' @aliases create_array create_array,Column-method
+#' @export
 #' @note create_array since 2.3.0
 setMethod("create_array",
           signature(x = "Column"),
@@ -3266,6 +3253,7 @@ setMethod("create_array",
 #'
 #' @rdname column_nonaggregate_functions
 #' @aliases create_map create_map,Column-method
+#' @export
 #' @note create_map since 2.3.0
 setMethod("create_map",
           signature(x = "Column"),
@@ -3280,11 +3268,10 @@ setMethod("create_map",
 
 #' @details
 #' \code{collect_list}: Creates a list of objects with duplicates.
-#' Note: the function is non-deterministic because the order of collected results depends
-#' on order of rows which may be non-deterministic after a shuffle.
 #'
 #' @rdname column_aggregate_functions
 #' @aliases collect_list collect_list,Column-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -3301,11 +3288,10 @@ setMethod("collect_list",
 
 #' @details
 #' \code{collect_set}: Creates a list of objects with duplicate elements eliminated.
-#' Note: the function is non-deterministic because the order of collected results depends
-#' on order of rows which may be non-deterministic after a shuffle.
 #'
 #' @rdname column_aggregate_functions
 #' @aliases collect_set collect_set,Column-method
+#' @export
 #' @note collect_set since 2.3.0
 setMethod("collect_set",
           signature(x = "Column"),
@@ -3320,6 +3306,7 @@ setMethod("collect_set",
 #'
 #' @rdname column_string_functions
 #' @aliases split_string split_string,Column-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -3342,6 +3329,7 @@ setMethod("split_string",
 #' @param n number of repetitions.
 #' @rdname column_string_functions
 #' @aliases repeat_string repeat_string,Column-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -3364,6 +3352,7 @@ setMethod("repeat_string",
 #'
 #' @rdname column_collection_functions
 #' @aliases explode_outer explode_outer,Column-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -3388,6 +3377,7 @@ setMethod("explode_outer",
 #'
 #' @rdname column_collection_functions
 #' @aliases posexplode_outer posexplode_outer,Column-method
+#' @export
 #' @note posexplode_outer since 2.3.0
 setMethod("posexplode_outer",
           signature(x = "Column"),
@@ -3408,6 +3398,7 @@ setMethod("posexplode_outer",
 #' @name not
 #' @aliases not,Column-method
 #' @family non-aggregate functions
+#' @export
 #' @examples
 #' \dontrun{
 #' df <- createDataFrame(data.frame(
@@ -3435,6 +3426,7 @@ setMethod("not",
 #'
 #' @rdname column_aggregate_functions
 #' @aliases grouping_bit grouping_bit,Column-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -3467,6 +3459,7 @@ setMethod("grouping_bit",
 #'
 #' @rdname column_aggregate_functions
 #' @aliases grouping_id grouping_id,Column-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -3501,6 +3494,7 @@ setMethod("grouping_id",
 #'
 #' @rdname column_nonaggregate_functions
 #' @aliases input_file_name input_file_name,missing-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -3518,6 +3512,7 @@ setMethod("input_file_name", signature("missing"),
 #'
 #' @rdname column_datetime_functions
 #' @aliases trunc trunc,Column-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -3537,6 +3532,7 @@ setMethod("trunc",
 #'
 #' @rdname column_datetime_functions
 #' @aliases date_trunc date_trunc,character,Column-method
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -3555,6 +3551,7 @@ setMethod("date_trunc",
 #'
 #' @rdname column_datetime_functions
 #' @aliases current_date current_date,missing-method
+#' @export
 #' @examples
 #' \dontrun{
 #' head(select(df, current_date(), current_timestamp()))}
@@ -3571,6 +3568,7 @@ setMethod("current_date",
 #'
 #' @rdname column_datetime_functions
 #' @aliases current_timestamp current_timestamp,missing-method
+#' @export
 #' @note current_timestamp since 2.3.0
 setMethod("current_timestamp",
           signature("missing"),
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 8894cb1c5b92f..e0dde3339fabc 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -19,6 +19,7 @@
 
 # @rdname aggregateRDD
 # @seealso reduce
+# @export
 setGeneric("aggregateRDD",
            function(x, zeroValue, seqOp, combOp) { standardGeneric("aggregateRDD") })
 
@@ -26,17 +27,21 @@ setGeneric("cacheRDD", function(x) { standardGeneric("cacheRDD") })
 
 # @rdname coalesce
 # @seealso repartition
+# @export
 setGeneric("coalesceRDD", function(x, numPartitions, ...) { standardGeneric("coalesceRDD") })
 
 # @rdname checkpoint-methods
+# @export
 setGeneric("checkpointRDD", function(x) { standardGeneric("checkpointRDD") })
 
 setGeneric("collectRDD", function(x, ...) { standardGeneric("collectRDD") })
 
 # @rdname collect-methods
+# @export
 setGeneric("collectAsMap", function(x) { standardGeneric("collectAsMap") })
 
 # @rdname collect-methods
+# @export
 setGeneric("collectPartition",
            function(x, partitionId) {
              standardGeneric("collectPartition")
@@ -47,15 +52,19 @@ setGeneric("countRDD", function(x) { standardGeneric("countRDD") })
 setGeneric("lengthRDD", function(x) { standardGeneric("lengthRDD") })
 
 # @rdname countByValue
+# @export
 setGeneric("countByValue", function(x) { standardGeneric("countByValue") })
 
 # @rdname crosstab
+# @export
 setGeneric("crosstab", function(x, col1, col2) { standardGeneric("crosstab") })
 
 # @rdname freqItems
+# @export
 setGeneric("freqItems", function(x, cols, support = 0.01) { standardGeneric("freqItems") })
 
 # @rdname approxQuantile
+# @export
 setGeneric("approxQuantile",
            function(x, cols, probabilities, relativeError) {
              standardGeneric("approxQuantile")
@@ -64,15 +73,18 @@ setGeneric("approxQuantile",
 setGeneric("distinctRDD", function(x, numPartitions = 1) { standardGeneric("distinctRDD") })
 
 # @rdname filterRDD
+# @export
 setGeneric("filterRDD", function(x, f) { standardGeneric("filterRDD") })
 
 setGeneric("firstRDD", function(x, ...) { standardGeneric("firstRDD") })
 
 # @rdname flatMap
+# @export
 setGeneric("flatMap", function(X, FUN) { standardGeneric("flatMap") })
 
 # @rdname fold
 # @seealso reduce
+# @export
 setGeneric("fold", function(x, zeroValue, op) { standardGeneric("fold") })
 
 setGeneric("foreach", function(x, func) { standardGeneric("foreach") })
@@ -83,14 +95,17 @@ setGeneric("foreachPartition", function(x, func) { standardGeneric("foreachParti
 setGeneric("getJRDD", function(rdd, ...) { standardGeneric("getJRDD") })
 
 # @rdname glom
+# @export
 setGeneric("glom", function(x) { standardGeneric("glom") })
 
 # @rdname histogram
+# @export
 setGeneric("histogram", function(df, col, nbins=10) { standardGeneric("histogram") })
 
 setGeneric("joinRDD", function(x, y, ...) { standardGeneric("joinRDD") })
 
 # @rdname keyBy
+# @export
 setGeneric("keyBy", function(x, func) { standardGeneric("keyBy") })
 
 setGeneric("lapplyPartition", function(X, FUN) { standardGeneric("lapplyPartition") })
@@ -108,37 +123,47 @@ setGeneric("mapPartitionsWithIndex",
            function(X, FUN) { standardGeneric("mapPartitionsWithIndex") })
 
 # @rdname maximum
+# @export
 setGeneric("maximum", function(x) { standardGeneric("maximum") })
 
 # @rdname minimum
+# @export
 setGeneric("minimum", function(x) { standardGeneric("minimum") })
 
 # @rdname sumRDD
+# @export
 setGeneric("sumRDD", function(x) { standardGeneric("sumRDD") })
 
 # @rdname name
+# @export
 setGeneric("name", function(x) { standardGeneric("name") })
 
 # @rdname getNumPartitionsRDD
+# @export
 setGeneric("getNumPartitionsRDD", function(x) { standardGeneric("getNumPartitionsRDD") })
 
 # @rdname getNumPartitions
+# @export
 setGeneric("numPartitions", function(x) { standardGeneric("numPartitions") })
 
 setGeneric("persistRDD", function(x, newLevel) { standardGeneric("persistRDD") })
 
 # @rdname pipeRDD
+# @export
 setGeneric("pipeRDD", function(x, command, env = list()) { standardGeneric("pipeRDD")})
 
 # @rdname pivot
+# @export
 setGeneric("pivot", function(x, colname, values = list()) { standardGeneric("pivot") })
 
 # @rdname reduce
+# @export
 setGeneric("reduce", function(x, func) { standardGeneric("reduce") })
 
 setGeneric("repartitionRDD", function(x, ...) { standardGeneric("repartitionRDD") })
 
 # @rdname sampleRDD
+# @export
 setGeneric("sampleRDD",
            function(x, withReplacement, fraction, seed) {
              standardGeneric("sampleRDD")
@@ -146,17 +171,21 @@ setGeneric("sampleRDD",
 
 # @rdname saveAsObjectFile
 # @seealso objectFile
+# @export
 setGeneric("saveAsObjectFile", function(x, path) { standardGeneric("saveAsObjectFile") })
 
 # @rdname saveAsTextFile
+# @export
 setGeneric("saveAsTextFile", function(x, path) { standardGeneric("saveAsTextFile") })
 
 # @rdname setName
+# @export
 setGeneric("setName", function(x, name) { standardGeneric("setName") })
 
 setGeneric("showRDD", function(object, ...) { standardGeneric("showRDD") })
 
 # @rdname sortBy
+# @export
 setGeneric("sortBy",
            function(x, func, ascending = TRUE, numPartitions = 1) {
              standardGeneric("sortBy")
@@ -165,71 +194,88 @@ setGeneric("sortBy",
 setGeneric("takeRDD", function(x, num) { standardGeneric("takeRDD") })
 
 # @rdname takeOrdered
+# @export
 setGeneric("takeOrdered", function(x, num) { standardGeneric("takeOrdered") })
 
 # @rdname takeSample
+# @export
 setGeneric("takeSample",
            function(x, withReplacement, num, seed) {
              standardGeneric("takeSample")
            })
 
 # @rdname top
+# @export
 setGeneric("top", function(x, num) { standardGeneric("top") })
 
 # @rdname unionRDD
+# @export
 setGeneric("unionRDD", function(x, y) { standardGeneric("unionRDD") })
 
 setGeneric("unpersistRDD", function(x, ...) { standardGeneric("unpersistRDD") })
 
 # @rdname zipRDD
+# @export
 setGeneric("zipRDD", function(x, other) { standardGeneric("zipRDD") })
 
 # @rdname zipRDD
+# @export
 setGeneric("zipPartitions", function(..., func) { standardGeneric("zipPartitions") },
            signature = "...")
 
 # @rdname zipWithIndex
 # @seealso zipWithUniqueId
+# @export
 setGeneric("zipWithIndex", function(x) { standardGeneric("zipWithIndex") })
 
 # @rdname zipWithUniqueId
 # @seealso zipWithIndex
+# @export
 setGeneric("zipWithUniqueId", function(x) { standardGeneric("zipWithUniqueId") })
 
 
 ############ Binary Functions #############
 
 # @rdname cartesian
+# @export
 setGeneric("cartesian", function(x, other) { standardGeneric("cartesian") })
 
 # @rdname countByKey
+# @export
 setGeneric("countByKey", function(x) { standardGeneric("countByKey") })
 
 # @rdname flatMapValues
+# @export
 setGeneric("flatMapValues", function(X, FUN) { standardGeneric("flatMapValues") })
 
 # @rdname intersection
+# @export
 setGeneric("intersection",
            function(x, other, numPartitions = 1) {
              standardGeneric("intersection")
            })
 
 # @rdname keys
+# @export
 setGeneric("keys", function(x) { standardGeneric("keys") })
 
 # @rdname lookup
+# @export
 setGeneric("lookup", function(x, key) { standardGeneric("lookup") })
 
 # @rdname mapValues
+# @export
 setGeneric("mapValues", function(X, FUN) { standardGeneric("mapValues") })
 
 # @rdname sampleByKey
+# @export
 setGeneric("sampleByKey",
            function(x, withReplacement, fractions, seed) {
              standardGeneric("sampleByKey")
            })
 
 # @rdname values
+# @export
 setGeneric("values", function(x) { standardGeneric("values") })
 
 
@@ -237,12 +283,14 @@ setGeneric("values", function(x) { standardGeneric("values") })
 
 # @rdname aggregateByKey
 # @seealso foldByKey, combineByKey
+# @export
 setGeneric("aggregateByKey",
            function(x, zeroValue, seqOp, combOp, numPartitions) {
              standardGeneric("aggregateByKey")
            })
 
 # @rdname cogroup
+# @export
 setGeneric("cogroup",
            function(..., numPartitions) {
              standardGeneric("cogroup")
@@ -251,6 +299,7 @@ setGeneric("cogroup",
 
 # @rdname combineByKey
 # @seealso groupByKey, reduceByKey
+# @export
 setGeneric("combineByKey",
            function(x, createCombiner, mergeValue, mergeCombiners, numPartitions) {
              standardGeneric("combineByKey")
@@ -258,53 +307,64 @@ setGeneric("combineByKey",
 
 # @rdname foldByKey
 # @seealso aggregateByKey, combineByKey
+# @export
 setGeneric("foldByKey",
            function(x, zeroValue, func, numPartitions) {
              standardGeneric("foldByKey")
            })
 
 # @rdname join-methods
+# @export
 setGeneric("fullOuterJoin", function(x, y, numPartitions) { standardGeneric("fullOuterJoin") })
 
 # @rdname groupByKey
 # @seealso reduceByKey
+# @export
 setGeneric("groupByKey", function(x, numPartitions) { standardGeneric("groupByKey") })
 
 # @rdname join-methods
+# @export
 setGeneric("join", function(x, y, ...) { standardGeneric("join") })
 
 # @rdname join-methods
+# @export
 setGeneric("leftOuterJoin", function(x, y, numPartitions) { standardGeneric("leftOuterJoin") })
 
 setGeneric("partitionByRDD", function(x, ...) { standardGeneric("partitionByRDD") })
 
 # @rdname reduceByKey
 # @seealso groupByKey
+# @export
 setGeneric("reduceByKey", function(x, combineFunc, numPartitions) { standardGeneric("reduceByKey")})
 
 # @rdname reduceByKeyLocally
 # @seealso reduceByKey
+# @export
 setGeneric("reduceByKeyLocally",
            function(x, combineFunc) {
              standardGeneric("reduceByKeyLocally")
            })
 
 # @rdname join-methods
+# @export
 setGeneric("rightOuterJoin", function(x, y, numPartitions) { standardGeneric("rightOuterJoin") })
 
 # @rdname sortByKey
+# @export
 setGeneric("sortByKey",
            function(x, ascending = TRUE, numPartitions = 1) {
              standardGeneric("sortByKey")
            })
 
 # @rdname subtract
+# @export
 setGeneric("subtract",
            function(x, other, numPartitions = 1) {
              standardGeneric("subtract")
            })
 
 # @rdname subtractByKey
+# @export
 setGeneric("subtractByKey",
            function(x, other, numPartitions = 1) {
              standardGeneric("subtractByKey")
@@ -314,6 +374,7 @@ setGeneric("subtractByKey",
 ################### Broadcast Variable Methods #################
 
 # @rdname broadcast
+# @export
 setGeneric("value", function(bcast) { standardGeneric("value") })
 
 
@@ -323,6 +384,7 @@ setGeneric("value", function(bcast) { standardGeneric("value") })
 #' @param ... further arguments to be passed to or from other methods.
 #' @return A SparkDataFrame.
 #' @rdname summarize
+#' @export
 setGeneric("agg", function(x, ...) { standardGeneric("agg") })
 
 #' alias
@@ -337,9 +399,11 @@ setGeneric("agg", function(x, ...) { standardGeneric("agg") })
 NULL
 
 #' @rdname arrange
+#' @export
 setGeneric("arrange", function(x, col, ...) { standardGeneric("arrange") })
 
 #' @rdname as.data.frame
+#' @export
 setGeneric("as.data.frame",
            function(x, row.names = NULL, optional = FALSE, ...) {
              standardGeneric("as.data.frame")
@@ -347,41 +411,52 @@ setGeneric("as.data.frame",
 
 # Do not document the generic because of signature changes across R versions
 #' @noRd
+#' @export
 setGeneric("attach")
 
 #' @rdname cache
+#' @export
 setGeneric("cache", function(x) { standardGeneric("cache") })
 
 #' @rdname checkpoint
+#' @export
 setGeneric("checkpoint", function(x, eager = TRUE) { standardGeneric("checkpoint") })
 
 #' @rdname coalesce
 #' @param x a SparkDataFrame.
 #' @param ... additional argument(s).
+#' @export
 setGeneric("coalesce", function(x, ...) { standardGeneric("coalesce") })
 
 #' @rdname collect
+#' @export
 setGeneric("collect", function(x, ...) { standardGeneric("collect") })
 
 #' @param do.NULL currently not used.
 #' @param prefix currently not used.
 #' @rdname columns
+#' @export
 setGeneric("colnames", function(x, do.NULL = TRUE, prefix = "col") { standardGeneric("colnames") })
 
 #' @rdname columns
+#' @export
 setGeneric("colnames<-", function(x, value) { standardGeneric("colnames<-") })
 
 #' @rdname coltypes
+#' @export
 setGeneric("coltypes", function(x) { standardGeneric("coltypes") })
 
 #' @rdname coltypes
+#' @export
 setGeneric("coltypes<-", function(x, value) { standardGeneric("coltypes<-") })
 
 #' @rdname columns
+#' @export
 setGeneric("columns", function(x) {standardGeneric("columns") })
 
 #' @param x a GroupedData or Column.
 #' @rdname count
+#' @export
 setGeneric("count", function(x) { standardGeneric("count") })
 
 #' @rdname cov
@@ -389,6 +464,7 @@ setGeneric("count", function(x) { standardGeneric("count") })
 #' @param ... additional argument(s). If \code{x} is a Column, a Column
 #'        should be provided. If \code{x} is a SparkDataFrame, two column names should
 #'        be provided.
+#' @export
 setGeneric("cov", function(x, ...) {standardGeneric("cov") })
 
 #' @rdname corr
@@ -396,933 +472,1125 @@ setGeneric("cov", function(x, ...) {standardGeneric("cov") })
 #' @param ... additional argument(s). If \code{x} is a Column, a Column
 #'        should be provided. If \code{x} is a SparkDataFrame, two column names should
 #'        be provided.
+#' @export
 setGeneric("corr", function(x, ...) {standardGeneric("corr") })
 
 #' @rdname cov
+#' @export
 setGeneric("covar_samp", function(col1, col2) {standardGeneric("covar_samp") })
 
 #' @rdname cov
+#' @export
 setGeneric("covar_pop", function(col1, col2) {standardGeneric("covar_pop") })
 
 #' @rdname createOrReplaceTempView
+#' @export
 setGeneric("createOrReplaceTempView",
            function(x, viewName) {
              standardGeneric("createOrReplaceTempView")
            })
 
 # @rdname crossJoin
+# @export
 setGeneric("crossJoin", function(x, y) { standardGeneric("crossJoin") })
 
 #' @rdname cube
+#' @export
 setGeneric("cube", function(x, ...) { standardGeneric("cube") })
 
 #' @rdname dapply
+#' @export
 setGeneric("dapply", function(x, func, schema) { standardGeneric("dapply") })
 
 #' @rdname dapplyCollect
+#' @export
 setGeneric("dapplyCollect", function(x, func) { standardGeneric("dapplyCollect") })
 
 #' @param x a SparkDataFrame or GroupedData.
 #' @param ... additional argument(s) passed to the method.
 #' @rdname gapply
+#' @export
 setGeneric("gapply", function(x, ...) { standardGeneric("gapply") })
 
 #' @param x a SparkDataFrame or GroupedData.
 #' @param ... additional argument(s) passed to the method.
 #' @rdname gapplyCollect
+#' @export
 setGeneric("gapplyCollect", function(x, ...) { standardGeneric("gapplyCollect") })
 
 # @rdname getNumPartitions
+# @export
 setGeneric("getNumPartitions", function(x) { standardGeneric("getNumPartitions") })
 
 #' @rdname describe
+#' @export
 setGeneric("describe", function(x, col, ...) { standardGeneric("describe") })
 
 #' @rdname distinct
+#' @export
 setGeneric("distinct", function(x) { standardGeneric("distinct") })
 
 #' @rdname drop
+#' @export
 setGeneric("drop", function(x, ...) { standardGeneric("drop") })
 
 #' @rdname dropDuplicates
+#' @export
 setGeneric("dropDuplicates", function(x, ...) { standardGeneric("dropDuplicates") })
 
 #' @rdname nafunctions
+#' @export
 setGeneric("dropna",
            function(x, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
              standardGeneric("dropna")
            })
 
 #' @rdname nafunctions
+#' @export
 setGeneric("na.omit",
            function(object, ...) {
              standardGeneric("na.omit")
            })
 
 #' @rdname dtypes
+#' @export
 setGeneric("dtypes", function(x) { standardGeneric("dtypes") })
 
 #' @rdname explain
+#' @export
 #' @param x a SparkDataFrame or a StreamingQuery.
 #' @param extended Logical. If extended is FALSE, prints only the physical plan.
 #' @param ... further arguments to be passed to or from other methods.
 setGeneric("explain", function(x, ...) { standardGeneric("explain") })
 
 #' @rdname except
+#' @export
 setGeneric("except", function(x, y) { standardGeneric("except") })
 
 #' @rdname nafunctions
+#' @export
 setGeneric("fillna", function(x, value, cols = NULL) { standardGeneric("fillna") })
 
 #' @rdname filter
+#' @export
 setGeneric("filter", function(x, condition) { standardGeneric("filter") })
 
 #' @rdname first
+#' @export
 setGeneric("first", function(x, ...) { standardGeneric("first") })
 
 #' @rdname groupBy
+#' @export
 setGeneric("group_by", function(x, ...) { standardGeneric("group_by") })
 
 #' @rdname groupBy
+#' @export
 setGeneric("groupBy", function(x, ...) { standardGeneric("groupBy") })
 
 #' @rdname hint
+#' @export
 setGeneric("hint", function(x, name, ...) { standardGeneric("hint") })
 
 #' @rdname insertInto
+#' @export
 setGeneric("insertInto", function(x, tableName, ...) { standardGeneric("insertInto") })
 
 #' @rdname intersect
+#' @export
 setGeneric("intersect", function(x, y) { standardGeneric("intersect") })
 
 #' @rdname isLocal
+#' @export
 setGeneric("isLocal", function(x) { standardGeneric("isLocal") })
 
 #' @rdname isStreaming
+#' @export
 setGeneric("isStreaming", function(x) { standardGeneric("isStreaming") })
 
 #' @rdname limit
+#' @export
 setGeneric("limit", function(x, num) {standardGeneric("limit") })
 
 #' @rdname localCheckpoint
+#' @export
 setGeneric("localCheckpoint", function(x, eager = TRUE) { standardGeneric("localCheckpoint") })
 
 #' @rdname merge
+#' @export
 setGeneric("merge")
 
 #' @rdname mutate
+#' @export
 setGeneric("mutate", function(.data, ...) {standardGeneric("mutate") })
 
 #' @rdname orderBy
+#' @export
 setGeneric("orderBy", function(x, col, ...) { standardGeneric("orderBy") })
 
 #' @rdname persist
+#' @export
 setGeneric("persist", function(x, newLevel) { standardGeneric("persist") })
 
 #' @rdname printSchema
+#' @export
 setGeneric("printSchema", function(x) { standardGeneric("printSchema") })
 
 #' @rdname registerTempTable-deprecated
+#' @export
 setGeneric("registerTempTable", function(x, tableName) { standardGeneric("registerTempTable") })
 
 #' @rdname rename
+#' @export
 setGeneric("rename", function(x, ...) { standardGeneric("rename") })
 
 #' @rdname repartition
+#' @export
 setGeneric("repartition", function(x, ...) { standardGeneric("repartition") })
 
-#' @rdname repartitionByRange
-setGeneric("repartitionByRange", function(x, ...) { standardGeneric("repartitionByRange") })
-
 #' @rdname sample
+#' @export
 setGeneric("sample",
            function(x, withReplacement = FALSE, fraction, seed) {
              standardGeneric("sample")
            })
 
 #' @rdname rollup
+#' @export
 setGeneric("rollup", function(x, ...) { standardGeneric("rollup") })
 
 #' @rdname sample
+#' @export
 setGeneric("sample_frac",
            function(x, withReplacement = FALSE, fraction, seed) { standardGeneric("sample_frac") })
 
 #' @rdname sampleBy
+#' @export
 setGeneric("sampleBy", function(x, col, fractions, seed) { standardGeneric("sampleBy") })
 
 #' @rdname saveAsTable
+#' @export
 setGeneric("saveAsTable", function(df, tableName, source = NULL, mode = "error", ...) {
   standardGeneric("saveAsTable")
 })
 
+#' @export
 setGeneric("str")
 
 #' @rdname take
+#' @export
 setGeneric("take", function(x, num) { standardGeneric("take") })
 
 #' @rdname mutate
+#' @export
 setGeneric("transform", function(`_data`, ...) {standardGeneric("transform") })
 
 #' @rdname write.df
+#' @export
 setGeneric("write.df", function(df, path = NULL, source = NULL, mode = "error", ...) {
   standardGeneric("write.df")
 })
 
 #' @rdname write.df
+#' @export
 setGeneric("saveDF", function(df, path, source = NULL, mode = "error", ...) {
   standardGeneric("saveDF")
 })
 
 #' @rdname write.jdbc
+#' @export
 setGeneric("write.jdbc", function(x, url, tableName, mode = "error", ...) {
   standardGeneric("write.jdbc")
 })
 
 #' @rdname write.json
+#' @export
 setGeneric("write.json", function(x, path, ...) { standardGeneric("write.json") })
 
 #' @rdname write.orc
+#' @export
 setGeneric("write.orc", function(x, path, ...) { standardGeneric("write.orc") })
 
 #' @rdname write.parquet
+#' @export
 setGeneric("write.parquet", function(x, path, ...) {
   standardGeneric("write.parquet")
 })
 
 #' @rdname write.parquet
+#' @export
 setGeneric("saveAsParquetFile", function(x, path) { standardGeneric("saveAsParquetFile") })
 
 #' @rdname write.stream
+#' @export
 setGeneric("write.stream", function(df, source = NULL, outputMode = NULL, ...) {
   standardGeneric("write.stream")
 })
 
 #' @rdname write.text
+#' @export
 setGeneric("write.text", function(x, path, ...) { standardGeneric("write.text") })
 
 #' @rdname schema
+#' @export
 setGeneric("schema", function(x) { standardGeneric("schema") })
 
 #' @rdname select
+#' @export
 setGeneric("select", function(x, col, ...) { standardGeneric("select") })
 
 #' @rdname selectExpr
+#' @export
 setGeneric("selectExpr", function(x, expr, ...) { standardGeneric("selectExpr") })
 
 #' @rdname showDF
+#' @export
 setGeneric("showDF", function(x, ...) { standardGeneric("showDF") })
 
 # @rdname storageLevel
+# @export
 setGeneric("storageLevel", function(x) { standardGeneric("storageLevel") })
 
 #' @rdname subset
+#' @export
 setGeneric("subset", function(x, ...) { standardGeneric("subset") })
 
 #' @rdname summarize
+#' @export
 setGeneric("summarize", function(x, ...) { standardGeneric("summarize") })
 
 #' @rdname summary
+#' @export
 setGeneric("summary", function(object, ...) { standardGeneric("summary") })
 
-setGeneric("toJSON", function(x, ...) { standardGeneric("toJSON") })
+setGeneric("toJSON", function(x) { standardGeneric("toJSON") })
 
 setGeneric("toRDD", function(x) { standardGeneric("toRDD") })
 
 #' @rdname union
+#' @export
 setGeneric("union", function(x, y) { standardGeneric("union") })
 
 #' @rdname union
+#' @export
 setGeneric("unionAll", function(x, y) { standardGeneric("unionAll") })
 
 #' @rdname unionByName
+#' @export
 setGeneric("unionByName", function(x, y) { standardGeneric("unionByName") })
 
 #' @rdname unpersist
+#' @export
 setGeneric("unpersist", function(x, ...) { standardGeneric("unpersist") })
 
 #' @rdname filter
+#' @export
 setGeneric("where", function(x, condition) { standardGeneric("where") })
 
 #' @rdname with
+#' @export
 setGeneric("with")
 
 #' @rdname withColumn
+#' @export
 setGeneric("withColumn", function(x, colName, col) { standardGeneric("withColumn") })
 
 #' @rdname rename
+#' @export
 setGeneric("withColumnRenamed",
            function(x, existingCol, newCol) { standardGeneric("withColumnRenamed") })
 
 #' @rdname withWatermark
+#' @export
 setGeneric("withWatermark", function(x, eventTime, delayThreshold) {
   standardGeneric("withWatermark")
 })
 
 #' @rdname write.df
+#' @export
 setGeneric("write.df", function(df, path = NULL, ...) { standardGeneric("write.df") })
 
 #' @rdname randomSplit
+#' @export
 setGeneric("randomSplit", function(x, weights, seed) { standardGeneric("randomSplit") })
 
 #' @rdname broadcast
+#' @export
 setGeneric("broadcast", function(x) { standardGeneric("broadcast") })
 
 ###################### Column Methods ##########################
 
 #' @rdname columnfunctions
+#' @export
 setGeneric("asc", function(x) { standardGeneric("asc") })
 
 #' @rdname between
+#' @export
 setGeneric("between", function(x, bounds) { standardGeneric("between") })
 
 #' @rdname cast
+#' @export
 setGeneric("cast", function(x, dataType) { standardGeneric("cast") })
 
 #' @rdname columnfunctions
 #' @param x a Column object.
 #' @param ... additional argument(s).
+#' @export
 setGeneric("contains", function(x, ...) { standardGeneric("contains") })
 
 #' @rdname columnfunctions
+#' @export
 setGeneric("desc", function(x) { standardGeneric("desc") })
 
 #' @rdname endsWith
+#' @export
 setGeneric("endsWith", function(x, suffix) { standardGeneric("endsWith") })
 
 #' @rdname columnfunctions
+#' @export
 setGeneric("getField", function(x, ...) { standardGeneric("getField") })
 
 #' @rdname columnfunctions
+#' @export
 setGeneric("getItem", function(x, ...) { standardGeneric("getItem") })
 
 #' @rdname columnfunctions
+#' @export
 setGeneric("isNaN", function(x) { standardGeneric("isNaN") })
 
 #' @rdname columnfunctions
+#' @export
 setGeneric("isNull", function(x) { standardGeneric("isNull") })
 
 #' @rdname columnfunctions
+#' @export
 setGeneric("isNotNull", function(x) { standardGeneric("isNotNull") })
 
 #' @rdname columnfunctions
+#' @export
 setGeneric("like", function(x, ...) { standardGeneric("like") })
 
 #' @rdname columnfunctions
+#' @export
 setGeneric("rlike", function(x, ...) { standardGeneric("rlike") })
 
 #' @rdname startsWith
+#' @export
 setGeneric("startsWith", function(x, prefix) { standardGeneric("startsWith") })
 
 #' @rdname column_nonaggregate_functions
+#' @export
 #' @name NULL
 setGeneric("when", function(condition, value) { standardGeneric("when") })
 
 #' @rdname otherwise
+#' @export
 setGeneric("otherwise", function(x, value) { standardGeneric("otherwise") })
 
 #' @rdname over
+#' @export
 setGeneric("over", function(x, window) { standardGeneric("over") })
 
 #' @rdname eq_null_safe
+#' @export
 setGeneric("%<=>%", function(x, value) { standardGeneric("%<=>%") })
 
 ###################### WindowSpec Methods ##########################
 
 #' @rdname partitionBy
+#' @export
 setGeneric("partitionBy", function(x, ...) { standardGeneric("partitionBy") })
 
 #' @rdname rowsBetween
+#' @export
 setGeneric("rowsBetween", function(x, start, end) { standardGeneric("rowsBetween") })
 
 #' @rdname rangeBetween
+#' @export
 setGeneric("rangeBetween", function(x, start, end) { standardGeneric("rangeBetween") })
 
 #' @rdname windowPartitionBy
+#' @export
 setGeneric("windowPartitionBy", function(col, ...) { standardGeneric("windowPartitionBy") })
 
 #' @rdname windowOrderBy
+#' @export
 setGeneric("windowOrderBy", function(col, ...) { standardGeneric("windowOrderBy") })
 
 ###################### Expression Function Methods ##########################
 
 #' @rdname column_datetime_diff_functions
+#' @export
 #' @name NULL
 setGeneric("add_months", function(y, x) { standardGeneric("add_months") })
 
 #' @rdname column_aggregate_functions
+#' @export
 #' @name NULL
 setGeneric("approxCountDistinct", function(x, ...) { standardGeneric("approxCountDistinct") })
 
 #' @rdname column_collection_functions
+#' @export
 #' @name NULL
 setGeneric("array_contains", function(x, value) { standardGeneric("array_contains") })
 
-#' @rdname column_collection_functions
-#' @name NULL
-setGeneric("array_max", function(x) { standardGeneric("array_max") })
-
-#' @rdname column_collection_functions
-#' @name NULL
-setGeneric("array_min", function(x) { standardGeneric("array_min") })
-
-#' @rdname column_collection_functions
-#' @name NULL
-setGeneric("array_position", function(x, value) { standardGeneric("array_position") })
-
-#' @rdname column_collection_functions
-#' @name NULL
-setGeneric("array_repeat", function(x, count) { standardGeneric("array_repeat") })
-
-#' @rdname column_collection_functions
-#' @name NULL
-setGeneric("array_sort", function(x) { standardGeneric("array_sort") })
-
-#' @rdname column_collection_functions
-#' @name NULL
-setGeneric("arrays_overlap", function(x, y) { standardGeneric("arrays_overlap") })
-
 #' @rdname column_string_functions
+#' @export
 #' @name NULL
 setGeneric("ascii", function(x) { standardGeneric("ascii") })
 
 #' @param x Column to compute on or a GroupedData object.
 #' @param ... additional argument(s) when \code{x} is a GroupedData object.
 #' @rdname avg
+#' @export
 setGeneric("avg", function(x, ...) { standardGeneric("avg") })
 
 #' @rdname column_string_functions
+#' @export
 #' @name NULL
 setGeneric("base64", function(x) { standardGeneric("base64") })
 
 #' @rdname column_math_functions
+#' @export
 #' @name NULL
 setGeneric("bin", function(x) { standardGeneric("bin") })
 
 #' @rdname column_nonaggregate_functions
+#' @export
 #' @name NULL
 setGeneric("bitwiseNOT", function(x) { standardGeneric("bitwiseNOT") })
 
 #' @rdname column_math_functions
+#' @export
 #' @name NULL
 setGeneric("bround", function(x, ...) { standardGeneric("bround") })
 
 #' @rdname column_math_functions
+#' @export
 #' @name NULL
 setGeneric("cbrt", function(x) { standardGeneric("cbrt") })
 
 #' @rdname column_math_functions
+#' @export
 #' @name NULL
 setGeneric("ceil", function(x) { standardGeneric("ceil") })
 
 #' @rdname column_aggregate_functions
+#' @export
 #' @name NULL
 setGeneric("collect_list", function(x) { standardGeneric("collect_list") })
 
 #' @rdname column_aggregate_functions
+#' @export
 #' @name NULL
 setGeneric("collect_set", function(x) { standardGeneric("collect_set") })
 
 #' @rdname column
+#' @export
 setGeneric("column", function(x) { standardGeneric("column") })
 
-#' @rdname column_collection_functions
+#' @rdname column_string_functions
+#' @export
 #' @name NULL
 setGeneric("concat", function(x, ...) { standardGeneric("concat") })
 
 #' @rdname column_string_functions
+#' @export
 #' @name NULL
 setGeneric("concat_ws", function(sep, x, ...) { standardGeneric("concat_ws") })
 
 #' @rdname column_math_functions
+#' @export
 #' @name NULL
 setGeneric("conv", function(x, fromBase, toBase) { standardGeneric("conv") })
 
 #' @rdname column_aggregate_functions
+#' @export
 #' @name NULL
 setGeneric("countDistinct", function(x, ...) { standardGeneric("countDistinct") })
 
 #' @rdname column_misc_functions
+#' @export
 #' @name NULL
 setGeneric("crc32", function(x) { standardGeneric("crc32") })
 
 #' @rdname column_nonaggregate_functions
+#' @export
 #' @name NULL
 setGeneric("create_array", function(x, ...) { standardGeneric("create_array") })
 
 #' @rdname column_nonaggregate_functions
+#' @export
 #' @name NULL
 setGeneric("create_map", function(x, ...) { standardGeneric("create_map") })
 
 #' @rdname column_misc_functions
+#' @export
 #' @name NULL
 setGeneric("hash", function(x, ...) { standardGeneric("hash") })
 
 #' @rdname column_window_functions
+#' @export
 #' @name NULL
 setGeneric("cume_dist", function(x = "missing") { standardGeneric("cume_dist") })
 
 #' @rdname column_datetime_functions
+#' @export
 #' @name NULL
 setGeneric("current_date", function(x = "missing") { standardGeneric("current_date") })
 
 #' @rdname column_datetime_functions
+#' @export
 #' @name NULL
 setGeneric("current_timestamp", function(x = "missing") { standardGeneric("current_timestamp") })
 
 
 #' @rdname column_datetime_diff_functions
+#' @export
 #' @name NULL
 setGeneric("datediff", function(y, x) { standardGeneric("datediff") })
 
 #' @rdname column_datetime_diff_functions
+#' @export
 #' @name NULL
 setGeneric("date_add", function(y, x) { standardGeneric("date_add") })
 
 #' @rdname column_datetime_diff_functions
+#' @export
 #' @name NULL
 setGeneric("date_format", function(y, x) { standardGeneric("date_format") })
 
 #' @rdname column_datetime_diff_functions
+#' @export
 #' @name NULL
 setGeneric("date_sub", function(y, x) { standardGeneric("date_sub") })
 
 #' @rdname column_datetime_functions
+#' @export
 #' @name NULL
 setGeneric("date_trunc", function(format, x) { standardGeneric("date_trunc") })
 
 #' @rdname column_datetime_functions
+#' @export
 #' @name NULL
 setGeneric("dayofmonth", function(x) { standardGeneric("dayofmonth") })
 
 #' @rdname column_datetime_functions
+#' @export
 #' @name NULL
 setGeneric("dayofweek", function(x) { standardGeneric("dayofweek") })
 
 #' @rdname column_datetime_functions
+#' @export
 #' @name NULL
 setGeneric("dayofyear", function(x) { standardGeneric("dayofyear") })
 
 #' @rdname column_string_functions
+#' @export
 #' @name NULL
 setGeneric("decode", function(x, charset) { standardGeneric("decode") })
 
 #' @rdname column_window_functions
+#' @export
 #' @name NULL
 setGeneric("dense_rank", function(x = "missing") { standardGeneric("dense_rank") })
 
-#' @rdname column_collection_functions
-#' @name NULL
-setGeneric("element_at", function(x, extraction) { standardGeneric("element_at") })
-
 #' @rdname column_string_functions
+#' @export
 #' @name NULL
 setGeneric("encode", function(x, charset) { standardGeneric("encode") })
 
 #' @rdname column_collection_functions
+#' @export
 #' @name NULL
 setGeneric("explode", function(x) { standardGeneric("explode") })
 
 #' @rdname column_collection_functions
+#' @export
 #' @name NULL
 setGeneric("explode_outer", function(x) { standardGeneric("explode_outer") })
 
 #' @rdname column_nonaggregate_functions
+#' @export
 #' @name NULL
 setGeneric("expr", function(x) { standardGeneric("expr") })
 
-#' @rdname column_collection_functions
-#' @name NULL
-setGeneric("flatten", function(x) { standardGeneric("flatten") })
-
 #' @rdname column_datetime_diff_functions
+#' @export
 #' @name NULL
 setGeneric("from_utc_timestamp", function(y, x) { standardGeneric("from_utc_timestamp") })
 
 #' @rdname column_string_functions
+#' @export
 #' @name NULL
 setGeneric("format_number", function(y, x) { standardGeneric("format_number") })
 
 #' @rdname column_string_functions
+#' @export
 #' @name NULL
 setGeneric("format_string", function(format, x, ...) { standardGeneric("format_string") })
 
 #' @rdname column_collection_functions
+#' @export
 #' @name NULL
 setGeneric("from_json", function(x, schema, ...) { standardGeneric("from_json") })
 
 #' @rdname column_datetime_functions
+#' @export
 #' @name NULL
 setGeneric("from_unixtime", function(x, ...) { standardGeneric("from_unixtime") })
 
 #' @rdname column_nonaggregate_functions
+#' @export
 #' @name NULL
 setGeneric("greatest", function(x, ...) { standardGeneric("greatest") })
 
 #' @rdname column_aggregate_functions
+#' @export
 #' @name NULL
 setGeneric("grouping_bit", function(x) { standardGeneric("grouping_bit") })
 
 #' @rdname column_aggregate_functions
+#' @export
 #' @name NULL
 setGeneric("grouping_id", function(x, ...) { standardGeneric("grouping_id") })
 
 #' @rdname column_math_functions
+#' @export
 #' @name NULL
 setGeneric("hex", function(x) { standardGeneric("hex") })
 
 #' @rdname column_datetime_functions
+#' @export
 #' @name NULL
 setGeneric("hour", function(x) { standardGeneric("hour") })
 
 #' @rdname column_math_functions
+#' @export
 #' @name NULL
 setGeneric("hypot", function(y, x) { standardGeneric("hypot") })
 
 #' @rdname column_string_functions
+#' @export
 #' @name NULL
 setGeneric("initcap", function(x) { standardGeneric("initcap") })
 
 #' @rdname column_nonaggregate_functions
+#' @export
 #' @name NULL
 setGeneric("input_file_name",
            function(x = "missing") { standardGeneric("input_file_name") })
 
 #' @rdname column_string_functions
+#' @export
 #' @name NULL
 setGeneric("instr", function(y, x) { standardGeneric("instr") })
 
 #' @rdname column_nonaggregate_functions
+#' @export
 #' @name NULL
 setGeneric("isnan", function(x) { standardGeneric("isnan") })
 
 #' @rdname column_aggregate_functions
+#' @export
 #' @name NULL
 setGeneric("kurtosis", function(x) { standardGeneric("kurtosis") })
 
 #' @rdname column_window_functions
+#' @export
 #' @name NULL
 setGeneric("lag", function(x, ...) { standardGeneric("lag") })
 
 #' @rdname last
+#' @export
 setGeneric("last", function(x, ...) { standardGeneric("last") })
 
 #' @rdname column_datetime_functions
+#' @export
 #' @name NULL
 setGeneric("last_day", function(x) { standardGeneric("last_day") })
 
 #' @rdname column_window_functions
+#' @export
 #' @name NULL
 setGeneric("lead", function(x, offset, defaultValue = NULL) { standardGeneric("lead") })
 
 #' @rdname column_nonaggregate_functions
+#' @export
 #' @name NULL
 setGeneric("least", function(x, ...) { standardGeneric("least") })
 
 #' @rdname column_string_functions
+#' @export
 #' @name NULL
 setGeneric("levenshtein", function(y, x) { standardGeneric("levenshtein") })
 
 #' @rdname column_nonaggregate_functions
+#' @export
 #' @name NULL
 setGeneric("lit", function(x) { standardGeneric("lit") })
 
 #' @rdname column_string_functions
+#' @export
 #' @name NULL
 setGeneric("locate", function(substr, str, ...) { standardGeneric("locate") })
 
 #' @rdname column_string_functions
+#' @export
 #' @name NULL
 setGeneric("lower", function(x) { standardGeneric("lower") })
 
 #' @rdname column_string_functions
+#' @export
 #' @name NULL
 setGeneric("lpad", function(x, len, pad) { standardGeneric("lpad") })
 
 #' @rdname column_string_functions
+#' @export
 #' @name NULL
 setGeneric("ltrim", function(x, trimString) { standardGeneric("ltrim") })
 
 #' @rdname column_collection_functions
-#' @name NULL
-setGeneric("map_entries", function(x) { standardGeneric("map_entries") })
-
-#' @rdname column_collection_functions
+#' @export
 #' @name NULL
 setGeneric("map_keys", function(x) { standardGeneric("map_keys") })
 
 #' @rdname column_collection_functions
+#' @export
 #' @name NULL
 setGeneric("map_values", function(x) { standardGeneric("map_values") })
 
 #' @rdname column_misc_functions
+#' @export
 #' @name NULL
 setGeneric("md5", function(x) { standardGeneric("md5") })
 
 #' @rdname column_datetime_functions
+#' @export
 #' @name NULL
 setGeneric("minute", function(x) { standardGeneric("minute") })
 
 #' @rdname column_nonaggregate_functions
+#' @export
 #' @name NULL
 setGeneric("monotonically_increasing_id",
            function(x = "missing") { standardGeneric("monotonically_increasing_id") })
 
 #' @rdname column_datetime_functions
+#' @export
 #' @name NULL
 setGeneric("month", function(x) { standardGeneric("month") })
 
 #' @rdname column_datetime_diff_functions
+#' @export
 #' @name NULL
 setGeneric("months_between", function(y, x) { standardGeneric("months_between") })
 
 #' @rdname count
+#' @export
 setGeneric("n", function(x) { standardGeneric("n") })
 
 #' @rdname column_nonaggregate_functions
+#' @export
 #' @name NULL
 setGeneric("nanvl", function(y, x) { standardGeneric("nanvl") })
 
 #' @rdname column_nonaggregate_functions
+#' @export
 #' @name NULL
 setGeneric("negate", function(x) { standardGeneric("negate") })
 
 #' @rdname not
+#' @export
 setGeneric("not", function(x) { standardGeneric("not") })
 
 #' @rdname column_datetime_diff_functions
+#' @export
 #' @name NULL
 setGeneric("next_day", function(y, x) { standardGeneric("next_day") })
 
 #' @rdname column_window_functions
+#' @export
 #' @name NULL
 setGeneric("ntile", function(x) { standardGeneric("ntile") })
 
 #' @rdname column_aggregate_functions
+#' @export
 #' @name NULL
 setGeneric("n_distinct", function(x, ...) { standardGeneric("n_distinct") })
 
 #' @rdname column_window_functions
+#' @export
 #' @name NULL
 setGeneric("percent_rank", function(x = "missing") { standardGeneric("percent_rank") })
 
 #' @rdname column_math_functions
+#' @export
 #' @name NULL
 setGeneric("pmod", function(y, x) { standardGeneric("pmod") })
 
 #' @rdname column_collection_functions
+#' @export
 #' @name NULL
 setGeneric("posexplode", function(x) { standardGeneric("posexplode") })
 
 #' @rdname column_collection_functions
+#' @export
 #' @name NULL
 setGeneric("posexplode_outer", function(x) { standardGeneric("posexplode_outer") })
 
 #' @rdname column_datetime_functions
+#' @export
 #' @name NULL
 setGeneric("quarter", function(x) { standardGeneric("quarter") })
 
 #' @rdname column_nonaggregate_functions
+#' @export
 #' @name NULL
 setGeneric("rand", function(seed) { standardGeneric("rand") })
 
 #' @rdname column_nonaggregate_functions
+#' @export
 #' @name NULL
 setGeneric("randn", function(seed) { standardGeneric("randn") })
 
 #' @rdname column_window_functions
+#' @export
 #' @name NULL
 setGeneric("rank", function(x, ...) { standardGeneric("rank") })
 
 #' @rdname column_string_functions
+#' @export
 #' @name NULL
 setGeneric("regexp_extract", function(x, pattern, idx) { standardGeneric("regexp_extract") })
 
 #' @rdname column_string_functions
+#' @export
 #' @name NULL
 setGeneric("regexp_replace",
            function(x, pattern, replacement) { standardGeneric("regexp_replace") })
 
 #' @rdname column_string_functions
+#' @export
 #' @name NULL
 setGeneric("repeat_string", function(x, n) { standardGeneric("repeat_string") })
 
-#' @rdname column_collection_functions
+#' @rdname column_string_functions
+#' @export
 #' @name NULL
 setGeneric("reverse", function(x) { standardGeneric("reverse") })
 
 #' @rdname column_math_functions
+#' @export
 #' @name NULL
 setGeneric("rint", function(x) { standardGeneric("rint") })
 
 #' @rdname column_window_functions
+#' @export
 #' @name NULL
 setGeneric("row_number", function(x = "missing") { standardGeneric("row_number") })
 
 #' @rdname column_string_functions
+#' @export
 #' @name NULL
 setGeneric("rpad", function(x, len, pad) { standardGeneric("rpad") })
 
 #' @rdname column_string_functions
+#' @export
 #' @name NULL
 setGeneric("rtrim", function(x, trimString) { standardGeneric("rtrim") })
 
 #' @rdname column_aggregate_functions
+#' @export
 #' @name NULL
 setGeneric("sd", function(x, na.rm = FALSE) { standardGeneric("sd") })
 
 #' @rdname column_datetime_functions
+#' @export
 #' @name NULL
 setGeneric("second", function(x) { standardGeneric("second") })
 
 #' @rdname column_misc_functions
+#' @export
 #' @name NULL
 setGeneric("sha1", function(x) { standardGeneric("sha1") })
 
 #' @rdname column_misc_functions
+#' @export
 #' @name NULL
 setGeneric("sha2", function(y, x) { standardGeneric("sha2") })
 
 #' @rdname column_math_functions
+#' @export
 #' @name NULL
 setGeneric("shiftLeft", function(y, x) { standardGeneric("shiftLeft") })
 
 #' @rdname column_math_functions
+#' @export
 #' @name NULL
 setGeneric("shiftRight", function(y, x) { standardGeneric("shiftRight") })
 
 #' @rdname column_math_functions
+#' @export
 #' @name NULL
 setGeneric("shiftRightUnsigned", function(y, x) { standardGeneric("shiftRightUnsigned") })
 
 #' @rdname column_math_functions
+#' @export
 #' @name NULL
 setGeneric("signum", function(x) { standardGeneric("signum") })
 
 #' @rdname column_collection_functions
+#' @export
 #' @name NULL
 setGeneric("size", function(x) { standardGeneric("size") })
 
 #' @rdname column_aggregate_functions
+#' @export
 #' @name NULL
 setGeneric("skewness", function(x) { standardGeneric("skewness") })
 
 #' @rdname column_collection_functions
-#' @name NULL
-setGeneric("slice", function(x, start, length) { standardGeneric("slice") })
-
-#' @rdname column_collection_functions
+#' @export
 #' @name NULL
 setGeneric("sort_array", function(x, asc = TRUE) { standardGeneric("sort_array") })
 
 #' @rdname column_string_functions
+#' @export
 #' @name NULL
 setGeneric("split_string", function(x, pattern) { standardGeneric("split_string") })
 
 #' @rdname column_string_functions
+#' @export
 #' @name NULL
 setGeneric("soundex", function(x) { standardGeneric("soundex") })
 
 #' @rdname column_nonaggregate_functions
+#' @export
 #' @name NULL
 setGeneric("spark_partition_id", function(x = "missing") { standardGeneric("spark_partition_id") })
 
 #' @rdname column_aggregate_functions
+#' @export
 #' @name NULL
 setGeneric("stddev", function(x) { standardGeneric("stddev") })
 
 #' @rdname column_aggregate_functions
+#' @export
 #' @name NULL
 setGeneric("stddev_pop", function(x) { standardGeneric("stddev_pop") })
 
 #' @rdname column_aggregate_functions
+#' @export
 #' @name NULL
 setGeneric("stddev_samp", function(x) { standardGeneric("stddev_samp") })
 
 #' @rdname column_nonaggregate_functions
+#' @export
 #' @name NULL
 setGeneric("struct", function(x, ...) { standardGeneric("struct") })
 
 #' @rdname column_string_functions
+#' @export
 #' @name NULL
 setGeneric("substring_index", function(x, delim, count) { standardGeneric("substring_index") })
 
 #' @rdname column_aggregate_functions
+#' @export
 #' @name NULL
 setGeneric("sumDistinct", function(x) { standardGeneric("sumDistinct") })
 
 #' @rdname column_math_functions
+#' @export
 #' @name NULL
 setGeneric("toDegrees", function(x) { standardGeneric("toDegrees") })
 
 #' @rdname column_math_functions
+#' @export
 #' @name NULL
 setGeneric("toRadians", function(x) { standardGeneric("toRadians") })
 
 #' @rdname column_datetime_functions
+#' @export
 #' @name NULL
 setGeneric("to_date", function(x, format) { standardGeneric("to_date") })
 
 #' @rdname column_collection_functions
+#' @export
 #' @name NULL
 setGeneric("to_json", function(x, ...) { standardGeneric("to_json") })
 
 #' @rdname column_datetime_functions
+#' @export
 #' @name NULL
 setGeneric("to_timestamp", function(x, format) { standardGeneric("to_timestamp") })
 
 #' @rdname column_datetime_diff_functions
+#' @export
 #' @name NULL
 setGeneric("to_utc_timestamp", function(y, x) { standardGeneric("to_utc_timestamp") })
 
 #' @rdname column_string_functions
+#' @export
 #' @name NULL
 setGeneric("translate", function(x, matchingString, replaceString) { standardGeneric("translate") })
 
 #' @rdname column_string_functions
+#' @export
 #' @name NULL
 setGeneric("trim", function(x, trimString) { standardGeneric("trim") })
 
 #' @rdname column_string_functions
+#' @export
 #' @name NULL
 setGeneric("unbase64", function(x) { standardGeneric("unbase64") })
 
 #' @rdname column_math_functions
+#' @export
 #' @name NULL
 setGeneric("unhex", function(x) { standardGeneric("unhex") })
 
 #' @rdname column_datetime_functions
+#' @export
 #' @name NULL
 setGeneric("unix_timestamp", function(x, format) { standardGeneric("unix_timestamp") })
 
 #' @rdname column_string_functions
+#' @export
 #' @name NULL
 setGeneric("upper", function(x) { standardGeneric("upper") })
 
 #' @rdname column_aggregate_functions
+#' @export
 #' @name NULL
 setGeneric("var", function(x, y = NULL, na.rm = FALSE, use) { standardGeneric("var") })
 
 #' @rdname column_aggregate_functions
+#' @export
 #' @name NULL
 setGeneric("variance", function(x) { standardGeneric("variance") })
 
 #' @rdname column_aggregate_functions
+#' @export
 #' @name NULL
 setGeneric("var_pop", function(x) { standardGeneric("var_pop") })
 
 #' @rdname column_aggregate_functions
+#' @export
 #' @name NULL
 setGeneric("var_samp", function(x) { standardGeneric("var_samp") })
 
 #' @rdname column_datetime_functions
+#' @export
 #' @name NULL
 setGeneric("weekofyear", function(x) { standardGeneric("weekofyear") })
 
 #' @rdname column_datetime_functions
+#' @export
 #' @name NULL
 setGeneric("window", function(x, ...) { standardGeneric("window") })
 
 #' @rdname column_datetime_functions
+#' @export
 #' @name NULL
 setGeneric("year", function(x) { standardGeneric("year") })
 
@@ -1330,110 +1598,142 @@ setGeneric("year", function(x) { standardGeneric("year") })
 ###################### Spark.ML Methods ##########################
 
 #' @rdname fitted
+#' @export
 setGeneric("fitted")
 
 # Do not carry stats::glm usage and param here, and do not document the generic
+#' @export
 #' @noRd
 setGeneric("glm")
 
 #' @param object a fitted ML model object.
 #' @param ... additional argument(s) passed to the method.
 #' @rdname predict
+#' @export
 setGeneric("predict", function(object, ...) { standardGeneric("predict") })
 
 #' @rdname rbind
+#' @export
 setGeneric("rbind", signature = "...")
 
 #' @rdname spark.als
+#' @export
 setGeneric("spark.als", function(data, ...) { standardGeneric("spark.als") })
 
 #' @rdname spark.bisectingKmeans
+#' @export
 setGeneric("spark.bisectingKmeans",
            function(data, formula, ...) { standardGeneric("spark.bisectingKmeans") })
 
 #' @rdname spark.gaussianMixture
+#' @export
 setGeneric("spark.gaussianMixture",
            function(data, formula, ...) { standardGeneric("spark.gaussianMixture") })
 
 #' @rdname spark.gbt
+#' @export
 setGeneric("spark.gbt", function(data, formula, ...) { standardGeneric("spark.gbt") })
 
 #' @rdname spark.glm
+#' @export
 setGeneric("spark.glm", function(data, formula, ...) { standardGeneric("spark.glm") })
 
 #' @rdname spark.isoreg
+#' @export
 setGeneric("spark.isoreg", function(data, formula, ...) { standardGeneric("spark.isoreg") })
 
 #' @rdname spark.kmeans
+#' @export
 setGeneric("spark.kmeans", function(data, formula, ...) { standardGeneric("spark.kmeans") })
 
 #' @rdname spark.kstest
+#' @export
 setGeneric("spark.kstest", function(data, ...) { standardGeneric("spark.kstest") })
 
 #' @rdname spark.lda
+#' @export
 setGeneric("spark.lda", function(data, ...) { standardGeneric("spark.lda") })
 
 #' @rdname spark.logit
+#' @export
 setGeneric("spark.logit", function(data, formula, ...) { standardGeneric("spark.logit") })
 
 #' @rdname spark.mlp
+#' @export
 setGeneric("spark.mlp", function(data, formula, ...) { standardGeneric("spark.mlp") })
 
 #' @rdname spark.naiveBayes
+#' @export
 setGeneric("spark.naiveBayes", function(data, formula, ...) { standardGeneric("spark.naiveBayes") })
 
 #' @rdname spark.decisionTree
+#' @export
 setGeneric("spark.decisionTree",
            function(data, formula, ...) { standardGeneric("spark.decisionTree") })
 
 #' @rdname spark.randomForest
+#' @export
 setGeneric("spark.randomForest",
            function(data, formula, ...) { standardGeneric("spark.randomForest") })
 
 #' @rdname spark.survreg
+#' @export
 setGeneric("spark.survreg", function(data, formula, ...) { standardGeneric("spark.survreg") })
 
 #' @rdname spark.svmLinear
+#' @export
 setGeneric("spark.svmLinear", function(data, formula, ...) { standardGeneric("spark.svmLinear") })
 
 #' @rdname spark.lda
+#' @export
 setGeneric("spark.posterior", function(object, newData) { standardGeneric("spark.posterior") })
 
 #' @rdname spark.lda
+#' @export
 setGeneric("spark.perplexity", function(object, data) { standardGeneric("spark.perplexity") })
 
 #' @rdname spark.fpGrowth
+#' @export
 setGeneric("spark.fpGrowth", function(data, ...) { standardGeneric("spark.fpGrowth") })
 
 #' @rdname spark.fpGrowth
+#' @export
 setGeneric("spark.freqItemsets", function(object) { standardGeneric("spark.freqItemsets") })
 
 #' @rdname spark.fpGrowth
+#' @export
 setGeneric("spark.associationRules", function(object) { standardGeneric("spark.associationRules") })
 
 #' @param object a fitted ML model object.
 #' @param path the directory where the model is saved.
 #' @param ... additional argument(s) passed to the method.
 #' @rdname write.ml
+#' @export
 setGeneric("write.ml", function(object, path, ...) { standardGeneric("write.ml") })
 
 
 ###################### Streaming Methods ##########################
 
 #' @rdname awaitTermination
+#' @export
 setGeneric("awaitTermination", function(x, timeout = NULL) { standardGeneric("awaitTermination") })
 
 #' @rdname isActive
+#' @export
 setGeneric("isActive", function(x) { standardGeneric("isActive") })
 
 #' @rdname lastProgress
+#' @export
 setGeneric("lastProgress", function(x) { standardGeneric("lastProgress") })
 
 #' @rdname queryName
+#' @export
 setGeneric("queryName", function(x) { standardGeneric("queryName") })
 
 #' @rdname status
+#' @export
 setGeneric("status", function(x) { standardGeneric("status") })
 
 #' @rdname stopQuery
+#' @export
 setGeneric("stopQuery", function(x) { standardGeneric("stopQuery") })
diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R
index f751b952f3915..54ef9f07d6fae 100644
--- a/R/pkg/R/group.R
+++ b/R/pkg/R/group.R
@@ -30,6 +30,7 @@ setOldClass("jobj")
 #' @seealso groupBy
 #'
 #' @param sgd A Java object reference to the backing Scala GroupedData
+#' @export
 #' @note GroupedData since 1.4.0
 setClass("GroupedData",
          slots = list(sgd = "jobj"))
@@ -47,6 +48,7 @@ groupedData <- function(sgd) {
 
 #' @rdname show
 #' @aliases show,GroupedData-method
+#' @export
 #' @note show(GroupedData) since 1.4.0
 setMethod("show", "GroupedData",
           function(object) {
@@ -61,6 +63,7 @@ setMethod("show", "GroupedData",
 #' @return A SparkDataFrame.
 #' @rdname count
 #' @aliases count,GroupedData-method
+#' @export
 #' @examples
 #' \dontrun{
 #'   count(groupBy(df, "name"))
@@ -84,6 +87,7 @@ setMethod("count",
 #' @aliases agg,GroupedData-method
 #' @name agg
 #' @family agg_funcs
+#' @export
 #' @examples
 #' \dontrun{
 #'  df2 <- agg(df, age = "sum")  # new column name will be created as 'SUM(age#0)'
@@ -146,6 +150,7 @@ methods <- c("avg", "max", "mean", "min", "sum")
 #' @rdname pivot
 #' @aliases pivot,GroupedData,character-method
 #' @name pivot
+#' @export
 #' @examples
 #' \dontrun{
 #' df <- createDataFrame(data.frame(
@@ -197,6 +202,7 @@ createMethods()
 #' @rdname gapply
 #' @aliases gapply,GroupedData-method
 #' @name gapply
+#' @export
 #' @note gapply(GroupedData) since 2.0.0
 setMethod("gapply",
           signature(x = "GroupedData"),
@@ -210,6 +216,7 @@ setMethod("gapply",
 #' @rdname gapplyCollect
 #' @aliases gapplyCollect,GroupedData-method
 #' @name gapplyCollect
+#' @export
 #' @note gapplyCollect(GroupedData) since 2.0.0
 setMethod("gapplyCollect",
           signature(x = "GroupedData"),
diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R
index 6d1edf6b6f3cf..04dc7562e5346 100644
--- a/R/pkg/R/install.R
+++ b/R/pkg/R/install.R
@@ -58,6 +58,7 @@
 #' @rdname install.spark
 #' @name install.spark
 #' @aliases install.spark
+#' @export
 #' @examples
 #'\dontrun{
 #' install.spark()
diff --git a/R/pkg/R/jvm.R b/R/pkg/R/jvm.R
index 9a1b26b0fa3c5..bb5c77544a3da 100644
--- a/R/pkg/R/jvm.R
+++ b/R/pkg/R/jvm.R
@@ -35,6 +35,7 @@
 #' @param ... parameters to pass to the Java method.
 #' @return the return value of the Java method. Either returned as a R object
 #'  if it can be deserialized or returned as a "jobj". See details section for more.
+#' @export
 #' @seealso \link{sparkR.callJStatic}, \link{sparkR.newJObject}
 #' @rdname sparkR.callJMethod
 #' @examples
@@ -68,6 +69,7 @@ sparkR.callJMethod <- function(x, methodName, ...) {
 #' @param ... parameters to pass to the Java method.
 #' @return the return value of the Java method. Either returned as a R object
 #'  if it can be deserialized or returned as a "jobj". See details section for more.
+#' @export
 #' @seealso \link{sparkR.callJMethod}, \link{sparkR.newJObject}
 #' @rdname sparkR.callJStatic
 #' @examples
@@ -98,6 +100,7 @@ sparkR.callJStatic <- function(x, methodName, ...) {
 #' @param ... arguments to be passed to the constructor.
 #' @return the object created. Either returned as a R object
 #'   if it can be deserialized or returned as a "jobj". See details section for more.
+#' @export
 #' @seealso \link{sparkR.callJMethod}, \link{sparkR.callJStatic}
 #' @rdname sparkR.newJObject
 #' @examples
diff --git a/R/pkg/R/mllib_classification.R b/R/pkg/R/mllib_classification.R
index 2964fdeff0957..f6e9b1357561b 100644
--- a/R/pkg/R/mllib_classification.R
+++ b/R/pkg/R/mllib_classification.R
@@ -21,24 +21,28 @@
 #' S4 class that represents an LinearSVCModel
 #'
 #' @param jobj a Java object reference to the backing Scala LinearSVCModel
+#' @export
 #' @note LinearSVCModel since 2.2.0
 setClass("LinearSVCModel", representation(jobj = "jobj"))
 
 #' S4 class that represents an LogisticRegressionModel
 #'
 #' @param jobj a Java object reference to the backing Scala LogisticRegressionModel
+#' @export
 #' @note LogisticRegressionModel since 2.1.0
 setClass("LogisticRegressionModel", representation(jobj = "jobj"))
 
 #' S4 class that represents a MultilayerPerceptronClassificationModel
 #'
 #' @param jobj a Java object reference to the backing Scala MultilayerPerceptronClassifierWrapper
+#' @export
 #' @note MultilayerPerceptronClassificationModel since 2.1.0
 setClass("MultilayerPerceptronClassificationModel", representation(jobj = "jobj"))
 
 #' S4 class that represents a NaiveBayesModel
 #'
 #' @param jobj a Java object reference to the backing Scala NaiveBayesWrapper
+#' @export
 #' @note NaiveBayesModel since 2.0.0
 setClass("NaiveBayesModel", representation(jobj = "jobj"))
 
@@ -78,6 +82,7 @@ setClass("NaiveBayesModel", representation(jobj = "jobj"))
 #' @rdname spark.svmLinear
 #' @aliases spark.svmLinear,SparkDataFrame,formula-method
 #' @name spark.svmLinear
+#' @export
 #' @examples
 #' \dontrun{
 #' sparkR.session()
@@ -126,6 +131,7 @@ setMethod("spark.svmLinear", signature(data = "SparkDataFrame", formula = "formu
 #' @return \code{predict} returns the predicted values based on a LinearSVCModel.
 #' @rdname spark.svmLinear
 #' @aliases predict,LinearSVCModel,SparkDataFrame-method
+#' @export
 #' @note predict(LinearSVCModel) since 2.2.0
 setMethod("predict", signature(object = "LinearSVCModel"),
           function(object, newData) {
@@ -140,6 +146,7 @@ setMethod("predict", signature(object = "LinearSVCModel"),
 #'         \code{numClasses} (number of classes), \code{numFeatures} (number of features).
 #' @rdname spark.svmLinear
 #' @aliases summary,LinearSVCModel-method
+#' @export
 #' @note summary(LinearSVCModel) since 2.2.0
 setMethod("summary", signature(object = "LinearSVCModel"),
           function(object) {
@@ -162,6 +169,7 @@ setMethod("summary", signature(object = "LinearSVCModel"),
 #'
 #' @rdname spark.svmLinear
 #' @aliases write.ml,LinearSVCModel,character-method
+#' @export
 #' @note write.ml(LogisticRegression, character) since 2.2.0
 setMethod("write.ml", signature(object = "LinearSVCModel", path = "character"),
 function(object, path, overwrite = FALSE) {
@@ -249,6 +257,7 @@ function(object, path, overwrite = FALSE) {
 #' @rdname spark.logit
 #' @aliases spark.logit,SparkDataFrame,formula-method
 #' @name spark.logit
+#' @export
 #' @examples
 #' \dontrun{
 #' sparkR.session()
@@ -365,6 +374,7 @@ setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula")
 #'         The list includes \code{coefficients} (coefficients matrix of the fitted model).
 #' @rdname spark.logit
 #' @aliases summary,LogisticRegressionModel-method
+#' @export
 #' @note summary(LogisticRegressionModel) since 2.1.0
 setMethod("summary", signature(object = "LogisticRegressionModel"),
           function(object) {
@@ -392,6 +402,7 @@ setMethod("summary", signature(object = "LogisticRegressionModel"),
 #' @return \code{predict} returns the predicted values based on an LogisticRegressionModel.
 #' @rdname spark.logit
 #' @aliases predict,LogisticRegressionModel,SparkDataFrame-method
+#' @export
 #' @note predict(LogisticRegressionModel) since 2.1.0
 setMethod("predict", signature(object = "LogisticRegressionModel"),
           function(object, newData) {
@@ -406,6 +417,7 @@ setMethod("predict", signature(object = "LogisticRegressionModel"),
 #'
 #' @rdname spark.logit
 #' @aliases write.ml,LogisticRegressionModel,character-method
+#' @export
 #' @note write.ml(LogisticRegression, character) since 2.1.0
 setMethod("write.ml", signature(object = "LogisticRegressionModel", path = "character"),
           function(object, path, overwrite = FALSE) {
@@ -446,6 +458,7 @@ setMethod("write.ml", signature(object = "LogisticRegressionModel", path = "char
 #' @aliases spark.mlp,SparkDataFrame,formula-method
 #' @name spark.mlp
 #' @seealso \link{read.ml}
+#' @export
 #' @examples
 #' \dontrun{
 #' df <- read.df("data/mllib/sample_multiclass_classification_data.txt", source = "libsvm")
@@ -504,6 +517,7 @@ setMethod("spark.mlp", signature(data = "SparkDataFrame", formula = "formula"),
 #'         For \code{weights}, it is a numeric vector with length equal to the expected
 #'         given the architecture (i.e., for 8-10-2 network, 112 connection weights).
 #' @rdname spark.mlp
+#' @export
 #' @aliases summary,MultilayerPerceptronClassificationModel-method
 #' @note summary(MultilayerPerceptronClassificationModel) since 2.1.0
 setMethod("summary", signature(object = "MultilayerPerceptronClassificationModel"),
@@ -524,6 +538,7 @@ setMethod("summary", signature(object = "MultilayerPerceptronClassificationModel
 #' "prediction".
 #' @rdname spark.mlp
 #' @aliases predict,MultilayerPerceptronClassificationModel-method
+#' @export
 #' @note predict(MultilayerPerceptronClassificationModel) since 2.1.0
 setMethod("predict", signature(object = "MultilayerPerceptronClassificationModel"),
           function(object, newData) {
@@ -538,6 +553,7 @@ setMethod("predict", signature(object = "MultilayerPerceptronClassificationModel
 #'
 #' @rdname spark.mlp
 #' @aliases write.ml,MultilayerPerceptronClassificationModel,character-method
+#' @export
 #' @seealso \link{write.ml}
 #' @note write.ml(MultilayerPerceptronClassificationModel, character) since 2.1.0
 setMethod("write.ml", signature(object = "MultilayerPerceptronClassificationModel",
@@ -569,6 +585,7 @@ setMethod("write.ml", signature(object = "MultilayerPerceptronClassificationMode
 #' @aliases spark.naiveBayes,SparkDataFrame,formula-method
 #' @name spark.naiveBayes
 #' @seealso e1071: \url{https://cran.r-project.org/package=e1071}
+#' @export
 #' @examples
 #' \dontrun{
 #' data <- as.data.frame(UCBAdmissions)
@@ -607,6 +624,7 @@ setMethod("spark.naiveBayes", signature(data = "SparkDataFrame", formula = "form
 #'         The list includes \code{apriori} (the label distribution) and
 #'         \code{tables} (conditional probabilities given the target label).
 #' @rdname spark.naiveBayes
+#' @export
 #' @note summary(NaiveBayesModel) since 2.0.0
 setMethod("summary", signature(object = "NaiveBayesModel"),
           function(object) {
@@ -630,6 +648,7 @@ setMethod("summary", signature(object = "NaiveBayesModel"),
 #' @return \code{predict} returns a SparkDataFrame containing predicted labeled in a column named
 #' "prediction".
 #' @rdname spark.naiveBayes
+#' @export
 #' @note predict(NaiveBayesModel) since 2.0.0
 setMethod("predict", signature(object = "NaiveBayesModel"),
           function(object, newData) {
@@ -643,6 +662,7 @@ setMethod("predict", signature(object = "NaiveBayesModel"),
 #'                  which means throw exception if the output path exists.
 #'
 #' @rdname spark.naiveBayes
+#' @export
 #' @seealso \link{write.ml}
 #' @note write.ml(NaiveBayesModel, character) since 2.0.0
 setMethod("write.ml", signature(object = "NaiveBayesModel", path = "character"),
diff --git a/R/pkg/R/mllib_clustering.R b/R/pkg/R/mllib_clustering.R
index 900be685824da..a25bf81c6d977 100644
--- a/R/pkg/R/mllib_clustering.R
+++ b/R/pkg/R/mllib_clustering.R
@@ -20,24 +20,28 @@
 #' S4 class that represents a BisectingKMeansModel
 #'
 #' @param jobj a Java object reference to the backing Scala BisectingKMeansModel
+#' @export
 #' @note BisectingKMeansModel since 2.2.0
 setClass("BisectingKMeansModel", representation(jobj = "jobj"))
 
 #' S4 class that represents a GaussianMixtureModel
 #'
 #' @param jobj a Java object reference to the backing Scala GaussianMixtureModel
+#' @export
 #' @note GaussianMixtureModel since 2.1.0
 setClass("GaussianMixtureModel", representation(jobj = "jobj"))
 
 #' S4 class that represents a KMeansModel
 #'
 #' @param jobj a Java object reference to the backing Scala KMeansModel
+#' @export
 #' @note KMeansModel since 2.0.0
 setClass("KMeansModel", representation(jobj = "jobj"))
 
 #' S4 class that represents an LDAModel
 #'
 #' @param jobj a Java object reference to the backing Scala LDAWrapper
+#' @export
 #' @note LDAModel since 2.1.0
 setClass("LDAModel", representation(jobj = "jobj"))
 
@@ -64,6 +68,7 @@ setClass("LDAModel", representation(jobj = "jobj"))
 #' @rdname spark.bisectingKmeans
 #' @aliases spark.bisectingKmeans,SparkDataFrame,formula-method
 #' @name spark.bisectingKmeans
+#' @export
 #' @examples
 #' \dontrun{
 #' sparkR.session()
@@ -112,6 +117,7 @@ setMethod("spark.bisectingKmeans", signature(data = "SparkDataFrame", formula =
 #'         (cluster centers of the transformed data; cluster is NULL if is.loaded is TRUE),
 #'         and \code{is.loaded} (whether the model is loaded from a saved file).
 #' @rdname spark.bisectingKmeans
+#' @export
 #' @note summary(BisectingKMeansModel) since 2.2.0
 setMethod("summary", signature(object = "BisectingKMeansModel"),
           function(object) {
@@ -138,6 +144,7 @@ setMethod("summary", signature(object = "BisectingKMeansModel"),
 #' @param newData a SparkDataFrame for testing.
 #' @return \code{predict} returns the predicted values based on a bisecting k-means model.
 #' @rdname spark.bisectingKmeans
+#' @export
 #' @note predict(BisectingKMeansModel) since 2.2.0
 setMethod("predict", signature(object = "BisectingKMeansModel"),
           function(object, newData) {
@@ -153,6 +160,7 @@ setMethod("predict", signature(object = "BisectingKMeansModel"),
 #'        or \code{"classes"} for assigned classes.
 #' @return \code{fitted} returns a SparkDataFrame containing fitted values.
 #' @rdname spark.bisectingKmeans
+#' @export
 #' @note fitted since 2.2.0
 setMethod("fitted", signature(object = "BisectingKMeansModel"),
           function(object, method = c("centers", "classes")) {
@@ -173,6 +181,7 @@ setMethod("fitted", signature(object = "BisectingKMeansModel"),
 #'                  which means throw exception if the output path exists.
 #'
 #' @rdname spark.bisectingKmeans
+#' @export
 #' @note write.ml(BisectingKMeansModel, character) since 2.2.0
 setMethod("write.ml", signature(object = "BisectingKMeansModel", path = "character"),
           function(object, path, overwrite = FALSE) {
@@ -199,6 +208,7 @@ setMethod("write.ml", signature(object = "BisectingKMeansModel", path = "charact
 #' @rdname spark.gaussianMixture
 #' @name spark.gaussianMixture
 #' @seealso mixtools: \url{https://cran.r-project.org/package=mixtools}
+#' @export
 #' @examples
 #' \dontrun{
 #' sparkR.session()
@@ -241,6 +251,7 @@ setMethod("spark.gaussianMixture", signature(data = "SparkDataFrame", formula =
 #'         \code{sigma} (sigma), \code{loglik} (loglik), and \code{posterior} (posterior).
 #' @aliases spark.gaussianMixture,SparkDataFrame,formula-method
 #' @rdname spark.gaussianMixture
+#' @export
 #' @note summary(GaussianMixtureModel) since 2.1.0
 setMethod("summary", signature(object = "GaussianMixtureModel"),
           function(object) {
@@ -280,6 +291,7 @@ setMethod("summary", signature(object = "GaussianMixtureModel"),
 #'         "prediction".
 #' @aliases predict,GaussianMixtureModel,SparkDataFrame-method
 #' @rdname spark.gaussianMixture
+#' @export
 #' @note predict(GaussianMixtureModel) since 2.1.0
 setMethod("predict", signature(object = "GaussianMixtureModel"),
           function(object, newData) {
@@ -294,6 +306,7 @@ setMethod("predict", signature(object = "GaussianMixtureModel"),
 #'
 #' @aliases write.ml,GaussianMixtureModel,character-method
 #' @rdname spark.gaussianMixture
+#' @export
 #' @note write.ml(GaussianMixtureModel, character) since 2.1.0
 setMethod("write.ml", signature(object = "GaussianMixtureModel", path = "character"),
           function(object, path, overwrite = FALSE) {
@@ -323,6 +336,7 @@ setMethod("write.ml", signature(object = "GaussianMixtureModel", path = "charact
 #' @rdname spark.kmeans
 #' @aliases spark.kmeans,SparkDataFrame,formula-method
 #' @name spark.kmeans
+#' @export
 #' @examples
 #' \dontrun{
 #' sparkR.session()
@@ -371,6 +385,7 @@ setMethod("spark.kmeans", signature(data = "SparkDataFrame", formula = "formula"
 #'         (the actual number of cluster centers. When using initMode = "random",
 #'         \code{clusterSize} may not equal to \code{k}).
 #' @rdname spark.kmeans
+#' @export
 #' @note summary(KMeansModel) since 2.0.0
 setMethod("summary", signature(object = "KMeansModel"),
           function(object) {
@@ -398,6 +413,7 @@ setMethod("summary", signature(object = "KMeansModel"),
 #' @param newData a SparkDataFrame for testing.
 #' @return \code{predict} returns the predicted values based on a k-means model.
 #' @rdname spark.kmeans
+#' @export
 #' @note predict(KMeansModel) since 2.0.0
 setMethod("predict", signature(object = "KMeansModel"),
           function(object, newData) {
@@ -415,6 +431,7 @@ setMethod("predict", signature(object = "KMeansModel"),
 #' @param ... additional argument(s) passed to the method.
 #' @return \code{fitted} returns a SparkDataFrame containing fitted values.
 #' @rdname fitted
+#' @export
 #' @examples
 #' \dontrun{
 #' model <- spark.kmeans(trainingData, ~ ., 2)
@@ -441,6 +458,7 @@ setMethod("fitted", signature(object = "KMeansModel"),
 #'                  which means throw exception if the output path exists.
 #'
 #' @rdname spark.kmeans
+#' @export
 #' @note write.ml(KMeansModel, character) since 2.0.0
 setMethod("write.ml", signature(object = "KMeansModel", path = "character"),
           function(object, path, overwrite = FALSE) {
@@ -478,6 +496,7 @@ setMethod("write.ml", signature(object = "KMeansModel", path = "character"),
 #' @rdname spark.lda
 #' @aliases spark.lda,SparkDataFrame-method
 #' @seealso topicmodels: \url{https://cran.r-project.org/package=topicmodels}
+#' @export
 #' @examples
 #' \dontrun{
 #' text <- read.df("data/mllib/sample_lda_libsvm_data.txt", source = "libsvm")
@@ -539,6 +558,7 @@ setMethod("spark.lda", signature(data = "SparkDataFrame"),
 #'               It is only for distributed LDA model (i.e., optimizer = "em")}
 #' @rdname spark.lda
 #' @aliases summary,LDAModel-method
+#' @export
 #' @note summary(LDAModel) since 2.1.0
 setMethod("summary", signature(object = "LDAModel"),
           function(object, maxTermsPerTopic) {
@@ -576,6 +596,7 @@ setMethod("summary", signature(object = "LDAModel"),
 #'         perplexity of the training data if missing argument "data".
 #' @rdname spark.lda
 #' @aliases spark.perplexity,LDAModel-method
+#' @export
 #' @note spark.perplexity(LDAModel) since 2.1.0
 setMethod("spark.perplexity", signature(object = "LDAModel", data = "SparkDataFrame"),
           function(object, data) {
@@ -590,6 +611,7 @@ setMethod("spark.perplexity", signature(object = "LDAModel", data = "SparkDataFr
 #'         vectors named "topicDistribution".
 #' @rdname spark.lda
 #' @aliases spark.posterior,LDAModel,SparkDataFrame-method
+#' @export
 #' @note spark.posterior(LDAModel) since 2.1.0
 setMethod("spark.posterior", signature(object = "LDAModel", newData = "SparkDataFrame"),
           function(object, newData) {
@@ -604,6 +626,7 @@ setMethod("spark.posterior", signature(object = "LDAModel", newData = "SparkData
 #'
 #' @rdname spark.lda
 #' @aliases write.ml,LDAModel,character-method
+#' @export
 #' @seealso \link{read.ml}
 #' @note write.ml(LDAModel, character) since 2.1.0
 setMethod("write.ml", signature(object = "LDAModel", path = "character"),
diff --git a/R/pkg/R/mllib_fpm.R b/R/pkg/R/mllib_fpm.R
index e2394906d8012..dfcb45a1b66c9 100644
--- a/R/pkg/R/mllib_fpm.R
+++ b/R/pkg/R/mllib_fpm.R
@@ -20,6 +20,7 @@
 #' S4 class that represents a FPGrowthModel
 #'
 #' @param jobj a Java object reference to the backing Scala FPGrowthModel
+#' @export
 #' @note FPGrowthModel since 2.2.0
 setClass("FPGrowthModel", slots = list(jobj = "jobj"))
 
@@ -44,6 +45,7 @@ setClass("FPGrowthModel", slots = list(jobj = "jobj"))
 #' @rdname spark.fpGrowth
 #' @name spark.fpGrowth
 #' @aliases spark.fpGrowth,SparkDataFrame-method
+#' @export
 #' @examples
 #' \dontrun{
 #' raw_data <- read.df(
@@ -107,6 +109,7 @@ setMethod("spark.fpGrowth", signature(data = "SparkDataFrame"),
 #'         and \code{freq} (frequency of the itemset).
 #' @rdname spark.fpGrowth
 #' @aliases freqItemsets,FPGrowthModel-method
+#' @export
 #' @note spark.freqItemsets(FPGrowthModel) since 2.2.0
 setMethod("spark.freqItemsets", signature(object = "FPGrowthModel"),
           function(object) {
@@ -122,6 +125,7 @@ setMethod("spark.freqItemsets", signature(object = "FPGrowthModel"),
 #'         and \code{condfidence} (confidence).
 #' @rdname spark.fpGrowth
 #' @aliases associationRules,FPGrowthModel-method
+#' @export
 #' @note spark.associationRules(FPGrowthModel) since 2.2.0
 setMethod("spark.associationRules", signature(object = "FPGrowthModel"),
           function(object) {
@@ -134,6 +138,7 @@ setMethod("spark.associationRules", signature(object = "FPGrowthModel"),
 #' @return \code{predict} returns a SparkDataFrame containing predicted values.
 #' @rdname spark.fpGrowth
 #' @aliases predict,FPGrowthModel-method
+#' @export
 #' @note predict(FPGrowthModel) since 2.2.0
 setMethod("predict", signature(object = "FPGrowthModel"),
           function(object, newData) {
@@ -148,6 +153,7 @@ setMethod("predict", signature(object = "FPGrowthModel"),
 #'                  if the output path exists.
 #' @rdname spark.fpGrowth
 #' @aliases write.ml,FPGrowthModel,character-method
+#' @export
 #' @seealso \link{read.ml}
 #' @note write.ml(FPGrowthModel, character) since 2.2.0
 setMethod("write.ml", signature(object = "FPGrowthModel", path = "character"),
diff --git a/R/pkg/R/mllib_recommendation.R b/R/pkg/R/mllib_recommendation.R
index 9a77b07462585..5441c4a4022a9 100644
--- a/R/pkg/R/mllib_recommendation.R
+++ b/R/pkg/R/mllib_recommendation.R
@@ -20,6 +20,7 @@
 #' S4 class that represents an ALSModel
 #'
 #' @param jobj a Java object reference to the backing Scala ALSWrapper
+#' @export
 #' @note ALSModel since 2.1.0
 setClass("ALSModel", representation(jobj = "jobj"))
 
@@ -54,6 +55,7 @@ setClass("ALSModel", representation(jobj = "jobj"))
 #' @rdname spark.als
 #' @aliases spark.als,SparkDataFrame-method
 #' @name spark.als
+#' @export
 #' @examples
 #' \dontrun{
 #' ratings <- list(list(0, 0, 4.0), list(0, 1, 2.0), list(1, 1, 3.0), list(1, 2, 4.0),
@@ -116,6 +118,7 @@ setMethod("spark.als", signature(data = "SparkDataFrame"),
 #'         and \code{rank} (rank of the matrix factorization model).
 #' @rdname spark.als
 #' @aliases summary,ALSModel-method
+#' @export
 #' @note summary(ALSModel) since 2.1.0
 setMethod("summary", signature(object = "ALSModel"),
           function(object) {
@@ -136,6 +139,7 @@ setMethod("summary", signature(object = "ALSModel"),
 #' @return \code{predict} returns a SparkDataFrame containing predicted values.
 #' @rdname spark.als
 #' @aliases predict,ALSModel-method
+#' @export
 #' @note predict(ALSModel) since 2.1.0
 setMethod("predict", signature(object = "ALSModel"),
           function(object, newData) {
@@ -151,6 +155,7 @@ setMethod("predict", signature(object = "ALSModel"),
 #'
 #' @rdname spark.als
 #' @aliases write.ml,ALSModel,character-method
+#' @export
 #' @seealso \link{read.ml}
 #' @note write.ml(ALSModel, character) since 2.1.0
 setMethod("write.ml", signature(object = "ALSModel", path = "character"),
diff --git a/R/pkg/R/mllib_regression.R b/R/pkg/R/mllib_regression.R
index 95c1a29905197..545be5e1d89f0 100644
--- a/R/pkg/R/mllib_regression.R
+++ b/R/pkg/R/mllib_regression.R
@@ -21,18 +21,21 @@
 #' S4 class that represents a AFTSurvivalRegressionModel
 #'
 #' @param jobj a Java object reference to the backing Scala AFTSurvivalRegressionWrapper
+#' @export
 #' @note AFTSurvivalRegressionModel since 2.0.0
 setClass("AFTSurvivalRegressionModel", representation(jobj = "jobj"))
 
 #' S4 class that represents a generalized linear model
 #'
 #' @param jobj a Java object reference to the backing Scala GeneralizedLinearRegressionWrapper
+#' @export
 #' @note GeneralizedLinearRegressionModel since 2.0.0
 setClass("GeneralizedLinearRegressionModel", representation(jobj = "jobj"))
 
 #' S4 class that represents an IsotonicRegressionModel
 #'
 #' @param jobj a Java object reference to the backing Scala IsotonicRegressionModel
+#' @export
 #' @note IsotonicRegressionModel since 2.1.0
 setClass("IsotonicRegressionModel", representation(jobj = "jobj"))
 
@@ -82,6 +85,7 @@ setClass("IsotonicRegressionModel", representation(jobj = "jobj"))
 #' @return \code{spark.glm} returns a fitted generalized linear model.
 #' @rdname spark.glm
 #' @name spark.glm
+#' @export
 #' @examples
 #' \dontrun{
 #' sparkR.session()
@@ -207,6 +211,7 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
 #' @return \code{glm} returns a fitted generalized linear model.
 #' @rdname glm
 #' @aliases glm
+#' @export
 #' @examples
 #' \dontrun{
 #' sparkR.session()
@@ -239,6 +244,7 @@ setMethod("glm", signature(formula = "formula", family = "ANY", data = "SparkDat
 #'         and \code{iter} (number of iterations IRLS takes). If there are collinear columns in
 #'         the data, the coefficients matrix only provides coefficients.
 #' @rdname spark.glm
+#' @export
 #' @note summary(GeneralizedLinearRegressionModel) since 2.0.0
 setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"),
           function(object) {
@@ -284,6 +290,7 @@ setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"),
 
 #' @rdname spark.glm
 #' @param x summary object of fitted generalized linear model returned by \code{summary} function.
+#' @export
 #' @note print.summary.GeneralizedLinearRegressionModel since 2.0.0
 print.summary.GeneralizedLinearRegressionModel <- function(x, ...) {
   if (x$is.loaded) {
@@ -317,6 +324,7 @@ print.summary.GeneralizedLinearRegressionModel <- function(x, ...) {
 #' @return \code{predict} returns a SparkDataFrame containing predicted labels in a column named
 #'         "prediction".
 #' @rdname spark.glm
+#' @export
 #' @note predict(GeneralizedLinearRegressionModel) since 1.5.0
 setMethod("predict", signature(object = "GeneralizedLinearRegressionModel"),
           function(object, newData) {
@@ -330,6 +338,7 @@ setMethod("predict", signature(object = "GeneralizedLinearRegressionModel"),
 #'                  which means throw exception if the output path exists.
 #'
 #' @rdname spark.glm
+#' @export
 #' @note write.ml(GeneralizedLinearRegressionModel, character) since 2.0.0
 setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", path = "character"),
           function(object, path, overwrite = FALSE) {
@@ -354,6 +363,7 @@ setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", pat
 #' @rdname spark.isoreg
 #' @aliases spark.isoreg,SparkDataFrame,formula-method
 #' @name spark.isoreg
+#' @export
 #' @examples
 #' \dontrun{
 #' sparkR.session()
@@ -402,6 +412,7 @@ setMethod("spark.isoreg", signature(data = "SparkDataFrame", formula = "formula"
 #'         and \code{predictions} (predictions associated with the boundaries at the same index).
 #' @rdname spark.isoreg
 #' @aliases summary,IsotonicRegressionModel-method
+#' @export
 #' @note summary(IsotonicRegressionModel) since 2.1.0
 setMethod("summary", signature(object = "IsotonicRegressionModel"),
           function(object) {
@@ -418,6 +429,7 @@ setMethod("summary", signature(object = "IsotonicRegressionModel"),
 #' @return \code{predict} returns a SparkDataFrame containing predicted values.
 #' @rdname spark.isoreg
 #' @aliases predict,IsotonicRegressionModel,SparkDataFrame-method
+#' @export
 #' @note predict(IsotonicRegressionModel) since 2.1.0
 setMethod("predict", signature(object = "IsotonicRegressionModel"),
           function(object, newData) {
@@ -432,6 +444,7 @@ setMethod("predict", signature(object = "IsotonicRegressionModel"),
 #'
 #' @rdname spark.isoreg
 #' @aliases write.ml,IsotonicRegressionModel,character-method
+#' @export
 #' @note write.ml(IsotonicRegression, character) since 2.1.0
 setMethod("write.ml", signature(object = "IsotonicRegressionModel", path = "character"),
           function(object, path, overwrite = FALSE) {
@@ -464,6 +477,7 @@ setMethod("write.ml", signature(object = "IsotonicRegressionModel", path = "char
 #' @return \code{spark.survreg} returns a fitted AFT survival regression model.
 #' @rdname spark.survreg
 #' @seealso survival: \url{https://cran.r-project.org/package=survival}
+#' @export
 #' @examples
 #' \dontrun{
 #' df <- createDataFrame(ovarian)
@@ -503,6 +517,7 @@ setMethod("spark.survreg", signature(data = "SparkDataFrame", formula = "formula
 #'         The list includes the model's \code{coefficients} (features, coefficients,
 #'         intercept and log(scale)).
 #' @rdname spark.survreg
+#' @export
 #' @note summary(AFTSurvivalRegressionModel) since 2.0.0
 setMethod("summary", signature(object = "AFTSurvivalRegressionModel"),
           function(object) {
@@ -522,6 +537,7 @@ setMethod("summary", signature(object = "AFTSurvivalRegressionModel"),
 #' @return \code{predict} returns a SparkDataFrame containing predicted values
 #'         on the original scale of the data (mean predicted value at scale = 1.0).
 #' @rdname spark.survreg
+#' @export
 #' @note predict(AFTSurvivalRegressionModel) since 2.0.0
 setMethod("predict", signature(object = "AFTSurvivalRegressionModel"),
           function(object, newData) {
@@ -534,6 +550,7 @@ setMethod("predict", signature(object = "AFTSurvivalRegressionModel"),
 #' @param overwrite overwrites or not if the output path already exists. Default is FALSE
 #'                  which means throw exception if the output path exists.
 #' @rdname spark.survreg
+#' @export
 #' @note write.ml(AFTSurvivalRegressionModel, character) since 2.0.0
 #' @seealso \link{write.ml}
 setMethod("write.ml", signature(object = "AFTSurvivalRegressionModel", path = "character"),
diff --git a/R/pkg/R/mllib_stat.R b/R/pkg/R/mllib_stat.R
index f8c3329359961..3e013f1d45e38 100644
--- a/R/pkg/R/mllib_stat.R
+++ b/R/pkg/R/mllib_stat.R
@@ -20,6 +20,7 @@
 #' S4 class that represents an KSTest
 #'
 #' @param jobj a Java object reference to the backing Scala KSTestWrapper
+#' @export
 #' @note KSTest since 2.1.0
 setClass("KSTest", representation(jobj = "jobj"))
 
@@ -51,6 +52,7 @@ setClass("KSTest", representation(jobj = "jobj"))
 #' @name spark.kstest
 #' @seealso \href{http://spark.apache.org/docs/latest/mllib-statistics.html#hypothesis-testing}{
 #'          MLlib: Hypothesis Testing}
+#' @export
 #' @examples
 #' \dontrun{
 #' data <- data.frame(test = c(0.1, 0.15, 0.2, 0.3, 0.25))
@@ -92,6 +94,7 @@ setMethod("spark.kstest", signature(data = "SparkDataFrame"),
 #'         parameters tested against) and \code{degreesOfFreedom} (degrees of freedom of the test).
 #' @rdname spark.kstest
 #' @aliases summary,KSTest-method
+#' @export
 #' @note summary(KSTest) since 2.1.0
 setMethod("summary", signature(object = "KSTest"),
           function(object) {
@@ -114,6 +117,7 @@ setMethod("summary", signature(object = "KSTest"),
 
 #' @rdname spark.kstest
 #' @param x summary object of KSTest returned by \code{summary}.
+#' @export
 #' @note print.summary.KSTest since 2.1.0
 print.summary.KSTest <- function(x, ...) {
   jobj <- x$jobj
diff --git a/R/pkg/R/mllib_tree.R b/R/pkg/R/mllib_tree.R
index 6769be038efa9..4e5ddf22ee16d 100644
--- a/R/pkg/R/mllib_tree.R
+++ b/R/pkg/R/mllib_tree.R
@@ -20,36 +20,42 @@
 #' S4 class that represents a GBTRegressionModel
 #'
 #' @param jobj a Java object reference to the backing Scala GBTRegressionModel
+#' @export
 #' @note GBTRegressionModel since 2.1.0
 setClass("GBTRegressionModel", representation(jobj = "jobj"))
 
 #' S4 class that represents a GBTClassificationModel
 #'
 #' @param jobj a Java object reference to the backing Scala GBTClassificationModel
+#' @export
 #' @note GBTClassificationModel since 2.1.0
 setClass("GBTClassificationModel", representation(jobj = "jobj"))
 
 #' S4 class that represents a RandomForestRegressionModel
 #'
 #' @param jobj a Java object reference to the backing Scala RandomForestRegressionModel
+#' @export
 #' @note RandomForestRegressionModel since 2.1.0
 setClass("RandomForestRegressionModel", representation(jobj = "jobj"))
 
 #' S4 class that represents a RandomForestClassificationModel
 #'
 #' @param jobj a Java object reference to the backing Scala RandomForestClassificationModel
+#' @export
 #' @note RandomForestClassificationModel since 2.1.0
 setClass("RandomForestClassificationModel", representation(jobj = "jobj"))
 
 #' S4 class that represents a DecisionTreeRegressionModel
 #'
 #' @param jobj a Java object reference to the backing Scala DecisionTreeRegressionModel
+#' @export
 #' @note DecisionTreeRegressionModel since 2.3.0
 setClass("DecisionTreeRegressionModel", representation(jobj = "jobj"))
 
 #' S4 class that represents a DecisionTreeClassificationModel
 #'
 #' @param jobj a Java object reference to the backing Scala DecisionTreeClassificationModel
+#' @export
 #' @note DecisionTreeClassificationModel since 2.3.0
 setClass("DecisionTreeClassificationModel", representation(jobj = "jobj"))
 
@@ -173,6 +179,7 @@ print.summary.decisionTree <- function(x) {
 #' @return \code{spark.gbt} returns a fitted Gradient Boosted Tree model.
 #' @rdname spark.gbt
 #' @name spark.gbt
+#' @export
 #' @examples
 #' \dontrun{
 #' # fit a Gradient Boosted Tree Regression Model
@@ -254,6 +261,7 @@ setMethod("spark.gbt", signature(data = "SparkDataFrame", formula = "formula"),
 #'         \code{numTrees} (number of trees), and \code{treeWeights} (tree weights).
 #' @rdname spark.gbt
 #' @aliases summary,GBTRegressionModel-method
+#' @export
 #' @note summary(GBTRegressionModel) since 2.1.0
 setMethod("summary", signature(object = "GBTRegressionModel"),
           function(object) {
@@ -267,6 +275,7 @@ setMethod("summary", signature(object = "GBTRegressionModel"),
 #' @param x summary object of Gradient Boosted Tree regression model or classification model
 #'          returned by \code{summary}.
 #' @rdname spark.gbt
+#' @export
 #' @note print.summary.GBTRegressionModel since 2.1.0
 print.summary.GBTRegressionModel <- function(x, ...) {
   print.summary.treeEnsemble(x)
@@ -276,6 +285,7 @@ print.summary.GBTRegressionModel <- function(x, ...) {
 
 #' @rdname spark.gbt
 #' @aliases summary,GBTClassificationModel-method
+#' @export
 #' @note summary(GBTClassificationModel) since 2.1.0
 setMethod("summary", signature(object = "GBTClassificationModel"),
           function(object) {
@@ -287,6 +297,7 @@ setMethod("summary", signature(object = "GBTClassificationModel"),
 #  Prints the summary of Gradient Boosted Tree Classification Model
 
 #' @rdname spark.gbt
+#' @export
 #' @note print.summary.GBTClassificationModel since 2.1.0
 print.summary.GBTClassificationModel <- function(x, ...) {
   print.summary.treeEnsemble(x)
@@ -299,6 +310,7 @@ print.summary.GBTClassificationModel <- function(x, ...) {
 #'         "prediction".
 #' @rdname spark.gbt
 #' @aliases predict,GBTRegressionModel-method
+#' @export
 #' @note predict(GBTRegressionModel) since 2.1.0
 setMethod("predict", signature(object = "GBTRegressionModel"),
           function(object, newData) {
@@ -307,6 +319,7 @@ setMethod("predict", signature(object = "GBTRegressionModel"),
 
 #' @rdname spark.gbt
 #' @aliases predict,GBTClassificationModel-method
+#' @export
 #' @note predict(GBTClassificationModel) since 2.1.0
 setMethod("predict", signature(object = "GBTClassificationModel"),
           function(object, newData) {
@@ -321,6 +334,7 @@ setMethod("predict", signature(object = "GBTClassificationModel"),
 #'                  which means throw exception if the output path exists.
 #' @aliases write.ml,GBTRegressionModel,character-method
 #' @rdname spark.gbt
+#' @export
 #' @note write.ml(GBTRegressionModel, character) since 2.1.0
 setMethod("write.ml", signature(object = "GBTRegressionModel", path = "character"),
           function(object, path, overwrite = FALSE) {
@@ -329,6 +343,7 @@ setMethod("write.ml", signature(object = "GBTRegressionModel", path = "character
 
 #' @aliases write.ml,GBTClassificationModel,character-method
 #' @rdname spark.gbt
+#' @export
 #' @note write.ml(GBTClassificationModel, character) since 2.1.0
 setMethod("write.ml", signature(object = "GBTClassificationModel", path = "character"),
           function(object, path, overwrite = FALSE) {
@@ -387,6 +402,7 @@ setMethod("write.ml", signature(object = "GBTClassificationModel", path = "chara
 #' @return \code{spark.randomForest} returns a fitted Random Forest model.
 #' @rdname spark.randomForest
 #' @name spark.randomForest
+#' @export
 #' @examples
 #' \dontrun{
 #' # fit a Random Forest Regression Model
@@ -464,6 +480,7 @@ setMethod("spark.randomForest", signature(data = "SparkDataFrame", formula = "fo
 #'         \code{numTrees} (number of trees), and \code{treeWeights} (tree weights).
 #' @rdname spark.randomForest
 #' @aliases summary,RandomForestRegressionModel-method
+#' @export
 #' @note summary(RandomForestRegressionModel) since 2.1.0
 setMethod("summary", signature(object = "RandomForestRegressionModel"),
           function(object) {
@@ -477,6 +494,7 @@ setMethod("summary", signature(object = "RandomForestRegressionModel"),
 #' @param x summary object of Random Forest regression model or classification model
 #'          returned by \code{summary}.
 #' @rdname spark.randomForest
+#' @export
 #' @note print.summary.RandomForestRegressionModel since 2.1.0
 print.summary.RandomForestRegressionModel <- function(x, ...) {
   print.summary.treeEnsemble(x)
@@ -486,6 +504,7 @@ print.summary.RandomForestRegressionModel <- function(x, ...) {
 
 #' @rdname spark.randomForest
 #' @aliases summary,RandomForestClassificationModel-method
+#' @export
 #' @note summary(RandomForestClassificationModel) since 2.1.0
 setMethod("summary", signature(object = "RandomForestClassificationModel"),
           function(object) {
@@ -497,6 +516,7 @@ setMethod("summary", signature(object = "RandomForestClassificationModel"),
 #  Prints the summary of Random Forest Classification Model
 
 #' @rdname spark.randomForest
+#' @export
 #' @note print.summary.RandomForestClassificationModel since 2.1.0
 print.summary.RandomForestClassificationModel <- function(x, ...) {
   print.summary.treeEnsemble(x)
@@ -509,6 +529,7 @@ print.summary.RandomForestClassificationModel <- function(x, ...) {
 #'         "prediction".
 #' @rdname spark.randomForest
 #' @aliases predict,RandomForestRegressionModel-method
+#' @export
 #' @note predict(RandomForestRegressionModel) since 2.1.0
 setMethod("predict", signature(object = "RandomForestRegressionModel"),
           function(object, newData) {
@@ -517,6 +538,7 @@ setMethod("predict", signature(object = "RandomForestRegressionModel"),
 
 #' @rdname spark.randomForest
 #' @aliases predict,RandomForestClassificationModel-method
+#' @export
 #' @note predict(RandomForestClassificationModel) since 2.1.0
 setMethod("predict", signature(object = "RandomForestClassificationModel"),
           function(object, newData) {
@@ -532,6 +554,7 @@ setMethod("predict", signature(object = "RandomForestClassificationModel"),
 #'
 #' @aliases write.ml,RandomForestRegressionModel,character-method
 #' @rdname spark.randomForest
+#' @export
 #' @note write.ml(RandomForestRegressionModel, character) since 2.1.0
 setMethod("write.ml", signature(object = "RandomForestRegressionModel", path = "character"),
           function(object, path, overwrite = FALSE) {
@@ -540,6 +563,7 @@ setMethod("write.ml", signature(object = "RandomForestRegressionModel", path = "
 
 #' @aliases write.ml,RandomForestClassificationModel,character-method
 #' @rdname spark.randomForest
+#' @export
 #' @note write.ml(RandomForestClassificationModel, character) since 2.1.0
 setMethod("write.ml", signature(object = "RandomForestClassificationModel", path = "character"),
           function(object, path, overwrite = FALSE) {
@@ -593,6 +617,7 @@ setMethod("write.ml", signature(object = "RandomForestClassificationModel", path
 #' @return \code{spark.decisionTree} returns a fitted Decision Tree model.
 #' @rdname spark.decisionTree
 #' @name spark.decisionTree
+#' @export
 #' @examples
 #' \dontrun{
 #' # fit a Decision Tree Regression Model
@@ -665,6 +690,7 @@ setMethod("spark.decisionTree", signature(data = "SparkDataFrame", formula = "fo
 #'         trees).
 #' @rdname spark.decisionTree
 #' @aliases summary,DecisionTreeRegressionModel-method
+#' @export
 #' @note summary(DecisionTreeRegressionModel) since 2.3.0
 setMethod("summary", signature(object = "DecisionTreeRegressionModel"),
           function(object) {
@@ -678,6 +704,7 @@ setMethod("summary", signature(object = "DecisionTreeRegressionModel"),
 #' @param x summary object of Decision Tree regression model or classification model
 #'          returned by \code{summary}.
 #' @rdname spark.decisionTree
+#' @export
 #' @note print.summary.DecisionTreeRegressionModel since 2.3.0
 print.summary.DecisionTreeRegressionModel <- function(x, ...) {
   print.summary.decisionTree(x)
@@ -687,6 +714,7 @@ print.summary.DecisionTreeRegressionModel <- function(x, ...) {
 
 #' @rdname spark.decisionTree
 #' @aliases summary,DecisionTreeClassificationModel-method
+#' @export
 #' @note summary(DecisionTreeClassificationModel) since 2.3.0
 setMethod("summary", signature(object = "DecisionTreeClassificationModel"),
           function(object) {
@@ -698,6 +726,7 @@ setMethod("summary", signature(object = "DecisionTreeClassificationModel"),
 #  Prints the summary of Decision Tree Classification Model
 
 #' @rdname spark.decisionTree
+#' @export
 #' @note print.summary.DecisionTreeClassificationModel since 2.3.0
 print.summary.DecisionTreeClassificationModel <- function(x, ...) {
   print.summary.decisionTree(x)
@@ -710,6 +739,7 @@ print.summary.DecisionTreeClassificationModel <- function(x, ...) {
 #'         "prediction".
 #' @rdname spark.decisionTree
 #' @aliases predict,DecisionTreeRegressionModel-method
+#' @export
 #' @note predict(DecisionTreeRegressionModel) since 2.3.0
 setMethod("predict", signature(object = "DecisionTreeRegressionModel"),
           function(object, newData) {
@@ -718,6 +748,7 @@ setMethod("predict", signature(object = "DecisionTreeRegressionModel"),
 
 #' @rdname spark.decisionTree
 #' @aliases predict,DecisionTreeClassificationModel-method
+#' @export
 #' @note predict(DecisionTreeClassificationModel) since 2.3.0
 setMethod("predict", signature(object = "DecisionTreeClassificationModel"),
           function(object, newData) {
@@ -733,6 +764,7 @@ setMethod("predict", signature(object = "DecisionTreeClassificationModel"),
 #'
 #' @aliases write.ml,DecisionTreeRegressionModel,character-method
 #' @rdname spark.decisionTree
+#' @export
 #' @note write.ml(DecisionTreeRegressionModel, character) since 2.3.0
 setMethod("write.ml", signature(object = "DecisionTreeRegressionModel", path = "character"),
           function(object, path, overwrite = FALSE) {
@@ -741,6 +773,7 @@ setMethod("write.ml", signature(object = "DecisionTreeRegressionModel", path = "
 
 #' @aliases write.ml,DecisionTreeClassificationModel,character-method
 #' @rdname spark.decisionTree
+#' @export
 #' @note write.ml(DecisionTreeClassificationModel, character) since 2.3.0
 setMethod("write.ml", signature(object = "DecisionTreeClassificationModel", path = "character"),
           function(object, path, overwrite = FALSE) {
diff --git a/R/pkg/R/mllib_utils.R b/R/pkg/R/mllib_utils.R
index 7d04bffcba3a4..a53c92c2c4815 100644
--- a/R/pkg/R/mllib_utils.R
+++ b/R/pkg/R/mllib_utils.R
@@ -31,6 +31,7 @@
 #' MLlib model below.
 #' @rdname write.ml
 #' @name write.ml
+#' @export
 #' @seealso \link{spark.als}, \link{spark.bisectingKmeans}, \link{spark.decisionTree},
 #' @seealso \link{spark.gaussianMixture}, \link{spark.gbt},
 #' @seealso \link{spark.glm}, \link{glm}, \link{spark.isoreg},
@@ -47,6 +48,7 @@ NULL
 #' MLlib model below.
 #' @rdname predict
 #' @name predict
+#' @export
 #' @seealso \link{spark.als}, \link{spark.bisectingKmeans}, \link{spark.decisionTree},
 #' @seealso \link{spark.gaussianMixture}, \link{spark.gbt},
 #' @seealso \link{spark.glm}, \link{glm}, \link{spark.isoreg},
@@ -73,6 +75,7 @@ predict_internal <- function(object, newData) {
 #' @return A fitted MLlib model.
 #' @rdname read.ml
 #' @name read.ml
+#' @export
 #' @seealso \link{write.ml}
 #' @examples
 #' \dontrun{
diff --git a/R/pkg/R/schema.R b/R/pkg/R/schema.R
index 9831fc3cc6d01..65f418740c643 100644
--- a/R/pkg/R/schema.R
+++ b/R/pkg/R/schema.R
@@ -29,6 +29,7 @@
 #' @param ... additional structField objects
 #' @return a structType object
 #' @rdname structType
+#' @export
 #' @examples
 #'\dontrun{
 #' schema <- structType(structField("a", "integer"), structField("c", "string"),
@@ -48,6 +49,7 @@ structType <- function(x, ...) {
 
 #' @rdname structType
 #' @method structType jobj
+#' @export
 structType.jobj <- function(x, ...) {
   obj <- structure(list(), class = "structType")
   obj$jobj <- x
@@ -57,6 +59,7 @@ structType.jobj <- function(x, ...) {
 
 #' @rdname structType
 #' @method structType structField
+#' @export
 structType.structField <- function(x, ...) {
   fields <- list(x, ...)
   if (!all(sapply(fields, inherits, "structField"))) {
@@ -73,6 +76,7 @@ structType.structField <- function(x, ...) {
 
 #' @rdname structType
 #' @method structType character
+#' @export
 structType.character <- function(x, ...) {
   if (!is.character(x)) {
     stop("schema must be a DDL-formatted string.")
@@ -115,6 +119,7 @@ print.structType <- function(x, ...) {
 #' @param ... additional argument(s) passed to the method.
 #' @return A structField object.
 #' @rdname structField
+#' @export
 #' @examples
 #'\dontrun{
 #' field1 <- structField("a", "integer")
@@ -132,6 +137,7 @@ structField <- function(x, ...) {
 
 #' @rdname structField
 #' @method structField jobj
+#' @export
 structField.jobj <- function(x, ...) {
   obj <- structure(list(), class = "structField")
   obj$jobj <- x
@@ -206,6 +212,7 @@ checkType <- function(type) {
 #' @param type The data type of the field
 #' @param nullable A logical vector indicating whether or not the field is nullable
 #' @rdname structField
+#' @export
 structField.character <- function(x, type, nullable = TRUE, ...) {
   if (class(x) != "character") {
     stop("Field name must be a string.")
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index f7c1663d32c96..965471f3b07a0 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -35,6 +35,7 @@ connExists <- function(env) {
 #' Also terminates the backend this R session is connected to.
 #' @rdname sparkR.session.stop
 #' @name sparkR.session.stop
+#' @export
 #' @note sparkR.session.stop since 2.0.0
 sparkR.session.stop <- function() {
   env <- .sparkREnv
@@ -83,6 +84,7 @@ sparkR.session.stop <- function() {
 
 #' @rdname sparkR.session.stop
 #' @name sparkR.stop
+#' @export
 #' @note sparkR.stop since 1.4.0
 sparkR.stop <- function() {
   sparkR.session.stop()
@@ -101,6 +103,7 @@ sparkR.stop <- function() {
 #' @param sparkPackages Character vector of package coordinates
 #' @seealso \link{sparkR.session}
 #' @rdname sparkR.init-deprecated
+#' @export
 #' @examples
 #'\dontrun{
 #' sc <- sparkR.init("local[2]", "SparkR", "/home/spark")
@@ -158,16 +161,11 @@ sparkR.sparkContext <- function(
                     " please use the --packages commandline instead", sep = ","))
     }
     backendPort <- existingPort
-    authSecret <- Sys.getenv("SPARKR_BACKEND_AUTH_SECRET")
-    if (nchar(authSecret) == 0) {
-      stop("Auth secret not provided in environment.")
-    }
   } else {
     path <- tempfile(pattern = "backend_port")
     submitOps <- getClientModeSparkSubmitOpts(
         Sys.getenv("SPARKR_SUBMIT_ARGS", "sparkr-shell"),
         sparkEnvirMap)
-    checkJavaVersion()
     launchBackend(
         args = path,
         sparkHome = sparkHome,
@@ -191,27 +189,16 @@ sparkR.sparkContext <- function(
     monitorPort <- readInt(f)
     rLibPath <- readString(f)
     connectionTimeout <- readInt(f)
-
-    # Don't use readString() so that we can provide a useful
-    # error message if the R and Java versions are mismatched.
-    authSecretLen <- readInt(f)
-    if (length(authSecretLen) == 0 || authSecretLen == 0) {
-      stop("Unexpected EOF in JVM connection data. Mismatched versions?")
-    }
-    authSecret <- readStringData(f, authSecretLen)
     close(f)
     file.remove(path)
     if (length(backendPort) == 0 || backendPort == 0 ||
         length(monitorPort) == 0 || monitorPort == 0 ||
-        length(rLibPath) != 1 || length(authSecret) == 0) {
+        length(rLibPath) != 1) {
       stop("JVM failed to launch")
     }
-
-    monitorConn <- socketConnection(port = monitorPort, blocking = TRUE,
-                                    timeout = connectionTimeout, open = "wb")
-    doServerAuth(monitorConn, authSecret)
-
-    assign(".monitorConn", monitorConn, envir = .sparkREnv)
+    assign(".monitorConn",
+           socketConnection(port = monitorPort, timeout = connectionTimeout),
+           envir = .sparkREnv)
     assign(".backendLaunched", 1, envir = .sparkREnv)
     if (rLibPath != "") {
       assign(".libPath", rLibPath, envir = .sparkREnv)
@@ -221,7 +208,7 @@ sparkR.sparkContext <- function(
 
   .sparkREnv$backendPort <- backendPort
   tryCatch({
-    connectBackend("localhost", backendPort, timeout = connectionTimeout, authSecret = authSecret)
+    connectBackend("localhost", backendPort, timeout = connectionTimeout)
   },
   error = function(err) {
     stop("Failed to connect JVM\n")
@@ -283,6 +270,7 @@ sparkR.sparkContext <- function(
 #' @param jsc The existing JavaSparkContext created with SparkR.init()
 #' @seealso \link{sparkR.session}
 #' @rdname sparkRSQL.init-deprecated
+#' @export
 #' @examples
 #'\dontrun{
 #' sc <- sparkR.init()
@@ -310,6 +298,7 @@ sparkRSQL.init <- function(jsc = NULL) {
 #' @param jsc The existing JavaSparkContext created with SparkR.init()
 #' @seealso \link{sparkR.session}
 #' @rdname sparkRHive.init-deprecated
+#' @export
 #' @examples
 #'\dontrun{
 #' sc <- sparkR.init()
@@ -358,6 +347,7 @@ sparkRHive.init <- function(jsc = NULL) {
 #' @param enableHiveSupport enable support for Hive, fallback if not built with Hive support; once
 #'        set, this cannot be turned off on an existing session
 #' @param ... named Spark properties passed to the method.
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -452,6 +442,7 @@ sparkR.session <- function(
 #' @return the SparkUI URL, or NA if it is disabled, or not started.
 #' @rdname sparkR.uiWebUrl
 #' @name sparkR.uiWebUrl
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -703,17 +694,3 @@ sparkCheckInstall <- function(sparkHome, master, deployMode) {
     NULL
   }
 }
-
-# Utility function for sending auth data over a socket and checking the server's reply.
-doServerAuth <- function(con, authSecret) {
-  if (nchar(authSecret) == 0) {
-    stop("Auth secret not provided.")
-  }
-  writeString(con, authSecret)
-  flush(con)
-  reply <- readString(con)
-  if (reply != "ok") {
-    close(con)
-    stop("Unexpected reply from server.")
-  }
-}
diff --git a/R/pkg/R/stats.R b/R/pkg/R/stats.R
index 497f18c763048..c8af798830b30 100644
--- a/R/pkg/R/stats.R
+++ b/R/pkg/R/stats.R
@@ -37,6 +37,7 @@ setOldClass("jobj")
 #' @name crosstab
 #' @aliases crosstab,SparkDataFrame,character,character-method
 #' @family stat functions
+#' @export
 #' @examples
 #' \dontrun{
 #' df <- read.json("/path/to/file.json")
@@ -62,6 +63,7 @@ setMethod("crosstab",
 #' @rdname cov
 #' @aliases cov,SparkDataFrame-method
 #' @family stat functions
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -90,6 +92,7 @@ setMethod("cov",
 #' @name corr
 #' @aliases corr,SparkDataFrame-method
 #' @family stat functions
+#' @export
 #' @examples
 #'
 #' \dontrun{
@@ -121,6 +124,7 @@ setMethod("corr",
 #' @name freqItems
 #' @aliases freqItems,SparkDataFrame,character-method
 #' @family stat functions
+#' @export
 #' @examples
 #' \dontrun{
 #' df <- read.json("/path/to/file.json")
@@ -164,6 +168,7 @@ setMethod("freqItems", signature(x = "SparkDataFrame", cols = "character"),
 #' @name approxQuantile
 #' @aliases approxQuantile,SparkDataFrame,character,numeric,numeric-method
 #' @family stat functions
+#' @export
 #' @examples
 #' \dontrun{
 #' df <- read.json("/path/to/file.json")
@@ -200,6 +205,7 @@ setMethod("approxQuantile",
 #' @aliases sampleBy,SparkDataFrame,character,list,numeric-method
 #' @name sampleBy
 #' @family stat functions
+#' @export
 #' @examples
 #'\dontrun{
 #' df <- read.json("/path/to/file.json")
diff --git a/R/pkg/R/streaming.R b/R/pkg/R/streaming.R
index fc83463f72cd4..8390bd5e6de72 100644
--- a/R/pkg/R/streaming.R
+++ b/R/pkg/R/streaming.R
@@ -28,6 +28,7 @@ NULL
 #' @seealso \link{read.stream}
 #'
 #' @param ssq A Java object reference to the backing Scala StreamingQuery
+#' @export
 #' @note StreamingQuery since 2.2.0
 #' @note experimental
 setClass("StreamingQuery",
@@ -44,6 +45,7 @@ streamingQuery <- function(ssq) {
 }
 
 #' @rdname show
+#' @export
 #' @note show(StreamingQuery) since 2.2.0
 setMethod("show", "StreamingQuery",
           function(object) {
@@ -68,6 +70,7 @@ setMethod("show", "StreamingQuery",
 #' @aliases queryName,StreamingQuery-method
 #' @family StreamingQuery methods
 #' @seealso \link{write.stream}
+#' @export
 #' @examples
 #' \dontrun{ queryName(sq) }
 #' @note queryName(StreamingQuery) since 2.2.0
@@ -82,6 +85,7 @@ setMethod("queryName",
 #' @name explain
 #' @aliases explain,StreamingQuery-method
 #' @family StreamingQuery methods
+#' @export
 #' @examples
 #' \dontrun{ explain(sq) }
 #' @note explain(StreamingQuery) since 2.2.0
@@ -100,6 +104,7 @@ setMethod("explain",
 #' @name lastProgress
 #' @aliases lastProgress,StreamingQuery-method
 #' @family StreamingQuery methods
+#' @export
 #' @examples
 #' \dontrun{ lastProgress(sq) }
 #' @note lastProgress(StreamingQuery) since 2.2.0
@@ -124,6 +129,7 @@ setMethod("lastProgress",
 #' @name status
 #' @aliases status,StreamingQuery-method
 #' @family StreamingQuery methods
+#' @export
 #' @examples
 #' \dontrun{ status(sq) }
 #' @note status(StreamingQuery) since 2.2.0
@@ -144,6 +150,7 @@ setMethod("status",
 #' @name isActive
 #' @aliases isActive,StreamingQuery-method
 #' @family StreamingQuery methods
+#' @export
 #' @examples
 #' \dontrun{ isActive(sq) }
 #' @note isActive(StreamingQuery) since 2.2.0
@@ -170,6 +177,7 @@ setMethod("isActive",
 #' @name awaitTermination
 #' @aliases awaitTermination,StreamingQuery-method
 #' @family StreamingQuery methods
+#' @export
 #' @examples
 #' \dontrun{ awaitTermination(sq, 10000) }
 #' @note awaitTermination(StreamingQuery) since 2.2.0
@@ -194,6 +202,7 @@ setMethod("awaitTermination",
 #' @name stopQuery
 #' @aliases stopQuery,StreamingQuery-method
 #' @family StreamingQuery methods
+#' @export
 #' @examples
 #' \dontrun{ stopQuery(sq) }
 #' @note stopQuery(StreamingQuery) since 2.2.0
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index c3501977e64bc..164cd6d01a347 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -108,6 +108,7 @@ isRDD <- function(name, env) {
 #'
 #' @param key the object to be hashed
 #' @return the hash code as an integer
+#' @export
 #' @examples
 #'\dontrun{
 #' hashCode(1L) # 1
@@ -746,7 +747,7 @@ varargsToJProperties <- function(...) {
   props
 }
 
-launchScript <- function(script, combinedArgs, wait = FALSE, stdout = "", stderr = "") {
+launchScript <- function(script, combinedArgs, wait = FALSE) {
   if (.Platform$OS.type == "windows") {
     scriptWithArgs <- paste(script, combinedArgs, sep = " ")
     # on Windows, intern = F seems to mean output to the console. (documentation on this is missing)
@@ -756,7 +757,7 @@ launchScript <- function(script, combinedArgs, wait = FALSE, stdout = "", stderr
     # stdout = F means discard output
     # stdout = "" means to its console (default)
     # Note that the console of this child process might not be the same as the running R process.
-    system2(script, combinedArgs, stdout = stdout, wait = wait, stderr = stderr)
+    system2(script, combinedArgs, stdout = "", wait = wait)
   }
 }
 
diff --git a/R/pkg/R/window.R b/R/pkg/R/window.R
index 396b27bee80c6..0799d841e5dc9 100644
--- a/R/pkg/R/window.R
+++ b/R/pkg/R/window.R
@@ -29,6 +29,7 @@
 #' @rdname windowPartitionBy
 #' @name windowPartitionBy
 #' @aliases windowPartitionBy,character-method
+#' @export
 #' @examples
 #' \dontrun{
 #'   ws <- orderBy(windowPartitionBy("key1", "key2"), "key3")
@@ -51,6 +52,7 @@ setMethod("windowPartitionBy",
 #' @rdname windowPartitionBy
 #' @name windowPartitionBy
 #' @aliases windowPartitionBy,Column-method
+#' @export
 #' @note windowPartitionBy(Column) since 2.0.0
 setMethod("windowPartitionBy",
           signature(col = "Column"),
@@ -76,6 +78,7 @@ setMethod("windowPartitionBy",
 #' @rdname windowOrderBy
 #' @name windowOrderBy
 #' @aliases windowOrderBy,character-method
+#' @export
 #' @examples
 #' \dontrun{
 #'   ws <- windowOrderBy("key1", "key2")
@@ -98,6 +101,7 @@ setMethod("windowOrderBy",
 #' @rdname windowOrderBy
 #' @name windowOrderBy
 #' @aliases windowOrderBy,Column-method
+#' @export
 #' @note windowOrderBy(Column) since 2.0.0
 setMethod("windowOrderBy",
           signature(col = "Column"),
diff --git a/R/pkg/inst/worker/daemon.R b/R/pkg/inst/worker/daemon.R
index fb9db63b07cd0..2e31dc5f728cd 100644
--- a/R/pkg/inst/worker/daemon.R
+++ b/R/pkg/inst/worker/daemon.R
@@ -28,9 +28,7 @@ suppressPackageStartupMessages(library(SparkR))
 
 port <- as.integer(Sys.getenv("SPARKR_WORKER_PORT"))
 inputCon <- socketConnection(
-    port = port, open = "wb", blocking = TRUE, timeout = connectionTimeout)
-
-SparkR:::doServerAuth(inputCon, Sys.getenv("SPARKR_WORKER_SECRET"))
+    port = port, open = "rb", blocking = TRUE, timeout = connectionTimeout)
 
 # Waits indefinitely for a socket connecion by default.
 selectTimeout <- NULL
diff --git a/R/pkg/inst/worker/worker.R b/R/pkg/inst/worker/worker.R
index ba458d2b9ddfb..00789d815bba8 100644
--- a/R/pkg/inst/worker/worker.R
+++ b/R/pkg/inst/worker/worker.R
@@ -100,12 +100,9 @@ suppressPackageStartupMessages(library(SparkR))
 
 port <- as.integer(Sys.getenv("SPARKR_WORKER_PORT"))
 inputCon <- socketConnection(
-    port = port, blocking = TRUE, open = "wb", timeout = connectionTimeout)
-SparkR:::doServerAuth(inputCon, Sys.getenv("SPARKR_WORKER_SECRET"))
-
+    port = port, blocking = TRUE, open = "rb", timeout = connectionTimeout)
 outputCon <- socketConnection(
     port = port, blocking = TRUE, open = "wb", timeout = connectionTimeout)
-SparkR:::doServerAuth(outputCon, Sys.getenv("SPARKR_WORKER_SECRET"))
 
 # read the index of the current partition inside the RDD
 partition <- SparkR:::readInt(inputCon)
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 16c1fd5a065eb..5197838eaac66 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -67,8 +67,6 @@ sparkSession <- if (windows_with_hadoop()) {
     sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
   }
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
-# materialize the catalog implementation
-listTables()
 
 mockLines <- c("{\"name\":\"Michael\"}",
                "{\"name\":\"Andy\", \"age\":30}",
@@ -1479,89 +1477,24 @@ test_that("column functions", {
   df5 <- createDataFrame(list(list(a = "010101")))
   expect_equal(collect(select(df5, conv(df5$a, 2, 16)))[1, 1], "15")
 
-  # Test array_contains(), array_max(), array_min(), array_position(), element_at() and reverse()
+  # Test array_contains() and sort_array()
   df <- createDataFrame(list(list(list(1L, 2L, 3L)), list(list(6L, 5L, 4L))))
   result <- collect(select(df, array_contains(df[[1]], 1L)))[[1]]
   expect_equal(result, c(TRUE, FALSE))
 
-  result <- collect(select(df, array_max(df[[1]])))[[1]]
-  expect_equal(result, c(3, 6))
-
-  result <- collect(select(df, array_min(df[[1]])))[[1]]
-  expect_equal(result, c(1, 4))
-
-  result <- collect(select(df, array_position(df[[1]], 1L)))[[1]]
-  expect_equal(result, c(1, 0))
-
-  result <- collect(select(df, element_at(df[[1]], 1L)))[[1]]
-  expect_equal(result, c(1, 6))
-
-  result <- collect(select(df, reverse(df[[1]])))[[1]]
-  expect_equal(result, list(list(3L, 2L, 1L), list(4L, 5L, 6L)))
-
-  df2 <- createDataFrame(list(list("abc")))
-  result <- collect(select(df2, reverse(df2[[1]])))[[1]]
-  expect_equal(result, "cba")
-
-  # Test array_repeat()
-  df <- createDataFrame(list(list("a", 3L), list("b", 2L)))
-  result <- collect(select(df, array_repeat(df[[1]], df[[2]])))[[1]]
-  expect_equal(result, list(list("a", "a", "a"), list("b", "b")))
-
-  result <- collect(select(df, array_repeat(df[[1]], 2L)))[[1]]
-  expect_equal(result, list(list("a", "a"), list("b", "b")))
-
-  # Test arrays_overlap()
-  df <- createDataFrame(list(list(list(1L, 2L), list(3L, 1L)),
-                             list(list(1L, 2L), list(3L, 4L)),
-                             list(list(1L, NA), list(3L, 4L))))
-  result <- collect(select(df, arrays_overlap(df[[1]], df[[2]])))[[1]]
-  expect_equal(result, c(TRUE, FALSE, NA))
-
-  # Test array_sort() and sort_array()
-  df <- createDataFrame(list(list(list(2L, 1L, 3L, NA)), list(list(NA, 6L, 5L, NA, 4L))))
-
-  result <- collect(select(df, array_sort(df[[1]])))[[1]]
-  expect_equal(result, list(list(1L, 2L, 3L, NA), list(4L, 5L, 6L, NA, NA)))
-
   result <- collect(select(df, sort_array(df[[1]], FALSE)))[[1]]
-  expect_equal(result, list(list(3L, 2L, 1L, NA), list(6L, 5L, 4L, NA, NA)))
+  expect_equal(result, list(list(3L, 2L, 1L), list(6L, 5L, 4L)))
   result <- collect(select(df, sort_array(df[[1]])))[[1]]
-  expect_equal(result, list(list(NA, 1L, 2L, 3L), list(NA, NA, 4L, 5L, 6L)))
-
-  # Test slice()
-  df <- createDataFrame(list(list(list(1L, 2L, 3L)), list(list(4L, 5L))))
-  result <- collect(select(df, slice(df[[1]], 2L, 2L)))[[1]]
-  expect_equal(result, list(list(2L, 3L), list(5L)))
-
-  # Test concat()
-  df <- createDataFrame(list(list(list(1L, 2L, 3L), list(4L, 5L, 6L)),
-                        list(list(7L, 8L, 9L), list(10L, 11L, 12L))))
-  result <- collect(select(df, concat(df[[1]], df[[2]])))[[1]]
-  expect_equal(result, list(list(1L, 2L, 3L, 4L, 5L, 6L), list(7L, 8L, 9L, 10L, 11L, 12L)))
-
-  # Test flatten()
-  df <- createDataFrame(list(list(list(list(1L, 2L), list(3L, 4L))),
-                        list(list(list(5L, 6L), list(7L, 8L)))))
-  result <- collect(select(df, flatten(df[[1]])))[[1]]
-  expect_equal(result, list(list(1L, 2L, 3L, 4L), list(5L, 6L, 7L, 8L)))
-
-  # Test map_entries(), map_keys(), map_values() and element_at()
-  df <- createDataFrame(list(list(map = as.environment(list(x = 1, y = 2)))))
-  result <- collect(select(df, map_entries(df$map)))[[1]]
-  expected_entries <-  list(listToStruct(list(key = "x", value = 1)),
-                            listToStruct(list(key = "y", value = 2)))
-  expect_equal(result, list(expected_entries))
+  expect_equal(result, list(list(1L, 2L, 3L), list(4L, 5L, 6L)))
 
+  # Test map_keys() and map_values()
+  df <- createDataFrame(list(list(map = as.environment(list(x = 1, y = 2)))))
   result <- collect(select(df, map_keys(df$map)))[[1]]
   expect_equal(result, list(list("x", "y")))
 
   result <- collect(select(df, map_values(df$map)))[[1]]
   expect_equal(result, list(list(1, 2)))
 
-  result <- collect(select(df, element_at(df$map, "y")))[[1]]
-  expect_equal(result, 2)
-
   # Test that stats::lag is working
   expect_equal(length(lag(ldeaths, 12)), 72)
 
@@ -1716,7 +1649,6 @@ test_that("string operators", {
   expect_false(first(select(df, startsWith(df$name, "m")))[[1]])
   expect_true(first(select(df, endsWith(df$name, "el")))[[1]])
   expect_equal(first(select(df, substr(df$name, 1, 2)))[[1]], "Mi")
-  expect_equal(first(select(df, substr(df$name, 4, 6)))[[1]], "hae")
   if (as.numeric(R.version$major) >= 3 && as.numeric(R.version$minor) >= 3) {
     expect_true(startsWith("Hello World", "Hello"))
     expect_false(endsWith("Hello World", "a"))
@@ -2253,8 +2185,8 @@ test_that("join(), crossJoin() and merge() on a DataFrame", {
   expect_equal(count(where(join(df, df2), df$name == df2$name)), 3)
   # cartesian join
   expect_error(tryCatch(count(join(df, df2)), error = function(e) { stop(e) }),
-               paste0(".*(org.apache.spark.sql.AnalysisException: Detected implicit cartesian",
-                      " product for INNER join between logical plans).*"))
+               paste0(".*(org.apache.spark.sql.AnalysisException: Detected cartesian product for",
+                      " INNER join between logical plans).*"))
 
   joined <- crossJoin(df, df2)
   expect_equal(names(joined), c("age", "name", "name", "test"))
@@ -3169,51 +3101,6 @@ test_that("repartition by columns on DataFrame", {
   })
 })
 
-test_that("repartitionByRange on a DataFrame", {
-  # The tasks here launch R workers with shuffles. So, we decrease the number of shuffle
-  # partitions to reduce the number of the tasks to speed up the test. This is particularly
-  # slow on Windows because the R workers are unable to be forked. See also SPARK-21693.
-  conf <- callJMethod(sparkSession, "conf")
-  shufflepartitionsvalue <- callJMethod(conf, "get", "spark.sql.shuffle.partitions")
-  callJMethod(conf, "set", "spark.sql.shuffle.partitions", "5")
-  tryCatch({
-    df <- createDataFrame(mtcars)
-    expect_error(repartitionByRange(df, "haha", df$mpg),
-                 "numPartitions and col must be numeric and Column.*")
-    expect_error(repartitionByRange(df),
-                 ".*specify a column.*or the number of partitions with a column.*")
-    expect_error(repartitionByRange(df, col = "haha"),
-                 "col must be Column; however, got.*")
-    expect_error(repartitionByRange(df, 3),
-                 "At least one partition-by column must be specified.")
-
-    # The order of rows should be different with a normal repartition.
-    actual <- repartitionByRange(df, 3, df$mpg)
-    expect_equal(getNumPartitions(actual), 3)
-    expect_false(identical(collect(actual), collect(repartition(df, 3, df$mpg))))
-
-    actual <- repartitionByRange(df, col = df$mpg)
-    expect_false(identical(collect(actual), collect(repartition(df, col = df$mpg))))
-
-    # They should have same data.
-    actual <- collect(repartitionByRange(df, 3, df$mpg))
-    actual <- actual[order(actual$mpg), ]
-    expected <- collect(repartition(df, 3, df$mpg))
-    expected <- expected[order(expected$mpg), ]
-    expect_true(all(actual == expected))
-
-    actual <- collect(repartitionByRange(df, col = df$mpg))
-    actual <- actual[order(actual$mpg), ]
-    expected <- collect(repartition(df, col = df$mpg))
-    expected <- expected[order(expected$mpg), ]
-    expect_true(all(actual == expected))
-  },
-  finally = {
-    # Resetting the conf back to default value
-    callJMethod(conf, "set", "spark.sql.shuffle.partitions", shufflepartitionsvalue)
-  })
-})
-
 test_that("coalesce, repartition, numPartitions", {
   df <- as.DataFrame(cars, numPartitions = 5)
   expect_equal(getNumPartitions(df), 5)
diff --git a/R/pkg/tests/fulltests/test_streaming.R b/R/pkg/tests/fulltests/test_streaming.R
index bfb1a046490ec..a354d50c6b54e 100644
--- a/R/pkg/tests/fulltests/test_streaming.R
+++ b/R/pkg/tests/fulltests/test_streaming.R
@@ -236,7 +236,7 @@ test_that("Watermark", {
 
   times <- collect(sql("SELECT * FROM times"))
   # looks like write timing can affect the first bucket; but it should be t
-  expect_equal(times[order(times$eventTime), ][1, 2], 2)
+  expect_equal(times[order(times$eventTime),][1, 2], 2)
 
   stopQuery(q)
   unlink(parquetPath)
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index d4713de7806a1..feca617c2554c 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -46,7 +46,7 @@ Sys.setenv("_JAVA_OPTIONS" = paste("-XX:-UsePerfData", old_java_opt, sep = " "))
 
 ## Overview
 
-SparkR is an R package that provides a light-weight frontend to use Apache Spark from R. With Spark `r packageVersion("SparkR")`, SparkR provides a distributed data frame implementation that supports data processing operations like selection, filtering, aggregation etc. and distributed machine learning using [MLlib](https://spark.apache.org/mllib/).
+SparkR is an R package that provides a light-weight frontend to use Apache Spark from R. With Spark `r packageVersion("SparkR")`, SparkR provides a distributed data frame implementation that supports data processing operations like selection, filtering, aggregation etc. and distributed machine learning using [MLlib](http://spark.apache.org/mllib/).
 
 ## Getting Started
 
@@ -132,7 +132,7 @@ sparkR.session.stop()
 
 Different from many other R packages, to use SparkR, you need an additional installation of Apache Spark. The Spark installation will be used to run a backend process that will compile and execute SparkR programs.
 
-After installing the SparkR package, you can call `sparkR.session` as explained in the previous section to start and it will check for the Spark installation. If you are working with SparkR from an interactive shell (eg. R, RStudio) then Spark is downloaded and cached automatically if it is not found. Alternatively, we provide an easy-to-use function `install.spark` for running this manually. If you don't have Spark installed on the computer, you may download it from [Apache Spark Website](https://spark.apache.org/downloads.html).
+After installing the SparkR package, you can call `sparkR.session` as explained in the previous section to start and it will check for the Spark installation. If you are working with SparkR from an interactive shell (eg. R, RStudio) then Spark is downloaded and cached automatically if it is not found. Alternatively, we provide an easy-to-use function `install.spark` for running this manually. If you don't have Spark installed on the computer, you may download it from [Apache Spark Website](http://spark.apache.org/downloads.html).
 
 ```{r, eval=FALSE}
 install.spark()
@@ -147,7 +147,7 @@ sparkR.session(sparkHome = "/HOME/spark")
 ### Spark Session {#SetupSparkSession}
 
 
-In addition to `sparkHome`, many other options can be specified in `sparkR.session`. For a complete list, see [Starting up: SparkSession](https://spark.apache.org/docs/latest/sparkr.html#starting-up-sparksession) and [SparkR API doc](https://spark.apache.org/docs/latest/api/R/sparkR.session.html).
+In addition to `sparkHome`, many other options can be specified in `sparkR.session`. For a complete list, see [Starting up: SparkSession](http://spark.apache.org/docs/latest/sparkr.html#starting-up-sparksession) and [SparkR API doc](http://spark.apache.org/docs/latest/api/R/sparkR.session.html).
 
 In particular, the following Spark driver properties can be set in `sparkConfig`.
 
@@ -169,7 +169,7 @@ sparkR.session(spark.sql.warehouse.dir = spark_warehouse_path)
 
 
 #### Cluster Mode
-SparkR can connect to remote Spark clusters. [Cluster Mode Overview](https://spark.apache.org/docs/latest/cluster-overview.html) is a good introduction to different Spark cluster modes.
+SparkR can connect to remote Spark clusters. [Cluster Mode Overview](http://spark.apache.org/docs/latest/cluster-overview.html) is a good introduction to different Spark cluster modes.
 
 When connecting SparkR to a remote Spark cluster, make sure that the Spark version and Hadoop version on the machine match the corresponding versions on the cluster. Current SparkR package is compatible with
 ```{r, echo=FALSE, tidy = TRUE}
@@ -177,7 +177,7 @@ paste("Spark", packageVersion("SparkR"))
 ```
 It should be used both on the local computer and on the remote cluster.
 
-To connect, pass the URL of the master node to `sparkR.session`. A complete list can be seen in [Spark Master URLs](https://spark.apache.org/docs/latest/submitting-applications.html#master-urls).
+To connect, pass the URL of the master node to `sparkR.session`. A complete list can be seen in [Spark Master URLs](http://spark.apache.org/docs/latest/submitting-applications.html#master-urls).
 For example, to connect to a local standalone Spark master, we can call
 
 ```{r, eval=FALSE}
@@ -317,7 +317,7 @@ A common flow of grouping and aggregation is
 
 2. Feed the `GroupedData` object to `agg` or `summarize` functions, with some provided aggregation functions to compute a number within each group.
 
-A number of widely used functions are supported to aggregate data after grouping, including `avg`, `countDistinct`, `count`, `first`, `kurtosis`, `last`, `max`, `mean`, `min`, `sd`, `skewness`, `stddev_pop`, `stddev_samp`, `sumDistinct`, `sum`, `var_pop`, `var_samp`, `var`. See the [API doc for aggregate functions](https://spark.apache.org/docs/latest/api/R/column_aggregate_functions.html) linked there.
+A number of widely used functions are supported to aggregate data after grouping, including `avg`, `countDistinct`, `count`, `first`, `kurtosis`, `last`, `max`, `mean`, `min`, `sd`, `skewness`, `stddev_pop`, `stddev_samp`, `sumDistinct`, `sum`, `var_pop`, `var_samp`, `var`. See the [API doc for `mean`](http://spark.apache.org/docs/latest/api/R/mean.html) and other `agg_funcs` linked there.
 
 For example we can compute a histogram of the number of cylinders in the `mtcars` dataset as shown below.
 
@@ -935,7 +935,7 @@ perplexity
 
 #### Alternating Least Squares
 
-`spark.als` learns latent factors in [collaborative filtering](https://en.wikipedia.org/wiki/Recommender_system#Collaborative_filtering) via [alternating least squares](https://dl.acm.org/citation.cfm?id=1608614).
+`spark.als` learns latent factors in [collaborative filtering](https://en.wikipedia.org/wiki/Recommender_system#Collaborative_filtering) via [alternating least squares](http://dl.acm.org/citation.cfm?id=1608614).
 
 There are multiple options that can be configured in `spark.als`, including `rank`, `reg`, and `nonnegative`. For a complete list, refer to the help file.
 
@@ -1171,11 +1171,11 @@ env | map
 
 ## References
 
-* [Spark Cluster Mode Overview](https://spark.apache.org/docs/latest/cluster-overview.html)
+* [Spark Cluster Mode Overview](http://spark.apache.org/docs/latest/cluster-overview.html)
 
-* [Submitting Spark Applications](https://spark.apache.org/docs/latest/submitting-applications.html)
+* [Submitting Spark Applications](http://spark.apache.org/docs/latest/submitting-applications.html)
 
-* [Machine Learning Library Guide (MLlib)](https://spark.apache.org/docs/latest/ml-guide.html)
+* [Machine Learning Library Guide (MLlib)](http://spark.apache.org/docs/latest/ml-guide.html)
 
 * [SparkR: Scaling R Programs with Spark](https://people.csail.mit.edu/matei/papers/2016/sigmod_sparkr.pdf), Shivaram Venkataraman, Zongheng Yang, Davies Liu, Eric Liang, Hossein Falaki, Xiangrui Meng, Reynold Xin, Ali Ghodsi, Michael Franklin, Ion Stoica, and Matei Zaharia. SIGMOD 2016. June 2016.
 
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 9608c96fd5369..a207dae5a74ff 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -254,14 +254,6 @@
           <artifactId>spark-hadoop-cloud_${scala.binary.version}</artifactId>
           <version>${project.version}</version>
         </dependency>
-        <!--
-        Redeclare this dependency to force it into the distribution.
-        -->
-        <dependency>
-          <groupId>org.eclipse.jetty</groupId>
-          <artifactId>jetty-util</artifactId>
-          <scope>${hadoop.deps.scope}</scope>
-        </dependency>
       </dependencies>
     </profile>
   </profiles>
diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh
index f090240065bf1..071406336d1b1 100755
--- a/bin/docker-image-tool.sh
+++ b/bin/docker-image-tool.sh
@@ -57,18 +57,15 @@ function build {
   else
     # Not passed as an argument to docker, but used to validate the Spark directory.
     IMG_PATH="kubernetes/dockerfiles"
-    BUILD_ARGS=()
   fi
 
   if [ ! -d "$IMG_PATH" ]; then
     error "Cannot find docker image. This script must be run from a runnable distribution of Apache Spark."
   fi
 
-  local DOCKERFILE=${DOCKERFILE:-"$IMG_PATH/spark/Dockerfile"}
-
   docker build "${BUILD_ARGS[@]}" \
     -t $(image_ref spark) \
-    -f "$DOCKERFILE" .
+    -f "$IMG_PATH/spark/Dockerfile" .
 }
 
 function push {
@@ -86,7 +83,6 @@ Commands:
   push        Push a pre-built image to a registry. Requires a repository address to be provided.
 
 Options:
-  -f file     Dockerfile to build. By default builds the Dockerfile shipped with Spark.
   -r repo     Repository address.
   -t tag      Tag to apply to the built image, or to identify the image to be pushed.
   -m          Use minikube's Docker daemon.
@@ -116,12 +112,10 @@ fi
 
 REPO=
 TAG=
-DOCKERFILE=
-while getopts f:mr:t: option
+while getopts mr:t: option
 do
  case "${option}"
  in
- f) DOCKERFILE=${OPTARG};;
  r) REPO=${OPTARG};;
  t) TAG=${OPTARG};;
  m)
diff --git a/bin/pyspark b/bin/pyspark
index 5d5affb1f97c3..dd286277c1fc1 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -25,14 +25,14 @@ source "${SPARK_HOME}"/bin/load-spark-env.sh
 export _SPARK_CMD_USAGE="Usage: ./bin/pyspark [options]"
 
 # In Spark 2.0, IPYTHON and IPYTHON_OPTS are removed and pyspark fails to launch if either option
-# is set in the user's environment. Instead, users should set PYSPARK_DRIVER_PYTHON=ipython
+# is set in the user's environment. Instead, users should set PYSPARK_DRIVER_PYTHON=ipython 
 # to use IPython and set PYSPARK_DRIVER_PYTHON_OPTS to pass options when starting the Python driver
 # (e.g. PYSPARK_DRIVER_PYTHON_OPTS='notebook').  This supports full customization of the IPython
 # and executor Python executables.
 
 # Fail noisily if removed options are set
 if [[ -n "$IPYTHON" || -n "$IPYTHON_OPTS" ]]; then
-  echo "Error in pyspark startup:"
+  echo "Error in pyspark startup:" 
   echo "IPYTHON and IPYTHON_OPTS are removed in Spark 2.0+. Remove these from the environment and set PYSPARK_DRIVER_PYTHON and PYSPARK_DRIVER_PYTHON_OPTS instead."
   exit 1
 fi
@@ -57,7 +57,7 @@ export PYSPARK_PYTHON
 
 # Add the PySpark classes to the Python path:
 export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH"
-export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.7-src.zip:$PYTHONPATH"
+export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.6-src.zip:$PYTHONPATH"
 
 # Load the PySpark shell.py script when ./pyspark is used interactively:
 export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"
diff --git a/bin/pyspark2.cmd b/bin/pyspark2.cmd
index 15fa910c277b3..663670f2fddaf 100644
--- a/bin/pyspark2.cmd
+++ b/bin/pyspark2.cmd
@@ -30,7 +30,7 @@ if "x%PYSPARK_DRIVER_PYTHON%"=="x" (
 )
 
 set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH%
-set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.7-src.zip;%PYTHONPATH%
+set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.6-src.zip;%PYTHONPATH%
 
 set OLD_PYTHONSTARTUP=%PYTHONSTARTUP%
 set PYTHONSTARTUP=%SPARK_HOME%\python\pyspark\shell.py
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java b/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
index 60f51125c07fd..0719fa7647bcc 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
@@ -32,7 +32,6 @@
 import io.netty.channel.ChannelOption;
 import io.netty.channel.EventLoopGroup;
 import io.netty.channel.socket.SocketChannel;
-import org.apache.commons.lang3.SystemUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -99,7 +98,6 @@ private void init(String hostToBind, int portToBind) {
       .group(bossGroup, workerGroup)
       .channel(NettyUtils.getServerChannelClass(ioMode))
       .option(ChannelOption.ALLOCATOR, allocator)
-      .option(ChannelOption.SO_REUSEADDR, !SystemUtils.IS_OS_WINDOWS)
       .childOption(ChannelOption.ALLOCATOR, allocator);
 
     this.metrics = new NettyMemoryMetrics(
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/JavaUtils.java b/common/network-common/src/main/java/org/apache/spark/network/util/JavaUtils.java
index b5497087634ce..afc59efaef810 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/JavaUtils.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/JavaUtils.java
@@ -17,7 +17,10 @@
 
 package org.apache.spark.network.util;
 
-import java.io.*;
+import java.io.Closeable;
+import java.io.EOFException;
+import java.io.File;
+import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.channels.ReadableByteChannel;
 import java.nio.charset.StandardCharsets;
@@ -88,24 +91,11 @@ public static String bytesToString(ByteBuffer b) {
    * @throws IOException if deletion is unsuccessful
    */
   public static void deleteRecursively(File file) throws IOException {
-    deleteRecursively(file, null);
-  }
-
-  /**
-   * Delete a file or directory and its contents recursively.
-   * Don't follow directories if they are symlinks.
-   *
-   * @param file Input file / dir to be deleted
-   * @param filter A filename filter that make sure only files / dirs with the satisfied filenames
-   *               are deleted.
-   * @throws IOException if deletion is unsuccessful
-   */
-  public static void deleteRecursively(File file, FilenameFilter filter) throws IOException {
     if (file == null) { return; }
 
     // On Unix systems, use operating system command to run faster
     // If that does not work out, fallback to the Java IO way
-    if (SystemUtils.IS_OS_UNIX && filter == null) {
+    if (SystemUtils.IS_OS_UNIX) {
       try {
         deleteRecursivelyUsingUnixNative(file);
         return;
@@ -115,17 +105,15 @@ public static void deleteRecursively(File file, FilenameFilter filter) throws IO
       }
     }
 
-    deleteRecursivelyUsingJavaIO(file, filter);
+    deleteRecursivelyUsingJavaIO(file);
   }
 
-  private static void deleteRecursivelyUsingJavaIO(
-      File file,
-      FilenameFilter filter) throws IOException {
+  private static void deleteRecursivelyUsingJavaIO(File file) throws IOException {
     if (file.isDirectory() && !isSymlink(file)) {
       IOException savedIOException = null;
-      for (File child : listFilesSafely(file, filter)) {
+      for (File child : listFilesSafely(file)) {
         try {
-          deleteRecursively(child, filter);
+          deleteRecursively(child);
         } catch (IOException e) {
           // In case of multiple exceptions, only last one will be thrown
           savedIOException = e;
@@ -136,13 +124,10 @@ private static void deleteRecursivelyUsingJavaIO(
       }
     }
 
-    // Delete file only when it's a normal file or an empty directory.
-    if (file.isFile() || (file.isDirectory() && listFilesSafely(file, null).length == 0)) {
-      boolean deleted = file.delete();
-      // Delete can also fail if the file simply did not exist.
-      if (!deleted && file.exists()) {
-        throw new IOException("Failed to delete: " + file.getAbsolutePath());
-      }
+    boolean deleted = file.delete();
+    // Delete can also fail if the file simply did not exist.
+    if (!deleted && file.exists()) {
+      throw new IOException("Failed to delete: " + file.getAbsolutePath());
     }
   }
 
@@ -172,9 +157,9 @@ private static void deleteRecursivelyUsingUnixNative(File file) throws IOExcepti
     }
   }
 
-  private static File[] listFilesSafely(File file, FilenameFilter filter) throws IOException {
+  private static File[] listFilesSafely(File file) throws IOException {
     if (file.exists()) {
-      File[] files = file.listFiles(filter);
+      File[] files = file.listFiles();
       if (files == null) {
         throw new IOException("Failed to list files for dir: " + file);
       }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java
index 098fa7974b87b..fc7bba41185f0 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java
@@ -138,13 +138,6 @@ public void applicationRemoved(String appId, boolean cleanupLocalDirs) {
     blockManager.applicationRemoved(appId, cleanupLocalDirs);
   }
 
-  /**
-   * Clean up any non-shuffle files in any local directories associated with an finished executor.
-   */
-  public void executorRemoved(String executorId, String appId) {
-    blockManager.executorRemoved(executorId, appId);
-  }
-
   /**
    * Register an (application, executor) with the given shuffle info.
    *
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
index 0b7a27402369d..e6399897be9c2 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
@@ -24,8 +24,6 @@
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Executor;
 import java.util.concurrent.Executors;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
@@ -61,7 +59,6 @@
   private static final Logger logger = LoggerFactory.getLogger(ExternalShuffleBlockResolver.class);
 
   private static final ObjectMapper mapper = new ObjectMapper();
-
   /**
    * This a common prefix to the key for each app registration we stick in leveldb, so they
    * are easy to find, since leveldb lets you search based on prefix.
@@ -69,8 +66,6 @@
   private static final String APP_KEY_PREFIX = "AppExecShuffleInfo";
   private static final StoreVersion CURRENT_VERSION = new StoreVersion(1, 0);
 
-  private static final Pattern MULTIPLE_SEPARATORS = Pattern.compile(File.separator + "{2,}");
-
   // Map containing all registered executors' metadata.
   @VisibleForTesting
   final ConcurrentMap<AppExecId, ExecutorShuffleInfo> executors;
@@ -216,26 +211,6 @@ public void applicationRemoved(String appId, boolean cleanupLocalDirs) {
     }
   }
 
-  /**
-   * Removes all the non-shuffle files in any local directories associated with the finished
-   * executor.
-   */
-  public void executorRemoved(String executorId, String appId) {
-    logger.info("Clean up non-shuffle files associated with the finished executor {}", executorId);
-    AppExecId fullId = new AppExecId(appId, executorId);
-    final ExecutorShuffleInfo executor = executors.get(fullId);
-    if (executor == null) {
-      // Executor not registered, skip clean up of the local directories.
-      logger.info("Executor is not registered (appId={}, execId={})", appId, executorId);
-    } else {
-      logger.info("Cleaning up non-shuffle files in executor {}'s {} local dirs", fullId,
-              executor.localDirs.length);
-
-      // Execute the actual deletion in a different thread, as it may take some time.
-      directoryCleaner.execute(() -> deleteNonShuffleFiles(executor.localDirs));
-    }
-  }
-
   /**
    * Synchronously deletes each directory one at a time.
    * Should be executed in its own thread, as this may take a long time.
@@ -251,29 +226,6 @@ private void deleteExecutorDirs(String[] dirs) {
     }
   }
 
-  /**
-   * Synchronously deletes non-shuffle files in each directory recursively.
-   * Should be executed in its own thread, as this may take a long time.
-   */
-  private void deleteNonShuffleFiles(String[] dirs) {
-    FilenameFilter filter = new FilenameFilter() {
-      @Override
-      public boolean accept(File dir, String name) {
-        // Don't delete shuffle data or shuffle index files.
-        return !name.endsWith(".index") && !name.endsWith(".data");
-      }
-    };
-
-    for (String localDir : dirs) {
-      try {
-        JavaUtils.deleteRecursively(new File(localDir), filter);
-        logger.debug("Successfully cleaned up non-shuffle files in directory: {}", localDir);
-      } catch (Exception e) {
-        logger.error("Failed to delete non-shuffle files in directory: " + localDir, e);
-      }
-    }
-  }
-
   /**
    * Sort-based shuffle data uses an index called "shuffle_ShuffleId_MapId_0.index" into a data file
    * called "shuffle_ShuffleId_MapId_0.data". This logic is from IndexShuffleBlockResolver,
@@ -307,8 +259,7 @@ static File getFile(String[] localDirs, int subDirsPerLocalDir, String filename)
     int hash = JavaUtils.nonNegativeHash(filename);
     String localDir = localDirs[hash % localDirs.length];
     int subDirId = (hash / localDirs.length) % subDirsPerLocalDir;
-    return new File(createNormalizedInternedPathname(
-        localDir, String.format("%02x", subDirId), filename));
+    return new File(new File(localDir, String.format("%02x", subDirId)), filename);
   }
 
   void close() {
@@ -321,28 +272,6 @@ void close() {
     }
   }
 
-  /**
-   * This method is needed to avoid the situation when multiple File instances for the
-   * same pathname "foo/bar" are created, each with a separate copy of the "foo/bar" String.
-   * According to measurements, in some scenarios such duplicate strings may waste a lot
-   * of memory (~ 10% of the heap). To avoid that, we intern the pathname, and before that
-   * we make sure that it's in a normalized form (contains no "//", "///" etc.) Otherwise,
-   * the internal code in java.io.File would normalize it later, creating a new "foo/bar"
-   * String copy. Unfortunately, we cannot just reuse the normalization code that java.io.File
-   * uses, since it is in the package-private class java.io.FileSystem.
-   */
-  @VisibleForTesting
-  static String createNormalizedInternedPathname(String dir1, String dir2, String fname) {
-    String pathname = dir1 + File.separator + dir2 + File.separator + fname;
-    Matcher m = MULTIPLE_SEPARATORS.matcher(pathname);
-    pathname = m.replaceAll("/");
-    // A single trailing slash needs to be taken care of separately
-    if (pathname.length() > 1 && pathname.endsWith("/")) {
-      pathname = pathname.substring(0, pathname.length() - 1);
-    }
-    return pathname.intern();
-  }
-
   /** Simply encodes an executor's full ID, which is appId + execId. */
   public static class AppExecId {
     public final String appId;
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java
index d2072a54fa415..6d201b8fe8d7d 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java
@@ -17,7 +17,6 @@
 
 package org.apache.spark.network.shuffle;
 
-import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
@@ -136,23 +135,4 @@ public void jsonSerializationOfExecutorRegistration() throws IOException {
       "\"subDirsPerLocalDir\": 7, \"shuffleManager\": " + "\"" + SORT_MANAGER + "\"}";
     assertEquals(shuffleInfo, mapper.readValue(legacyShuffleJson, ExecutorShuffleInfo.class));
   }
-
-  @Test
-  public void testNormalizeAndInternPathname() {
-    assertPathsMatch("/foo", "bar", "baz", "/foo/bar/baz");
-    assertPathsMatch("//foo/", "bar/", "//baz", "/foo/bar/baz");
-    assertPathsMatch("foo", "bar", "baz///", "foo/bar/baz");
-    assertPathsMatch("/foo/", "/bar//", "/baz", "/foo/bar/baz");
-    assertPathsMatch("/", "", "", "/");
-    assertPathsMatch("/", "/", "/", "/");
-  }
-
-  private void assertPathsMatch(String p1, String p2, String p3, String expectedPathname) {
-    String normPathname =
-        ExternalShuffleBlockResolver.createNormalizedInternedPathname(p1, p2, p3);
-    assertEquals(expectedPathname, normPathname);
-    File file = new File(normPathname);
-    String returnedPath = file.getPath();
-    assertTrue(normPathname == returnedPath);
-  }
 }
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/NonShuffleFilesCleanupSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/NonShuffleFilesCleanupSuite.java
deleted file mode 100644
index d22f3ace4103b..0000000000000
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/NonShuffleFilesCleanupSuite.java
+++ /dev/null
@@ -1,221 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.shuffle;
-
-import java.io.File;
-import java.io.FilenameFilter;
-import java.io.IOException;
-import java.nio.charset.StandardCharsets;
-import java.util.Random;
-import java.util.concurrent.Executor;
-import java.util.concurrent.atomic.AtomicBoolean;
-
-import com.google.common.util.concurrent.MoreExecutors;
-import org.junit.Test;
-import static org.junit.Assert.assertTrue;
-
-import org.apache.spark.network.util.MapConfigProvider;
-import org.apache.spark.network.util.TransportConf;
-
-public class NonShuffleFilesCleanupSuite {
-
-  // Same-thread Executor used to ensure cleanup happens synchronously in test thread.
-  private Executor sameThreadExecutor = MoreExecutors.sameThreadExecutor();
-  private TransportConf conf = new TransportConf("shuffle", MapConfigProvider.EMPTY);
-  private static final String SORT_MANAGER = "org.apache.spark.shuffle.sort.SortShuffleManager";
-
-  @Test
-  public void cleanupOnRemovedExecutorWithShuffleFiles() throws IOException {
-    cleanupOnRemovedExecutor(true);
-  }
-
-  @Test
-  public void cleanupOnRemovedExecutorWithoutShuffleFiles() throws IOException {
-    cleanupOnRemovedExecutor(false);
-  }
-
-  private void cleanupOnRemovedExecutor(boolean withShuffleFiles) throws IOException {
-    TestShuffleDataContext dataContext = initDataContext(withShuffleFiles);
-
-    ExternalShuffleBlockResolver resolver =
-      new ExternalShuffleBlockResolver(conf, null, sameThreadExecutor);
-    resolver.registerExecutor("app", "exec0", dataContext.createExecutorInfo(SORT_MANAGER));
-    resolver.executorRemoved("exec0", "app");
-
-    assertCleanedUp(dataContext);
-  }
-
-  @Test
-  public void cleanupUsesExecutorWithShuffleFiles() throws IOException {
-    cleanupUsesExecutor(true);
-  }
-
-  @Test
-  public void cleanupUsesExecutorWithoutShuffleFiles() throws IOException {
-    cleanupUsesExecutor(false);
-  }
-
-  private void cleanupUsesExecutor(boolean withShuffleFiles) throws IOException {
-    TestShuffleDataContext dataContext = initDataContext(withShuffleFiles);
-
-    AtomicBoolean cleanupCalled = new AtomicBoolean(false);
-
-    // Executor which does nothing to ensure we're actually using it.
-    Executor noThreadExecutor = runnable -> cleanupCalled.set(true);
-
-    ExternalShuffleBlockResolver manager =
-      new ExternalShuffleBlockResolver(conf, null, noThreadExecutor);
-
-    manager.registerExecutor("app", "exec0", dataContext.createExecutorInfo(SORT_MANAGER));
-    manager.executorRemoved("exec0", "app");
-
-    assertTrue(cleanupCalled.get());
-    assertStillThere(dataContext);
-  }
-
-  @Test
-  public void cleanupOnlyRemovedExecutorWithShuffleFiles() throws IOException {
-    cleanupOnlyRemovedExecutor(true);
-  }
-
-  @Test
-  public void cleanupOnlyRemovedExecutorWithoutShuffleFiles() throws IOException {
-    cleanupOnlyRemovedExecutor(false);
-  }
-
-  private void cleanupOnlyRemovedExecutor(boolean withShuffleFiles) throws IOException {
-    TestShuffleDataContext dataContext0 = initDataContext(withShuffleFiles);
-    TestShuffleDataContext dataContext1 = initDataContext(withShuffleFiles);
-
-    ExternalShuffleBlockResolver resolver =
-      new ExternalShuffleBlockResolver(conf, null, sameThreadExecutor);
-    resolver.registerExecutor("app", "exec0", dataContext0.createExecutorInfo(SORT_MANAGER));
-    resolver.registerExecutor("app", "exec1", dataContext1.createExecutorInfo(SORT_MANAGER));
-
-
-    resolver.executorRemoved("exec-nonexistent", "app");
-    assertStillThere(dataContext0);
-    assertStillThere(dataContext1);
-
-    resolver.executorRemoved("exec0", "app");
-    assertCleanedUp(dataContext0);
-    assertStillThere(dataContext1);
-
-    resolver.executorRemoved("exec1", "app");
-    assertCleanedUp(dataContext0);
-    assertCleanedUp(dataContext1);
-
-    // Make sure it's not an error to cleanup multiple times
-    resolver.executorRemoved("exec1", "app");
-    assertCleanedUp(dataContext0);
-    assertCleanedUp(dataContext1);
-  }
-
-  @Test
-  public void cleanupOnlyRegisteredExecutorWithShuffleFiles() throws IOException {
-    cleanupOnlyRegisteredExecutor(true);
-  }
-
-  @Test
-  public void cleanupOnlyRegisteredExecutorWithoutShuffleFiles() throws IOException {
-    cleanupOnlyRegisteredExecutor(false);
-  }
-
-  private void cleanupOnlyRegisteredExecutor(boolean withShuffleFiles) throws IOException {
-    TestShuffleDataContext dataContext = initDataContext(withShuffleFiles);
-
-    ExternalShuffleBlockResolver resolver =
-      new ExternalShuffleBlockResolver(conf, null, sameThreadExecutor);
-    resolver.registerExecutor("app", "exec0", dataContext.createExecutorInfo(SORT_MANAGER));
-
-    resolver.executorRemoved("exec1", "app");
-    assertStillThere(dataContext);
-
-    resolver.executorRemoved("exec0", "app");
-    assertCleanedUp(dataContext);
-  }
-
-  private static void assertStillThere(TestShuffleDataContext dataContext) {
-    for (String localDir : dataContext.localDirs) {
-      assertTrue(localDir + " was cleaned up prematurely", new File(localDir).exists());
-    }
-  }
-
-  private static FilenameFilter filter = new FilenameFilter() {
-    @Override
-    public boolean accept(File dir, String name) {
-      // Don't delete shuffle data or shuffle index files.
-      return !name.endsWith(".index") && !name.endsWith(".data");
-    }
-  };
-
-  private static boolean assertOnlyShuffleDataInDir(File[] dirs) {
-    for (File dir : dirs) {
-      assertTrue(dir.getName() + " wasn't cleaned up", !dir.exists() ||
-        dir.listFiles(filter).length == 0 || assertOnlyShuffleDataInDir(dir.listFiles()));
-    }
-    return true;
-  }
-
-  private static void assertCleanedUp(TestShuffleDataContext dataContext) {
-    for (String localDir : dataContext.localDirs) {
-      File[] dirs = new File[] {new File(localDir)};
-      assertOnlyShuffleDataInDir(dirs);
-    }
-  }
-
-  private static TestShuffleDataContext initDataContext(boolean withShuffleFiles)
-      throws IOException {
-    if (withShuffleFiles) {
-      return initDataContextWithShuffleFiles();
-    } else {
-      return initDataContextWithoutShuffleFiles();
-    }
-  }
-
-  private static TestShuffleDataContext initDataContextWithShuffleFiles() throws IOException {
-    TestShuffleDataContext dataContext = createDataContext();
-    createShuffleFiles(dataContext);
-    createNonShuffleFiles(dataContext);
-    return dataContext;
-  }
-
-  private static TestShuffleDataContext initDataContextWithoutShuffleFiles() throws IOException {
-    TestShuffleDataContext dataContext = createDataContext();
-    createNonShuffleFiles(dataContext);
-    return dataContext;
-  }
-
-  private static TestShuffleDataContext createDataContext() {
-    TestShuffleDataContext dataContext = new TestShuffleDataContext(10, 5);
-    dataContext.create();
-    return dataContext;
-  }
-
-  private static void createShuffleFiles(TestShuffleDataContext dataContext) throws IOException {
-    Random rand = new Random(123);
-    dataContext.insertSortShuffleData(rand.nextInt(1000), rand.nextInt(1000), new byte[][] {
-        "ABC".getBytes(StandardCharsets.UTF_8),
-        "DEF".getBytes(StandardCharsets.UTF_8)});
-  }
-
-  private static void createNonShuffleFiles(TestShuffleDataContext dataContext) throws IOException {
-    // Create spill file(s)
-    dataContext.insertSpillData();
-  }
-}
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
index 6989c3baf2e28..81e01949e50fa 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
@@ -22,7 +22,6 @@
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
-import java.util.UUID;
 
 import com.google.common.io.Closeables;
 import com.google.common.io.Files;
@@ -95,20 +94,6 @@ public void insertSortShuffleData(int shuffleId, int mapId, byte[][] blocks) thr
     }
   }
 
-  /** Creates spill file(s) within the local dirs. */
-  public void insertSpillData() throws IOException {
-    String filename = "temp_local_" + UUID.randomUUID();
-    OutputStream dataStream = null;
-
-    try {
-      dataStream = new FileOutputStream(
-        ExternalShuffleBlockResolver.getFile(localDirs, subDirsPerLocalDir, filename));
-      dataStream.write(42);
-    } finally {
-      Closeables.close(dataStream, false);
-    }
-  }
-
   /**
    * Creates an ExecutorShuffleInfo object based on the given shuffle manager which targets this
    * context's directories.
diff --git a/common/sketch/src/main/java/org/apache/spark/util/sketch/Murmur3_x86_32.java b/common/sketch/src/main/java/org/apache/spark/util/sketch/Murmur3_x86_32.java
index e83b331391e39..a61ce4fb7241d 100644
--- a/common/sketch/src/main/java/org/apache/spark/util/sketch/Murmur3_x86_32.java
+++ b/common/sketch/src/main/java/org/apache/spark/util/sketch/Murmur3_x86_32.java
@@ -60,8 +60,6 @@ public static int hashUnsafeWords(Object base, long offset, int lengthInBytes, i
   }
 
   public static int hashUnsafeBytes(Object base, long offset, int lengthInBytes, int seed) {
-    // This is not compatible with original and another implementations.
-    // But remain it for backward compatibility for the components existing before 2.3.
     assert (lengthInBytes >= 0): "lengthInBytes cannot be negative";
     int lengthAligned = lengthInBytes - lengthInBytes % 4;
     int h1 = hashBytesByInt(base, offset, lengthAligned, seed);
@@ -73,20 +71,6 @@ public static int hashUnsafeBytes(Object base, long offset, int lengthInBytes, i
     return fmix(h1, lengthInBytes);
   }
 
-  public static int hashUnsafeBytes2(Object base, long offset, int lengthInBytes, int seed) {
-    // This is compatible with original and another implementations.
-    // Use this method for new components after Spark 2.3.
-    assert (lengthInBytes >= 0): "lengthInBytes cannot be negative";
-    int lengthAligned = lengthInBytes - lengthInBytes % 4;
-    int h1 = hashBytesByInt(base, offset, lengthAligned, seed);
-    int k1 = 0;
-    for (int i = lengthAligned, shift = 0; i < lengthInBytes; i++, shift += 8) {
-      k1 ^= (Platform.getByte(base, offset + i) & 0xFF) << shift;
-    }
-    h1 ^= mixK1(k1);
-    return fmix(h1, lengthInBytes);
-  }
-
   private static int hashBytesByInt(Object base, long offset, int lengthInBytes, int seed) {
     assert (lengthInBytes % 4 == 0);
     int h1 = seed;
diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/expressions/HiveHasher.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/expressions/HiveHasher.java
index 62b75ae8aa01d..73577437ac506 100644
--- a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/expressions/HiveHasher.java
+++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/expressions/HiveHasher.java
@@ -17,8 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions;
 
-import org.apache.spark.unsafe.memory.MemoryBlock;
-import org.apache.spark.unsafe.types.UTF8String;
+import org.apache.spark.unsafe.Platform;
 
 /**
  * Simulates Hive's hashing function from Hive v1.2.1
@@ -39,21 +38,12 @@ public static int hashLong(long input) {
     return (int) ((input >>> 32) ^ input);
   }
 
-  public static int hashUnsafeBytesBlock(MemoryBlock mb) {
-    long lengthInBytes = mb.size();
+  public static int hashUnsafeBytes(Object base, long offset, int lengthInBytes) {
     assert (lengthInBytes >= 0): "lengthInBytes cannot be negative";
     int result = 0;
-    for (long i = 0; i < lengthInBytes; i++) {
-      result = (result * 31) + (int) mb.getByte(i);
+    for (int i = 0; i < lengthInBytes; i++) {
+      result = (result * 31) + (int) Platform.getByte(base, offset + i);
     }
     return result;
   }
-
-  public static int hashUnsafeBytes(Object base, long offset, int lengthInBytes) {
-    return hashUnsafeBytesBlock(MemoryBlock.allocateFromObject(base, offset, lengthInBytes));
-  }
-
-  public static int hashUTF8String(UTF8String str) {
-    return hashUnsafeBytesBlock(str.getMemoryBlock());
-  }
 }
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
index 54dcadf3a7754..aca6fca00c48b 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
@@ -187,7 +187,7 @@ public static void setMemory(long address, byte value, long size) {
   }
 
   public static void copyMemory(
-      Object src, long srcOffset, Object dst, long dstOffset, long length) {
+    Object src, long srcOffset, Object dst, long dstOffset, long length) {
     // Check if dstOffset is before or after srcOffset to determine if we should copy
     // forward or backwards. This is necessary in case src and dst overlap.
     if (dstOffset < srcOffset) {
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java
index ef0f78d95d1ee..a6b1f7a16d605 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java
@@ -18,7 +18,6 @@
 package org.apache.spark.unsafe.array;
 
 import org.apache.spark.unsafe.Platform;
-import org.apache.spark.unsafe.memory.MemoryBlock;
 
 public class ByteArrayMethods {
 
@@ -33,11 +32,7 @@ public static long nextPowerOf2(long num) {
   }
 
   public static int roundNumberOfBytesToNearestWord(int numBytes) {
-    return (int)roundNumberOfBytesToNearestWord((long)numBytes);
-  }
-
-  public static long roundNumberOfBytesToNearestWord(long numBytes) {
-    long remainder = numBytes & 0x07;  // This is equivalent to `numBytes % 8`
+    int remainder = numBytes & 0x07;  // This is equivalent to `numBytes % 8`
     if (remainder == 0) {
       return numBytes;
     } else {
@@ -53,25 +48,15 @@ public static long roundNumberOfBytesToNearestWord(long numBytes) {
   public static int MAX_ROUNDED_ARRAY_LENGTH = Integer.MAX_VALUE - 15;
 
   private static final boolean unaligned = Platform.unaligned();
-  /**
-   * MemoryBlock equality check for MemoryBlocks.
-   * @return true if the arrays are equal, false otherwise
-   */
-  public static boolean arrayEqualsBlock(
-      MemoryBlock leftBase, long leftOffset, MemoryBlock rightBase, long rightOffset, long length) {
-    return arrayEquals(leftBase.getBaseObject(), leftBase.getBaseOffset() + leftOffset,
-      rightBase.getBaseObject(), rightBase.getBaseOffset() + rightOffset, length);
-  }
-
   /**
    * Optimized byte array equality check for byte arrays.
    * @return true if the arrays are equal, false otherwise
    */
   public static boolean arrayEquals(
-      Object leftBase, long leftOffset, Object rightBase, long rightOffset, long length) {
+      Object leftBase, long leftOffset, Object rightBase, long rightOffset, final long length) {
     int i = 0;
 
-    // check if starts align and we can get both offsets to be aligned
+    // check if stars align and we can get both offsets to be aligned
     if ((leftOffset % 8) == (rightOffset % 8)) {
       while ((leftOffset + i) % 8 != 0 && i < length) {
         if (Platform.getByte(leftBase, leftOffset + i) !=
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/array/LongArray.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/array/LongArray.java
index b74d2de0691d5..2cd39bd60c2ac 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/array/LongArray.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/array/LongArray.java
@@ -17,6 +17,7 @@
 
 package org.apache.spark.unsafe.array;
 
+import org.apache.spark.unsafe.Platform;
 import org.apache.spark.unsafe.memory.MemoryBlock;
 
 /**
@@ -32,12 +33,16 @@
   private static final long WIDTH = 8;
 
   private final MemoryBlock memory;
+  private final Object baseObj;
+  private final long baseOffset;
 
   private final long length;
 
   public LongArray(MemoryBlock memory) {
     assert memory.size() < (long) Integer.MAX_VALUE * 8: "Array size >= Integer.MAX_VALUE elements";
     this.memory = memory;
+    this.baseObj = memory.getBaseObject();
+    this.baseOffset = memory.getBaseOffset();
     this.length = memory.size() / WIDTH;
   }
 
@@ -46,11 +51,11 @@ public MemoryBlock memoryBlock() {
   }
 
   public Object getBaseObject() {
-    return memory.getBaseObject();
+    return baseObj;
   }
 
   public long getBaseOffset() {
-    return memory.getBaseOffset();
+    return baseOffset;
   }
 
   /**
@@ -64,8 +69,8 @@ public long size() {
    * Fill this all with 0L.
    */
   public void zeroOut() {
-    for (long off = 0; off < length * WIDTH; off += WIDTH) {
-      memory.putLong(off, 0);
+    for (long off = baseOffset; off < baseOffset + length * WIDTH; off += WIDTH) {
+      Platform.putLong(baseObj, off, 0);
     }
   }
 
@@ -75,7 +80,7 @@ public void zeroOut() {
   public void set(int index, long value) {
     assert index >= 0 : "index (" + index + ") should >= 0";
     assert index < length : "index (" + index + ") should < length (" + length + ")";
-    memory.putLong(index * WIDTH, value);
+    Platform.putLong(baseObj, baseOffset + index * WIDTH, value);
   }
 
   /**
@@ -84,6 +89,6 @@ public void set(int index, long value) {
   public long get(int index) {
     assert index >= 0 : "index (" + index + ") should >= 0";
     assert index < length : "index (" + index + ") should < length (" + length + ")";
-    return memory.getLong(index * WIDTH);
+    return Platform.getLong(baseObj, baseOffset + index * WIDTH);
   }
 }
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java
index aff6e93d647fe..5e7ee480cafd1 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java
@@ -17,10 +17,7 @@
 
 package org.apache.spark.unsafe.hash;
 
-import com.google.common.primitives.Ints;
-
-import org.apache.spark.unsafe.memory.MemoryBlock;
-import org.apache.spark.unsafe.types.UTF8String;
+import org.apache.spark.unsafe.Platform;
 
 /**
  * 32-bit Murmur3 hasher.  This is based on Guava's Murmur3_32HashFunction.
@@ -52,70 +49,33 @@ public static int hashInt(int input, int seed) {
   }
 
   public int hashUnsafeWords(Object base, long offset, int lengthInBytes) {
-    return hashUnsafeWordsBlock(MemoryBlock.allocateFromObject(base, offset, lengthInBytes), seed);
+    return hashUnsafeWords(base, offset, lengthInBytes, seed);
   }
 
-  public static int hashUnsafeWordsBlock(MemoryBlock base, int seed) {
+  public static int hashUnsafeWords(Object base, long offset, int lengthInBytes, int seed) {
     // This is based on Guava's `Murmur32_Hasher.processRemaining(ByteBuffer)` method.
-    int lengthInBytes = Ints.checkedCast(base.size());
     assert (lengthInBytes % 8 == 0): "lengthInBytes must be a multiple of 8 (word-aligned)";
-    int h1 = hashBytesByIntBlock(base, seed);
+    int h1 = hashBytesByInt(base, offset, lengthInBytes, seed);
     return fmix(h1, lengthInBytes);
   }
 
-  public static int hashUnsafeWords(Object base, long offset, int lengthInBytes, int seed) {
-    // This is based on Guava's `Murmur32_Hasher.processRemaining(ByteBuffer)` method.
-    return hashUnsafeWordsBlock(MemoryBlock.allocateFromObject(base, offset, lengthInBytes), seed);
-  }
-
-  public static int hashUnsafeBytesBlock(MemoryBlock base, int seed) {
-    // This is not compatible with original and another implementations.
-    // But remain it for backward compatibility for the components existing before 2.3.
-    int lengthInBytes = Ints.checkedCast(base.size());
+  public static int hashUnsafeBytes(Object base, long offset, int lengthInBytes, int seed) {
     assert (lengthInBytes >= 0): "lengthInBytes cannot be negative";
     int lengthAligned = lengthInBytes - lengthInBytes % 4;
-    int h1 = hashBytesByIntBlock(base.subBlock(0, lengthAligned), seed);
+    int h1 = hashBytesByInt(base, offset, lengthAligned, seed);
     for (int i = lengthAligned; i < lengthInBytes; i++) {
-      int halfWord = base.getByte(i);
+      int halfWord = Platform.getByte(base, offset + i);
       int k1 = mixK1(halfWord);
       h1 = mixH1(h1, k1);
     }
     return fmix(h1, lengthInBytes);
   }
 
-  public static int hashUTF8String(UTF8String str, int seed) {
-    return hashUnsafeBytesBlock(str.getMemoryBlock(), seed);
-  }
-
-  public static int hashUnsafeBytes(Object base, long offset, int lengthInBytes, int seed) {
-    return hashUnsafeBytesBlock(MemoryBlock.allocateFromObject(base, offset, lengthInBytes), seed);
-  }
-
-  public static int hashUnsafeBytes2(Object base, long offset, int lengthInBytes, int seed) {
-    return hashUnsafeBytes2Block(MemoryBlock.allocateFromObject(base, offset, lengthInBytes), seed);
-  }
-
-  public static int hashUnsafeBytes2Block(MemoryBlock base, int seed) {
-    // This is compatible with original and other implementations.
-    // Use this method for new components after Spark 2.3.
-    int lengthInBytes = Ints.checkedCast(base.size());
-    assert (lengthInBytes >= 0) : "lengthInBytes cannot be negative";
-    int lengthAligned = lengthInBytes - lengthInBytes % 4;
-    int h1 = hashBytesByIntBlock(base.subBlock(0, lengthAligned), seed);
-    int k1 = 0;
-    for (int i = lengthAligned, shift = 0; i < lengthInBytes; i++, shift += 8) {
-      k1 ^= (base.getByte(i) & 0xFF) << shift;
-    }
-    h1 ^= mixK1(k1);
-    return fmix(h1, lengthInBytes);
-  }
-
-  private static int hashBytesByIntBlock(MemoryBlock base, int seed) {
-    long lengthInBytes = base.size();
+  private static int hashBytesByInt(Object base, long offset, int lengthInBytes, int seed) {
     assert (lengthInBytes % 4 == 0);
     int h1 = seed;
-    for (long i = 0; i < lengthInBytes; i += 4) {
-      int halfWord = base.getInt(i);
+    for (int i = 0; i < lengthInBytes; i += 4) {
+      int halfWord = Platform.getInt(base, offset + i);
       int k1 = mixK1(halfWord);
       h1 = mixH1(h1, k1);
     }
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/ByteArrayMemoryBlock.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/ByteArrayMemoryBlock.java
deleted file mode 100644
index 9f238632bc87a..0000000000000
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/ByteArrayMemoryBlock.java
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe.memory;
-
-import com.google.common.primitives.Ints;
-
-import org.apache.spark.unsafe.Platform;
-
-/**
- * A consecutive block of memory with a byte array on Java heap.
- */
-public final class ByteArrayMemoryBlock extends MemoryBlock {
-
-  private final byte[] array;
-
-  public ByteArrayMemoryBlock(byte[] obj, long offset, long size) {
-    super(obj, offset, size);
-    this.array = obj;
-    assert(offset + size <= Platform.BYTE_ARRAY_OFFSET + obj.length) :
-      "The sum of size " + size + " and offset " + offset + " should not be larger than " +
-        "the size of the given memory space " + (obj.length + Platform.BYTE_ARRAY_OFFSET);
-  }
-
-  public ByteArrayMemoryBlock(long length) {
-    this(new byte[Ints.checkedCast(length)], Platform.BYTE_ARRAY_OFFSET, length);
-  }
-
-  @Override
-  public MemoryBlock subBlock(long offset, long size) {
-    checkSubBlockRange(offset, size);
-    if (offset == 0 && size == this.size()) return this;
-    return new ByteArrayMemoryBlock(array, this.offset + offset, size);
-  }
-
-  public byte[] getByteArray() { return array; }
-
-  /**
-   * Creates a memory block pointing to the memory used by the byte array.
-   */
-  public static ByteArrayMemoryBlock fromArray(final byte[] array) {
-    return new ByteArrayMemoryBlock(array, Platform.BYTE_ARRAY_OFFSET, array.length);
-  }
-
-  @Override
-  public int getInt(long offset) {
-    return Platform.getInt(array, this.offset + offset);
-  }
-
-  @Override
-  public void putInt(long offset, int value) {
-    Platform.putInt(array, this.offset + offset, value);
-  }
-
-  @Override
-  public boolean getBoolean(long offset) {
-    return Platform.getBoolean(array, this.offset + offset);
-  }
-
-  @Override
-  public void putBoolean(long offset, boolean value) {
-    Platform.putBoolean(array, this.offset + offset, value);
-  }
-
-  @Override
-  public byte getByte(long offset) {
-    return array[(int)(this.offset + offset - Platform.BYTE_ARRAY_OFFSET)];
-  }
-
-  @Override
-  public void putByte(long offset, byte value) {
-    array[(int)(this.offset + offset - Platform.BYTE_ARRAY_OFFSET)] = value;
-  }
-
-  @Override
-  public short getShort(long offset) {
-    return Platform.getShort(array, this.offset + offset);
-  }
-
-  @Override
-  public void putShort(long offset, short value) {
-    Platform.putShort(array, this.offset + offset, value);
-  }
-
-  @Override
-  public long getLong(long offset) {
-    return Platform.getLong(array, this.offset + offset);
-  }
-
-  @Override
-  public void putLong(long offset, long value) {
-    Platform.putLong(array, this.offset + offset, value);
-  }
-
-  @Override
-  public float getFloat(long offset) {
-    return Platform.getFloat(array, this.offset + offset);
-  }
-
-  @Override
-  public void putFloat(long offset, float value) {
-    Platform.putFloat(array, this.offset + offset, value);
-  }
-
-  @Override
-  public double getDouble(long offset) {
-    return Platform.getDouble(array, this.offset + offset);
-  }
-
-  @Override
-  public void putDouble(long offset, double value) {
-    Platform.putDouble(array, this.offset + offset, value);
-  }
-}
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java
index 36caf80888cda..a9603c1aba051 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java
@@ -23,6 +23,8 @@
 import java.util.LinkedList;
 import java.util.Map;
 
+import org.apache.spark.unsafe.Platform;
+
 /**
  * A simple {@link MemoryAllocator} that can allocate up to 16GB using a JVM long primitive array.
  */
@@ -44,31 +46,28 @@ private boolean shouldPool(long size) {
 
   @Override
   public MemoryBlock allocate(long size) throws OutOfMemoryError {
-    int numWords = (int) ((size + 7) / 8);
-    long alignedSize = numWords * 8L;
-    assert (alignedSize >= size);
-    if (shouldPool(alignedSize)) {
+    if (shouldPool(size)) {
       synchronized (this) {
-        final LinkedList<WeakReference<long[]>> pool = bufferPoolsBySize.get(alignedSize);
+        final LinkedList<WeakReference<long[]>> pool = bufferPoolsBySize.get(size);
         if (pool != null) {
           while (!pool.isEmpty()) {
             final WeakReference<long[]> arrayReference = pool.pop();
             final long[] array = arrayReference.get();
             if (array != null) {
               assert (array.length * 8L >= size);
-              MemoryBlock memory = OnHeapMemoryBlock.fromArray(array, size);
+              MemoryBlock memory = new MemoryBlock(array, Platform.LONG_ARRAY_OFFSET, size);
               if (MemoryAllocator.MEMORY_DEBUG_FILL_ENABLED) {
                 memory.fill(MemoryAllocator.MEMORY_DEBUG_FILL_CLEAN_VALUE);
               }
               return memory;
             }
           }
-          bufferPoolsBySize.remove(alignedSize);
+          bufferPoolsBySize.remove(size);
         }
       }
     }
-    long[] array = new long[numWords];
-    MemoryBlock memory = OnHeapMemoryBlock.fromArray(array, size);
+    long[] array = new long[(int) ((size + 7) / 8)];
+    MemoryBlock memory = new MemoryBlock(array, Platform.LONG_ARRAY_OFFSET, size);
     if (MemoryAllocator.MEMORY_DEBUG_FILL_ENABLED) {
       memory.fill(MemoryAllocator.MEMORY_DEBUG_FILL_CLEAN_VALUE);
     }
@@ -77,13 +76,12 @@ public MemoryBlock allocate(long size) throws OutOfMemoryError {
 
   @Override
   public void free(MemoryBlock memory) {
-    assert(memory instanceof OnHeapMemoryBlock);
-    assert (memory.getBaseObject() != null) :
+    assert (memory.obj != null) :
       "baseObject was null; are you trying to use the on-heap allocator to free off-heap memory?";
-    assert (memory.getPageNumber() != MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER) :
+    assert (memory.pageNumber != MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER) :
       "page has already been freed";
-    assert ((memory.getPageNumber() == MemoryBlock.NO_PAGE_NUMBER)
-            || (memory.getPageNumber() == MemoryBlock.FREED_IN_TMM_PAGE_NUMBER)) :
+    assert ((memory.pageNumber == MemoryBlock.NO_PAGE_NUMBER)
+            || (memory.pageNumber == MemoryBlock.FREED_IN_TMM_PAGE_NUMBER)) :
       "TMM-allocated pages must first be freed via TMM.freePage(), not directly in allocator " +
         "free()";
 
@@ -93,20 +91,19 @@ public void free(MemoryBlock memory) {
     }
 
     // Mark the page as freed (so we can detect double-frees).
-    memory.setPageNumber(MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER);
+    memory.pageNumber = MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER;
 
     // As an additional layer of defense against use-after-free bugs, we mutate the
     // MemoryBlock to null out its reference to the long[] array.
-    long[] array = ((OnHeapMemoryBlock)memory).getLongArray();
-    memory.resetObjAndOffset();
+    long[] array = (long[]) memory.obj;
+    memory.setObjAndOffset(null, 0);
 
-    long alignedSize = ((size + 7) / 8) * 8;
-    if (shouldPool(alignedSize)) {
+    if (shouldPool(size)) {
       synchronized (this) {
-        LinkedList<WeakReference<long[]>> pool = bufferPoolsBySize.get(alignedSize);
+        LinkedList<WeakReference<long[]>> pool = bufferPoolsBySize.get(size);
         if (pool == null) {
           pool = new LinkedList<>();
-          bufferPoolsBySize.put(alignedSize, pool);
+          bufferPoolsBySize.put(size, pool);
         }
         pool.add(new WeakReference<>(array));
       }
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryAllocator.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryAllocator.java
index 38315fb97b46a..7b588681d9790 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryAllocator.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryAllocator.java
@@ -38,7 +38,7 @@
 
   void free(MemoryBlock memory);
 
-  UnsafeMemoryAllocator UNSAFE = new UnsafeMemoryAllocator();
+  MemoryAllocator UNSAFE = new UnsafeMemoryAllocator();
 
-  HeapMemoryAllocator HEAP = new HeapMemoryAllocator();
+  MemoryAllocator HEAP = new HeapMemoryAllocator();
 }
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryBlock.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryBlock.java
index ca7213bbf92da..c333857358d30 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryBlock.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryBlock.java
@@ -22,10 +22,10 @@
 import org.apache.spark.unsafe.Platform;
 
 /**
- * A representation of a consecutive memory block in Spark. It defines the common interfaces
- * for memory accessing and mutating.
+ * A consecutive block of memory, starting at a {@link MemoryLocation} with a fixed size.
  */
-public abstract class MemoryBlock {
+public class MemoryBlock extends MemoryLocation {
+
   /** Special `pageNumber` value for pages which were not allocated by TaskMemoryManagers */
   public static final int NO_PAGE_NUMBER = -1;
 
@@ -45,163 +45,38 @@
    */
   public static final int FREED_IN_ALLOCATOR_PAGE_NUMBER = -3;
 
-  @Nullable
-  protected Object obj;
-
-  protected long offset;
-
-  protected long length;
+  private final long length;
 
   /**
    * Optional page number; used when this MemoryBlock represents a page allocated by a
-   * TaskMemoryManager. This field can be updated using setPageNumber method so that
-   * this can be modified by the TaskMemoryManager, which lives in a different package.
+   * TaskMemoryManager. This field is public so that it can be modified by the TaskMemoryManager,
+   * which lives in a different package.
    */
-  private int pageNumber = NO_PAGE_NUMBER;
+  public int pageNumber = NO_PAGE_NUMBER;
 
-  protected MemoryBlock(@Nullable Object obj, long offset, long length) {
-    if (offset < 0 || length < 0) {
-      throw new IllegalArgumentException(
-        "Length " + length + " and offset " + offset + "must be non-negative");
-    }
-    this.obj = obj;
-    this.offset = offset;
+  public MemoryBlock(@Nullable Object obj, long offset, long length) {
+    super(obj, offset);
     this.length = length;
   }
 
-  protected MemoryBlock() {
-    this(null, 0, 0);
-  }
-
-  public final Object getBaseObject() {
-    return obj;
-  }
-
-  public final long getBaseOffset() {
-    return offset;
-  }
-
-  public void resetObjAndOffset() {
-    this.obj = null;
-    this.offset = 0;
-  }
-
   /**
    * Returns the size of the memory block.
    */
-  public final long size() {
+  public long size() {
     return length;
   }
 
-  public final void setPageNumber(int pageNum) {
-    pageNumber = pageNum;
-  }
-
-  public final int getPageNumber() {
-    return pageNumber;
-  }
-
-  /**
-   * Fills the memory block with the specified byte value.
-   */
-  public final void fill(byte value) {
-    Platform.setMemory(obj, offset, length, value);
-  }
-
-  /**
-   * Instantiate MemoryBlock for given object type with new offset
-   */
-  public static final MemoryBlock allocateFromObject(Object obj, long offset, long length) {
-    MemoryBlock mb = null;
-    if (obj instanceof byte[]) {
-      byte[] array = (byte[])obj;
-      mb = new ByteArrayMemoryBlock(array, offset, length);
-    } else if (obj instanceof long[]) {
-      long[] array = (long[])obj;
-      mb = new OnHeapMemoryBlock(array, offset, length);
-    } else if (obj == null) {
-      // we assume that to pass null pointer means off-heap
-      mb = new OffHeapMemoryBlock(offset, length);
-    } else {
-      throw new UnsupportedOperationException(
-        "Instantiate MemoryBlock for type " + obj.getClass() + " is not supported now");
-    }
-    return mb;
-  }
-
   /**
-   * Just instantiate the sub-block with the same type of MemoryBlock with the new size and relative
-   * offset from the original offset. The data is not copied.
-   * If parameters are invalid, an exception is thrown.
+   * Creates a memory block pointing to the memory used by the long array.
    */
-  public abstract MemoryBlock subBlock(long offset, long size);
-
-  protected void checkSubBlockRange(long offset, long size) {
-    if (offset < 0 || size < 0) {
-      throw new ArrayIndexOutOfBoundsException(
-        "Size " + size + " and offset " + offset + " must be non-negative");
-    }
-    if (offset + size > length) {
-      throw new ArrayIndexOutOfBoundsException("The sum of size " + size + " and offset " +
-        offset + " should not be larger than the length " + length + " in the MemoryBlock");
-    }
+  public static MemoryBlock fromLongArray(final long[] array) {
+    return new MemoryBlock(array, Platform.LONG_ARRAY_OFFSET, array.length * 8L);
   }
 
   /**
-   * getXXX/putXXX does not ensure guarantee behavior if the offset is invalid. e.g  cause illegal
-   * memory access, throw an exception, or etc.
-   * getXXX/putXXX uses an index based on this.offset that includes the size of metadata such as
-   * JVM object header. The offset is 0-based and is expected as an logical offset in the memory
-   * block.
+   * Fills the memory block with the specified byte value.
    */
-  public abstract int getInt(long offset);
-
-  public abstract void putInt(long offset, int value);
-
-  public abstract boolean getBoolean(long offset);
-
-  public abstract void putBoolean(long offset, boolean value);
-
-  public abstract byte getByte(long offset);
-
-  public abstract void putByte(long offset, byte value);
-
-  public abstract short getShort(long offset);
-
-  public abstract void putShort(long offset, short value);
-
-  public abstract long getLong(long offset);
-
-  public abstract void putLong(long offset, long value);
-
-  public abstract float getFloat(long offset);
-
-  public abstract void putFloat(long offset, float value);
-
-  public abstract double getDouble(long offset);
-
-  public abstract void putDouble(long offset, double value);
-
-  public static final void copyMemory(
-      MemoryBlock src, long srcOffset, MemoryBlock dst, long dstOffset, long length) {
-    assert(srcOffset + length <= src.length && dstOffset + length <= dst.length);
-    Platform.copyMemory(src.getBaseObject(), src.getBaseOffset() + srcOffset,
-      dst.getBaseObject(), dst.getBaseOffset() + dstOffset, length);
-  }
-
-  public static final void copyMemory(MemoryBlock src, MemoryBlock dst, long length) {
-    assert(length <= src.length && length <= dst.length);
-    Platform.copyMemory(src.getBaseObject(), src.getBaseOffset(),
-      dst.getBaseObject(), dst.getBaseOffset(), length);
-  }
-
-  public final void copyFrom(Object src, long srcOffset, long dstOffset, long length) {
-    assert(length <= this.length - srcOffset);
-    Platform.copyMemory(src, srcOffset, obj, offset + dstOffset, length);
-  }
-
-  public final void writeTo(long srcOffset, Object dst, long dstOffset, long length) {
-    assert(length <= this.length - srcOffset);
-    Platform.copyMemory(obj, offset + srcOffset, dst, dstOffset, length);
+  public void fill(byte value) {
+    Platform.setMemory(obj, offset, length, value);
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/api/r/RAuthHelper.scala b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryLocation.java
similarity index 54%
rename from core/src/main/scala/org/apache/spark/api/r/RAuthHelper.scala
rename to common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryLocation.java
index ac6826a9ec774..74ebc87dc978c 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RAuthHelper.scala
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryLocation.java
@@ -15,24 +15,40 @@
  * limitations under the License.
  */
 
-package org.apache.spark.api.r
+package org.apache.spark.unsafe.memory;
 
-import java.io.{DataInputStream, DataOutputStream}
-import java.net.Socket
+import javax.annotation.Nullable;
 
-import org.apache.spark.SparkConf
-import org.apache.spark.security.SocketAuthHelper
+/**
+ * A memory location. Tracked either by a memory address (with off-heap allocation),
+ * or by an offset from a JVM object (in-heap allocation).
+ */
+public class MemoryLocation {
+
+  @Nullable
+  Object obj;
 
-private[spark] class RAuthHelper(conf: SparkConf) extends SocketAuthHelper(conf) {
+  long offset;
+
+  public MemoryLocation(@Nullable Object obj, long offset) {
+    this.obj = obj;
+    this.offset = offset;
+  }
 
-  override protected def readUtf8(s: Socket): String = {
-    SerDe.readString(new DataInputStream(s.getInputStream()))
+  public MemoryLocation() {
+    this(null, 0);
   }
 
-  override protected def writeUtf8(str: String, s: Socket): Unit = {
-    val out = s.getOutputStream()
-    SerDe.writeString(new DataOutputStream(out), str)
-    out.flush()
+  public void setObjAndOffset(Object newObj, long newOffset) {
+    this.obj = newObj;
+    this.offset = newOffset;
   }
 
+  public final Object getBaseObject() {
+    return obj;
+  }
+
+  public final long getBaseOffset() {
+    return offset;
+  }
 }
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/OffHeapMemoryBlock.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/OffHeapMemoryBlock.java
deleted file mode 100644
index 3431b08980eb8..0000000000000
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/OffHeapMemoryBlock.java
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe.memory;
-
-import org.apache.spark.unsafe.Platform;
-
-public class OffHeapMemoryBlock extends MemoryBlock {
-  public static final OffHeapMemoryBlock NULL = new OffHeapMemoryBlock(0, 0);
-
-  public OffHeapMemoryBlock(long address, long size) {
-    super(null, address, size);
-  }
-
-  @Override
-  public MemoryBlock subBlock(long offset, long size) {
-    checkSubBlockRange(offset, size);
-    if (offset == 0 && size == this.size()) return this;
-    return new OffHeapMemoryBlock(this.offset + offset, size);
-  }
-
-  @Override
-  public final int getInt(long offset) {
-    return Platform.getInt(null, this.offset + offset);
-  }
-
-  @Override
-  public final void putInt(long offset, int value) {
-    Platform.putInt(null, this.offset + offset, value);
-  }
-
-  @Override
-  public final boolean getBoolean(long offset) {
-    return Platform.getBoolean(null, this.offset + offset);
-  }
-
-  @Override
-  public final void putBoolean(long offset, boolean value) {
-    Platform.putBoolean(null, this.offset + offset, value);
-  }
-
-  @Override
-  public final byte getByte(long offset) {
-    return Platform.getByte(null, this.offset + offset);
-  }
-
-  @Override
-  public final void putByte(long offset, byte value) {
-    Platform.putByte(null, this.offset + offset, value);
-  }
-
-  @Override
-  public final short getShort(long offset) {
-    return Platform.getShort(null, this.offset + offset);
-  }
-
-  @Override
-  public final void putShort(long offset, short value) {
-    Platform.putShort(null, this.offset + offset, value);
-  }
-
-  @Override
-  public final long getLong(long offset) {
-    return Platform.getLong(null, this.offset + offset);
-  }
-
-  @Override
-  public final void putLong(long offset, long value) {
-    Platform.putLong(null, this.offset + offset, value);
-  }
-
-  @Override
-  public final float getFloat(long offset) {
-    return Platform.getFloat(null, this.offset + offset);
-  }
-
-  @Override
-  public final void putFloat(long offset, float value) {
-    Platform.putFloat(null, this.offset + offset, value);
-  }
-
-  @Override
-  public final double getDouble(long offset) {
-    return Platform.getDouble(null, this.offset + offset);
-  }
-
-  @Override
-  public final void putDouble(long offset, double value) {
-    Platform.putDouble(null, this.offset + offset, value);
-  }
-}
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/OnHeapMemoryBlock.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/OnHeapMemoryBlock.java
deleted file mode 100644
index ee42bc27c9c5f..0000000000000
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/OnHeapMemoryBlock.java
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe.memory;
-
-import com.google.common.primitives.Ints;
-
-import org.apache.spark.unsafe.Platform;
-
-/**
- * A consecutive block of memory with a long array on Java heap.
- */
-public final class OnHeapMemoryBlock extends MemoryBlock {
-
-  private final long[] array;
-
-  public OnHeapMemoryBlock(long[] obj, long offset, long size) {
-    super(obj, offset, size);
-    this.array = obj;
-    assert(offset + size <= obj.length * 8L + Platform.LONG_ARRAY_OFFSET) :
-      "The sum of size " + size + " and offset " + offset + " should not be larger than " +
-        "the size of the given memory space " + (obj.length * 8L + Platform.LONG_ARRAY_OFFSET);
-  }
-
-  public OnHeapMemoryBlock(long size) {
-    this(new long[Ints.checkedCast((size + 7) / 8)], Platform.LONG_ARRAY_OFFSET, size);
-  }
-
-  @Override
-  public MemoryBlock subBlock(long offset, long size) {
-    checkSubBlockRange(offset, size);
-    if (offset == 0 && size == this.size()) return this;
-    return new OnHeapMemoryBlock(array, this.offset + offset, size);
-  }
-
-  public long[] getLongArray() { return array; }
-
-  /**
-   * Creates a memory block pointing to the memory used by the long array.
-   */
-  public static OnHeapMemoryBlock fromArray(final long[] array) {
-    return new OnHeapMemoryBlock(array, Platform.LONG_ARRAY_OFFSET, array.length * 8L);
-  }
-
-  public static OnHeapMemoryBlock fromArray(final long[] array, long size) {
-    return new OnHeapMemoryBlock(array, Platform.LONG_ARRAY_OFFSET, size);
-  }
-
-  @Override
-  public int getInt(long offset) {
-    return Platform.getInt(array, this.offset + offset);
-  }
-
-  @Override
-  public void putInt(long offset, int value) {
-    Platform.putInt(array, this.offset + offset, value);
-  }
-
-  @Override
-  public boolean getBoolean(long offset) {
-    return Platform.getBoolean(array, this.offset + offset);
-  }
-
-  @Override
-  public void putBoolean(long offset, boolean value) {
-    Platform.putBoolean(array, this.offset + offset, value);
-  }
-
-  @Override
-  public byte getByte(long offset) {
-    return Platform.getByte(array, this.offset + offset);
-  }
-
-  @Override
-  public void putByte(long offset, byte value) {
-    Platform.putByte(array, this.offset + offset, value);
-  }
-
-  @Override
-  public short getShort(long offset) {
-    return Platform.getShort(array, this.offset + offset);
-  }
-
-  @Override
-  public void putShort(long offset, short value) {
-    Platform.putShort(array, this.offset + offset, value);
-  }
-
-  @Override
-  public long getLong(long offset) {
-    return Platform.getLong(array, this.offset + offset);
-  }
-
-  @Override
-  public void putLong(long offset, long value) {
-    Platform.putLong(array, this.offset + offset, value);
-  }
-
-  @Override
-  public float getFloat(long offset) {
-    return Platform.getFloat(array, this.offset + offset);
-  }
-
-  @Override
-  public void putFloat(long offset, float value) {
-    Platform.putFloat(array, this.offset + offset, value);
-  }
-
-  @Override
-  public double getDouble(long offset) {
-    return Platform.getDouble(array, this.offset + offset);
-  }
-
-  @Override
-  public void putDouble(long offset, double value) {
-    Platform.putDouble(array, this.offset + offset, value);
-  }
-}
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/UnsafeMemoryAllocator.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/UnsafeMemoryAllocator.java
index 5310bdf2779a9..4368fb615ba1e 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/UnsafeMemoryAllocator.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/UnsafeMemoryAllocator.java
@@ -25,9 +25,9 @@
 public class UnsafeMemoryAllocator implements MemoryAllocator {
 
   @Override
-  public OffHeapMemoryBlock allocate(long size) throws OutOfMemoryError {
+  public MemoryBlock allocate(long size) throws OutOfMemoryError {
     long address = Platform.allocateMemory(size);
-    OffHeapMemoryBlock memory = new OffHeapMemoryBlock(address, size);
+    MemoryBlock memory = new MemoryBlock(null, address, size);
     if (MemoryAllocator.MEMORY_DEBUG_FILL_ENABLED) {
       memory.fill(MemoryAllocator.MEMORY_DEBUG_FILL_CLEAN_VALUE);
     }
@@ -36,25 +36,22 @@ public OffHeapMemoryBlock allocate(long size) throws OutOfMemoryError {
 
   @Override
   public void free(MemoryBlock memory) {
-    assert(memory instanceof OffHeapMemoryBlock) :
-      "UnsafeMemoryAllocator can only free OffHeapMemoryBlock.";
-    if (memory == OffHeapMemoryBlock.NULL) return;
-    assert (memory.getPageNumber() != MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER) :
+    assert (memory.obj == null) :
+      "baseObject not null; are you trying to use the off-heap allocator to free on-heap memory?";
+    assert (memory.pageNumber != MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER) :
       "page has already been freed";
-    assert ((memory.getPageNumber() == MemoryBlock.NO_PAGE_NUMBER)
-            || (memory.getPageNumber() == MemoryBlock.FREED_IN_TMM_PAGE_NUMBER)) :
+    assert ((memory.pageNumber == MemoryBlock.NO_PAGE_NUMBER)
+            || (memory.pageNumber == MemoryBlock.FREED_IN_TMM_PAGE_NUMBER)) :
       "TMM-allocated pages must be freed via TMM.freePage(), not directly in allocator free()";
 
     if (MemoryAllocator.MEMORY_DEBUG_FILL_ENABLED) {
       memory.fill(MemoryAllocator.MEMORY_DEBUG_FILL_FREED_VALUE);
     }
-
     Platform.freeMemory(memory.offset);
-
     // As an additional layer of defense against use-after-free bugs, we mutate the
     // MemoryBlock to reset its pointer.
-    memory.resetObjAndOffset();
+    memory.offset = 0;
     // Mark the page as freed (so we can detect double-frees).
-    memory.setPageNumber(MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER);
+    memory.pageNumber = MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER;
   }
 }
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
index ecd7c19f2c634..c03caf0076f61 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
@@ -17,12 +17,10 @@
 
 package org.apache.spark.unsafe.types;
 
-import java.util.Arrays;
-
-import com.google.common.primitives.Ints;
-
 import org.apache.spark.unsafe.Platform;
 
+import java.util.Arrays;
+
 public final class ByteArray {
 
   public static final byte[] EMPTY_BYTE = new byte[0];
@@ -79,17 +77,17 @@ public static long getPrefix(byte[] bytes) {
 
   public static byte[] concat(byte[]... inputs) {
     // Compute the total length of the result
-    long totalLength = 0;
+    int totalLength = 0;
     for (int i = 0; i < inputs.length; i++) {
       if (inputs[i] != null) {
-        totalLength += (long)inputs[i].length;
+        totalLength += inputs[i].length;
       } else {
         return null;
       }
     }
 
     // Allocate a new byte array, and copy the inputs one by one into it
-    final byte[] result = new byte[Ints.checkedCast(totalLength)];
+    final byte[] result = new byte[totalLength];
     int offset = 0;
     for (int i = 0; i < inputs.length; i++) {
       int len = inputs[i].length;
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index e91fc4391425c..b0d0c44823e68 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -29,13 +29,10 @@
 import com.esotericsoftware.kryo.KryoSerializable;
 import com.esotericsoftware.kryo.io.Input;
 import com.esotericsoftware.kryo.io.Output;
-import com.google.common.primitives.Ints;
 
 import org.apache.spark.unsafe.Platform;
 import org.apache.spark.unsafe.array.ByteArrayMethods;
 import org.apache.spark.unsafe.hash.Murmur3_x86_32;
-import org.apache.spark.unsafe.memory.ByteArrayMemoryBlock;
-import org.apache.spark.unsafe.memory.MemoryBlock;
 
 import static org.apache.spark.unsafe.Platform.*;
 
@@ -53,51 +50,19 @@
 
   // These are only updated by readExternal() or read()
   @Nonnull
-  private MemoryBlock base;
-  // While numBytes has the same value as base.size(), to keep as int avoids cast from long to int
+  private Object base;
+  private long offset;
   private int numBytes;
 
-  public MemoryBlock getMemoryBlock() { return base; }
-  public Object getBaseObject() { return base.getBaseObject(); }
-  public long getBaseOffset() { return base.getBaseOffset(); }
+  public Object getBaseObject() { return base; }
+  public long getBaseOffset() { return offset; }
 
-  /**
-   * A char in UTF-8 encoding can take 1-4 bytes depending on the first byte which
-   * indicates the size of the char. See Unicode standard in page 126, Table 3-6:
-   * http://www.unicode.org/versions/Unicode10.0.0/UnicodeStandard-10.0.pdf
-   *
-   * Binary    Hex          Comments
-   * 0xxxxxxx  0x00..0x7F   Only byte of a 1-byte character encoding
-   * 10xxxxxx  0x80..0xBF   Continuation bytes (1-3 continuation bytes)
-   * 110xxxxx  0xC0..0xDF   First byte of a 2-byte character encoding
-   * 1110xxxx  0xE0..0xEF   First byte of a 3-byte character encoding
-   * 11110xxx  0xF0..0xF7   First byte of a 4-byte character encoding
-   *
-   * As a consequence of the well-formedness conditions specified in
-   * Table 3-7 (page 126), the following byte values are disallowed in UTF-8:
-   *   C0–C1, F5–FF.
-   */
-  private static byte[] bytesOfCodePointInUTF8 = {
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00..0x0F
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10..0x1F
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20..0x2F
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30..0x3F
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40..0x4F
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50..0x5F
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60..0x6F
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70..0x7F
-    // Continuation bytes cannot appear as the first byte
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80..0x8F
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90..0x9F
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0..0xAF
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0..0xBF
-    0, 0, // 0xC0..0xC1 - disallowed in UTF-8
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xC2..0xCF
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xD0..0xDF
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xE0..0xEF
-    4, 4, 4, 4, 4, // 0xF0..0xF4
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // 0xF5..0xFF - disallowed in UTF-8
-  };
+  private static int[] bytesOfCodePointInUTF8 = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    4, 4, 4, 4, 4, 4, 4, 4,
+    5, 5, 5, 5,
+    6, 6};
 
   private static final boolean IS_LITTLE_ENDIAN =
       ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN;
@@ -112,8 +77,7 @@
    */
   public static UTF8String fromBytes(byte[] bytes) {
     if (bytes != null) {
-      return new UTF8String(
-        new ByteArrayMemoryBlock(bytes, BYTE_ARRAY_OFFSET, bytes.length));
+      return new UTF8String(bytes, BYTE_ARRAY_OFFSET, bytes.length);
     } else {
       return null;
     }
@@ -126,13 +90,19 @@ public static UTF8String fromBytes(byte[] bytes) {
    */
   public static UTF8String fromBytes(byte[] bytes, int offset, int numBytes) {
     if (bytes != null) {
-      return new UTF8String(
-        new ByteArrayMemoryBlock(bytes, BYTE_ARRAY_OFFSET + offset, numBytes));
+      return new UTF8String(bytes, BYTE_ARRAY_OFFSET + offset, numBytes);
     } else {
       return null;
     }
   }
 
+  /**
+   * Creates an UTF8String from given address (base and offset) and length.
+   */
+  public static UTF8String fromAddress(Object base, long offset, int numBytes) {
+    return new UTF8String(base, offset, numBytes);
+  }
+
   /**
    * Creates an UTF8String from String.
    */
@@ -149,13 +119,16 @@ public static UTF8String blankString(int length) {
     return fromBytes(spaces);
   }
 
-  public UTF8String(MemoryBlock base) {
+  protected UTF8String(Object base, long offset, int numBytes) {
     this.base = base;
-    this.numBytes = Ints.checkedCast(base.size());
+    this.offset = offset;
+    this.numBytes = numBytes;
   }
 
   // for serialization
-  public UTF8String() {}
+  public UTF8String() {
+    this(null, 0, 0);
+  }
 
   /**
    * Writes the content of this string into a memory address, identified by an object and an offset.
@@ -163,7 +136,7 @@ public UTF8String() {}
    * bytes in this string.
    */
   public void writeToMemory(Object target, long targetOffset) {
-    base.writeTo(0, target, targetOffset, numBytes);
+    Platform.copyMemory(base, offset, target, targetOffset, numBytes);
   }
 
   public void writeTo(ByteBuffer buffer) {
@@ -183,9 +156,8 @@ public void writeTo(ByteBuffer buffer) {
    */
   @Nonnull
   public ByteBuffer getByteBuffer() {
-    long offset = base.getBaseOffset();
-    if (base instanceof ByteArrayMemoryBlock && offset >= BYTE_ARRAY_OFFSET) {
-      final byte[] bytes = ((ByteArrayMemoryBlock) base).getByteArray();
+    if (base instanceof byte[] && offset >= BYTE_ARRAY_OFFSET) {
+      final byte[] bytes = (byte[]) base;
 
       // the offset includes an object header... this is only needed for unsafe copies
       final long arrayOffset = offset - BYTE_ARRAY_OFFSET;
@@ -215,9 +187,8 @@ public void writeTo(OutputStream out) throws IOException {
    * @param b The first byte of a code point
    */
   private static int numBytesForFirstByte(final byte b) {
-    final int offset = b & 0xFF;
-    byte numBytes = bytesOfCodePointInUTF8[offset];
-    return (numBytes == 0) ? 1: numBytes; // Skip the first byte disallowed in UTF-8
+    final int offset = (b & 0xFF) - 192;
+    return (offset >= 0) ? bytesOfCodePointInUTF8[offset] : 1;
   }
 
   /**
@@ -252,12 +223,12 @@ public long getPrefix() {
     long mask = 0;
     if (IS_LITTLE_ENDIAN) {
       if (numBytes >= 8) {
-        p = base.getLong(0);
+        p = Platform.getLong(base, offset);
       } else if (numBytes > 4) {
-        p = base.getLong(0);
+        p = Platform.getLong(base, offset);
         mask = (1L << (8 - numBytes) * 8) - 1;
       } else if (numBytes > 0) {
-        p = (long) base.getInt(0);
+        p = (long) Platform.getInt(base, offset);
         mask = (1L << (8 - numBytes) * 8) - 1;
       } else {
         p = 0;
@@ -266,12 +237,12 @@ public long getPrefix() {
     } else {
       // byteOrder == ByteOrder.BIG_ENDIAN
       if (numBytes >= 8) {
-        p = base.getLong(0);
+        p = Platform.getLong(base, offset);
       } else if (numBytes > 4) {
-        p = base.getLong(0);
+        p = Platform.getLong(base, offset);
         mask = (1L << (8 - numBytes) * 8) - 1;
       } else if (numBytes > 0) {
-        p = ((long) base.getInt(0)) << 32;
+        p = ((long) Platform.getInt(base, offset)) << 32;
         mask = (1L << (8 - numBytes) * 8) - 1;
       } else {
         p = 0;
@@ -286,13 +257,12 @@ public long getPrefix() {
    */
   public byte[] getBytes() {
     // avoid copy if `base` is `byte[]`
-    long offset = base.getBaseOffset();
-    if (offset == BYTE_ARRAY_OFFSET && base instanceof ByteArrayMemoryBlock
-      && (((ByteArrayMemoryBlock) base).getByteArray()).length == numBytes) {
-      return ((ByteArrayMemoryBlock) base).getByteArray();
+    if (offset == BYTE_ARRAY_OFFSET && base instanceof byte[]
+      && ((byte[]) base).length == numBytes) {
+      return (byte[]) base;
     } else {
       byte[] bytes = new byte[numBytes];
-      base.writeTo(0, bytes, BYTE_ARRAY_OFFSET, numBytes);
+      copyMemory(base, offset, bytes, BYTE_ARRAY_OFFSET, numBytes);
       return bytes;
     }
   }
@@ -322,7 +292,7 @@ public UTF8String substring(final int start, final int until) {
 
     if (i > j) {
       byte[] bytes = new byte[i - j];
-      base.writeTo(j, bytes, BYTE_ARRAY_OFFSET, i - j);
+      copyMemory(base, offset + j, bytes, BYTE_ARRAY_OFFSET, i - j);
       return fromBytes(bytes);
     } else {
       return EMPTY_UTF8;
@@ -363,14 +333,14 @@ public boolean contains(final UTF8String substring) {
    * Returns the byte at position `i`.
    */
   private byte getByte(int i) {
-    return base.getByte(i);
+    return Platform.getByte(base, offset + i);
   }
 
   private boolean matchAt(final UTF8String s, int pos) {
     if (s.numBytes + pos > numBytes || pos < 0) {
       return false;
     }
-    return ByteArrayMethods.arrayEqualsBlock(base, pos, s.base, 0, s.numBytes);
+    return ByteArrayMethods.arrayEquals(base, offset + pos, s.base, s.offset, s.numBytes);
   }
 
   public boolean startsWith(final UTF8String prefix) {
@@ -497,7 +467,8 @@ public int findInSet(UTF8String match) {
     for (int i = 0; i < numBytes; i++) {
       if (getByte(i) == (byte) ',') {
         if (i - (lastComma + 1) == match.numBytes &&
-          ByteArrayMethods.arrayEqualsBlock(base, lastComma + 1, match.base, 0, match.numBytes)) {
+          ByteArrayMethods.arrayEquals(base, offset + (lastComma + 1), match.base, match.offset,
+            match.numBytes)) {
           return n;
         }
         lastComma = i;
@@ -505,7 +476,8 @@ public int findInSet(UTF8String match) {
       }
     }
     if (numBytes - (lastComma + 1) == match.numBytes &&
-      ByteArrayMethods.arrayEqualsBlock(base, lastComma + 1, match.base, 0, match.numBytes)) {
+      ByteArrayMethods.arrayEquals(base, offset + (lastComma + 1), match.base, match.offset,
+        match.numBytes)) {
       return n;
     }
     return 0;
@@ -520,7 +492,7 @@ public int findInSet(UTF8String match) {
   private UTF8String copyUTF8String(int start, int end) {
     int len = end - start + 1;
     byte[] newBytes = new byte[len];
-    base.writeTo(start, newBytes, BYTE_ARRAY_OFFSET, len);
+    copyMemory(base, offset + start, newBytes, BYTE_ARRAY_OFFSET, len);
     return UTF8String.fromBytes(newBytes);
   }
 
@@ -667,7 +639,8 @@ public UTF8String reverse() {
     int i = 0; // position in byte
     while (i < numBytes) {
       int len = numBytesForFirstByte(getByte(i));
-      base.writeTo(i, result, BYTE_ARRAY_OFFSET + result.length - i - len, len);
+      copyMemory(this.base, this.offset + i, result,
+        BYTE_ARRAY_OFFSET + result.length - i - len, len);
 
       i += len;
     }
@@ -681,7 +654,7 @@ public UTF8String repeat(int times) {
     }
 
     byte[] newBytes = new byte[numBytes * times];
-    base.writeTo(0, newBytes, BYTE_ARRAY_OFFSET, numBytes);
+    copyMemory(this.base, this.offset, newBytes, BYTE_ARRAY_OFFSET, numBytes);
 
     int copied = 1;
     while (copied < times) {
@@ -718,7 +691,7 @@ public int indexOf(UTF8String v, int start) {
       if (i + v.numBytes > numBytes) {
         return -1;
       }
-      if (ByteArrayMethods.arrayEqualsBlock(base, i, v.base, 0, v.numBytes)) {
+      if (ByteArrayMethods.arrayEquals(base, offset + i, v.base, v.offset, v.numBytes)) {
         return c;
       }
       i += numBytesForFirstByte(getByte(i));
@@ -734,7 +707,7 @@ public int indexOf(UTF8String v, int start) {
   private int find(UTF8String str, int start) {
     assert (str.numBytes > 0);
     while (start <= numBytes - str.numBytes) {
-      if (ByteArrayMethods.arrayEqualsBlock(base, start, str.base, 0, str.numBytes)) {
+      if (ByteArrayMethods.arrayEquals(base, offset + start, str.base, str.offset, str.numBytes)) {
         return start;
       }
       start += 1;
@@ -748,7 +721,7 @@ private int find(UTF8String str, int start) {
   private int rfind(UTF8String str, int start) {
     assert (str.numBytes > 0);
     while (start >= 0) {
-      if (ByteArrayMethods.arrayEqualsBlock(base, start, str.base, 0, str.numBytes)) {
+      if (ByteArrayMethods.arrayEquals(base, offset + start, str.base, str.offset, str.numBytes)) {
         return start;
       }
       start -= 1;
@@ -781,7 +754,7 @@ public UTF8String subStringIndex(UTF8String delim, int count) {
         return EMPTY_UTF8;
       }
       byte[] bytes = new byte[idx];
-      base.writeTo(0, bytes, BYTE_ARRAY_OFFSET, idx);
+      copyMemory(base, offset, bytes, BYTE_ARRAY_OFFSET, idx);
       return fromBytes(bytes);
 
     } else {
@@ -801,7 +774,7 @@ public UTF8String subStringIndex(UTF8String delim, int count) {
       }
       int size = numBytes - delim.numBytes - idx;
       byte[] bytes = new byte[size];
-      base.writeTo(idx + delim.numBytes, bytes, BYTE_ARRAY_OFFSET, size);
+      copyMemory(base, offset + idx + delim.numBytes, bytes, BYTE_ARRAY_OFFSET, size);
       return fromBytes(bytes);
     }
   }
@@ -824,15 +797,15 @@ public UTF8String rpad(int len, UTF8String pad) {
       UTF8String remain = pad.substring(0, spaces - padChars * count);
 
       byte[] data = new byte[this.numBytes + pad.numBytes * count + remain.numBytes];
-      base.writeTo(0, data, BYTE_ARRAY_OFFSET, this.numBytes);
+      copyMemory(this.base, this.offset, data, BYTE_ARRAY_OFFSET, this.numBytes);
       int offset = this.numBytes;
       int idx = 0;
       while (idx < count) {
-        pad.base.writeTo(0, data, BYTE_ARRAY_OFFSET + offset, pad.numBytes);
+        copyMemory(pad.base, pad.offset, data, BYTE_ARRAY_OFFSET + offset, pad.numBytes);
         ++ idx;
         offset += pad.numBytes;
       }
-      remain.base.writeTo(0, data, BYTE_ARRAY_OFFSET + offset, remain.numBytes);
+      copyMemory(remain.base, remain.offset, data, BYTE_ARRAY_OFFSET + offset, remain.numBytes);
 
       return UTF8String.fromBytes(data);
     }
@@ -860,13 +833,13 @@ public UTF8String lpad(int len, UTF8String pad) {
       int offset = 0;
       int idx = 0;
       while (idx < count) {
-        pad.base.writeTo(0, data, BYTE_ARRAY_OFFSET + offset, pad.numBytes);
+        copyMemory(pad.base, pad.offset, data, BYTE_ARRAY_OFFSET + offset, pad.numBytes);
         ++ idx;
         offset += pad.numBytes;
       }
-      remain.base.writeTo(0, data, BYTE_ARRAY_OFFSET + offset, remain.numBytes);
+      copyMemory(remain.base, remain.offset, data, BYTE_ARRAY_OFFSET + offset, remain.numBytes);
       offset += remain.numBytes;
-      base.writeTo(0, data, BYTE_ARRAY_OFFSET + offset, numBytes());
+      copyMemory(this.base, this.offset, data, BYTE_ARRAY_OFFSET + offset, numBytes());
 
       return UTF8String.fromBytes(data);
     }
@@ -877,22 +850,22 @@ public UTF8String lpad(int len, UTF8String pad) {
    */
   public static UTF8String concat(UTF8String... inputs) {
     // Compute the total length of the result.
-    long totalLength = 0;
+    int totalLength = 0;
     for (int i = 0; i < inputs.length; i++) {
       if (inputs[i] != null) {
-        totalLength += (long)inputs[i].numBytes;
+        totalLength += inputs[i].numBytes;
       } else {
         return null;
       }
     }
 
     // Allocate a new byte array, and copy the inputs one by one into it.
-    final byte[] result = new byte[Ints.checkedCast(totalLength)];
+    final byte[] result = new byte[totalLength];
     int offset = 0;
     for (int i = 0; i < inputs.length; i++) {
       int len = inputs[i].numBytes;
-      inputs[i].base.writeTo(
-        0,
+      copyMemory(
+        inputs[i].base, inputs[i].offset,
         result, BYTE_ARRAY_OFFSET + offset,
         len);
       offset += len;
@@ -931,8 +904,8 @@ public static UTF8String concatWs(UTF8String separator, UTF8String... inputs) {
     for (int i = 0, j = 0; i < inputs.length; i++) {
       if (inputs[i] != null) {
         int len = inputs[i].numBytes;
-        inputs[i].base.writeTo(
-          0,
+        copyMemory(
+          inputs[i].base, inputs[i].offset,
           result, BYTE_ARRAY_OFFSET + offset,
           len);
         offset += len;
@@ -940,8 +913,8 @@ public static UTF8String concatWs(UTF8String separator, UTF8String... inputs) {
         j++;
         // Add separator if this is not the last input.
         if (j < numInputs) {
-          separator.base.writeTo(
-            0,
+          copyMemory(
+            separator.base, separator.offset,
             result, BYTE_ARRAY_OFFSET + offset,
             separator.numBytes);
           offset += separator.numBytes;
@@ -1215,7 +1188,7 @@ public UTF8String clone() {
 
   public UTF8String copy() {
     byte[] bytes = new byte[numBytes];
-    base.writeTo(0, bytes, BYTE_ARRAY_OFFSET, numBytes);
+    copyMemory(base, offset, bytes, BYTE_ARRAY_OFFSET, numBytes);
     return fromBytes(bytes);
   }
 
@@ -1223,10 +1196,11 @@ public UTF8String copy() {
   public int compareTo(@Nonnull final UTF8String other) {
     int len = Math.min(numBytes, other.numBytes);
     int wordMax = (len / 8) * 8;
-    MemoryBlock rbase = other.base;
+    long roffset = other.offset;
+    Object rbase = other.base;
     for (int i = 0; i < wordMax; i += 8) {
-      long left = base.getLong(i);
-      long right = rbase.getLong(i);
+      long left = getLong(base, offset + i);
+      long right = getLong(rbase, roffset + i);
       if (left != right) {
         if (IS_LITTLE_ENDIAN) {
           return Long.compareUnsigned(Long.reverseBytes(left), Long.reverseBytes(right));
@@ -1237,7 +1211,7 @@ public int compareTo(@Nonnull final UTF8String other) {
     }
     for (int i = wordMax; i < len; i++) {
       // In UTF-8, the byte should be unsigned, so we should compare them as unsigned int.
-      int res = (getByte(i) & 0xFF) - (rbase.getByte(i) & 0xFF);
+      int res = (getByte(i) & 0xFF) - (Platform.getByte(rbase, roffset + i) & 0xFF);
       if (res != 0) {
         return res;
       }
@@ -1256,7 +1230,7 @@ public boolean equals(final Object other) {
       if (numBytes != o.numBytes) {
         return false;
       }
-      return ByteArrayMethods.arrayEqualsBlock(base, 0, o.base, 0, numBytes);
+      return ByteArrayMethods.arrayEquals(base, offset, o.base, o.offset, numBytes);
     } else {
       return false;
     }
@@ -1312,8 +1286,8 @@ public int levenshteinDistance(UTF8String other) {
               num_bytes_j != numBytesForFirstByte(s.getByte(i_bytes))) {
           cost = 1;
         } else {
-          cost = (ByteArrayMethods.arrayEqualsBlock(t.base, j_bytes, s.base,
-            i_bytes, num_bytes_j)) ? 0 : 1;
+          cost = (ByteArrayMethods.arrayEquals(t.base, t.offset + j_bytes, s.base,
+              s.offset + i_bytes, num_bytes_j)) ? 0 : 1;
         }
         d[i + 1] = Math.min(Math.min(d[i] + 1, p[i + 1] + 1), p[i] + cost);
       }
@@ -1328,7 +1302,7 @@ public int levenshteinDistance(UTF8String other) {
 
   @Override
   public int hashCode() {
-    return Murmur3_x86_32.hashUnsafeBytesBlock(base,42);
+    return Murmur3_x86_32.hashUnsafeBytes(base, offset, numBytes, 42);
   }
 
   /**
@@ -1391,10 +1365,10 @@ public void writeExternal(ObjectOutput out) throws IOException {
   }
 
   public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
+    offset = BYTE_ARRAY_OFFSET;
     numBytes = in.readInt();
-    byte[] bytes = new byte[numBytes];
-    in.readFully(bytes);
-    base = ByteArrayMemoryBlock.fromArray(bytes);
+    base = new byte[numBytes];
+    in.readFully((byte[]) base);
   }
 
   @Override
@@ -1406,10 +1380,10 @@ public void write(Kryo kryo, Output out) {
 
   @Override
   public void read(Kryo kryo, Input in) {
-    numBytes = in.readInt();
-    byte[] bytes = new byte[numBytes];
-    in.read(bytes);
-    base = ByteArrayMemoryBlock.fromArray(bytes);
+    this.offset = BYTE_ARRAY_OFFSET;
+    this.numBytes = in.readInt();
+    this.base = new byte[numBytes];
+    in.read((byte[]) base);
   }
 
 }
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
index 583a148b3845d..62854837b05ed 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
@@ -17,7 +17,6 @@
 
 package org.apache.spark.unsafe;
 
-import org.apache.spark.unsafe.memory.HeapMemoryAllocator;
 import org.apache.spark.unsafe.memory.MemoryAllocator;
 import org.apache.spark.unsafe.memory.MemoryBlock;
 
@@ -81,7 +80,7 @@ public void freeingOnHeapMemoryBlockResetsBaseObjectAndOffset() {
     MemoryAllocator.HEAP.free(block);
     Assert.assertNull(block.getBaseObject());
     Assert.assertEquals(0, block.getBaseOffset());
-    Assert.assertEquals(MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER, block.getPageNumber());
+    Assert.assertEquals(MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER, block.pageNumber);
   }
 
   @Test
@@ -92,7 +91,7 @@ public void freeingOffHeapMemoryBlockResetsOffset() {
     MemoryAllocator.UNSAFE.free(block);
     Assert.assertNull(block.getBaseObject());
     Assert.assertEquals(0, block.getBaseOffset());
-    Assert.assertEquals(MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER, block.getPageNumber());
+    Assert.assertEquals(MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER, block.pageNumber);
   }
 
   @Test(expected = AssertionError.class)
@@ -135,26 +134,4 @@ public void memoryDebugFillEnabledInTest() {
       MemoryAllocator.MEMORY_DEBUG_FILL_CLEAN_VALUE);
     MemoryAllocator.UNSAFE.free(offheap);
   }
-
-  @Test
-  public void heapMemoryReuse() {
-    MemoryAllocator heapMem = new HeapMemoryAllocator();
-    // The size is less than `HeapMemoryAllocator.POOLING_THRESHOLD_BYTES`,
-    // allocate new memory every time.
-    MemoryBlock onheap1 = heapMem.allocate(513);
-    Object obj1 = onheap1.getBaseObject();
-    heapMem.free(onheap1);
-    MemoryBlock onheap2 = heapMem.allocate(514);
-    Assert.assertNotEquals(obj1, onheap2.getBaseObject());
-
-    // The size is greater than `HeapMemoryAllocator.POOLING_THRESHOLD_BYTES`,
-    // reuse the previous memory which has released.
-    MemoryBlock onheap3 = heapMem.allocate(1024 * 1024 + 1);
-    Assert.assertEquals(onheap3.size(), 1024 * 1024 + 1);
-    Object obj3 = onheap3.getBaseObject();
-    heapMem.free(onheap3);
-    MemoryBlock onheap4 = heapMem.allocate(1024 * 1024 + 7);
-    Assert.assertEquals(onheap4.size(), 1024 * 1024 + 7);
-    Assert.assertEquals(obj3, onheap4.getBaseObject());
-  }
 }
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/array/LongArraySuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/array/LongArraySuite.java
index 8c2e98c2bfc54..fb8e53b3348f3 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/array/LongArraySuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/array/LongArraySuite.java
@@ -20,13 +20,14 @@
 import org.junit.Assert;
 import org.junit.Test;
 
-import org.apache.spark.unsafe.memory.OnHeapMemoryBlock;
+import org.apache.spark.unsafe.memory.MemoryBlock;
 
 public class LongArraySuite {
 
   @Test
   public void basicTest() {
-    LongArray arr = new LongArray(new OnHeapMemoryBlock(16));
+    long[] bytes = new long[2];
+    LongArray arr = new LongArray(MemoryBlock.fromLongArray(bytes));
     arr.set(0, 1L);
     arr.set(1, 2L);
     arr.set(1, 3L);
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/hash/Murmur3_x86_32Suite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/hash/Murmur3_x86_32Suite.java
index d7ed005db1891..e759cb33b3e6a 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/hash/Murmur3_x86_32Suite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/hash/Murmur3_x86_32Suite.java
@@ -22,8 +22,6 @@
 import java.util.Random;
 import java.util.Set;
 
-import scala.util.hashing.MurmurHash3$;
-
 import org.apache.spark.unsafe.Platform;
 import org.junit.Assert;
 import org.junit.Test;
@@ -53,41 +51,6 @@ public void testKnownLongInputs() {
     Assert.assertEquals(-2106506049, hasher.hashLong(Long.MAX_VALUE));
   }
 
-  // SPARK-23381 Check whether the hash of the byte array is the same as another implementations
-  @Test
-  public void testKnownBytesInputs() {
-    byte[] test = "test".getBytes(StandardCharsets.UTF_8);
-    Assert.assertEquals(MurmurHash3$.MODULE$.bytesHash(test, 0),
-      Murmur3_x86_32.hashUnsafeBytes2(test, Platform.BYTE_ARRAY_OFFSET, test.length, 0));
-    byte[] test1 = "test1".getBytes(StandardCharsets.UTF_8);
-    Assert.assertEquals(MurmurHash3$.MODULE$.bytesHash(test1, 0),
-      Murmur3_x86_32.hashUnsafeBytes2(test1, Platform.BYTE_ARRAY_OFFSET, test1.length, 0));
-    byte[] te = "te".getBytes(StandardCharsets.UTF_8);
-    Assert.assertEquals(MurmurHash3$.MODULE$.bytesHash(te, 0),
-      Murmur3_x86_32.hashUnsafeBytes2(te, Platform.BYTE_ARRAY_OFFSET, te.length, 0));
-    byte[] tes = "tes".getBytes(StandardCharsets.UTF_8);
-    Assert.assertEquals(MurmurHash3$.MODULE$.bytesHash(tes, 0),
-      Murmur3_x86_32.hashUnsafeBytes2(tes, Platform.BYTE_ARRAY_OFFSET, tes.length, 0));
-  }
-
-  @Test
-  public void testKnownWordsInputs() {
-    byte[] bytes = new byte[16];
-    long offset = Platform.BYTE_ARRAY_OFFSET;
-    for (int i = 0; i < 16; i++) {
-      bytes[i] = 0;
-    }
-    Assert.assertEquals(-300363099, hasher.hashUnsafeWords(bytes, offset, 16, 42));
-    for (int i = 0; i < 16; i++) {
-      bytes[i] = -1;
-    }
-    Assert.assertEquals(-1210324667, hasher.hashUnsafeWords(bytes, offset, 16, 42));
-    for (int i = 0; i < 16; i++) {
-      bytes[i] = (byte)i;
-    }
-    Assert.assertEquals(-634919701, hasher.hashUnsafeWords(bytes, offset, 16, 42));
-  }
-
   @Test
   public void randomizedStressTest() {
     int size = 65536;
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/memory/MemoryBlockSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/memory/MemoryBlockSuite.java
deleted file mode 100644
index ef5ff8ee70ec0..0000000000000
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/memory/MemoryBlockSuite.java
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe.memory;
-
-import org.apache.spark.unsafe.Platform;
-import org.junit.Assert;
-import org.junit.Test;
-
-import java.nio.ByteOrder;
-
-import static org.hamcrest.core.StringContains.containsString;
-
-public class MemoryBlockSuite {
-  private static final boolean bigEndianPlatform =
-    ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN);
-
-  private void check(MemoryBlock memory, Object obj, long offset, int length) {
-    memory.setPageNumber(1);
-    memory.fill((byte)-1);
-    memory.putBoolean(0, true);
-    memory.putByte(1, (byte)127);
-    memory.putShort(2, (short)257);
-    memory.putInt(4, 0x20000002);
-    memory.putLong(8, 0x1234567089ABCDEFL);
-    memory.putFloat(16, 1.0F);
-    memory.putLong(20, 0x1234567089ABCDEFL);
-    memory.putDouble(28, 2.0);
-    MemoryBlock.copyMemory(memory, 0L, memory, 36, 4);
-    int[] a = new int[2];
-    a[0] = 0x12345678;
-    a[1] = 0x13579BDF;
-    memory.copyFrom(a, Platform.INT_ARRAY_OFFSET, 40, 8);
-    byte[] b = new byte[8];
-    memory.writeTo(40, b, Platform.BYTE_ARRAY_OFFSET, 8);
-
-    Assert.assertEquals(obj, memory.getBaseObject());
-    Assert.assertEquals(offset, memory.getBaseOffset());
-    Assert.assertEquals(length, memory.size());
-    Assert.assertEquals(1, memory.getPageNumber());
-    Assert.assertEquals(true, memory.getBoolean(0));
-    Assert.assertEquals((byte)127, memory.getByte(1 ));
-    Assert.assertEquals((short)257, memory.getShort(2));
-    Assert.assertEquals(0x20000002, memory.getInt(4));
-    Assert.assertEquals(0x1234567089ABCDEFL, memory.getLong(8));
-    Assert.assertEquals(1.0F, memory.getFloat(16), 0);
-    Assert.assertEquals(0x1234567089ABCDEFL, memory.getLong(20));
-    Assert.assertEquals(2.0, memory.getDouble(28), 0);
-    Assert.assertEquals(true, memory.getBoolean(36));
-    Assert.assertEquals((byte)127, memory.getByte(37 ));
-    Assert.assertEquals((short)257, memory.getShort(38));
-    Assert.assertEquals(a[0], memory.getInt(40));
-    Assert.assertEquals(a[1], memory.getInt(44));
-    if (bigEndianPlatform) {
-      Assert.assertEquals(a[0],
-        ((int)b[0] & 0xff) << 24 | ((int)b[1] & 0xff) << 16 |
-        ((int)b[2] & 0xff) << 8 | ((int)b[3] & 0xff));
-      Assert.assertEquals(a[1],
-        ((int)b[4] & 0xff) << 24 | ((int)b[5] & 0xff) << 16 |
-        ((int)b[6] & 0xff) << 8 | ((int)b[7] & 0xff));
-    } else {
-      Assert.assertEquals(a[0],
-        ((int)b[3] & 0xff) << 24 | ((int)b[2] & 0xff) << 16 |
-        ((int)b[1] & 0xff) << 8 | ((int)b[0] & 0xff));
-      Assert.assertEquals(a[1],
-        ((int)b[7] & 0xff) << 24 | ((int)b[6] & 0xff) << 16 |
-        ((int)b[5] & 0xff) << 8 | ((int)b[4] & 0xff));
-    }
-    for (int i = 48; i < memory.size(); i++) {
-      Assert.assertEquals((byte) -1, memory.getByte(i));
-    }
-
-    assert(memory.subBlock(0, memory.size()) == memory);
-
-    try {
-      memory.subBlock(-8, 8);
-      Assert.fail();
-    } catch (Exception expected) {
-      Assert.assertThat(expected.getMessage(), containsString("non-negative"));
-    }
-
-    try {
-      memory.subBlock(0, -8);
-      Assert.fail();
-    } catch (Exception expected) {
-      Assert.assertThat(expected.getMessage(), containsString("non-negative"));
-    }
-
-    try {
-      memory.subBlock(0, length + 8);
-      Assert.fail();
-    } catch (Exception expected) {
-      Assert.assertThat(expected.getMessage(), containsString("should not be larger than"));
-    }
-
-    try {
-      memory.subBlock(8, length - 4);
-      Assert.fail();
-    } catch (Exception expected) {
-      Assert.assertThat(expected.getMessage(), containsString("should not be larger than"));
-    }
-
-    try {
-      memory.subBlock(length + 8, 4);
-      Assert.fail();
-    } catch (Exception expected) {
-      Assert.assertThat(expected.getMessage(), containsString("should not be larger than"));
-    }
-
-    memory.setPageNumber(MemoryBlock.NO_PAGE_NUMBER);
-  }
-
-  @Test
-  public void testByteArrayMemoryBlock() {
-    byte[] obj = new byte[56];
-    long offset = Platform.BYTE_ARRAY_OFFSET;
-    int length = obj.length;
-
-    MemoryBlock memory = new ByteArrayMemoryBlock(obj, offset, length);
-    check(memory, obj, offset, length);
-
-    memory = ByteArrayMemoryBlock.fromArray(obj);
-    check(memory, obj, offset, length);
-
-    obj = new byte[112];
-    memory = new ByteArrayMemoryBlock(obj, offset, length);
-    check(memory, obj, offset, length);
-  }
-
-  @Test
-  public void testOnHeapMemoryBlock() {
-    long[] obj = new long[7];
-    long offset = Platform.LONG_ARRAY_OFFSET;
-    int length = obj.length * 8;
-
-    MemoryBlock memory = new OnHeapMemoryBlock(obj, offset, length);
-    check(memory, obj, offset, length);
-
-    memory = OnHeapMemoryBlock.fromArray(obj);
-    check(memory, obj, offset, length);
-
-    obj = new long[14];
-    memory = new OnHeapMemoryBlock(obj, offset, length);
-    check(memory, obj, offset, length);
-  }
-
-  @Test
-  public void testOffHeapArrayMemoryBlock() {
-    MemoryAllocator memoryAllocator = new UnsafeMemoryAllocator();
-    MemoryBlock memory = memoryAllocator.allocate(56);
-    Object obj = memory.getBaseObject();
-    long offset = memory.getBaseOffset();
-    int length = 56;
-
-    check(memory, obj, offset, length);
-    memoryAllocator.free(memory);
-
-    long address = Platform.allocateMemory(112);
-    memory = new OffHeapMemoryBlock(address, length);
-    obj = memory.getBaseObject();
-    offset = memory.getBaseOffset();
-    check(memory, obj, offset, length);
-    Platform.freeMemory(address);
-  }
-}
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
index 42dda30480702..9b303fa5bc6c5 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
@@ -25,8 +25,7 @@
 import java.util.*;
 
 import com.google.common.collect.ImmutableMap;
-import org.apache.spark.unsafe.memory.ByteArrayMemoryBlock;
-import org.apache.spark.unsafe.memory.OnHeapMemoryBlock;
+import org.apache.spark.unsafe.Platform;
 import org.junit.Test;
 
 import static org.junit.Assert.*;
@@ -52,19 +51,15 @@ private static void checkBasic(String str, int len) {
 
     assertTrue(s1.contains(s2));
     assertTrue(s2.contains(s1));
-    assertTrue(s1.startsWith(s2));
-    assertTrue(s1.endsWith(s2));
+    assertTrue(s1.startsWith(s1));
+    assertTrue(s1.endsWith(s1));
   }
 
   @Test
   public void basicTest() {
     checkBasic("", 0);
-    checkBasic("¡", 1); // 2 bytes char
-    checkBasic("ку", 2); // 2 * 2 bytes chars
-    checkBasic("hello", 5); // 5 * 1 byte chars
+    checkBasic("hello", 5);
     checkBasic("大 千 世 界", 7);
-    checkBasic("︽﹋%", 3); // 3 * 3 bytes chars
-    checkBasic("\uD83E\uDD19", 1); // 4 bytes char
   }
 
   @Test
@@ -513,6 +508,21 @@ public void soundex() {
     assertEquals(fromString("世界千世").soundex(), fromString("世界千世"));
   }
 
+  @Test
+  public void writeToOutputStreamUnderflow() throws IOException {
+    // offset underflow is apparently supported?
+    final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
+    final byte[] test = "01234567".getBytes(StandardCharsets.UTF_8);
+
+    for (int i = 1; i <= Platform.BYTE_ARRAY_OFFSET; ++i) {
+      UTF8String.fromAddress(test, Platform.BYTE_ARRAY_OFFSET - i, test.length + i)
+          .writeTo(outputStream);
+      final ByteBuffer buffer = ByteBuffer.wrap(outputStream.toByteArray(), i, test.length);
+      assertEquals("01234567", StandardCharsets.UTF_8.decode(buffer).toString());
+      outputStream.reset();
+    }
+  }
+
   @Test
   public void writeToOutputStreamSlice() throws IOException {
     final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
@@ -520,7 +530,7 @@ public void writeToOutputStreamSlice() throws IOException {
 
     for (int i = 0; i < test.length; ++i) {
       for (int j = 0; j < test.length - i; ++j) {
-        new UTF8String(ByteArrayMemoryBlock.fromArray(test).subBlock(i, j))
+        UTF8String.fromAddress(test, Platform.BYTE_ARRAY_OFFSET + i, j)
             .writeTo(outputStream);
 
         assertArrayEquals(Arrays.copyOfRange(test, i, i + j), outputStream.toByteArray());
@@ -551,7 +561,7 @@ public void writeToOutputStreamOverflow() throws IOException {
 
     for (final long offset : offsets) {
       try {
-        new UTF8String(ByteArrayMemoryBlock.fromArray(test).subBlock(offset, test.length))
+        fromAddress(test, BYTE_ARRAY_OFFSET + offset, test.length)
             .writeTo(outputStream);
 
         throw new IllegalStateException(Long.toString(offset));
@@ -578,25 +588,26 @@ public void writeToOutputStream() throws IOException {
   }
 
   @Test
-  public void writeToOutputStreamLongArray() throws IOException {
+  public void writeToOutputStreamIntArray() throws IOException {
     // verify that writes work on objects that are not byte arrays
-    final ByteBuffer buffer = StandardCharsets.UTF_8.encode("3千大千世界");
+    final ByteBuffer buffer = StandardCharsets.UTF_8.encode("大千世界");
     buffer.position(0);
     buffer.order(ByteOrder.nativeOrder());
 
     final int length = buffer.limit();
-    assertEquals(16, length);
+    assertEquals(12, length);
 
-    final int longs = length / 8;
-    final long[] array = new long[longs];
+    final int ints = length / 4;
+    final int[] array = new int[ints];
 
-    for (int i = 0; i < longs; ++i) {
-      array[i] = buffer.getLong();
+    for (int i = 0; i < ints; ++i) {
+      array[i] = buffer.getInt();
     }
 
     final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
-    new UTF8String(OnHeapMemoryBlock.fromArray(array)).writeTo(outputStream);
-    assertEquals("3千大千世界", outputStream.toString("UTF-8"));
+    fromAddress(array, Platform.INT_ARRAY_OFFSET, length)
+        .writeTo(outputStream);
+    assertEquals("大千世界", outputStream.toString("UTF-8"));
   }
 
   @Test
@@ -780,21 +791,4 @@ public void trimRightWithTrimString() {
     assertEquals(fromString("头"), fromString("头a???/").trimRight(fromString("数?/*&^%a")));
     assertEquals(fromString("头"), fromString("头数b数数 [").trimRight(fromString(" []数b")));
   }
-
-  @Test
-  public void skipWrongFirstByte() {
-    int[] wrongFirstBytes = {
-      0x80, 0x9F, 0xBF, // Skip Continuation bytes
-      0xC0, 0xC2, // 0xC0..0xC1 - disallowed in UTF-8
-      // 0xF5..0xFF - disallowed in UTF-8
-      0xF5, 0xF6, 0xF7, 0xF8, 0xF9,
-      0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
-    };
-    byte[] c = new byte[1];
-
-    for (int i = 0; i < wrongFirstBytes.length; ++i) {
-      c[0] = (byte)wrongFirstBytes[i];
-      assertEquals(fromBytes(c).numChars(), 1);
-    }
-  }
 }
diff --git a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala
index 48004e812a8bf..62d4176d00f94 100644
--- a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala
+++ b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala
@@ -164,7 +164,7 @@ class UTF8StringPropertyCheckSuite extends FunSuite with GeneratorDrivenProperty
     def padding(origin: String, pad: String, length: Int, isLPad: Boolean): String = {
       if (length <= 0) return ""
       if (length <= origin.length) {
-        origin.substring(0, length)
+        if (length <= 0) "" else origin.substring(0, length)
       } else {
         if (pad.length == 0) return origin
         val toPad = length - origin.length
diff --git a/core/pom.xml b/core/pom.xml
index 220522d3a8296..9258a856028a0 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -95,12 +95,6 @@
       <groupId>org.apache.curator</groupId>
       <artifactId>curator-recipes</artifactId>
     </dependency>
-    <!-- With curator 2.12  SBT/Ivy doesn't get ZK on the build classpath.
-         Explicitly declaring it as a dependency fixes this. -->
-    <dependency>
-      <groupId>org.apache.zookeeper</groupId>
-      <artifactId>zookeeper</artifactId>
-    </dependency>
 
     <!-- Jetty dependencies promoted to compile here so they are shaded
          and inlined into spark-core jar -->
@@ -350,7 +344,7 @@
     <dependency>
       <groupId>net.sf.py4j</groupId>
       <artifactId>py4j</artifactId>
-      <version>0.10.7</version>
+      <version>0.10.6</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/core/src/main/java/org/apache/spark/SparkExecutorInfo.java b/core/src/main/java/org/apache/spark/SparkExecutorInfo.java
index 2b93385adf103..dc3e826475987 100644
--- a/core/src/main/java/org/apache/spark/SparkExecutorInfo.java
+++ b/core/src/main/java/org/apache/spark/SparkExecutorInfo.java
@@ -30,8 +30,4 @@
   int port();
   long cacheSize();
   int numRunningTasks();
-  long usedOnHeapStorageMemory();
-  long usedOffHeapStorageMemory();
-  long totalOnHeapStorageMemory();
-  long totalOffHeapStorageMemory();
 }
diff --git a/core/src/main/java/org/apache/spark/io/ReadAheadInputStream.java b/core/src/main/java/org/apache/spark/io/ReadAheadInputStream.java
index 0cced9e222952..5b45d268ace8d 100644
--- a/core/src/main/java/org/apache/spark/io/ReadAheadInputStream.java
+++ b/core/src/main/java/org/apache/spark/io/ReadAheadInputStream.java
@@ -27,7 +27,6 @@
 import java.nio.ByteBuffer;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.locks.Condition;
 import java.util.concurrent.locks.ReentrantLock;
 
@@ -79,8 +78,9 @@
   // whether there is a read ahead task running,
   private boolean isReading;
 
-  // whether there is a reader waiting for data.
-  private AtomicBoolean isWaiting = new AtomicBoolean(false);
+  // If the remaining data size in the current buffer is below this threshold,
+  // we issue an async read from the underlying input stream.
+  private final int readAheadThresholdInBytes;
 
   private final InputStream underlyingInputStream;
 
@@ -97,13 +97,20 @@
    *
    * @param inputStream The underlying input stream.
    * @param bufferSizeInBytes The buffer size.
+   * @param readAheadThresholdInBytes If the active buffer has less data than the read-ahead
+   *                                  threshold, an async read is triggered.
    */
   public ReadAheadInputStream(
-      InputStream inputStream, int bufferSizeInBytes) {
+      InputStream inputStream, int bufferSizeInBytes, int readAheadThresholdInBytes) {
     Preconditions.checkArgument(bufferSizeInBytes > 0,
         "bufferSizeInBytes should be greater than 0, but the value is " + bufferSizeInBytes);
+    Preconditions.checkArgument(readAheadThresholdInBytes > 0 &&
+            readAheadThresholdInBytes < bufferSizeInBytes,
+        "readAheadThresholdInBytes should be greater than 0 and less than bufferSizeInBytes, " +
+            "but the value is " + readAheadThresholdInBytes);
     activeBuffer = ByteBuffer.allocate(bufferSizeInBytes);
     readAheadBuffer = ByteBuffer.allocate(bufferSizeInBytes);
+    this.readAheadThresholdInBytes = readAheadThresholdInBytes;
     this.underlyingInputStream = inputStream;
     activeBuffer.flip();
     readAheadBuffer.flip();
@@ -159,17 +166,12 @@ public void run() {
         // in that case the reader waits for this async read to complete.
         // So there is no race condition in both the situations.
         int read = 0;
-        int off = 0, len = arr.length;
         Throwable exception = null;
         try {
-          // try to fill the read ahead buffer.
-          // if a reader is waiting, possibly return early.
-          do {
-            read = underlyingInputStream.read(arr, off, len);
-            if (read <= 0) break;
-            off += read;
-            len -= read;
-          } while (len > 0 && !isWaiting.get());
+          while (true) {
+            read = underlyingInputStream.read(arr);
+            if (0 != read) break;
+          }
         } catch (Throwable ex) {
           exception = ex;
           if (ex instanceof Error) {
@@ -179,12 +181,13 @@ public void run() {
           }
         } finally {
           stateChangeLock.lock();
-          readAheadBuffer.limit(off);
           if (read < 0 || (exception instanceof EOFException)) {
             endOfStream = true;
           } else if (exception != null) {
             readAborted = true;
             readException = exception;
+          } else {
+            readAheadBuffer.limit(read);
           }
           readInProgress = false;
           signalAsyncReadComplete();
@@ -227,10 +230,7 @@ private void signalAsyncReadComplete() {
 
   private void waitForAsyncReadComplete() throws IOException {
     stateChangeLock.lock();
-    isWaiting.set(true);
     try {
-      // There is only one reader, and one writer, so the writer should signal only once,
-      // but a while loop checking the wake up condition is still needed to avoid spurious wakeups.
       while (readInProgress) {
         asyncReadComplete.await();
       }
@@ -239,7 +239,6 @@ private void waitForAsyncReadComplete() throws IOException {
       iio.initCause(e);
       throw iio;
     } finally {
-      isWaiting.set(false);
       stateChangeLock.unlock();
     }
     checkReadException();
@@ -247,13 +246,8 @@ private void waitForAsyncReadComplete() throws IOException {
 
   @Override
   public int read() throws IOException {
-    if (activeBuffer.hasRemaining()) {
-      // short path - just get one byte.
-      return activeBuffer.get() & 0xFF;
-    } else {
-      byte[] oneByteArray = oneByte.get();
-      return read(oneByteArray, 0, 1) == -1 ? -1 : oneByteArray[0] & 0xFF;
-    }
+    byte[] oneByteArray = oneByte.get();
+    return read(oneByteArray, 0, 1) == -1 ? -1 : oneByteArray[0] & 0xFF;
   }
 
   @Override
@@ -264,43 +258,54 @@ public int read(byte[] b, int offset, int len) throws IOException {
     if (len == 0) {
       return 0;
     }
+    stateChangeLock.lock();
+    try {
+      return readInternal(b, offset, len);
+    } finally {
+      stateChangeLock.unlock();
+    }
+  }
 
+  /**
+   * flip the active and read ahead buffer
+   */
+  private void swapBuffers() {
+    ByteBuffer temp = activeBuffer;
+    activeBuffer = readAheadBuffer;
+    readAheadBuffer = temp;
+  }
+
+  /**
+   * Internal read function which should be called only from read() api. The assumption is that
+   * the stateChangeLock is already acquired in the caller before calling this function.
+   */
+  private int readInternal(byte[] b, int offset, int len) throws IOException {
+    assert (stateChangeLock.isLocked());
     if (!activeBuffer.hasRemaining()) {
-      // No remaining in active buffer - lock and switch to write ahead buffer.
-      stateChangeLock.lock();
-      try {
+      waitForAsyncReadComplete();
+      if (readAheadBuffer.hasRemaining()) {
+        swapBuffers();
+      } else {
+        // The first read or activeBuffer is skipped.
+        readAsync();
         waitForAsyncReadComplete();
-        if (!readAheadBuffer.hasRemaining()) {
-          // The first read.
-          readAsync();
-          waitForAsyncReadComplete();
-          if (isEndOfStream()) {
-            return -1;
-          }
+        if (isEndOfStream()) {
+          return -1;
         }
-        // Swap the newly read read ahead buffer in place of empty active buffer.
         swapBuffers();
-        // After swapping buffers, trigger another async read for read ahead buffer.
-        readAsync();
-      } finally {
-        stateChangeLock.unlock();
       }
+    } else {
+      checkReadException();
     }
     len = Math.min(len, activeBuffer.remaining());
     activeBuffer.get(b, offset, len);
 
+    if (activeBuffer.remaining() <= readAheadThresholdInBytes && !readAheadBuffer.hasRemaining()) {
+      readAsync();
+    }
     return len;
   }
 
-  /**
-   * flip the active and read ahead buffer
-   */
-  private void swapBuffers() {
-    ByteBuffer temp = activeBuffer;
-    activeBuffer = readAheadBuffer;
-    readAheadBuffer = temp;
-  }
-
   @Override
   public int available() throws IOException {
     stateChangeLock.lock();
@@ -318,11 +323,6 @@ public long skip(long n) throws IOException {
     if (n <= 0L) {
       return 0L;
     }
-    if (n <= activeBuffer.remaining()) {
-      // Only skipping from active buffer is sufficient
-      activeBuffer.position((int) n + activeBuffer.position());
-      return n;
-    }
     stateChangeLock.lock();
     long skipped;
     try {
@@ -346,14 +346,21 @@ private long skipInternal(long n) throws IOException {
     if (available() >= n) {
       // we can skip from the internal buffers
       int toSkip = (int) n;
+      if (toSkip <= activeBuffer.remaining()) {
+        // Only skipping from active buffer is sufficient
+        activeBuffer.position(toSkip + activeBuffer.position());
+        if (activeBuffer.remaining() <= readAheadThresholdInBytes
+            && !readAheadBuffer.hasRemaining()) {
+          readAsync();
+        }
+        return n;
+      }
       // We need to skip from both active buffer and read ahead buffer
       toSkip -= activeBuffer.remaining();
-      assert(toSkip > 0); // skipping from activeBuffer already handled.
       activeBuffer.position(0);
       activeBuffer.flip();
       readAheadBuffer.position(toSkip + readAheadBuffer.position());
       swapBuffers();
-      // Trigger async read to emptied read ahead buffer.
       readAsync();
       return n;
     } else {
diff --git a/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java b/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
index 8651a639c07f7..d07faf1da1248 100644
--- a/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
+++ b/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
@@ -311,7 +311,7 @@ public MemoryBlock allocatePage(long size, MemoryConsumer consumer) {
       // this could trigger spilling to free some pages.
       return allocatePage(size, consumer);
     }
-    page.setPageNumber(pageNumber);
+    page.pageNumber = pageNumber;
     pageTable[pageNumber] = page;
     if (logger.isTraceEnabled()) {
       logger.trace("Allocate page number {} ({} bytes)", pageNumber, acquired);
@@ -323,25 +323,25 @@ public MemoryBlock allocatePage(long size, MemoryConsumer consumer) {
    * Free a block of memory allocated via {@link TaskMemoryManager#allocatePage}.
    */
   public void freePage(MemoryBlock page, MemoryConsumer consumer) {
-    assert (page.getPageNumber() != MemoryBlock.NO_PAGE_NUMBER) :
+    assert (page.pageNumber != MemoryBlock.NO_PAGE_NUMBER) :
       "Called freePage() on memory that wasn't allocated with allocatePage()";
-    assert (page.getPageNumber() != MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER) :
+    assert (page.pageNumber != MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER) :
       "Called freePage() on a memory block that has already been freed";
-    assert (page.getPageNumber() != MemoryBlock.FREED_IN_TMM_PAGE_NUMBER) :
+    assert (page.pageNumber != MemoryBlock.FREED_IN_TMM_PAGE_NUMBER) :
             "Called freePage() on a memory block that has already been freed";
-    assert(allocatedPages.get(page.getPageNumber()));
-    pageTable[page.getPageNumber()] = null;
+    assert(allocatedPages.get(page.pageNumber));
+    pageTable[page.pageNumber] = null;
     synchronized (this) {
-      allocatedPages.clear(page.getPageNumber());
+      allocatedPages.clear(page.pageNumber);
     }
     if (logger.isTraceEnabled()) {
-      logger.trace("Freed page number {} ({} bytes)", page.getPageNumber(), page.size());
+      logger.trace("Freed page number {} ({} bytes)", page.pageNumber, page.size());
     }
     long pageSize = page.size();
     // Clear the page number before passing the block to the MemoryAllocator's free().
     // Doing this allows the MemoryAllocator to detect when a TaskMemoryManager-managed
     // page has been inappropriately directly freed without calling TMM.freePage().
-    page.setPageNumber(MemoryBlock.FREED_IN_TMM_PAGE_NUMBER);
+    page.pageNumber = MemoryBlock.FREED_IN_TMM_PAGE_NUMBER;
     memoryManager.tungstenMemoryAllocator().free(page);
     releaseExecutionMemory(pageSize, consumer);
   }
@@ -363,7 +363,7 @@ public long encodePageNumberAndOffset(MemoryBlock page, long offsetInPage) {
       // relative to the page's base offset; this relative offset will fit in 51 bits.
       offsetInPage -= page.getBaseOffset();
     }
-    return encodePageNumberAndOffset(page.getPageNumber(), offsetInPage);
+    return encodePageNumberAndOffset(page.pageNumber, offsetInPage);
   }
 
   @VisibleForTesting
@@ -434,7 +434,7 @@ public long cleanUpAllAllocatedMemory() {
       for (MemoryBlock page : pageTable) {
         if (page != null) {
           logger.debug("unreleased page: " + page + " in task " + taskAttemptId);
-          page.setPageNumber(MemoryBlock.FREED_IN_TMM_PAGE_NUMBER);
+          page.pageNumber = MemoryBlock.FREED_IN_TMM_PAGE_NUMBER;
           memoryManager.tungstenMemoryAllocator().free(page);
         }
       }
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleInMemorySorter.java b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleInMemorySorter.java
index 8f49859746b89..dc36809d8911f 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleInMemorySorter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleInMemorySorter.java
@@ -20,6 +20,7 @@
 import java.util.Comparator;
 
 import org.apache.spark.memory.MemoryConsumer;
+import org.apache.spark.unsafe.Platform;
 import org.apache.spark.unsafe.array.LongArray;
 import org.apache.spark.unsafe.memory.MemoryBlock;
 import org.apache.spark.util.collection.Sorter;
@@ -104,7 +105,13 @@ public void reset() {
 
   public void expandPointerArray(LongArray newArray) {
     assert(newArray.size() > array.size());
-    MemoryBlock.copyMemory(array.memoryBlock(), newArray.memoryBlock(), pos * 8L);
+    Platform.copyMemory(
+      array.getBaseObject(),
+      array.getBaseOffset(),
+      newArray.getBaseObject(),
+      newArray.getBaseOffset(),
+      pos * 8L
+    );
     consumer.freeArray(array);
     array = newArray;
     usableCapacity = getUsableCapacity();
@@ -173,7 +180,10 @@ public ShuffleSorterIterator getSortedIterator() {
         PackedRecordPointer.PARTITION_ID_START_BYTE_INDEX,
         PackedRecordPointer.PARTITION_ID_END_BYTE_INDEX, false, false);
     } else {
-      MemoryBlock unused = array.memoryBlock().subBlock(pos * 8L, (array.size() - pos) * 8L);
+      MemoryBlock unused = new MemoryBlock(
+        array.getBaseObject(),
+        array.getBaseOffset() + pos * 8L,
+        (array.size() - pos) * 8L);
       LongArray buffer = new LongArray(unused);
       Sorter<PackedRecordPointer, LongArray> sorter =
         new Sorter<>(new ShuffleSortDataFormat(buffer));
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleSortDataFormat.java b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleSortDataFormat.java
index 254449e95443e..717bdd79d47ef 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleSortDataFormat.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleSortDataFormat.java
@@ -17,8 +17,8 @@
 
 package org.apache.spark.shuffle.sort;
 
+import org.apache.spark.unsafe.Platform;
 import org.apache.spark.unsafe.array.LongArray;
-import org.apache.spark.unsafe.memory.MemoryBlock;
 import org.apache.spark.util.collection.SortDataFormat;
 
 final class ShuffleSortDataFormat extends SortDataFormat<PackedRecordPointer, LongArray> {
@@ -60,8 +60,13 @@ public void copyElement(LongArray src, int srcPos, LongArray dst, int dstPos) {
 
   @Override
   public void copyRange(LongArray src, int srcPos, LongArray dst, int dstPos, int length) {
-    MemoryBlock.copyMemory(src.memoryBlock(), srcPos * 8L,
-      dst.memoryBlock(),dstPos * 8L,length * 8L);
+    Platform.copyMemory(
+      src.getBaseObject(),
+      src.getBaseOffset() + srcPos * 8L,
+      dst.getBaseObject(),
+      dst.getBaseOffset() + dstPos * 8L,
+      length * 8L
+    );
   }
 
   @Override
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
index 4fc19b1721518..66118f454159b 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
@@ -544,7 +544,7 @@ public long spill() throws IOException {
           // is accessing the current record. We free this page in that caller's next loadNext()
           // call.
           for (MemoryBlock page : allocatedPages) {
-            if (!loaded || page.getPageNumber() !=
+            if (!loaded || page.pageNumber !=
                     ((UnsafeInMemorySorter.SortedIterator)upstream).getCurrentPageNumber()) {
               released += page.size();
               freePage(page);
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
index 717823ebbd320..b3c27d83da172 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
@@ -26,6 +26,7 @@
 import org.apache.spark.memory.MemoryConsumer;
 import org.apache.spark.memory.SparkOutOfMemoryError;
 import org.apache.spark.memory.TaskMemoryManager;
+import org.apache.spark.unsafe.Platform;
 import org.apache.spark.unsafe.UnsafeAlignedOffset;
 import org.apache.spark.unsafe.array.LongArray;
 import org.apache.spark.unsafe.memory.MemoryBlock;
@@ -124,7 +125,7 @@ public UnsafeInMemorySorter(
     int initialSize,
     boolean canUseRadixSort) {
     this(consumer, memoryManager, recordComparator, prefixComparator,
-      consumer.allocateArray(initialSize * 2L), canUseRadixSort);
+      consumer.allocateArray(initialSize * 2), canUseRadixSort);
   }
 
   public UnsafeInMemorySorter(
@@ -215,7 +216,12 @@ public void expandPointerArray(LongArray newArray) {
     if (newArray.size() < array.size()) {
       throw new SparkOutOfMemoryError("Not enough memory to grow pointer array");
     }
-    MemoryBlock.copyMemory(array.memoryBlock(), newArray.memoryBlock(), pos * 8L);
+    Platform.copyMemory(
+      array.getBaseObject(),
+      array.getBaseOffset(),
+      newArray.getBaseObject(),
+      newArray.getBaseOffset(),
+      pos * 8L);
     consumer.freeArray(array);
     array = newArray;
     usableCapacity = getUsableCapacity();
@@ -342,7 +348,10 @@ public UnsafeSorterIterator getSortedIterator() {
           array, nullBoundaryPos, (pos - nullBoundaryPos) / 2L, 0, 7,
           radixSortSupport.sortDescending(), radixSortSupport.sortSigned());
       } else {
-        MemoryBlock unused = array.memoryBlock().subBlock(pos * 8L, (array.size() - pos) * 8L);
+        MemoryBlock unused = new MemoryBlock(
+          array.getBaseObject(),
+          array.getBaseOffset() + pos * 8L,
+          (array.size() - pos) * 8L);
         LongArray buffer = new LongArray(unused);
         Sorter<RecordPointerAndKeyPrefix, LongArray> sorter =
           new Sorter<>(new UnsafeSortDataFormat(buffer));
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSortDataFormat.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSortDataFormat.java
index 37772f41caa87..d9f84d10e9051 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSortDataFormat.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSortDataFormat.java
@@ -84,7 +84,7 @@ public void copyRange(LongArray src, int srcPos, LongArray dst, int dstPos, int
 
   @Override
   public LongArray allocate(int length) {
-    assert (length * 2L <= buffer.size()) :
+    assert (length * 2 <= buffer.size()) :
       "the buffer is smaller than required: " + buffer.size() + " < " + (length * 2);
     return buffer;
   }
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
index fb179d07edebc..2c53c8d809d2e 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
@@ -72,15 +72,21 @@ public UnsafeSorterSpillReader(
       bufferSizeBytes = DEFAULT_BUFFER_SIZE_BYTES;
     }
 
+    final double readAheadFraction =
+        SparkEnv.get() == null ? 0.5 :
+             SparkEnv.get().conf().getDouble("spark.unsafe.sorter.spill.read.ahead.fraction", 0.5);
+
+    // SPARK-23310: Disable read-ahead input stream, because it is causing lock contention and perf
+    // regression for TPC-DS queries.
     final boolean readAheadEnabled = SparkEnv.get() != null &&
-        SparkEnv.get().conf().getBoolean("spark.unsafe.sorter.spill.read.ahead.enabled", true);
+        SparkEnv.get().conf().getBoolean("spark.unsafe.sorter.spill.read.ahead.enabled", false);
 
     final InputStream bs =
         new NioBufferedFileInputStream(file, (int) bufferSizeBytes);
     try {
       if (readAheadEnabled) {
         this.in = new ReadAheadInputStream(serializerManager.wrapStream(blockId, bs),
-                (int) bufferSizeBytes);
+                (int) bufferSizeBytes, (int) (bufferSizeBytes * readAheadFraction));
       } else {
         this.in = serializerManager.wrapStream(blockId, bs);
       }
diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
index 6717af3ac4daf..d430d8c5fb35a 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
@@ -25,18 +25,12 @@ function getThreadDumpEnabled() {
     return threadDumpEnabled;
 }
 
-function formatStatus(status, type, row) {
-    if (row.isBlacklisted) {
-        return "Blacklisted";
-    }
-
+function formatStatus(status, type) {
     if (status) {
-        if (row.blacklistedInStages.length == 0) {
-            return "Active"
-        }
-        return "Active (Blacklisted in Stages: [" + row.blacklistedInStages.join(", ") + "])";
+        return "Active"
+    } else {
+        return "Dead"
     }
-    return "Dead"
 }
 
 jQuery.extend(jQuery.fn.dataTableExt.oSort, {
@@ -421,10 +415,9 @@ $(document).ready(function () {
                             }
                         },
                         {data: 'hostPort'},
-                        {
-                            data: 'isActive',
-                            render: function (data, type, row) {
-                                return formatStatus (data, type, row);
+                        {data: 'isActive', render: function (data, type, row) {
+                            if (row.isBlacklisted) return "Blacklisted";
+                            else return formatStatus (data, type);
                             }
                         },
                         {data: 'rddBlocks'},
diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage.js b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
index abc2ec0fa6531..f0b2a5a833a99 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/historypage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
@@ -113,7 +113,7 @@ $(document).ready(function() {
       status: (requestedIncomplete ? "running" : "completed")
     };
 
-    $.getJSON(uiRoot + "/api/v1/applications", appParams, function(response,status,jqXHR) {
+    $.getJSON("api/v1/applications", appParams, function(response,status,jqXHR) {
       var array = [];
       var hasMultipleAttempts = false;
       for (i in response) {
@@ -151,7 +151,7 @@ $(document).ready(function() {
         "showCompletedColumns": !requestedIncomplete,
       }
 
-      $.get(uiRoot + "/static/historypage-template.html", function(template) {
+      $.get("static/historypage-template.html", function(template) {
         var sibling = historySummary.prev();
         historySummary.detach();
         var apps = $(Mustache.render($(template).filter("#history-summary-template").html(),data));
diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.js b/core/src/main/resources/org/apache/spark/ui/static/webui.js
index f01c567ba58ad..e575c4c78970d 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/webui.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/webui.js
@@ -72,7 +72,6 @@ $(function() {
   collapseTablePageLoad('collapse-aggregated-allActiveStages','aggregated-allActiveStages');
   collapseTablePageLoad('collapse-aggregated-allPendingStages','aggregated-allPendingStages');
   collapseTablePageLoad('collapse-aggregated-allCompletedStages','aggregated-allCompletedStages');
-  collapseTablePageLoad('collapse-aggregated-allSkippedStages','aggregated-allSkippedStages');
   collapseTablePageLoad('collapse-aggregated-allFailedStages','aggregated-allFailedStages');
   collapseTablePageLoad('collapse-aggregated-activeStages','aggregated-activeStages');
   collapseTablePageLoad('collapse-aggregated-pendingOrSkippedStages','aggregated-pendingOrSkippedStages');
@@ -81,6 +80,4 @@ $(function() {
   collapseTablePageLoad('collapse-aggregated-poolActiveStages','aggregated-poolActiveStages');
   collapseTablePageLoad('collapse-aggregated-tasks','aggregated-tasks');
   collapseTablePageLoad('collapse-aggregated-rdds','aggregated-rdds');
-  collapseTablePageLoad('collapse-aggregated-activeBatches','aggregated-activeBatches');
-  collapseTablePageLoad('collapse-aggregated-completedBatches','aggregated-completedBatches');
 });
\ No newline at end of file
diff --git a/core/src/main/scala/org/apache/spark/Dependency.scala b/core/src/main/scala/org/apache/spark/Dependency.scala
index 9ea6d2fa2fd95..ca52ecafa2cc8 100644
--- a/core/src/main/scala/org/apache/spark/Dependency.scala
+++ b/core/src/main/scala/org/apache/spark/Dependency.scala
@@ -76,9 +76,6 @@ class ShuffleDependency[K: ClassTag, V: ClassTag, C: ClassTag](
     val mapSideCombine: Boolean = false)
   extends Dependency[Product2[K, V]] {
 
-  if (mapSideCombine) {
-    require(aggregator.isDefined, "Map-side combine without Aggregator specified!")
-  }
   override def rdd: RDD[Product2[K, V]] = _rdd.asInstanceOf[RDD[Product2[K, V]]]
 
   private[spark] val keyClassName: String = reflect.classTag[K].runtimeClass.getName
diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala
index 63d87b4cd385c..9112d93a86b2a 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala
@@ -55,18 +55,18 @@ private[spark] trait ExecutorAllocationClient {
   /**
    * Request that the cluster manager kill the specified executors.
    *
+   * When asking the executor to be replaced, the executor loss is considered a failure, and
+   * killed tasks that are running on the executor will count towards the failure limits. If no
+   * replacement is being requested, then the tasks will not count towards the limit.
+   *
    * @param executorIds identifiers of executors to kill
-   * @param adjustTargetNumExecutors whether the target number of executors will be adjusted down
-   *                                 after these executors have been killed
-   * @param countFailures if there are tasks running on the executors when they are killed, whether
-    *                     to count those failures toward task failure limits
+   * @param replace whether to replace the killed executors with new ones, default false
    * @param force whether to force kill busy executors, default false
    * @return the ids of the executors acknowledged by the cluster manager to be removed.
    */
   def killExecutors(
     executorIds: Seq[String],
-    adjustTargetNumExecutors: Boolean,
-    countFailures: Boolean,
+    replace: Boolean = false,
     force: Boolean = false): Seq[String]
 
   /**
@@ -81,8 +81,7 @@ private[spark] trait ExecutorAllocationClient {
    * @return whether the request is acknowledged by the cluster manager.
    */
   def killExecutor(executorId: String): Boolean = {
-    val killedExecutors = killExecutors(Seq(executorId), adjustTargetNumExecutors = true,
-      countFailures = false)
+    val killedExecutors = killExecutors(Seq(executorId))
     killedExecutors.nonEmpty && killedExecutors(0).equals(executorId)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index aa363eeffffb8..6c59038f2a6c1 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -26,10 +26,9 @@ import scala.util.control.{ControlThrowable, NonFatal}
 import com.codahale.metrics.{Gauge, MetricRegistry}
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.{DYN_ALLOCATION_MAX_EXECUTORS, DYN_ALLOCATION_MIN_EXECUTORS}
 import org.apache.spark.metrics.source.Source
 import org.apache.spark.scheduler._
-import org.apache.spark.storage.BlockManagerMaster
 import org.apache.spark.util.{Clock, SystemClock, ThreadUtils, Utils}
 
 /**
@@ -69,10 +68,6 @@ import org.apache.spark.util.{Clock, SystemClock, ThreadUtils, Utils}
  *   spark.dynamicAllocation.maxExecutors - Upper bound on the number of executors
  *   spark.dynamicAllocation.initialExecutors - Number of executors to start with
  *
- *   spark.dynamicAllocation.executorAllocationRatio -
- *     This is used to reduce the parallelism of the dynamic allocation that can waste
- *     resources when tasks are small
- *
  *   spark.dynamicAllocation.schedulerBacklogTimeout (M) -
  *     If there are backlogged tasks for this duration, add new executors
  *
@@ -86,8 +81,7 @@ import org.apache.spark.util.{Clock, SystemClock, ThreadUtils, Utils}
 private[spark] class ExecutorAllocationManager(
     client: ExecutorAllocationClient,
     listenerBus: LiveListenerBus,
-    conf: SparkConf,
-    blockManagerMaster: BlockManagerMaster)
+    conf: SparkConf)
   extends Logging {
 
   allocationManager =>
@@ -120,12 +114,9 @@ private[spark] class ExecutorAllocationManager(
   // TODO: The default value of 1 for spark.executor.cores works right now because dynamic
   // allocation is only supported for YARN and the default number of cores per executor in YARN is
   // 1, but it might need to be attained differently for different cluster managers
-  private val tasksPerExecutorForFullParallelism =
+  private val tasksPerExecutor =
     conf.getInt("spark.executor.cores", 1) / conf.getInt("spark.task.cpus", 1)
 
-  private val executorAllocationRatio =
-    conf.get(DYN_ALLOCATION_EXECUTOR_ALLOCATION_RATIO)
-
   validateSettings()
 
   // Number of executors to add in the next round
@@ -160,7 +151,7 @@ private[spark] class ExecutorAllocationManager(
   private var clock: Clock = new SystemClock()
 
   // Listener for Spark events that impact the allocation policy
-  val listener = new ExecutorAllocationListener
+  private val listener = new ExecutorAllocationListener
 
   // Executor that handles the scheduling task.
   private val executor =
@@ -216,13 +207,8 @@ private[spark] class ExecutorAllocationManager(
       throw new SparkException("Dynamic allocation of executors requires the external " +
         "shuffle service. You may enable this through spark.shuffle.service.enabled.")
     }
-    if (tasksPerExecutorForFullParallelism == 0) {
-      throw new SparkException("spark.executor.cores must not be < spark.task.cpus.")
-    }
-
-    if (executorAllocationRatio > 1.0 || executorAllocationRatio <= 0.0) {
-      throw new SparkException(
-        "spark.dynamicAllocation.executorAllocationRatio must be > 0 and <= 1.0")
+    if (tasksPerExecutor == 0) {
+      throw new SparkException("spark.executor.cores must not be less than spark.task.cpus.")
     }
   }
 
@@ -285,9 +271,7 @@ private[spark] class ExecutorAllocationManager(
    */
   private def maxNumExecutorsNeeded(): Int = {
     val numRunningOrPendingTasks = listener.totalPendingTasks + listener.totalRunningTasks
-    math.ceil(numRunningOrPendingTasks * executorAllocationRatio /
-              tasksPerExecutorForFullParallelism)
-      .toInt
+    (numRunningOrPendingTasks + tasksPerExecutor - 1) / tasksPerExecutor
   }
 
   private def totalRunningTasks(): Int = synchronized {
@@ -350,11 +334,6 @@ private[spark] class ExecutorAllocationManager(
 
       // If the new target has not changed, avoid sending a message to the cluster manager
       if (numExecutorsTarget < oldNumExecutorsTarget) {
-        // We lower the target number of executors but don't actively kill any yet.  Killing is
-        // controlled separately by an idle timeout.  It's still helpful to reduce the target number
-        // in case an executor just happens to get lost (eg., bad hardware, or the cluster manager
-        // preempts it) -- in that case, there is no point in trying to immediately  get a new
-        // executor, since we wouldn't even use it yet.
         client.requestTotalExecutors(numExecutorsTarget, localityAwareTasks, hostToLocalTaskCount)
         logDebug(s"Lowering target number of executors to $numExecutorsTarget (previously " +
           s"$oldNumExecutorsTarget) because not all requested executors are actually needed")
@@ -476,10 +455,7 @@ private[spark] class ExecutorAllocationManager(
     val executorsRemoved = if (testing) {
       executorIdsToBeRemoved
     } else {
-      // We don't want to change our target number of executors, because we already did that
-      // when the task backlog decreased.
-      client.killExecutors(executorIdsToBeRemoved, adjustTargetNumExecutors = false,
-        countFailures = false, force = false)
+      client.killExecutors(executorIdsToBeRemoved)
     }
     // [SPARK-21834] killExecutors api reduces the target number of executors.
     // So we need to update the target with desired value.
@@ -599,7 +575,7 @@ private[spark] class ExecutorAllocationManager(
         // Note that it is not necessary to query the executors since all the cached
         // blocks we are concerned with are reported to the driver. Note that this
         // does not include broadcast blocks.
-        val hasCachedBlocks = blockManagerMaster.hasCachedBlocks(executorId)
+        val hasCachedBlocks = SparkEnv.get.blockManager.master.hasCachedBlocks(executorId)
         val now = clock.getTimeMillis()
         val timeout = {
           if (hasCachedBlocks) {
@@ -634,7 +610,7 @@ private[spark] class ExecutorAllocationManager(
    * This class is intentionally conservative in its assumptions about the relative ordering
    * and consistency of events returned by the listener.
    */
-  private[spark] class ExecutorAllocationListener extends SparkListener {
+  private class ExecutorAllocationListener extends SparkListener {
 
     private val stageIdToNumTasks = new mutable.HashMap[Int, Int]
     // Number of running tasks per stage including speculative tasks.
diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index 73646051f264c..195fd4f818b36 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -22,7 +22,7 @@ import java.util.concurrent.{ConcurrentHashMap, LinkedBlockingQueue, ThreadPoolE
 import java.util.zip.{GZIPInputStream, GZIPOutputStream}
 
 import scala.collection.JavaConverters._
-import scala.collection.mutable.{HashMap, HashSet, ListBuffer, Map}
+import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, Map}
 import scala.concurrent.{ExecutionContext, Future}
 import scala.concurrent.duration.Duration
 import scala.reflect.ClassTag
@@ -282,7 +282,7 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
 
   // For testing
   def getMapSizesByExecutorId(shuffleId: Int, reduceId: Int)
-      : Iterator[(BlockManagerId, Seq[(BlockId, Long)])] = {
+      : Seq[(BlockManagerId, Seq[(BlockId, Long)])] = {
     getMapSizesByExecutorId(shuffleId, reduceId, reduceId + 1)
   }
 
@@ -296,7 +296,7 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
    *         describing the shuffle blocks that are stored at that block manager.
    */
   def getMapSizesByExecutorId(shuffleId: Int, startPartition: Int, endPartition: Int)
-      : Iterator[(BlockManagerId, Seq[(BlockId, Long)])]
+      : Seq[(BlockManagerId, Seq[(BlockId, Long)])]
 
   /**
    * Deletes map output status information for the specified shuffle stage.
@@ -632,10 +632,9 @@ private[spark] class MapOutputTrackerMaster(
     }
   }
 
-  // Get blocks sizes by executor Id. Note that zero-sized blocks are excluded in the result.
   // This method is only called in local-mode.
   def getMapSizesByExecutorId(shuffleId: Int, startPartition: Int, endPartition: Int)
-      : Iterator[(BlockManagerId, Seq[(BlockId, Long)])] = {
+      : Seq[(BlockManagerId, Seq[(BlockId, Long)])] = {
     logDebug(s"Fetching outputs for shuffle $shuffleId, partitions $startPartition-$endPartition")
     shuffleStatuses.get(shuffleId) match {
       case Some (shuffleStatus) =>
@@ -643,7 +642,7 @@ private[spark] class MapOutputTrackerMaster(
           MapOutputTracker.convertMapStatuses(shuffleId, startPartition, endPartition, statuses)
         }
       case None =>
-        Iterator.empty
+        Seq.empty
     }
   }
 
@@ -670,9 +669,8 @@ private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTr
   /** Remembers which map output locations are currently being fetched on an executor. */
   private val fetching = new HashSet[Int]
 
-  // Get blocks sizes by executor Id. Note that zero-sized blocks are excluded in the result.
   override def getMapSizesByExecutorId(shuffleId: Int, startPartition: Int, endPartition: Int)
-      : Iterator[(BlockManagerId, Seq[(BlockId, Long)])] = {
+      : Seq[(BlockManagerId, Seq[(BlockId, Long)])] = {
     logDebug(s"Fetching outputs for shuffle $shuffleId, partitions $startPartition-$endPartition")
     val statuses = getStatuses(shuffleId)
     try {
@@ -843,7 +841,6 @@ private[spark] object MapOutputTracker extends Logging {
    * Given an array of map statuses and a range of map output partitions, returns a sequence that,
    * for each block manager ID, lists the shuffle block IDs and corresponding shuffle block sizes
    * stored at that block manager.
-   * Note that empty blocks are filtered in the result.
    *
    * If any of the statuses is null (indicating a missing location due to a failed mapper),
    * throws a FetchFailedException.
@@ -860,24 +857,22 @@ private[spark] object MapOutputTracker extends Logging {
       shuffleId: Int,
       startPartition: Int,
       endPartition: Int,
-      statuses: Array[MapStatus]): Iterator[(BlockManagerId, Seq[(BlockId, Long)])] = {
+      statuses: Array[MapStatus]): Seq[(BlockManagerId, Seq[(BlockId, Long)])] = {
     assert (statuses != null)
-    val splitsByAddress = new HashMap[BlockManagerId, ListBuffer[(BlockId, Long)]]
-    for ((status, mapId) <- statuses.iterator.zipWithIndex) {
+    val splitsByAddress = new HashMap[BlockManagerId, ArrayBuffer[(BlockId, Long)]]
+    for ((status, mapId) <- statuses.zipWithIndex) {
       if (status == null) {
         val errorMessage = s"Missing an output location for shuffle $shuffleId"
         logError(errorMessage)
         throw new MetadataFetchFailedException(shuffleId, startPartition, errorMessage)
       } else {
         for (part <- startPartition until endPartition) {
-          val size = status.getSizeForBlock(part)
-          if (size != 0) {
-            splitsByAddress.getOrElseUpdate(status.location, ListBuffer()) +=
-                ((ShuffleBlockId(shuffleId, mapId, part), size))
-          }
+          splitsByAddress.getOrElseUpdate(status.location, ArrayBuffer()) +=
+            ((ShuffleBlockId(shuffleId, mapId, part), status.getSizeForBlock(part)))
         }
       }
     }
-    splitsByAddress.iterator
+
+    splitsByAddress.toSeq
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/SSLOptions.scala b/core/src/main/scala/org/apache/spark/SSLOptions.scala
index 04c38f12acc78..477b01968c6ef 100644
--- a/core/src/main/scala/org/apache/spark/SSLOptions.scala
+++ b/core/src/main/scala/org/apache/spark/SSLOptions.scala
@@ -128,7 +128,7 @@ private[spark] case class SSLOptions(
   }
 
   /** Returns a string representation of this SSLOptions with all the passwords masked. */
-  override def toString: String = s"SSLOptions{enabled=$enabled, port=$port, " +
+  override def toString: String = s"SSLOptions{enabled=$enabled, " +
       s"keyStore=$keyStore, keyStorePassword=${keyStorePassword.map(_ => "xxx")}, " +
       s"trustStore=$trustStore, trustStorePassword=${trustStorePassword.map(_ => "xxx")}, " +
       s"protocol=$protocol, enabledAlgorithms=$enabledAlgorithms}"
@@ -142,7 +142,6 @@ private[spark] object SSLOptions extends Logging {
    *
    * The following settings are allowed:
    * $ - `[ns].enabled` - `true` or `false`, to enable or disable SSL respectively
-   * $ - `[ns].port` - the port where to bind the SSL server
    * $ - `[ns].keyStore` - a path to the key-store file; can be relative to the current directory
    * $ - `[ns].keyStorePassword` - a password to the key-store file
    * $ - `[ns].keyPassword` - a password to the private key
diff --git a/core/src/main/scala/org/apache/spark/SecurityManager.scala b/core/src/main/scala/org/apache/spark/SecurityManager.scala
index b87476322573d..4c1dbe3ffb4ad 100644
--- a/core/src/main/scala/org/apache/spark/SecurityManager.scala
+++ b/core/src/main/scala/org/apache/spark/SecurityManager.scala
@@ -17,10 +17,15 @@
 
 package org.apache.spark
 
+import java.lang.{Byte => JByte}
 import java.net.{Authenticator, PasswordAuthentication}
 import java.nio.charset.StandardCharsets.UTF_8
+import java.security.{KeyStore, SecureRandom}
+import java.security.cert.X509Certificate
 import javax.net.ssl._
 
+import com.google.common.hash.HashCodes
+import com.google.common.io.Files
 import org.apache.hadoop.io.Text
 import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 
@@ -37,10 +42,148 @@ import org.apache.spark.util.Utils
  * should access it from that. There are some cases where the SparkEnv hasn't been
  * initialized yet and this class must be instantiated directly.
  *
- * This class implements all of the configuration related to security features described
- * in the "Security" document. Please refer to that document for specific features implemented
- * here.
+ * Spark currently supports authentication via a shared secret.
+ * Authentication can be configured to be on via the 'spark.authenticate' configuration
+ * parameter. This parameter controls whether the Spark communication protocols do
+ * authentication using the shared secret. This authentication is a basic handshake to
+ * make sure both sides have the same shared secret and are allowed to communicate.
+ * If the shared secret is not identical they will not be allowed to communicate.
+ *
+ * The Spark UI can also be secured by using javax servlet filters. A user may want to
+ * secure the UI if it has data that other users should not be allowed to see. The javax
+ * servlet filter specified by the user can authenticate the user and then once the user
+ * is logged in, Spark can compare that user versus the view acls to make sure they are
+ * authorized to view the UI. The configs 'spark.acls.enable', 'spark.ui.view.acls' and
+ * 'spark.ui.view.acls.groups' control the behavior of the acls. Note that the person who
+ * started the application always has view access to the UI.
+ *
+ * Spark has a set of individual and group modify acls (`spark.modify.acls`) and
+ * (`spark.modify.acls.groups`) that controls which users and groups have permission to
+ * modify a single application. This would include things like killing the application.
+ * By default the person who started the application has modify access. For modify access
+ * through the UI, you must have a filter that does authentication in place for the modify
+ * acls to work properly.
+ *
+ * Spark also has a set of individual and group admin acls (`spark.admin.acls`) and
+ * (`spark.admin.acls.groups`) which is a set of users/administrators and admin groups
+ * who always have permission to view or modify the Spark application.
+ *
+ * Starting from version 1.3, Spark has partial support for encrypted connections with SSL.
+ *
+ * At this point spark has multiple communication protocols that need to be secured and
+ * different underlying mechanisms are used depending on the protocol:
+ *
+ *  - HTTP for broadcast and file server (via HttpServer) ->  Spark currently uses Jetty
+ *            for the HttpServer. Jetty supports multiple authentication mechanisms -
+ *            Basic, Digest, Form, Spnego, etc. It also supports multiple different login
+ *            services - Hash, JAAS, Spnego, JDBC, etc.  Spark currently uses the HashLoginService
+ *            to authenticate using DIGEST-MD5 via a single user and the shared secret.
+ *            Since we are using DIGEST-MD5, the shared secret is not passed on the wire
+ *            in plaintext.
+ *
+ *            We currently support SSL (https) for this communication protocol (see the details
+ *            below).
+ *
+ *            The Spark HttpServer installs the HashLoginServer and configures it to DIGEST-MD5.
+ *            Any clients must specify the user and password. There is a default
+ *            Authenticator installed in the SecurityManager to how it does the authentication
+ *            and in this case gets the user name and password from the request.
+ *
+ *  - BlockTransferService -> The Spark BlockTransferServices uses java nio to asynchronously
+ *            exchange messages.  For this we use the Java SASL
+ *            (Simple Authentication and Security Layer) API and again use DIGEST-MD5
+ *            as the authentication mechanism. This means the shared secret is not passed
+ *            over the wire in plaintext.
+ *            Note that SASL is pluggable as to what mechanism it uses.  We currently use
+ *            DIGEST-MD5 but this could be changed to use Kerberos or other in the future.
+ *            Spark currently supports "auth" for the quality of protection, which means
+ *            the connection does not support integrity or privacy protection (encryption)
+ *            after authentication. SASL also supports "auth-int" and "auth-conf" which
+ *            SPARK could support in the future to allow the user to specify the quality
+ *            of protection they want. If we support those, the messages will also have to
+ *            be wrapped and unwrapped via the SaslServer/SaslClient.wrap/unwrap API's.
+ *
+ *            Since the NioBlockTransferService does asynchronous messages passing, the SASL
+ *            authentication is a bit more complex. A ConnectionManager can be both a client
+ *            and a Server, so for a particular connection it has to determine what to do.
+ *            A ConnectionId was added to be able to track connections and is used to
+ *            match up incoming messages with connections waiting for authentication.
+ *            The ConnectionManager tracks all the sendingConnections using the ConnectionId,
+ *            waits for the response from the server, and does the handshake before sending
+ *            the real message.
+ *
+ *            The NettyBlockTransferService ensures that SASL authentication is performed
+ *            synchronously prior to any other communication on a connection. This is done in
+ *            SaslClientBootstrap on the client side and SaslRpcHandler on the server side.
+ *
+ *  - HTTP for the Spark UI -> the UI was changed to use servlets so that javax servlet filters
+ *            can be used. Yarn requires a specific AmIpFilter be installed for security to work
+ *            properly. For non-Yarn deployments, users can write a filter to go through their
+ *            organization's normal login service. If an authentication filter is in place then the
+ *            SparkUI can be configured to check the logged in user against the list of users who
+ *            have view acls to see if that user is authorized.
+ *            The filters can also be used for many different purposes. For instance filters
+ *            could be used for logging, encryption, or compression.
+ *
+ *  The exact mechanisms used to generate/distribute the shared secret are deployment-specific.
+ *
+ *  For YARN deployments, the secret is automatically generated. The secret is placed in the Hadoop
+ *  UGI which gets passed around via the Hadoop RPC mechanism. Hadoop RPC can be configured to
+ *  support different levels of protection. See the Hadoop documentation for more details. Each
+ *  Spark application on YARN gets a different shared secret.
+ *
+ *  On YARN, the Spark UI gets configured to use the Hadoop YARN AmIpFilter which requires the user
+ *  to go through the ResourceManager Proxy. That proxy is there to reduce the possibility of web
+ *  based attacks through YARN. Hadoop can be configured to use filters to do authentication. That
+ *  authentication then happens via the ResourceManager Proxy and Spark will use that to do
+ *  authorization against the view acls.
+ *
+ *  For other Spark deployments, the shared secret must be specified via the
+ *  spark.authenticate.secret config.
+ *  All the nodes (Master and Workers) and the applications need to have the same shared secret.
+ *  This again is not ideal as one user could potentially affect another users application.
+ *  This should be enhanced in the future to provide better protection.
+ *  If the UI needs to be secure, the user needs to install a javax servlet filter to do the
+ *  authentication. Spark will then use that user to compare against the view acls to do
+ *  authorization. If not filter is in place the user is generally null and no authorization
+ *  can take place.
+ *
+ *  When authentication is being used, encryption can also be enabled by setting the option
+ *  spark.authenticate.enableSaslEncryption to true. This is only supported by communication
+ *  channels that use the network-common library, and can be used as an alternative to SSL in those
+ *  cases.
+ *
+ *  SSL can be used for encryption for certain communication channels. The user can configure the
+ *  default SSL settings which will be used for all the supported communication protocols unless
+ *  they are overwritten by protocol specific settings. This way the user can easily provide the
+ *  common settings for all the protocols without disabling the ability to configure each one
+ *  individually.
+ *
+ *  All the SSL settings like `spark.ssl.xxx` where `xxx` is a particular configuration property,
+ *  denote the global configuration for all the supported protocols. In order to override the global
+ *  configuration for the particular protocol, the properties must be overwritten in the
+ *  protocol-specific namespace. Use `spark.ssl.yyy.xxx` settings to overwrite the global
+ *  configuration for particular protocol denoted by `yyy`. Currently `yyy` can be only`fs` for
+ *  broadcast and file server.
+ *
+ *  Refer to [[org.apache.spark.SSLOptions]] documentation for the list of
+ *  options that can be specified.
+ *
+ *  SecurityManager initializes SSLOptions objects for different protocols separately. SSLOptions
+ *  object parses Spark configuration at a given namespace and builds the common representation
+ *  of SSL settings. SSLOptions is then used to provide protocol-specific SSLContextFactory for
+ *  Jetty.
+ *
+ *  SSL must be configured on each node and configured for each component involved in
+ *  communication using the particular protocol. In YARN clusters, the key-store can be prepared on
+ *  the client side then distributed and used by the executors as the part of the application
+ *  (YARN allows the user to deploy files before the application is started).
+ *  In standalone deployment, the user needs to provide key-stores and configuration
+ *  options for master and workers. In this mode, the user may allow the executors to use the SSL
+ *  settings inherited from the worker which spawned that executor. It can be accomplished by
+ *  setting `spark.ssl.useNodeLocalConf` to `true`.
  */
+
 private[spark] class SecurityManager(
     sparkConf: SparkConf,
     val ioEncryptionKey: Option[Array[Byte]] = None)
@@ -84,7 +227,6 @@ private[spark] class SecurityManager(
   setViewAclsGroups(sparkConf.get("spark.ui.view.acls.groups", ""));
   setModifyAclsGroups(sparkConf.get("spark.modify.acls.groups", ""));
 
-  private var secretKey: String = _
   logInfo("SecurityManager: authentication " + (if (authOn) "enabled" else "disabled") +
     "; ui acls " + (if (aclsOn) "enabled" else "disabled") +
     "; users  with view permissions: " + viewAcls.toString() +
@@ -114,6 +256,51 @@ private[spark] class SecurityManager(
   // the default SSL configuration - it will be used by all communication layers unless overwritten
   private val defaultSSLOptions = SSLOptions.parse(sparkConf, "spark.ssl", defaults = None)
 
+  // SSL configuration for the file server. This is used by Utils.setupSecureURLConnection().
+  val fileServerSSLOptions = getSSLOptions("fs")
+  val (sslSocketFactory, hostnameVerifier) = if (fileServerSSLOptions.enabled) {
+    val trustStoreManagers =
+      for (trustStore <- fileServerSSLOptions.trustStore) yield {
+        val input = Files.asByteSource(fileServerSSLOptions.trustStore.get).openStream()
+
+        try {
+          val ks = KeyStore.getInstance(KeyStore.getDefaultType)
+          ks.load(input, fileServerSSLOptions.trustStorePassword.get.toCharArray)
+
+          val tmf = TrustManagerFactory.getInstance(TrustManagerFactory.getDefaultAlgorithm)
+          tmf.init(ks)
+          tmf.getTrustManagers
+        } finally {
+          input.close()
+        }
+      }
+
+    lazy val credulousTrustStoreManagers = Array({
+      logWarning("Using 'accept-all' trust manager for SSL connections.")
+      new X509TrustManager {
+        override def getAcceptedIssuers: Array[X509Certificate] = null
+
+        override def checkClientTrusted(x509Certificates: Array[X509Certificate], s: String) {}
+
+        override def checkServerTrusted(x509Certificates: Array[X509Certificate], s: String) {}
+      }: TrustManager
+    })
+
+    require(fileServerSSLOptions.protocol.isDefined,
+      "spark.ssl.protocol is required when enabling SSL connections.")
+
+    val sslContext = SSLContext.getInstance(fileServerSSLOptions.protocol.get)
+    sslContext.init(null, trustStoreManagers.getOrElse(credulousTrustStoreManagers), null)
+
+    val hostVerifier = new HostnameVerifier {
+      override def verify(s: String, sslSession: SSLSession): Boolean = true
+    }
+
+    (Some(sslContext.getSocketFactory), Some(hostVerifier))
+  } else {
+    (None, None)
+  }
+
   def getSSLOptions(module: String): SSLOptions = {
     val opts = SSLOptions.parse(sparkConf, s"spark.ssl.$module", Some(defaultSSLOptions))
     logDebug(s"Created SSL options for $module: $opts")
@@ -317,12 +504,6 @@ private[spark] class SecurityManager(
       val creds = UserGroupInformation.getCurrentUser().getCredentials()
       Option(creds.getSecretKey(SECRET_LOOKUP_KEY))
         .map { bytes => new String(bytes, UTF_8) }
-        // Secret key may not be found in current UGI's credentials.
-        // This happens when UGI is refreshed in the driver side by UGI's loginFromKeytab but not
-        // copy secret key from original UGI to the new one. This exists in ThriftServer's Hive
-        // logic. So as a workaround, storing secret key in a local variable to make it visible
-        // in different context.
-        .orElse(Option(secretKey))
         .orElse(Option(sparkConf.getenv(ENV_AUTH_SECRET)))
         .orElse(sparkConf.getOption(SPARK_AUTH_SECRET_CONF))
         .getOrElse {
@@ -339,30 +520,28 @@ private[spark] class SecurityManager(
    *
    * If authentication is disabled, do nothing.
    *
-   * In YARN and local mode, generate a new secret and store it in the current user's credentials.
+   * In YARN mode, generate a new secret and store it in the current user's credentials.
    *
    * In other modes, assert that the auth secret is set in the configuration.
    */
   def initializeAuth(): Unit = {
-    import SparkMasterRegex._
-
     if (!sparkConf.get(NETWORK_AUTH_ENABLED)) {
       return
     }
 
-    val master = sparkConf.get(SparkLauncher.SPARK_MASTER, "")
-    master match {
-      case "yarn" | "local" | LOCAL_N_REGEX(_) | LOCAL_N_FAILURES_REGEX(_, _) =>
-        // Secret generation allowed here
-      case _ =>
-        require(sparkConf.contains(SPARK_AUTH_SECRET_CONF),
-          s"A secret key must be specified via the $SPARK_AUTH_SECRET_CONF config.")
-        return
+    if (sparkConf.get(SparkLauncher.SPARK_MASTER, null) != "yarn") {
+      require(sparkConf.contains(SPARK_AUTH_SECRET_CONF),
+        s"A secret key must be specified via the $SPARK_AUTH_SECRET_CONF config.")
+      return
     }
 
-    secretKey = Utils.createSecret(sparkConf)
+    val rnd = new SecureRandom()
+    val length = sparkConf.getInt("spark.authenticate.secretBitLength", 256) / JByte.SIZE
+    val secretBytes = new Array[Byte](length)
+    rnd.nextBytes(secretBytes)
+
     val creds = new Credentials()
-    creds.addSecretKey(SECRET_LOOKUP_KEY, secretKey.getBytes(UTF_8))
+    creds.addSecretKey(SECRET_LOOKUP_KEY, secretBytes)
     UserGroupInformation.getCurrentUser().addCredentials(creds)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 6c4c5c94cfa28..f53b2bed74c6e 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -265,18 +265,16 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
    * Get a time parameter as seconds; throws a NoSuchElementException if it's not set. If no
    * suffix is provided then seconds are assumed.
    * @throws java.util.NoSuchElementException If the time parameter is not set
-   * @throws NumberFormatException If the value cannot be interpreted as seconds
    */
-  def getTimeAsSeconds(key: String): Long = catchIllegalValue(key) {
+  def getTimeAsSeconds(key: String): Long = {
     Utils.timeStringAsSeconds(get(key))
   }
 
   /**
    * Get a time parameter as seconds, falling back to a default if not set. If no
    * suffix is provided then seconds are assumed.
-   * @throws NumberFormatException If the value cannot be interpreted as seconds
    */
-  def getTimeAsSeconds(key: String, defaultValue: String): Long = catchIllegalValue(key) {
+  def getTimeAsSeconds(key: String, defaultValue: String): Long = {
     Utils.timeStringAsSeconds(get(key, defaultValue))
   }
 
@@ -284,18 +282,16 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
    * Get a time parameter as milliseconds; throws a NoSuchElementException if it's not set. If no
    * suffix is provided then milliseconds are assumed.
    * @throws java.util.NoSuchElementException If the time parameter is not set
-   * @throws NumberFormatException If the value cannot be interpreted as milliseconds
    */
-  def getTimeAsMs(key: String): Long = catchIllegalValue(key) {
+  def getTimeAsMs(key: String): Long = {
     Utils.timeStringAsMs(get(key))
   }
 
   /**
    * Get a time parameter as milliseconds, falling back to a default if not set. If no
    * suffix is provided then milliseconds are assumed.
-   * @throws NumberFormatException If the value cannot be interpreted as milliseconds
    */
-  def getTimeAsMs(key: String, defaultValue: String): Long = catchIllegalValue(key) {
+  def getTimeAsMs(key: String, defaultValue: String): Long = {
     Utils.timeStringAsMs(get(key, defaultValue))
   }
 
@@ -303,26 +299,23 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
    * Get a size parameter as bytes; throws a NoSuchElementException if it's not set. If no
    * suffix is provided then bytes are assumed.
    * @throws java.util.NoSuchElementException If the size parameter is not set
-   * @throws NumberFormatException If the value cannot be interpreted as bytes
    */
-  def getSizeAsBytes(key: String): Long = catchIllegalValue(key) {
+  def getSizeAsBytes(key: String): Long = {
     Utils.byteStringAsBytes(get(key))
   }
 
   /**
    * Get a size parameter as bytes, falling back to a default if not set. If no
    * suffix is provided then bytes are assumed.
-   * @throws NumberFormatException If the value cannot be interpreted as bytes
    */
-  def getSizeAsBytes(key: String, defaultValue: String): Long = catchIllegalValue(key) {
+  def getSizeAsBytes(key: String, defaultValue: String): Long = {
     Utils.byteStringAsBytes(get(key, defaultValue))
   }
 
   /**
    * Get a size parameter as bytes, falling back to a default if not set.
-   * @throws NumberFormatException If the value cannot be interpreted as bytes
    */
-  def getSizeAsBytes(key: String, defaultValue: Long): Long = catchIllegalValue(key) {
+  def getSizeAsBytes(key: String, defaultValue: Long): Long = {
     Utils.byteStringAsBytes(get(key, defaultValue + "B"))
   }
 
@@ -330,18 +323,16 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
    * Get a size parameter as Kibibytes; throws a NoSuchElementException if it's not set. If no
    * suffix is provided then Kibibytes are assumed.
    * @throws java.util.NoSuchElementException If the size parameter is not set
-   * @throws NumberFormatException If the value cannot be interpreted as Kibibytes
    */
-  def getSizeAsKb(key: String): Long = catchIllegalValue(key) {
+  def getSizeAsKb(key: String): Long = {
     Utils.byteStringAsKb(get(key))
   }
 
   /**
    * Get a size parameter as Kibibytes, falling back to a default if not set. If no
    * suffix is provided then Kibibytes are assumed.
-   * @throws NumberFormatException If the value cannot be interpreted as Kibibytes
    */
-  def getSizeAsKb(key: String, defaultValue: String): Long = catchIllegalValue(key) {
+  def getSizeAsKb(key: String, defaultValue: String): Long = {
     Utils.byteStringAsKb(get(key, defaultValue))
   }
 
@@ -349,18 +340,16 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
    * Get a size parameter as Mebibytes; throws a NoSuchElementException if it's not set. If no
    * suffix is provided then Mebibytes are assumed.
    * @throws java.util.NoSuchElementException If the size parameter is not set
-   * @throws NumberFormatException If the value cannot be interpreted as Mebibytes
    */
-  def getSizeAsMb(key: String): Long = catchIllegalValue(key) {
+  def getSizeAsMb(key: String): Long = {
     Utils.byteStringAsMb(get(key))
   }
 
   /**
    * Get a size parameter as Mebibytes, falling back to a default if not set. If no
    * suffix is provided then Mebibytes are assumed.
-   * @throws NumberFormatException If the value cannot be interpreted as Mebibytes
    */
-  def getSizeAsMb(key: String, defaultValue: String): Long = catchIllegalValue(key) {
+  def getSizeAsMb(key: String, defaultValue: String): Long = {
     Utils.byteStringAsMb(get(key, defaultValue))
   }
 
@@ -368,18 +357,16 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
    * Get a size parameter as Gibibytes; throws a NoSuchElementException if it's not set. If no
    * suffix is provided then Gibibytes are assumed.
    * @throws java.util.NoSuchElementException If the size parameter is not set
-   * @throws NumberFormatException If the value cannot be interpreted as Gibibytes
    */
-  def getSizeAsGb(key: String): Long = catchIllegalValue(key) {
+  def getSizeAsGb(key: String): Long = {
     Utils.byteStringAsGb(get(key))
   }
 
   /**
    * Get a size parameter as Gibibytes, falling back to a default if not set. If no
    * suffix is provided then Gibibytes are assumed.
-   * @throws NumberFormatException If the value cannot be interpreted as Gibibytes
    */
-  def getSizeAsGb(key: String, defaultValue: String): Long = catchIllegalValue(key) {
+  def getSizeAsGb(key: String, defaultValue: String): Long = {
     Utils.byteStringAsGb(get(key, defaultValue))
   }
 
@@ -407,35 +394,23 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
   }
 
 
-  /**
-   * Get a parameter as an integer, falling back to a default if not set
-   * @throws NumberFormatException If the value cannot be interpreted as an integer
-   */
-  def getInt(key: String, defaultValue: Int): Int = catchIllegalValue(key) {
+  /** Get a parameter as an integer, falling back to a default if not set */
+  def getInt(key: String, defaultValue: Int): Int = {
     getOption(key).map(_.toInt).getOrElse(defaultValue)
   }
 
-  /**
-   * Get a parameter as a long, falling back to a default if not set
-   * @throws NumberFormatException If the value cannot be interpreted as a long
-   */
-  def getLong(key: String, defaultValue: Long): Long = catchIllegalValue(key) {
+  /** Get a parameter as a long, falling back to a default if not set */
+  def getLong(key: String, defaultValue: Long): Long = {
     getOption(key).map(_.toLong).getOrElse(defaultValue)
   }
 
-  /**
-   * Get a parameter as a double, falling back to a default if not ste
-   * @throws NumberFormatException If the value cannot be interpreted as a double
-   */
-  def getDouble(key: String, defaultValue: Double): Double = catchIllegalValue(key) {
+  /** Get a parameter as a double, falling back to a default if not set */
+  def getDouble(key: String, defaultValue: Double): Double = {
     getOption(key).map(_.toDouble).getOrElse(defaultValue)
   }
 
-  /**
-   * Get a parameter as a boolean, falling back to a default if not set
-   * @throws IllegalArgumentException If the value cannot be interpreted as a boolean
-   */
-  def getBoolean(key: String, defaultValue: Boolean): Boolean = catchIllegalValue(key) {
+  /** Get a parameter as a boolean, falling back to a default if not set */
+  def getBoolean(key: String, defaultValue: Boolean): Boolean = {
     getOption(key).map(_.toBoolean).getOrElse(defaultValue)
   }
 
@@ -473,33 +448,14 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
    */
   private[spark] def getenv(name: String): String = System.getenv(name)
 
-  /**
-   * Wrapper method for get() methods which require some specific value format. This catches
-   * any [[NumberFormatException]] or [[IllegalArgumentException]] and re-raises it with the
-   * incorrectly configured key in the exception message.
-   */
-  private def catchIllegalValue[T](key: String)(getValue: => T): T = {
-    try {
-      getValue
-    } catch {
-      case e: NumberFormatException =>
-        // NumberFormatException doesn't have a constructor that takes a cause for some reason.
-        throw new NumberFormatException(s"Illegal value for config key $key: ${e.getMessage}")
-            .initCause(e)
-      case e: IllegalArgumentException =>
-        throw new IllegalArgumentException(s"Illegal value for config key $key: ${e.getMessage}", e)
-    }
-  }
-
   /**
    * Checks for illegal or deprecated config settings. Throws an exception for the former. Not
    * idempotent - may mutate this conf object to convert deprecated settings to supported ones.
    */
   private[spark] def validateSettings() {
     if (contains("spark.local.dir")) {
-      val msg = "Note that spark.local.dir will be overridden by the value set by " +
-        "the cluster manager (via SPARK_LOCAL_DIRS in mesos/standalone/kubernetes and LOCAL_DIRS" +
-        " in YARN)."
+      val msg = "In Spark 1.0 and later spark.local.dir will be overridden by the value set by " +
+        "the cluster manager (via SPARK_LOCAL_DIRS in mesos/standalone and LOCAL_DIRS in YARN)."
       logWarning(msg)
     }
 
@@ -647,15 +603,13 @@ private[spark] object SparkConf extends Logging {
         "Please use spark.kryoserializer.buffer instead. The default value for " +
           "spark.kryoserializer.buffer.mb was previously specified as '0.064'. Fractional values " +
           "are no longer accepted. To specify the equivalent now, one may use '64k'."),
-      DeprecatedConfig("spark.rpc", "2.0", "Not used anymore."),
+      DeprecatedConfig("spark.rpc", "2.0", "Not used any more."),
       DeprecatedConfig("spark.scheduler.executorTaskBlacklistTime", "2.1.0",
         "Please use the new blacklisting options, spark.blacklist.*"),
-      DeprecatedConfig("spark.yarn.am.port", "2.0.0", "Not used anymore"),
-      DeprecatedConfig("spark.executor.port", "2.0.0", "Not used anymore"),
+      DeprecatedConfig("spark.yarn.am.port", "2.0.0", "Not used any more"),
+      DeprecatedConfig("spark.executor.port", "2.0.0", "Not used any more"),
       DeprecatedConfig("spark.shuffle.service.index.cache.entries", "2.3.0",
-        "Not used anymore. Please use spark.shuffle.service.index.cache.size"),
-      DeprecatedConfig("spark.yarn.credentials.file.retention.count", "2.4.0", "Not used anymore."),
-      DeprecatedConfig("spark.yarn.credentials.file.retention.days", "2.4.0", "Not used anymore.")
+        "Not used any more. Please use spark.shuffle.service.index.cache.size")
     )
 
     Map(configs.map { cfg => (cfg.key -> cfg) } : _*)
@@ -794,7 +748,7 @@ private[spark] object SparkConf extends Logging {
     }
     if (key.startsWith("spark.akka") || key.startsWith("spark.ssl.akka")) {
       logWarning(
-        s"The configuration key $key is not supported anymore " +
+        s"The configuration key $key is not supported any more " +
           s"because Spark doesn't use Akka since 2.0")
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 5e8595603cc90..3828d4f703247 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -54,7 +54,6 @@ import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.{CoarseGrainedSchedulerBackend, StandaloneSchedulerBackend}
 import org.apache.spark.scheduler.local.LocalSchedulerBackend
 import org.apache.spark.status.AppStatusStore
-import org.apache.spark.status.api.v1.ThreadStackTrace
 import org.apache.spark.storage._
 import org.apache.spark.storage.BlockManagerMessages.TriggerThreadDump
 import org.apache.spark.ui.{ConsoleProgressBar, SparkUI}
@@ -534,8 +533,7 @@ class SparkContext(config: SparkConf) extends Logging {
         schedulerBackend match {
           case b: ExecutorAllocationClient =>
             Some(new ExecutorAllocationManager(
-              schedulerBackend.asInstanceOf[ExecutorAllocationClient], listenerBus, _conf,
-              _env.blockManager.master))
+              schedulerBackend.asInstanceOf[ExecutorAllocationClient], listenerBus, _conf))
           case _ =>
             None
         }
@@ -1634,8 +1632,6 @@ class SparkContext(config: SparkConf) extends Logging {
    * :: DeveloperApi ::
    * Request that the cluster manager kill the specified executors.
    *
-   * This is not supported when dynamic allocation is turned on.
-   *
    * @note This is an indication to the cluster manager that the application wishes to adjust
    * its resource usage downwards. If the application wishes to replace the executors it kills
    * through this method with new ones, it should follow up explicitly with a call to
@@ -1647,10 +1643,7 @@ class SparkContext(config: SparkConf) extends Logging {
   def killExecutors(executorIds: Seq[String]): Boolean = {
     schedulerBackend match {
       case b: ExecutorAllocationClient =>
-        require(executorAllocationManager.isEmpty,
-          "killExecutors() unsupported with Dynamic Allocation turned on")
-        b.killExecutors(executorIds, adjustTargetNumExecutors = true, countFailures = false,
-          force = true).nonEmpty
+        b.killExecutors(executorIds, replace = false, force = true).nonEmpty
       case _ =>
         logWarning("Killing executors is not supported by current scheduler.")
         false
@@ -1688,8 +1681,7 @@ class SparkContext(config: SparkConf) extends Logging {
   private[spark] def killAndReplaceExecutor(executorId: String): Boolean = {
     schedulerBackend match {
       case b: ExecutorAllocationClient =>
-        b.killExecutors(Seq(executorId), adjustTargetNumExecutors = false, countFailures = true,
-          force = true).nonEmpty
+        b.killExecutors(Seq(executorId), replace = true, force = true).nonEmpty
       case _ =>
         logWarning("Killing executors is not supported by current scheduler.")
         false
@@ -1723,13 +1715,7 @@ class SparkContext(config: SparkConf) extends Logging {
   private[spark] def getRDDStorageInfo(filter: RDD[_] => Boolean): Array[RDDInfo] = {
     assertNotStopped()
     val rddInfos = persistentRdds.values.filter(filter).map(RDDInfo.fromRdd).toArray
-    rddInfos.foreach { rddInfo =>
-      val rddId = rddInfo.id
-      val rddStorageInfo = statusStore.asOption(statusStore.rdd(rddId))
-      rddInfo.numCachedPartitions = rddStorageInfo.map(_.numCachedPartitions).getOrElse(0)
-      rddInfo.memSize = rddStorageInfo.map(_.memoryUsed).getOrElse(0L)
-      rddInfo.diskSize = rddStorageInfo.map(_.diskUsed).getOrElse(0L)
-    }
+    StorageUtils.updateRddInfo(rddInfos, getExecutorStorageStatus)
     rddInfos.filter(_.isCached)
   }
 
@@ -1740,6 +1726,17 @@ class SparkContext(config: SparkConf) extends Logging {
    */
   def getPersistentRDDs: Map[Int, RDD[_]] = persistentRdds.toMap
 
+  /**
+   * :: DeveloperApi ::
+   * Return information about blocks stored in all of the slaves
+   */
+  @DeveloperApi
+  @deprecated("This method may change or be removed in a future release.", "2.2.0")
+  def getExecutorStorageStatus: Array[StorageStatus] = {
+    assertNotStopped()
+    env.blockManager.master.getStorageStatus
+  }
+
   /**
    * :: DeveloperApi ::
    * Return pools for fair scheduler
diff --git a/core/src/main/scala/org/apache/spark/SparkStatusTracker.scala b/core/src/main/scala/org/apache/spark/SparkStatusTracker.scala
index 815237eba0174..70865cb58c571 100644
--- a/core/src/main/scala/org/apache/spark/SparkStatusTracker.scala
+++ b/core/src/main/scala/org/apache/spark/SparkStatusTracker.scala
@@ -97,8 +97,7 @@ class SparkStatusTracker private[spark] (sc: SparkContext, store: AppStatusStore
   }
 
   /**
-   * Returns information of all known executors, including host, port, cacheSize, numRunningTasks
-   * and memory metrics.
+   * Returns information of all known executors, including host, port, cacheSize, numRunningTasks.
    */
   def getExecutorInfos: Array[SparkExecutorInfo] = {
     store.executorList(true).map { exec =>
@@ -114,11 +113,7 @@ class SparkStatusTracker private[spark] (sc: SparkContext, store: AppStatusStore
         host,
         port,
         cachedMem,
-        exec.activeTasks,
-        exec.memoryMetrics.map(_.usedOffHeapStorageMemory).getOrElse(0L),
-        exec.memoryMetrics.map(_.usedOnHeapStorageMemory).getOrElse(0L),
-        exec.memoryMetrics.map(_.totalOffHeapStorageMemory).getOrElse(0L),
-        exec.memoryMetrics.map(_.totalOnHeapStorageMemory).getOrElse(0L))
+        exec.activeTasks)
     }.toArray
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/StatusAPIImpl.scala b/core/src/main/scala/org/apache/spark/StatusAPIImpl.scala
index 6a888c1e9e772..c1f24a6377788 100644
--- a/core/src/main/scala/org/apache/spark/StatusAPIImpl.scala
+++ b/core/src/main/scala/org/apache/spark/StatusAPIImpl.scala
@@ -38,9 +38,5 @@ private class SparkExecutorInfoImpl(
     val host: String,
     val port: Int,
     val cacheSize: Long,
-    val numRunningTasks: Int,
-    val usedOnHeapStorageMemory: Long,
-    val usedOffHeapStorageMemory: Long,
-    val totalOnHeapStorageMemory: Long,
-    val totalOffHeapStorageMemory: Long)
+    val numRunningTasks: Int)
   extends SparkExecutorInfo
diff --git a/core/src/main/scala/org/apache/spark/TaskContextImpl.scala b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
index 0791fe856ef15..cccd3ea457ba4 100644
--- a/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
+++ b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
@@ -178,6 +178,4 @@ private[spark] class TaskContextImpl(
 
   private[spark] def fetchFailed: Option[FetchFailedException] = _fetchFailedException
 
-  // TODO: shall we publish it and define it in `TaskContext`?
-  private[spark] def getLocalProperties(): Properties = localProperties
 }
diff --git a/core/src/main/scala/org/apache/spark/TaskEndReason.scala b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
index 33901bc8380e9..a76283e33fa65 100644
--- a/core/src/main/scala/org/apache/spark/TaskEndReason.scala
+++ b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
@@ -212,15 +212,9 @@ case object TaskResultLost extends TaskFailedReason {
  * Task was killed intentionally and needs to be rescheduled.
  */
 @DeveloperApi
-case class TaskKilled(
-    reason: String,
-    accumUpdates: Seq[AccumulableInfo] = Seq.empty,
-    private[spark] val accums: Seq[AccumulatorV2[_, _]] = Nil)
-  extends TaskFailedReason {
-
+case class TaskKilled(reason: String) extends TaskFailedReason {
   override def toErrorString: String = s"TaskKilled ($reason)"
   override def countTowardsTaskFailures: Boolean = false
-
 }
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/TestUtils.scala b/core/src/main/scala/org/apache/spark/TestUtils.scala
index b5c4c705dcbc7..93e7ee3d2a404 100644
--- a/core/src/main/scala/org/apache/spark/TestUtils.scala
+++ b/core/src/main/scala/org/apache/spark/TestUtils.scala
@@ -22,7 +22,7 @@ import java.net.{HttpURLConnection, URI, URL}
 import java.nio.charset.StandardCharsets
 import java.security.SecureRandom
 import java.security.cert.X509Certificate
-import java.util.{Arrays, Properties}
+import java.util.Arrays
 import java.util.concurrent.{CountDownLatch, TimeoutException, TimeUnit}
 import java.util.jar.{JarEntry, JarOutputStream}
 import javax.net.ssl._
@@ -35,7 +35,6 @@ import scala.sys.process.{Process, ProcessLogger}
 import scala.util.Try
 
 import com.google.common.io.{ByteStreams, Files}
-import org.apache.log4j.PropertyConfigurator
 
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.scheduler._
@@ -257,29 +256,6 @@ private[spark] object TestUtils {
       s"Can't find $numExecutors executors before $timeout milliseconds elapsed")
   }
 
-  /**
-   * config a log4j properties used for testsuite
-   */
-  def configTestLog4j(level: String): Unit = {
-    val pro = new Properties()
-    pro.put("log4j.rootLogger", s"$level, console")
-    pro.put("log4j.appender.console", "org.apache.log4j.ConsoleAppender")
-    pro.put("log4j.appender.console.target", "System.err")
-    pro.put("log4j.appender.console.layout", "org.apache.log4j.PatternLayout")
-    pro.put("log4j.appender.console.layout.ConversionPattern",
-      "%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n")
-    PropertyConfigurator.configure(pro)
-  }
-
-  /**
-   * Lists files recursively.
-   */
-  def recursiveList(f: File): Array[File] = {
-    require(f.isDirectory)
-    val current = f.listFiles
-    current ++ current.filter(_.isDirectory).flatMap(recursiveList)
-  }
-
 }
 
 
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonGatewayServer.scala b/core/src/main/scala/org/apache/spark/api/python/PythonGatewayServer.scala
index 9ddc4a4910180..11f2432575d84 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonGatewayServer.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonGatewayServer.scala
@@ -17,39 +17,26 @@
 
 package org.apache.spark.api.python
 
-import java.io.{DataOutputStream, File, FileOutputStream}
-import java.net.InetAddress
-import java.nio.charset.StandardCharsets.UTF_8
-import java.nio.file.Files
+import java.io.DataOutputStream
+import java.net.Socket
 
 import py4j.GatewayServer
 
-import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
 import org.apache.spark.util.Utils
 
 /**
- * Process that starts a Py4J GatewayServer on an ephemeral port.
+ * Process that starts a Py4J GatewayServer on an ephemeral port and communicates the bound port
+ * back to its caller via a callback port specified by the caller.
  *
  * This process is launched (via SparkSubmit) by the PySpark driver (see java_gateway.py).
  */
 private[spark] object PythonGatewayServer extends Logging {
   initializeLogIfNecessary(true)
 
-  def main(args: Array[String]): Unit = {
-    val secret = Utils.createSecret(new SparkConf())
-
-    // Start a GatewayServer on an ephemeral port. Make sure the callback client is configured
-    // with the same secret, in case the app needs callbacks from the JVM to the underlying
-    // python processes.
-    val localhost = InetAddress.getLoopbackAddress()
-    val gatewayServer: GatewayServer = new GatewayServer.GatewayServerBuilder()
-      .authToken(secret)
-      .javaPort(0)
-      .javaAddress(localhost)
-      .callbackClient(GatewayServer.DEFAULT_PYTHON_PORT, localhost, secret)
-      .build()
-
+  def main(args: Array[String]): Unit = Utils.tryOrExit {
+    // Start a GatewayServer on an ephemeral port
+    val gatewayServer: GatewayServer = new GatewayServer(null, 0)
     gatewayServer.start()
     val boundPort: Int = gatewayServer.getListeningPort
     if (boundPort == -1) {
@@ -59,24 +46,15 @@ private[spark] object PythonGatewayServer extends Logging {
       logDebug(s"Started PythonGatewayServer on port $boundPort")
     }
 
-    // Communicate the connection information back to the python process by writing the
-    // information in the requested file. This needs to match the read side in java_gateway.py.
-    val connectionInfoPath = new File(sys.env("_PYSPARK_DRIVER_CONN_INFO_PATH"))
-    val tmpPath = Files.createTempFile(connectionInfoPath.getParentFile().toPath(),
-      "connection", ".info").toFile()
-
-    val dos = new DataOutputStream(new FileOutputStream(tmpPath))
+    // Communicate the bound port back to the caller via the caller-specified callback port
+    val callbackHost = sys.env("_PYSPARK_DRIVER_CALLBACK_HOST")
+    val callbackPort = sys.env("_PYSPARK_DRIVER_CALLBACK_PORT").toInt
+    logDebug(s"Communicating GatewayServer port to Python driver at $callbackHost:$callbackPort")
+    val callbackSocket = new Socket(callbackHost, callbackPort)
+    val dos = new DataOutputStream(callbackSocket.getOutputStream)
     dos.writeInt(boundPort)
-
-    val secretBytes = secret.getBytes(UTF_8)
-    dos.writeInt(secretBytes.length)
-    dos.write(secretBytes, 0, secretBytes.length)
     dos.close()
-
-    if (!tmpPath.renameTo(connectionInfoPath)) {
-      logError(s"Unable to write connection information to $connectionInfoPath.")
-      System.exit(1)
-    }
+    callbackSocket.close()
 
     // Exit on EOF or broken pipe to ensure that this process dies when the Python driver dies:
     while (System.in.read() != -1) {
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index a1ee2f7d1b119..f6293c0dc5091 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -38,7 +38,6 @@ import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.input.PortableDataStream
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
-import org.apache.spark.security.SocketAuthHelper
 import org.apache.spark.util._
 
 
@@ -108,12 +107,6 @@ private[spark] object PythonRDD extends Logging {
   // remember the broadcasts sent to each worker
   private val workerBroadcasts = new mutable.WeakHashMap[Socket, mutable.Set[Long]]()
 
-  // Authentication helper used when serving iterator data.
-  private lazy val authHelper = {
-    val conf = Option(SparkEnv.get).map(_.conf).getOrElse(new SparkConf())
-    new SocketAuthHelper(conf)
-  }
-
   def getWorkerBroadcasts(worker: Socket): mutable.Set[Long] = {
     synchronized {
       workerBroadcasts.getOrElseUpdate(worker, new mutable.HashSet[Long]())
@@ -136,13 +129,12 @@ private[spark] object PythonRDD extends Logging {
    * (effectively a collect()), but allows you to run on a certain subset of partitions,
    * or to enable local execution.
    *
-   * @return 2-tuple (as a Java array) with the port number of a local socket which serves the
-   *         data collected from this job, and the secret for authentication.
+   * @return the port number of a local socket which serves the data collected from this job.
    */
   def runJob(
       sc: SparkContext,
       rdd: JavaRDD[Array[Byte]],
-      partitions: JArrayList[Int]): Array[Any] = {
+      partitions: JArrayList[Int]): Int = {
     type ByteArray = Array[Byte]
     type UnrolledPartition = Array[ByteArray]
     val allPartitions: Array[UnrolledPartition] =
@@ -155,14 +147,13 @@ private[spark] object PythonRDD extends Logging {
   /**
    * A helper function to collect an RDD as an iterator, then serve it via socket.
    *
-   * @return 2-tuple (as a Java array) with the port number of a local socket which serves the
-   *         data collected from this job, and the secret for authentication.
+   * @return the port number of a local socket which serves the data collected from this job.
    */
-  def collectAndServe[T](rdd: RDD[T]): Array[Any] = {
+  def collectAndServe[T](rdd: RDD[T]): Int = {
     serveIterator(rdd.collect().iterator, s"serve RDD ${rdd.id}")
   }
 
-  def toLocalIteratorAndServe[T](rdd: RDD[T]): Array[Any] = {
+  def toLocalIteratorAndServe[T](rdd: RDD[T]): Int = {
     serveIterator(rdd.toLocalIterator, s"serve toLocalIterator")
   }
 
@@ -393,11 +384,8 @@ private[spark] object PythonRDD extends Logging {
    * and send them into this connection.
    *
    * The thread will terminate after all the data are sent or any exceptions happen.
-   *
-   * @return 2-tuple (as a Java array) with the port number of a local socket which serves the
-   *         data collected from this job, and the secret for authentication.
    */
-  def serveIterator(items: Iterator[_], threadName: String): Array[Any] = {
+  def serveIterator[T](items: Iterator[T], threadName: String): Int = {
     val serverSocket = new ServerSocket(0, 1, InetAddress.getByName("localhost"))
     // Close the socket if no connection in 15 seconds
     serverSocket.setSoTimeout(15000)
@@ -407,14 +395,11 @@ private[spark] object PythonRDD extends Logging {
       override def run() {
         try {
           val sock = serverSocket.accept()
-          authHelper.authClient(sock)
-
           val out = new DataOutputStream(new BufferedOutputStream(sock.getOutputStream))
           Utils.tryWithSafeFinally {
             writeIteratorToStream(items, out)
           } {
             out.close()
-            sock.close()
           }
         } catch {
           case NonFatal(e) =>
@@ -425,7 +410,7 @@ private[spark] object PythonRDD extends Logging {
       }
     }.start()
 
-    Array(serverSocket.getLocalPort, authHelper.secret)
+    serverSocket.getLocalPort
   }
 
   private def getMergedConf(confAsMap: java.util.HashMap[String, String],
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
index 41eac10d9b267..f075a7e0eb0b4 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
@@ -183,13 +183,6 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
         dataOut.writeInt(context.partitionId())
         dataOut.writeInt(context.attemptNumber())
         dataOut.writeLong(context.taskAttemptId())
-        val localProps = context.asInstanceOf[TaskContextImpl].getLocalProperties.asScala
-        dataOut.writeInt(localProps.size)
-        localProps.foreach { case (k, v) =>
-          PythonRDD.writeUTF(k, dataOut)
-          PythonRDD.writeUTF(v, dataOut)
-        }
-
         // sparkFilesDir
         PythonRDD.writeUTF(SparkFiles.getRootDirectory(), dataOut)
         // Python includes (*.zip and *.egg files)
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
index 27a5e19f96a14..92e228a9dd10c 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
@@ -32,7 +32,7 @@ private[spark] object PythonUtils {
     val pythonPath = new ArrayBuffer[String]
     for (sparkHome <- sys.env.get("SPARK_HOME")) {
       pythonPath += Seq(sparkHome, "python", "lib", "pyspark.zip").mkString(File.separator)
-      pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.10.7-src.zip").mkString(File.separator)
+      pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.10.6-src.zip").mkString(File.separator)
     }
     pythonPath ++= SparkContext.jarOfObject(this)
     pythonPath.mkString(File.pathSeparator)
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
index 6afa37aa36fd3..30976ac752a8a 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.api.python
 
-import java.io.{DataInputStream, DataOutputStream, EOFException, InputStream, OutputStreamWriter}
+import java.io.{DataInputStream, DataOutputStream, InputStream, OutputStreamWriter}
 import java.net.{InetAddress, ServerSocket, Socket, SocketException}
 import java.nio.charset.StandardCharsets
 import java.util.Arrays
@@ -27,7 +27,6 @@ import scala.collection.mutable
 
 import org.apache.spark._
 import org.apache.spark.internal.Logging
-import org.apache.spark.security.SocketAuthHelper
 import org.apache.spark.util.{RedirectThread, Utils}
 
 private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String, String])
@@ -68,8 +67,6 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
     value
   }.getOrElse("pyspark.worker")
 
-  private val authHelper = new SocketAuthHelper(SparkEnv.get.conf)
-
   var daemon: Process = null
   val daemonHost = InetAddress.getByAddress(Array(127, 0, 0, 1))
   var daemonPort: Int = 0
@@ -111,8 +108,6 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
       if (pid < 0) {
         throw new IllegalStateException("Python daemon failed to launch worker with code " + pid)
       }
-
-      authHelper.authToServer(socket)
       daemonWorkers.put(socket, pid)
       socket
     }
@@ -150,24 +145,25 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
       workerEnv.put("PYTHONPATH", pythonPath)
       // This is equivalent to setting the -u flag; we use it because ipython doesn't support -u:
       workerEnv.put("PYTHONUNBUFFERED", "YES")
-      workerEnv.put("PYTHON_WORKER_FACTORY_PORT", serverSocket.getLocalPort.toString)
-      workerEnv.put("PYTHON_WORKER_FACTORY_SECRET", authHelper.secret)
       val worker = pb.start()
 
       // Redirect worker stdout and stderr
       redirectStreamsToStderr(worker.getInputStream, worker.getErrorStream)
 
-      // Wait for it to connect to our socket, and validate the auth secret.
-      serverSocket.setSoTimeout(10000)
+      // Tell the worker our port
+      val out = new  OutputStreamWriter(worker.getOutputStream, StandardCharsets.UTF_8)
+      out.write(serverSocket.getLocalPort + "\n")
+      out.flush()
 
+      // Wait for it to connect to our socket
+      serverSocket.setSoTimeout(10000)
       try {
         val socket = serverSocket.accept()
-        authHelper.authClient(socket)
         simpleWorkers.put(socket, worker)
         return socket
       } catch {
         case e: Exception =>
-          throw new SparkException("Python worker failed to connect back.", e)
+          throw new SparkException("Python worker did not connect back in time", e)
       }
     } finally {
       if (serverSocket != null) {
@@ -186,43 +182,20 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
 
       try {
         // Create and start the daemon
-        val command = Arrays.asList(pythonExec, "-m", daemonModule)
-        val pb = new ProcessBuilder(command)
+        val pb = new ProcessBuilder(Arrays.asList(pythonExec, "-m", daemonModule))
         val workerEnv = pb.environment()
         workerEnv.putAll(envVars.asJava)
         workerEnv.put("PYTHONPATH", pythonPath)
-        workerEnv.put("PYTHON_WORKER_FACTORY_SECRET", authHelper.secret)
         // This is equivalent to setting the -u flag; we use it because ipython doesn't support -u:
         workerEnv.put("PYTHONUNBUFFERED", "YES")
         daemon = pb.start()
 
         val in = new DataInputStream(daemon.getInputStream)
-        try {
-          daemonPort = in.readInt()
-        } catch {
-          case _: EOFException =>
-            throw new SparkException(s"No port number in $daemonModule's stdout")
-        }
-
-        // test that the returned port number is within a valid range.
-        // note: this does not cover the case where the port number
-        // is arbitrary data but is also coincidentally within range
-        if (daemonPort < 1 || daemonPort > 0xffff) {
-          val exceptionMessage = f"""
-            |Bad data in $daemonModule's standard output. Invalid port number:
-            |  $daemonPort (0x$daemonPort%08x)
-            |Python command to execute the daemon was:
-            |  ${command.asScala.mkString(" ")}
-            |Check that you don't have any unexpected modules or libraries in
-            |your PYTHONPATH:
-            |  $pythonPath
-            |Also, check if you have a sitecustomize.py module in your python path,
-            |or in your python installation, that is printing to standard output"""
-          throw new SparkException(exceptionMessage.stripMargin)
-        }
+        daemonPort = in.readInt()
 
         // Redirect daemon stdout and stderr
         redirectStreamsToStderr(in, daemon.getErrorStream)
+
       } catch {
         case e: Exception =>
 
diff --git a/core/src/main/scala/org/apache/spark/api/r/RBackend.scala b/core/src/main/scala/org/apache/spark/api/r/RBackend.scala
index 3b2e809408e0f..2d1152a036449 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RBackend.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RBackend.scala
@@ -17,8 +17,8 @@
 
 package org.apache.spark.api.r
 
-import java.io.{DataInputStream, DataOutputStream, File, FileOutputStream, IOException}
-import java.net.{InetAddress, InetSocketAddress, ServerSocket, Socket}
+import java.io.{DataOutputStream, File, FileOutputStream, IOException}
+import java.net.{InetAddress, InetSocketAddress, ServerSocket}
 import java.util.concurrent.TimeUnit
 
 import io.netty.bootstrap.ServerBootstrap
@@ -32,8 +32,6 @@ import io.netty.handler.timeout.ReadTimeoutHandler
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
-import org.apache.spark.network.util.JavaUtils
-import org.apache.spark.util.Utils
 
 /**
  * Netty-based backend server that is used to communicate between R and Java.
@@ -47,7 +45,7 @@ private[spark] class RBackend {
   /** Tracks JVM objects returned to R for this RBackend instance. */
   private[r] val jvmObjectTracker = new JVMObjectTracker
 
-  def init(): (Int, RAuthHelper) = {
+  def init(): Int = {
     val conf = new SparkConf()
     val backendConnectionTimeout = conf.getInt(
       "spark.r.backendConnectionTimeout", SparkRDefaults.DEFAULT_CONNECTION_TIMEOUT)
@@ -55,7 +53,6 @@ private[spark] class RBackend {
       conf.getInt("spark.r.numRBackendThreads", SparkRDefaults.DEFAULT_NUM_RBACKEND_THREADS))
     val workerGroup = bossGroup
     val handler = new RBackendHandler(this)
-    val authHelper = new RAuthHelper(conf)
 
     bootstrap = new ServerBootstrap()
       .group(bossGroup, workerGroup)
@@ -74,16 +71,13 @@ private[spark] class RBackend {
             new LengthFieldBasedFrameDecoder(Integer.MAX_VALUE, 0, 4, 0, 4))
           .addLast("decoder", new ByteArrayDecoder())
           .addLast("readTimeoutHandler", new ReadTimeoutHandler(backendConnectionTimeout))
-          .addLast(new RBackendAuthHandler(authHelper.secret))
           .addLast("handler", handler)
       }
     })
 
     channelFuture = bootstrap.bind(new InetSocketAddress("localhost", 0))
     channelFuture.syncUninterruptibly()
-
-    val port = channelFuture.channel().localAddress().asInstanceOf[InetSocketAddress].getPort()
-    (port, authHelper)
+    channelFuture.channel().localAddress().asInstanceOf[InetSocketAddress].getPort()
   }
 
   def run(): Unit = {
@@ -122,7 +116,7 @@ private[spark] object RBackend extends Logging {
     val sparkRBackend = new RBackend()
     try {
       // bind to random port
-      val (boundPort, authHelper) = sparkRBackend.init()
+      val boundPort = sparkRBackend.init()
       val serverSocket = new ServerSocket(0, 1, InetAddress.getByName("localhost"))
       val listenPort = serverSocket.getLocalPort()
       // Connection timeout is set by socket client. To make it configurable we will pass the
@@ -139,7 +133,6 @@ private[spark] object RBackend extends Logging {
       dos.writeInt(listenPort)
       SerDe.writeString(dos, RUtils.rPackages.getOrElse(""))
       dos.writeInt(backendConnectionTimeout)
-      SerDe.writeString(dos, authHelper.secret)
       dos.close()
       f.renameTo(new File(path))
 
@@ -151,35 +144,12 @@ private[spark] object RBackend extends Logging {
           val buf = new Array[Byte](1024)
           // shutdown JVM if R does not connect back in 10 seconds
           serverSocket.setSoTimeout(10000)
-
-          // Wait for the R process to connect back, ignoring any failed auth attempts. Allow
-          // a max number of connection attempts to avoid looping forever.
           try {
-            var remainingAttempts = 10
-            var inSocket: Socket = null
-            while (inSocket == null) {
-              inSocket = serverSocket.accept()
-              try {
-                authHelper.authClient(inSocket)
-              } catch {
-                case e: Exception =>
-                  remainingAttempts -= 1
-                  if (remainingAttempts == 0) {
-                    val msg = "Too many failed authentication attempts."
-                    logError(msg)
-                    throw new IllegalStateException(msg)
-                  }
-                  logInfo("Client connection failed authentication.")
-                  inSocket = null
-              }
-            }
-
+            val inSocket = serverSocket.accept()
             serverSocket.close()
-
             // wait for the end of socket, closed if R process die
             inSocket.getInputStream().read(buf)
           } finally {
-            serverSocket.close()
             sparkRBackend.close()
             System.exit(0)
           }
@@ -195,5 +165,4 @@ private[spark] object RBackend extends Logging {
     }
     System.exit(0)
   }
-
 }
diff --git a/core/src/main/scala/org/apache/spark/api/r/RBackendAuthHandler.scala b/core/src/main/scala/org/apache/spark/api/r/RBackendAuthHandler.scala
deleted file mode 100644
index 4162e4a6c7476..0000000000000
--- a/core/src/main/scala/org/apache/spark/api/r/RBackendAuthHandler.scala
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.api.r
-
-import java.io.{ByteArrayOutputStream, DataOutputStream}
-import java.nio.charset.StandardCharsets.UTF_8
-
-import io.netty.channel.{Channel, ChannelHandlerContext, SimpleChannelInboundHandler}
-
-import org.apache.spark.internal.Logging
-import org.apache.spark.util.Utils
-
-/**
- * Authentication handler for connections from the R process.
- */
-private class RBackendAuthHandler(secret: String)
-  extends SimpleChannelInboundHandler[Array[Byte]] with Logging {
-
-  override def channelRead0(ctx: ChannelHandlerContext, msg: Array[Byte]): Unit = {
-    // The R code adds a null terminator to serialized strings, so ignore it here.
-    val clientSecret = new String(msg, 0, msg.length - 1, UTF_8)
-    try {
-      require(secret == clientSecret, "Auth secret mismatch.")
-      ctx.pipeline().remove(this)
-      writeReply("ok", ctx.channel())
-    } catch {
-      case e: Exception =>
-        logInfo("Authentication failure.", e)
-        writeReply("err", ctx.channel())
-        ctx.close()
-    }
-  }
-
-  private def writeReply(reply: String, chan: Channel): Unit = {
-    val out = new ByteArrayOutputStream()
-    SerDe.writeString(new DataOutputStream(out), reply)
-    chan.writeAndFlush(out.toByteArray())
-  }
-
-}
diff --git a/core/src/main/scala/org/apache/spark/api/r/RRunner.scala b/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
index e7fdc3963945a..88118392003e8 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
@@ -74,19 +74,14 @@ private[spark] class RRunner[U](
 
     // the socket used to send out the input of task
     serverSocket.setSoTimeout(10000)
-    dataStream = try {
-      val inSocket = serverSocket.accept()
-      RRunner.authHelper.authClient(inSocket)
-      startStdinThread(inSocket.getOutputStream(), inputIterator, partitionIndex)
-
-      // the socket used to receive the output of task
-      val outSocket = serverSocket.accept()
-      RRunner.authHelper.authClient(outSocket)
-      val inputStream = new BufferedInputStream(outSocket.getInputStream)
-      new DataInputStream(inputStream)
-    } finally {
-      serverSocket.close()
-    }
+    val inSocket = serverSocket.accept()
+    startStdinThread(inSocket.getOutputStream(), inputIterator, partitionIndex)
+
+    // the socket used to receive the output of task
+    val outSocket = serverSocket.accept()
+    val inputStream = new BufferedInputStream(outSocket.getInputStream)
+    dataStream = new DataInputStream(inputStream)
+    serverSocket.close()
 
     try {
       return new Iterator[U] {
@@ -320,11 +315,6 @@ private[r] object RRunner {
   private[this] var errThread: BufferedStreamThread = _
   private[this] var daemonChannel: DataOutputStream = _
 
-  private lazy val authHelper = {
-    val conf = Option(SparkEnv.get).map(_.conf).getOrElse(new SparkConf())
-    new RAuthHelper(conf)
-  }
-
   /**
    * Start a thread to print the process's stderr to ours
    */
@@ -359,7 +349,6 @@ private[r] object RRunner {
     pb.environment().put("SPARKR_BACKEND_CONNECTION_TIMEOUT", rConnectionTimeout.toString)
     pb.environment().put("SPARKR_SPARKFILES_ROOT_DIR", SparkFiles.getRootDirectory())
     pb.environment().put("SPARKR_IS_RUNNING_ON_WORKER", "TRUE")
-    pb.environment().put("SPARKR_WORKER_SECRET", authHelper.secret)
     pb.redirectErrorStream(true)  // redirect stderr into stdout
     val proc = pb.start()
     val errThread = startStdoutThread(proc)
@@ -381,12 +370,8 @@ private[r] object RRunner {
           // the socket used to send out the input of task
           serverSocket.setSoTimeout(10000)
           val sock = serverSocket.accept()
-          try {
-            authHelper.authClient(sock)
-            daemonChannel = new DataOutputStream(new BufferedOutputStream(sock.getOutputStream))
-          } finally {
-            serverSocket.close()
-          }
+          daemonChannel = new DataOutputStream(new BufferedOutputStream(sock.getOutputStream))
+          serverSocket.close()
         }
         try {
           daemonChannel.writeInt(port)
diff --git a/core/src/main/scala/org/apache/spark/deploy/DependencyUtils.scala b/core/src/main/scala/org/apache/spark/deploy/DependencyUtils.scala
index 178bdcfccb603..ecc82d7ac8001 100644
--- a/core/src/main/scala/org/apache/spark/deploy/DependencyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/DependencyUtils.scala
@@ -18,24 +18,21 @@
 package org.apache.spark.deploy
 
 import java.io.File
-import java.net.URI
 
 import org.apache.commons.lang3.StringUtils
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, Path}
 
-import org.apache.spark.{SecurityManager, SparkConf, SparkException}
-import org.apache.spark.internal.Logging
+import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.util.{MutableURLClassLoader, Utils}
 
-private[deploy] object DependencyUtils extends Logging {
+private[deploy] object DependencyUtils {
 
   def resolveMavenDependencies(
       packagesExclusions: String,
       packages: String,
       repositories: String,
-      ivyRepoPath: String,
-      ivySettingsPath: Option[String]): String = {
+      ivyRepoPath: String): String = {
     val exclusions: Seq[String] =
       if (!StringUtils.isBlank(packagesExclusions)) {
         packagesExclusions.split(",")
@@ -43,12 +40,10 @@ private[deploy] object DependencyUtils extends Logging {
         Nil
       }
     // Create the IvySettings, either load from file or build defaults
-    val ivySettings = ivySettingsPath match {
-      case Some(path) =>
-        SparkSubmitUtils.loadIvySettings(path, Option(repositories), Option(ivyRepoPath))
-
-      case None =>
-        SparkSubmitUtils.buildIvySettings(Option(repositories), Option(ivyRepoPath))
+    val ivySettings = sys.props.get("spark.jars.ivySettings").map { ivySettingsFile =>
+      SparkSubmitUtils.loadIvySettings(ivySettingsFile, Option(repositories), Option(ivyRepoPath))
+    }.getOrElse {
+      SparkSubmitUtils.buildIvySettings(Option(repositories), Option(ivyRepoPath))
     }
 
     SparkSubmitUtils.resolveMavenCoordinates(packages, ivySettings, exclusions = exclusions)
@@ -76,7 +71,7 @@ private[deploy] object DependencyUtils extends Logging {
   def addJarsToClassPath(jars: String, loader: MutableURLClassLoader): Unit = {
     if (jars != null) {
       for (jar <- jars.split(",")) {
-        addJarToClasspath(jar, loader)
+        SparkSubmit.addJarToClasspath(jar, loader)
       }
     }
   }
@@ -142,56 +137,16 @@ private[deploy] object DependencyUtils extends Logging {
   def resolveGlobPaths(paths: String, hadoopConf: Configuration): String = {
     require(paths != null, "paths cannot be null.")
     Utils.stringToSeq(paths).flatMap { path =>
-      val (base, fragment) = splitOnFragment(path)
-      (resolveGlobPath(base, hadoopConf), fragment) match {
-        case (resolved, Some(_)) if resolved.length > 1 => throw new SparkException(
-            s"${base.toString} resolves ambiguously to multiple files: ${resolved.mkString(",")}")
-        case (resolved, Some(namedAs)) => resolved.map(_ + "#" + namedAs)
-        case (resolved, _) => resolved
+      val uri = Utils.resolveURI(path)
+      uri.getScheme match {
+        case "local" | "http" | "https" | "ftp" => Array(path)
+        case _ =>
+          val fs = FileSystem.get(uri, hadoopConf)
+          Option(fs.globStatus(new Path(uri))).map { status =>
+            status.filter(_.isFile).map(_.getPath.toUri.toString)
+          }.getOrElse(Array(path))
       }
     }.mkString(",")
   }
 
-  def addJarToClasspath(localJar: String, loader: MutableURLClassLoader): Unit = {
-    val uri = Utils.resolveURI(localJar)
-    uri.getScheme match {
-      case "file" | "local" =>
-        val file = new File(uri.getPath)
-        if (file.exists()) {
-          loader.addURL(file.toURI.toURL)
-        } else {
-          logWarning(s"Local jar $file does not exist, skipping.")
-        }
-      case _ =>
-        logWarning(s"Skip remote jar $uri.")
-    }
-  }
-
-  /**
-   * Merge a sequence of comma-separated file lists, some of which may be null to indicate
-   * no files, into a single comma-separated string.
-   */
-  def mergeFileLists(lists: String*): String = {
-    val merged = lists.filterNot(StringUtils.isBlank)
-      .flatMap(Utils.stringToSeq)
-    if (merged.nonEmpty) merged.mkString(",") else null
-  }
-
-  private def splitOnFragment(path: String): (URI, Option[String]) = {
-    val uri = Utils.resolveURI(path)
-    val withoutFragment = new URI(uri.getScheme, uri.getSchemeSpecificPart, null)
-    (withoutFragment, Option(uri.getFragment))
-  }
-
-  private def resolveGlobPath(uri: URI, hadoopConf: Configuration): Array[String] = {
-    uri.getScheme match {
-      case "local" | "http" | "https" | "ftp" => Array(uri.toString)
-      case _ =>
-        val fs = FileSystem.get(uri, hadoopConf)
-        Option(fs.globStatus(new Path(uri))).map { status =>
-          status.filter(_.isFile).map(_.getPath.toUri.toString)
-        }.getOrElse(Array(uri.toString))
-    }
-  }
-
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala b/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
index b59a4fe66587c..f975fa5cb4e23 100644
--- a/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
@@ -94,11 +94,6 @@ class ExternalShuffleService(sparkConf: SparkConf, securityManager: SecurityMana
     blockHandler.applicationRemoved(appId, true /* cleanupLocalDirs */)
   }
 
-  /** Clean up all the non-shuffle files associated with an executor that has exited. */
-  def executorRemoved(executorId: String, appId: String): Unit = {
-    blockHandler.executorRemoved(executorId, appId)
-  }
-
   def stop() {
     if (server != null) {
       server.close()
diff --git a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
index ccb30e205ca40..7aca305783a7f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
@@ -18,8 +18,7 @@
 package org.apache.spark.deploy
 
 import java.io.File
-import java.net.{InetAddress, URI}
-import java.nio.file.Files
+import java.net.URI
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
@@ -40,7 +39,6 @@ object PythonRunner {
     val pyFiles = args(1)
     val otherArgs = args.slice(2, args.length)
     val sparkConf = new SparkConf()
-    val secret = Utils.createSecret(sparkConf)
     val pythonExec = sparkConf.get(PYSPARK_DRIVER_PYTHON)
       .orElse(sparkConf.get(PYSPARK_PYTHON))
       .orElse(sys.env.get("PYSPARK_DRIVER_PYTHON"))
@@ -49,17 +47,11 @@ object PythonRunner {
 
     // Format python file paths before adding them to the PYTHONPATH
     val formattedPythonFile = formatPath(pythonFile)
-    val formattedPyFiles = resolvePyFiles(formatPaths(pyFiles))
+    val formattedPyFiles = formatPaths(pyFiles)
 
     // Launch a Py4J gateway server for the process to connect to; this will let it see our
     // Java system properties and such
-    val localhost = InetAddress.getLoopbackAddress()
-    val gatewayServer = new py4j.GatewayServer.GatewayServerBuilder()
-      .authToken(secret)
-      .javaPort(0)
-      .javaAddress(localhost)
-      .callbackClient(py4j.GatewayServer.DEFAULT_PYTHON_PORT, localhost, secret)
-      .build()
+    val gatewayServer = new py4j.GatewayServer(null, 0)
     val thread = new Thread(new Runnable() {
       override def run(): Unit = Utils.logUncaughtExceptions {
         gatewayServer.start()
@@ -90,7 +82,6 @@ object PythonRunner {
     // This is equivalent to setting the -u flag; we use it because ipython doesn't support -u:
     env.put("PYTHONUNBUFFERED", "YES") // value is needed to be set to a non-empty string
     env.put("PYSPARK_GATEWAY_PORT", "" + gatewayServer.getListeningPort)
-    env.put("PYSPARK_GATEWAY_SECRET", secret)
     // pass conf spark.pyspark.python to python process, the only way to pass info to
     // python process is through environment variable.
     sparkConf.get(PYSPARK_PYTHON).foreach(env.put("PYSPARK_PYTHON", _))
@@ -154,30 +145,4 @@ object PythonRunner {
       .map { p => formatPath(p, testWindows) }
   }
 
-  /**
-   * Resolves the ".py" files. ".py" file should not be added as is because PYTHONPATH does
-   * not expect a file. This method creates a temporary directory and puts the ".py" files
-   * if exist in the given paths.
-   */
-  private def resolvePyFiles(pyFiles: Array[String]): Array[String] = {
-    lazy val dest = Utils.createTempDir(namePrefix = "localPyFiles")
-    pyFiles.flatMap { pyFile =>
-      // In case of client with submit, the python paths should be set before context
-      // initialization because the context initialization can be done later.
-      // We will copy the local ".py" files because ".py" file shouldn't be added
-      // alone but its parent directory in PYTHONPATH. See SPARK-24384.
-      if (pyFile.endsWith(".py")) {
-        val source = new File(pyFile)
-        if (source.exists() && source.isFile && source.canRead) {
-          Files.copy(source.toPath, new File(dest, source.getName).toPath)
-          Some(dest.getAbsolutePath)
-        } else {
-          // Don't have to add it if it doesn't exist or isn't readable.
-          None
-        }
-      } else {
-        Some(pyFile)
-      }
-    }.distinct
-  }
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/RRunner.scala b/core/src/main/scala/org/apache/spark/deploy/RRunner.scala
index e86b362639e57..6eb53a8252205 100644
--- a/core/src/main/scala/org/apache/spark/deploy/RRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/RRunner.scala
@@ -68,13 +68,10 @@ object RRunner {
     // Java system properties etc.
     val sparkRBackend = new RBackend()
     @volatile var sparkRBackendPort = 0
-    @volatile var sparkRBackendSecret: String = null
     val initialized = new Semaphore(0)
     val sparkRBackendThread = new Thread("SparkR backend") {
       override def run() {
-        val (port, authHelper) = sparkRBackend.init()
-        sparkRBackendPort = port
-        sparkRBackendSecret = authHelper.secret
+        sparkRBackendPort = sparkRBackend.init()
         initialized.release()
         sparkRBackend.run()
       }
@@ -94,7 +91,6 @@ object RRunner {
         env.put("SPARKR_PACKAGE_DIR", rPackageDir.mkString(","))
         env.put("R_PROFILE_USER",
           Seq(rPackageDir(0), "SparkR", "profile", "general.R").mkString(File.separator))
-        env.put("SPARKR_BACKEND_AUTH_SECRET", sparkRBackendSecret)
         builder.redirectErrorStream(true) // Ugly but needed for stdout and stderr to synchronize
         val process = builder.start()
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index 8353e64a619cf..e14f9845e6db6 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -40,7 +40,6 @@ import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdenti
 import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config._
 import org.apache.spark.util.Utils
 
 /**
@@ -112,9 +111,7 @@ class SparkHadoopUtil extends Logging {
    * subsystems.
    */
   def newConfiguration(conf: SparkConf): Configuration = {
-    val hadoopConf = SparkHadoopUtil.newConfiguration(conf)
-    hadoopConf.addResource(SparkHadoopUtil.SPARK_HADOOP_CONF_FILE)
-    hadoopConf
+    SparkHadoopUtil.newConfiguration(conf)
   }
 
   /**
@@ -147,8 +144,7 @@ class SparkHadoopUtil extends Logging {
   private[spark] def addDelegationTokens(tokens: Array[Byte], sparkConf: SparkConf) {
     UserGroupInformation.setConfiguration(newConfiguration(sparkConf))
     val creds = deserialize(tokens)
-    logInfo("Updating delegation tokens for current user.")
-    logDebug(s"Adding/updating delegation tokens ${dumpTokens(creds)}")
+    logInfo(s"Adding/updating delegation tokens ${dumpTokens(creds)}")
     addCurrentUserCredentials(creds)
   }
 
@@ -323,6 +319,19 @@ class SparkHadoopUtil extends Logging {
     }
   }
 
+  /**
+   * Return a fresh Hadoop configuration, bypassing the HDFS cache mechanism.
+   * This is to prevent the DFSClient from using an old cached token to connect to the NameNode.
+   */
+  private[spark] def getConfBypassingFSCache(
+      hadoopConf: Configuration,
+      scheme: String): Configuration = {
+    val newConf = new Configuration(hadoopConf)
+    val confKey = s"fs.${scheme}.impl.disable.cache"
+    newConf.setBoolean(confKey, true)
+    newConf
+  }
+
   /**
    * Dump the credentials' tokens to string values.
    *
@@ -426,27 +435,19 @@ object SparkHadoopUtil {
    */
   private[spark] val UPDATE_INPUT_METRICS_INTERVAL_RECORDS = 1000
 
-  /**
-   * Name of the file containing the gateway's Hadoop configuration, to be overlayed on top of the
-   * cluster's Hadoop config. It is up to the Spark code launching the application to create
-   * this file if it's desired. If the file doesn't exist, it will just be ignored.
-   */
-  private[spark] val SPARK_HADOOP_CONF_FILE = "__spark_hadoop_conf__.xml"
-
   def get: SparkHadoopUtil = instance
 
   /**
-   * Given an expiration date for the current set of credentials, calculate the time when new
-   * credentials should be created.
-   *
+   * Given an expiration date (e.g. for Hadoop Delegation Tokens) return a the date
+   * when a given fraction of the duration until the expiration date has passed.
+   * Formula: current time + (fraction * (time until expiration))
    * @param expirationDate Drop-dead expiration date
-   * @param conf Spark configuration
-   * @return Timestamp when new credentials should be created.
+   * @param fraction fraction of the time until expiration return
+   * @return Date when the fraction of the time until expiration has passed
    */
-  private[spark] def nextCredentialRenewalTime(expirationDate: Long, conf: SparkConf): Long = {
+  private[spark] def getDateOfNextUpdate(expirationDate: Long, fraction: Double): Long = {
     val ct = System.currentTimeMillis
-    val ratio = conf.get(CREDENTIALS_RENEWAL_INTERVAL_RATIO)
-    (ct + (ratio * (expirationDate - ct))).toLong
+    (ct + (fraction * (expirationDate - ct))).toLong
   }
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index a46af26feb061..1e381965c52ba 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -22,7 +22,6 @@ import java.lang.reflect.{InvocationTargetException, Modifier, UndeclaredThrowab
 import java.net.URL
 import java.security.PrivilegedExceptionAction
 import java.text.ParseException
-import java.util.UUID
 
 import scala.annotation.tailrec
 import scala.collection.mutable.{ArrayBuffer, HashMap, Map}
@@ -59,7 +58,7 @@ import org.apache.spark.util._
  */
 private[deploy] object SparkSubmitAction extends Enumeration {
   type SparkSubmitAction = Value
-  val SUBMIT, KILL, REQUEST_STATUS, PRINT_VERSION = Value
+  val SUBMIT, KILL, REQUEST_STATUS = Value
 }
 
 /**
@@ -68,32 +67,78 @@ private[deploy] object SparkSubmitAction extends Enumeration {
  * This program handles setting up the classpath with relevant Spark dependencies and provides
  * a layer over the different cluster managers and deploy modes that Spark supports.
  */
-private[spark] class SparkSubmit extends Logging {
+object SparkSubmit extends CommandLineUtils with Logging {
 
   import DependencyUtils._
-  import SparkSubmit._
 
-  def doSubmit(args: Array[String]): Unit = {
+  // Cluster managers
+  private val YARN = 1
+  private val STANDALONE = 2
+  private val MESOS = 4
+  private val LOCAL = 8
+  private val KUBERNETES = 16
+  private val ALL_CLUSTER_MGRS = YARN | STANDALONE | MESOS | LOCAL | KUBERNETES
+
+  // Deploy modes
+  private val CLIENT = 1
+  private val CLUSTER = 2
+  private val ALL_DEPLOY_MODES = CLIENT | CLUSTER
+
+  // Special primary resource names that represent shells rather than application jars.
+  private val SPARK_SHELL = "spark-shell"
+  private val PYSPARK_SHELL = "pyspark-shell"
+  private val SPARKR_SHELL = "sparkr-shell"
+  private val SPARKR_PACKAGE_ARCHIVE = "sparkr.zip"
+  private val R_PACKAGE_ARCHIVE = "rpkg.zip"
+
+  private val CLASS_NOT_FOUND_EXIT_STATUS = 101
+
+  // Following constants are visible for testing.
+  private[deploy] val YARN_CLUSTER_SUBMIT_CLASS =
+    "org.apache.spark.deploy.yarn.YarnClusterApplication"
+  private[deploy] val REST_CLUSTER_SUBMIT_CLASS = classOf[RestSubmissionClientApp].getName()
+  private[deploy] val STANDALONE_CLUSTER_SUBMIT_CLASS = classOf[ClientApp].getName()
+  private[deploy] val KUBERNETES_CLUSTER_SUBMIT_CLASS =
+    "org.apache.spark.deploy.k8s.submit.KubernetesClientApplication"
+
+  // scalastyle:off println
+  private[spark] def printVersionAndExit(): Unit = {
+    printStream.println("""Welcome to
+      ____              __
+     / __/__  ___ _____/ /__
+    _\ \/ _ \/ _ `/ __/  '_/
+   /___/ .__/\_,_/_/ /_/\_\   version %s
+      /_/
+                        """.format(SPARK_VERSION))
+    printStream.println("Using Scala %s, %s, %s".format(
+      Properties.versionString, Properties.javaVmName, Properties.javaVersion))
+    printStream.println("Branch %s".format(SPARK_BRANCH))
+    printStream.println("Compiled by user %s on %s".format(SPARK_BUILD_USER, SPARK_BUILD_DATE))
+    printStream.println("Revision %s".format(SPARK_REVISION))
+    printStream.println("Url %s".format(SPARK_REPO_URL))
+    printStream.println("Type --help for more information.")
+    exitFn(0)
+  }
+  // scalastyle:on println
+
+  override def main(args: Array[String]): Unit = {
     // Initialize logging if it hasn't been done yet. Keep track of whether logging needs to
     // be reset before the application starts.
     val uninitLog = initializeLogIfNecessary(true, silent = true)
 
-    val appArgs = parseArguments(args)
+    val appArgs = new SparkSubmitArguments(args)
     if (appArgs.verbose) {
-      logInfo(appArgs.toString)
+      // scalastyle:off println
+      printStream.println(appArgs)
+      // scalastyle:on println
     }
     appArgs.action match {
       case SparkSubmitAction.SUBMIT => submit(appArgs, uninitLog)
       case SparkSubmitAction.KILL => kill(appArgs)
       case SparkSubmitAction.REQUEST_STATUS => requestStatus(appArgs)
-      case SparkSubmitAction.PRINT_VERSION => printVersion()
     }
   }
 
-  protected def parseArguments(args: Array[String]): SparkSubmitArguments = {
-    new SparkSubmitArguments(args)
-  }
-
   /**
    * Kill an existing submission using the REST protocol. Standalone and Mesos cluster mode only.
    */
@@ -111,24 +156,6 @@ private[spark] class SparkSubmit extends Logging {
       .requestSubmissionStatus(args.submissionToRequestStatusFor)
   }
 
-  /** Print version information to the log. */
-  private def printVersion(): Unit = {
-    logInfo("""Welcome to
-      ____              __
-     / __/__  ___ _____/ /__
-    _\ \/ _ \/ _ `/ __/  '_/
-   /___/ .__/\_,_/_/ /_/\_\   version %s
-      /_/
-                        """.format(SPARK_VERSION))
-    logInfo("Using Scala %s, %s, %s".format(
-      Properties.versionString, Properties.javaVmName, Properties.javaVersion))
-    logInfo(s"Branch $SPARK_BRANCH")
-    logInfo(s"Compiled by user $SPARK_BUILD_USER on $SPARK_BUILD_DATE")
-    logInfo(s"Revision $SPARK_REVISION")
-    logInfo(s"Url $SPARK_REPO_URL")
-    logInfo("Type --help for more information.")
-  }
-
   /**
    * Submit the application using the provided parameters.
    *
@@ -158,7 +185,10 @@ private[spark] class SparkSubmit extends Logging {
             // makes the message printed to the output by the JVM not very helpful. Instead,
             // detect exceptions with empty stack traces here, and treat them differently.
             if (e.getStackTrace().length == 0) {
-              error(s"ERROR: ${e.getClass().getName()}: ${e.getMessage()}")
+              // scalastyle:off println
+              printStream.println(s"ERROR: ${e.getClass().getName()}: ${e.getMessage()}")
+              // scalastyle:on println
+              exitFn(1)
             } else {
               throw e
             }
@@ -180,11 +210,14 @@ private[spark] class SparkSubmit extends Logging {
     // to use the legacy gateway if the master endpoint turns out to be not a REST server.
     if (args.isStandaloneCluster && args.useRest) {
       try {
-        logInfo("Running Spark using the REST application submission protocol.")
+        // scalastyle:off println
+        printStream.println("Running Spark using the REST application submission protocol.")
+        // scalastyle:on println
+        doRunMain()
       } catch {
         // Fail over to use the legacy submission gateway
         case e: SubmitRestConnectionException =>
-          logWarning(s"Master endpoint ${args.master} was not a REST server. " +
+          printWarning(s"Master endpoint ${args.master} was not a REST server. " +
             "Falling back to legacy submission gateway instead.")
           args.useRest = false
           submit(args, false)
@@ -222,7 +255,7 @@ private[spark] class SparkSubmit extends Logging {
     val clusterManager: Int = args.master match {
       case "yarn" => YARN
       case "yarn-client" | "yarn-cluster" =>
-        logWarning(s"Master ${args.master} is deprecated since 2.0." +
+        printWarning(s"Master ${args.master} is deprecated since 2.0." +
           " Please use master \"yarn\" with specified deploy mode instead.")
         YARN
       case m if m.startsWith("spark") => STANDALONE
@@ -230,7 +263,7 @@ private[spark] class SparkSubmit extends Logging {
       case m if m.startsWith("k8s") => KUBERNETES
       case m if m.startsWith("local") => LOCAL
       case _ =>
-        error("Master must either be yarn or start with spark, mesos, k8s, or local")
+        printErrorAndExit("Master must either be yarn or start with spark, mesos, k8s, or local")
         -1
     }
 
@@ -238,9 +271,7 @@ private[spark] class SparkSubmit extends Logging {
     var deployMode: Int = args.deployMode match {
       case "client" | null => CLIENT
       case "cluster" => CLUSTER
-      case _ =>
-        error("Deploy mode must be either client or cluster")
-        -1
+      case _ => printErrorAndExit("Deploy mode must be either client or cluster"); -1
     }
 
     // Because the deprecated way of specifying "yarn-cluster" and "yarn-client" encapsulate both
@@ -252,16 +283,16 @@ private[spark] class SparkSubmit extends Logging {
           deployMode = CLUSTER
           args.master = "yarn"
         case ("yarn-cluster", "client") =>
-          error("Client deploy mode is not compatible with master \"yarn-cluster\"")
+          printErrorAndExit("Client deploy mode is not compatible with master \"yarn-cluster\"")
         case ("yarn-client", "cluster") =>
-          error("Cluster deploy mode is not compatible with master \"yarn-client\"")
+          printErrorAndExit("Cluster deploy mode is not compatible with master \"yarn-client\"")
         case (_, mode) =>
           args.master = "yarn"
       }
 
       // Make sure YARN is included in our build if we're trying to use it
       if (!Utils.classIsLoadable(YARN_CLUSTER_SUBMIT_CLASS) && !Utils.isTesting) {
-        error(
+        printErrorAndExit(
           "Could not load YARN classes. " +
           "This copy of Spark may not have been compiled with YARN support.")
       }
@@ -271,7 +302,7 @@ private[spark] class SparkSubmit extends Logging {
       args.master = Utils.checkAndGetK8sMasterUrl(args.master)
       // Make sure KUBERNETES is included in our build if we're trying to use it
       if (!Utils.classIsLoadable(KUBERNETES_CLUSTER_SUBMIT_CLASS) && !Utils.isTesting) {
-        error(
+        printErrorAndExit(
           "Could not load KUBERNETES classes. " +
             "This copy of Spark may not have been compiled with KUBERNETES support.")
       }
@@ -280,23 +311,25 @@ private[spark] class SparkSubmit extends Logging {
     // Fail fast, the following modes are not supported or applicable
     (clusterManager, deployMode) match {
       case (STANDALONE, CLUSTER) if args.isPython =>
-        error("Cluster deploy mode is currently not supported for python " +
+        printErrorAndExit("Cluster deploy mode is currently not supported for python " +
           "applications on standalone clusters.")
       case (STANDALONE, CLUSTER) if args.isR =>
-        error("Cluster deploy mode is currently not supported for R " +
+        printErrorAndExit("Cluster deploy mode is currently not supported for R " +
           "applications on standalone clusters.")
       case (KUBERNETES, _) if args.isPython =>
-        error("Python applications are currently not supported for Kubernetes.")
+        printErrorAndExit("Python applications are currently not supported for Kubernetes.")
       case (KUBERNETES, _) if args.isR =>
-        error("R applications are currently not supported for Kubernetes.")
+        printErrorAndExit("R applications are currently not supported for Kubernetes.")
+      case (KUBERNETES, CLIENT) =>
+        printErrorAndExit("Client mode is currently not supported for Kubernetes.")
       case (LOCAL, CLUSTER) =>
-        error("Cluster deploy mode is not compatible with master \"local\"")
+        printErrorAndExit("Cluster deploy mode is not compatible with master \"local\"")
       case (_, CLUSTER) if isShell(args.primaryResource) =>
-        error("Cluster deploy mode is not applicable to Spark shells.")
+        printErrorAndExit("Cluster deploy mode is not applicable to Spark shells.")
       case (_, CLUSTER) if isSqlShell(args.mainClass) =>
-        error("Cluster deploy mode is not applicable to Spark SQL shell.")
+        printErrorAndExit("Cluster deploy mode is not applicable to Spark SQL shell.")
       case (_, CLUSTER) if isThriftServer(args.mainClass) =>
-        error("Cluster deploy mode is not applicable to Spark Thrift server.")
+        printErrorAndExit("Cluster deploy mode is not applicable to Spark Thrift server.")
       case _ =>
     }
 
@@ -310,14 +343,12 @@ private[spark] class SparkSubmit extends Logging {
     val isMesosCluster = clusterManager == MESOS && deployMode == CLUSTER
     val isStandAloneCluster = clusterManager == STANDALONE && deployMode == CLUSTER
     val isKubernetesCluster = clusterManager == KUBERNETES && deployMode == CLUSTER
-    val isMesosClient = clusterManager == MESOS && deployMode == CLIENT
 
     if (!isMesosCluster && !isStandAloneCluster) {
       // Resolve maven dependencies if there are any and add classpath to jars. Add them to py-files
       // too for packages that include Python code
       val resolvedMavenCoordinates = DependencyUtils.resolveMavenDependencies(
-        args.packagesExclusions, args.packages, args.repositories, args.ivyRepoPath,
-        args.ivySettingsPath)
+        args.packagesExclusions, args.packages, args.repositories, args.ivyRepoPath)
 
       if (!StringUtils.isBlank(resolvedMavenCoordinates)) {
         args.jars = mergeFileLists(args.jars, resolvedMavenCoordinates)
@@ -338,7 +369,7 @@ private[spark] class SparkSubmit extends Logging {
     val targetDir = Utils.createTempDir()
 
     // assure a keytab is available from any place in a JVM
-    if (clusterManager == YARN || clusterManager == LOCAL || isMesosClient) {
+    if (clusterManager == YARN || clusterManager == LOCAL || clusterManager == MESOS) {
       if (args.principal != null) {
         if (args.keytab != null) {
           require(new File(args.keytab).exists(), s"Keytab file: ${args.keytab} does not exist")
@@ -430,15 +461,18 @@ private[spark] class SparkSubmit extends Logging {
         // Usage: PythonAppRunner <main python file> <extra python files> [app arguments]
         args.mainClass = "org.apache.spark.deploy.PythonRunner"
         args.childArgs = ArrayBuffer(localPrimaryResource, localPyFiles) ++ args.childArgs
+        if (clusterManager != YARN) {
+          // The YARN backend distributes the primary file differently, so don't merge it.
+          args.files = mergeFileLists(args.files, args.primaryResource)
+        }
       }
       if (clusterManager != YARN) {
         // The YARN backend handles python files differently, so don't merge the lists.
         args.files = mergeFileLists(args.files, args.pyFiles)
       }
-    }
-
-    if (localPyFiles != null) {
-      sparkConf.set("spark.submit.pyFiles", localPyFiles)
+      if (localPyFiles != null) {
+        sparkConf.set("spark.submit.pyFiles", localPyFiles)
+      }
     }
 
     // In YARN mode for an R app, add the SparkR package archive and the R package
@@ -447,11 +481,11 @@ private[spark] class SparkSubmit extends Logging {
     if (args.isR && clusterManager == YARN) {
       val sparkRPackagePath = RUtils.localSparkRPackagePath
       if (sparkRPackagePath.isEmpty) {
-        error("SPARK_HOME does not exist for R application in YARN mode.")
+        printErrorAndExit("SPARK_HOME does not exist for R application in YARN mode.")
       }
       val sparkRPackageFile = new File(sparkRPackagePath.get, SPARKR_PACKAGE_ARCHIVE)
       if (!sparkRPackageFile.exists()) {
-        error(s"$SPARKR_PACKAGE_ARCHIVE does not exist for R application in YARN mode.")
+        printErrorAndExit(s"$SPARKR_PACKAGE_ARCHIVE does not exist for R application in YARN mode.")
       }
       val sparkRPackageURI = Utils.resolveURI(sparkRPackageFile.getAbsolutePath).toString
 
@@ -464,7 +498,7 @@ private[spark] class SparkSubmit extends Logging {
         val rPackageFile =
           RPackageUtils.zipRLibraries(new File(RUtils.rPackages.get), R_PACKAGE_ARCHIVE)
         if (!rPackageFile.exists()) {
-          error("Failed to zip all the built R packages.")
+          printErrorAndExit("Failed to zip all the built R packages.")
         }
 
         val rPackageURI = Utils.resolveURI(rPackageFile.getAbsolutePath).toString
@@ -475,12 +509,12 @@ private[spark] class SparkSubmit extends Logging {
 
     // TODO: Support distributing R packages with standalone cluster
     if (args.isR && clusterManager == STANDALONE && !RUtils.rPackages.isEmpty) {
-      error("Distributing R packages with standalone cluster is not supported.")
+      printErrorAndExit("Distributing R packages with standalone cluster is not supported.")
     }
 
     // TODO: Support distributing R packages with mesos cluster
     if (args.isR && clusterManager == MESOS && !RUtils.rPackages.isEmpty) {
-      error("Distributing R packages with mesos cluster is not supported.")
+      printErrorAndExit("Distributing R packages with mesos cluster is not supported.")
     }
 
     // If we're running an R app, set the main class to our specific R runner
@@ -753,7 +787,9 @@ private[spark] class SparkSubmit extends Logging {
   private def setRMPrincipal(sparkConf: SparkConf): Unit = {
     val shortUserName = UserGroupInformation.getCurrentUser.getShortUserName
     val key = s"spark.hadoop.${YarnConfiguration.RM_PRINCIPAL}"
-    logInfo(s"Setting ${key} to ${shortUserName}")
+    // scalastyle:off println
+    printStream.println(s"Setting ${key} to ${shortUserName}")
+    // scalastyle:off println
     sparkConf.set(key, shortUserName)
   }
 
@@ -769,14 +805,16 @@ private[spark] class SparkSubmit extends Logging {
       sparkConf: SparkConf,
       childMainClass: String,
       verbose: Boolean): Unit = {
+    // scalastyle:off println
     if (verbose) {
-      logInfo(s"Main class:\n$childMainClass")
-      logInfo(s"Arguments:\n${childArgs.mkString("\n")}")
+      printStream.println(s"Main class:\n$childMainClass")
+      printStream.println(s"Arguments:\n${childArgs.mkString("\n")}")
       // sysProps may contain sensitive information, so redact before printing
-      logInfo(s"Spark config:\n${Utils.redact(sparkConf.getAll.toMap).mkString("\n")}")
-      logInfo(s"Classpath elements:\n${childClasspath.mkString("\n")}")
-      logInfo("\n")
+      printStream.println(s"Spark config:\n${Utils.redact(sparkConf.getAll.toMap).mkString("\n")}")
+      printStream.println(s"Classpath elements:\n${childClasspath.mkString("\n")}")
+      printStream.println("\n")
     }
+    // scalastyle:on println
 
     val loader =
       if (sparkConf.get(DRIVER_USER_CLASS_PATH_FIRST)) {
@@ -798,19 +836,23 @@ private[spark] class SparkSubmit extends Logging {
       mainClass = Utils.classForName(childMainClass)
     } catch {
       case e: ClassNotFoundException =>
-        logWarning(s"Failed to load $childMainClass.", e)
+        e.printStackTrace(printStream)
         if (childMainClass.contains("thriftserver")) {
-          logInfo(s"Failed to load main class $childMainClass.")
-          logInfo("You need to build Spark with -Phive and -Phive-thriftserver.")
+          // scalastyle:off println
+          printStream.println(s"Failed to load main class $childMainClass.")
+          printStream.println("You need to build Spark with -Phive and -Phive-thriftserver.")
+          // scalastyle:on println
         }
-        throw new SparkUserAppException(CLASS_NOT_FOUND_EXIT_STATUS)
+        System.exit(CLASS_NOT_FOUND_EXIT_STATUS)
       case e: NoClassDefFoundError =>
-        logWarning(s"Failed to load $childMainClass: ${e.getMessage()}")
+        e.printStackTrace(printStream)
         if (e.getMessage.contains("org/apache/hadoop/hive")) {
-          logInfo(s"Failed to load hive class.")
-          logInfo("You need to build Spark with -Phive and -Phive-thriftserver.")
+          // scalastyle:off println
+          printStream.println(s"Failed to load hive class.")
+          printStream.println("You need to build Spark with -Phive and -Phive-thriftserver.")
+          // scalastyle:on println
         }
-        throw new SparkUserAppException(CLASS_NOT_FOUND_EXIT_STATUS)
+        System.exit(CLASS_NOT_FOUND_EXIT_STATUS)
     }
 
     val app: SparkApplication = if (classOf[SparkApplication].isAssignableFrom(mainClass)) {
@@ -818,7 +860,7 @@ private[spark] class SparkSubmit extends Logging {
     } else {
       // SPARK-4170
       if (classOf[scala.App].isAssignableFrom(mainClass)) {
-        logWarning("Subclasses of scala.App may not work correctly. Use a main() method instead.")
+        printWarning("Subclasses of scala.App may not work correctly. Use a main() method instead.")
       }
       new JavaMainApplication(mainClass)
     }
@@ -837,90 +879,29 @@ private[spark] class SparkSubmit extends Logging {
       app.start(childArgs.toArray, sparkConf)
     } catch {
       case t: Throwable =>
-        throw findCause(t)
-    }
-  }
-
-  /** Throw a SparkException with the given error message. */
-  private def error(msg: String): Unit = throw new SparkException(msg)
-
-}
-
-
-/**
- * This entry point is used by the launcher library to start in-process Spark applications.
- */
-private[spark] object InProcessSparkSubmit {
-
-  def main(args: Array[String]): Unit = {
-    val submit = new SparkSubmit()
-    submit.doSubmit(args)
-  }
-
-}
-
-object SparkSubmit extends CommandLineUtils with Logging {
-
-  // Cluster managers
-  private val YARN = 1
-  private val STANDALONE = 2
-  private val MESOS = 4
-  private val LOCAL = 8
-  private val KUBERNETES = 16
-  private val ALL_CLUSTER_MGRS = YARN | STANDALONE | MESOS | LOCAL | KUBERNETES
-
-  // Deploy modes
-  private val CLIENT = 1
-  private val CLUSTER = 2
-  private val ALL_DEPLOY_MODES = CLIENT | CLUSTER
-
-  // Special primary resource names that represent shells rather than application jars.
-  private val SPARK_SHELL = "spark-shell"
-  private val PYSPARK_SHELL = "pyspark-shell"
-  private val SPARKR_SHELL = "sparkr-shell"
-  private val SPARKR_PACKAGE_ARCHIVE = "sparkr.zip"
-  private val R_PACKAGE_ARCHIVE = "rpkg.zip"
-
-  private val CLASS_NOT_FOUND_EXIT_STATUS = 101
-
-  // Following constants are visible for testing.
-  private[deploy] val YARN_CLUSTER_SUBMIT_CLASS =
-    "org.apache.spark.deploy.yarn.YarnClusterApplication"
-  private[deploy] val REST_CLUSTER_SUBMIT_CLASS = classOf[RestSubmissionClientApp].getName()
-  private[deploy] val STANDALONE_CLUSTER_SUBMIT_CLASS = classOf[ClientApp].getName()
-  private[deploy] val KUBERNETES_CLUSTER_SUBMIT_CLASS =
-    "org.apache.spark.deploy.k8s.submit.KubernetesClientApplication"
-
-  override def main(args: Array[String]): Unit = {
-    val submit = new SparkSubmit() {
-      self =>
-
-      override protected def parseArguments(args: Array[String]): SparkSubmitArguments = {
-        new SparkSubmitArguments(args) {
-          override protected def logInfo(msg: => String): Unit = self.logInfo(msg)
+        findCause(t) match {
+          case SparkUserAppException(exitCode) =>
+            System.exit(exitCode)
 
-          override protected def logWarning(msg: => String): Unit = self.logWarning(msg)
+          case t: Throwable =>
+            throw t
         }
-      }
-
-      override protected def logInfo(msg: => String): Unit = printMessage(msg)
-
-      override protected def logWarning(msg: => String): Unit = printMessage(s"Warning: $msg")
+    }
+  }
 
-      override def doSubmit(args: Array[String]): Unit = {
-        try {
-          super.doSubmit(args)
-        } catch {
-          case e: SparkUserAppException =>
-            exitFn(e.exitCode)
-          case e: SparkException =>
-            printErrorAndExit(e.getMessage())
+  private[deploy] def addJarToClasspath(localJar: String, loader: MutableURLClassLoader) {
+    val uri = Utils.resolveURI(localJar)
+    uri.getScheme match {
+      case "file" | "local" =>
+        val file = new File(uri.getPath)
+        if (file.exists()) {
+          loader.addURL(file.toURI.toURL)
+        } else {
+          printWarning(s"Local jar $file does not exist, skipping.")
         }
-      }
-
+      case _ =>
+        printWarning(s"Skip remote jar $uri.")
     }
-
-    submit.doSubmit(args)
   }
 
   /**
@@ -969,6 +950,17 @@ object SparkSubmit extends CommandLineUtils with Logging {
     res == SparkLauncher.NO_RESOURCE
   }
 
+  /**
+   * Merge a sequence of comma-separated file lists, some of which may be null to indicate
+   * no files, into a single comma-separated string.
+   */
+  private[deploy] def mergeFileLists(lists: String*): String = {
+    val merged = lists.filterNot(StringUtils.isBlank)
+                      .flatMap(_.split(","))
+                      .mkString(",")
+    if (merged == "") null else merged
+  }
+
 }
 
 /** Provides utility functions to be used inside SparkSubmit. */
@@ -996,12 +988,12 @@ private[spark] object SparkSubmitUtils {
     override def toString: String = s"$groupId:$artifactId:$version"
   }
 
-  /**
-   * Extracts maven coordinates from a comma-delimited string. Coordinates should be provided
-   * in the format `groupId:artifactId:version` or `groupId/artifactId:version`.
-   * @param coordinates Comma-delimited string of maven coordinates
-   * @return Sequence of Maven coordinates
-   */
+/**
+ * Extracts maven coordinates from a comma-delimited string. Coordinates should be provided
+ * in the format `groupId:artifactId:version` or `groupId/artifactId:version`.
+ * @param coordinates Comma-delimited string of maven coordinates
+ * @return Sequence of Maven coordinates
+ */
   def extractMavenCoordinates(coordinates: String): Seq[MavenCoordinate] = {
     coordinates.split(",").map { p =>
       val splits = p.replace("/", ":").split(":")
@@ -1203,33 +1195,7 @@ private[spark] object SparkSubmitUtils {
 
   /** A nice function to use in tests as well. Values are dummy strings. */
   def getModuleDescriptor: DefaultModuleDescriptor = DefaultModuleDescriptor.newDefaultInstance(
-    // Include UUID in module name, so multiple clients resolving maven coordinate at the same time
-    // do not modify the same resolution file concurrently.
-    ModuleRevisionId.newInstance("org.apache.spark",
-      s"spark-submit-parent-${UUID.randomUUID.toString}",
-      "1.0"))
-
-  /**
-   * Clear ivy resolution from current launch. The resolution file is usually at
-   * ~/.ivy2/org.apache.spark-spark-submit-parent-$UUID-default.xml,
-   * ~/.ivy2/resolved-org.apache.spark-spark-submit-parent-$UUID-1.0.xml, and
-   * ~/.ivy2/resolved-org.apache.spark-spark-submit-parent-$UUID-1.0.properties.
-   * Since each launch will have its own resolution files created, delete them after
-   * each resolution to prevent accumulation of these files in the ivy cache dir.
-   */
-  private def clearIvyResolutionFiles(
-      mdId: ModuleRevisionId,
-      ivySettings: IvySettings,
-      ivyConfName: String): Unit = {
-    val currentResolutionFiles = Seq(
-      s"${mdId.getOrganisation}-${mdId.getName}-$ivyConfName.xml",
-      s"resolved-${mdId.getOrganisation}-${mdId.getName}-${mdId.getRevision}.xml",
-      s"resolved-${mdId.getOrganisation}-${mdId.getName}-${mdId.getRevision}.properties"
-    )
-    currentResolutionFiles.foreach { filename =>
-      new File(ivySettings.getDefaultCache, filename).delete()
-    }
-  }
+    ModuleRevisionId.newInstance("org.apache.spark", "spark-submit-parent", "1.0"))
 
   /**
    * Resolves any dependencies that were supplied through maven coordinates
@@ -1280,6 +1246,14 @@ private[spark] object SparkSubmitUtils {
 
         // A Module descriptor must be specified. Entries are dummy strings
         val md = getModuleDescriptor
+        // clear ivy resolution from previous launches. The resolution file is usually at
+        // ~/.ivy2/org.apache.spark-spark-submit-parent-default.xml. In between runs, this file
+        // leads to confusion with Ivy when the files can no longer be found at the repository
+        // declared in that file/
+        val mdId = md.getModuleRevisionId
+        val previousResolution = new File(ivySettings.getDefaultCache,
+          s"${mdId.getOrganisation}-${mdId.getName}-$ivyConfName.xml")
+        if (previousResolution.exists) previousResolution.delete
 
         md.setDefaultConf(ivyConfName)
 
@@ -1300,10 +1274,7 @@ private[spark] object SparkSubmitUtils {
           packagesDirectory.getAbsolutePath + File.separator +
             "[organization]_[artifact]-[revision](-[classifier]).[ext]",
           retrieveOptions.setConfs(Array(ivyConfName)))
-        val paths = resolveDependencyPaths(rr.getArtifacts.toArray, packagesDirectory)
-        val mdId = md.getModuleRevisionId
-        clearIvyResolutionFiles(mdId, ivySettings, ivyConfName)
-        paths
+        resolveDependencyPaths(rr.getArtifacts.toArray, packagesDirectory)
       } finally {
         System.setOut(sysOut)
       }
@@ -1321,13 +1292,6 @@ private[spark] object SparkSubmitUtils {
     rule
   }
 
-  def parseSparkConfProperty(pair: String): (String, String) = {
-    pair.split("=", 2).toSeq match {
-      case Seq(k, v) => (k, v)
-      case _ => throw new SparkException(s"Spark config without '=': $pair")
-    }
-  }
-
 }
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index fb232101114b9..9db7a1fe3106d 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.deploy
 
-import java.io.{ByteArrayOutputStream, File, PrintStream}
+import java.io.{ByteArrayOutputStream, PrintStream}
 import java.lang.reflect.InvocationTargetException
 import java.net.URI
 import java.nio.charset.StandardCharsets
@@ -29,19 +29,18 @@ import scala.collection.mutable.{ArrayBuffer, HashMap}
 import scala.io.Source
 import scala.util.Try
 
-import org.apache.spark.{SparkException, SparkUserAppException}
 import org.apache.spark.deploy.SparkSubmitAction._
-import org.apache.spark.internal.Logging
 import org.apache.spark.launcher.SparkSubmitArgumentsParser
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.util.Utils
 
+
 /**
  * Parses and encapsulates arguments from the spark-submit script.
  * The env argument is used for testing.
  */
 private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, String] = sys.env)
-  extends SparkSubmitArgumentsParser with Logging {
+  extends SparkSubmitArgumentsParser {
   var master: String = null
   var deployMode: String = null
   var executorMemory: String = null
@@ -64,7 +63,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
   var packages: String = null
   var repositories: String = null
   var ivyRepoPath: String = null
-  var ivySettingsPath: Option[String] = None
   var packagesExclusions: String = null
   var verbose: Boolean = false
   var isPython: Boolean = false
@@ -75,7 +73,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
   var proxyUser: String = null
   var principal: String = null
   var keytab: String = null
-  private var dynamicAllocationEnabled: Boolean = false
 
   // Standalone cluster mode only
   var supervise: Boolean = false
@@ -87,9 +84,8 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
   /** Default properties present in the currently defined defaults file. */
   lazy val defaultSparkProperties: HashMap[String, String] = {
     val defaultProperties = new HashMap[String, String]()
-    if (verbose) {
-      logInfo(s"Using properties file: $propertiesFile")
-    }
+    // scalastyle:off println
+    if (verbose) SparkSubmit.printStream.println(s"Using properties file: $propertiesFile")
     Option(propertiesFile).foreach { filename =>
       val properties = Utils.getPropertiesFromFile(filename)
       properties.foreach { case (k, v) =>
@@ -98,16 +94,21 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
       // Property files may contain sensitive information, so redact before printing
       if (verbose) {
         Utils.redact(properties).foreach { case (k, v) =>
-          logInfo(s"Adding default property: $k=$v")
+          SparkSubmit.printStream.println(s"Adding default property: $k=$v")
         }
       }
     }
+    // scalastyle:on println
     defaultProperties
   }
 
   // Set parameters from command line arguments
-  parse(args.asJava)
-
+  try {
+    parse(args.asJava)
+  } catch {
+    case e: IllegalArgumentException =>
+      SparkSubmit.printErrorAndExit(e.getMessage())
+  }
   // Populate `sparkProperties` map from properties file
   mergeDefaultSparkProperties()
   // Remove keys that don't start with "spark." from `sparkProperties`.
@@ -139,7 +140,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
     sparkProperties.foreach { case (k, v) =>
       if (!k.startsWith("spark.")) {
         sparkProperties -= k
-        logWarning(s"Ignoring non-spark config property: $k=$v")
+        SparkSubmit.printWarning(s"Ignoring non-spark config property: $k=$v")
       }
     }
   }
@@ -182,9 +183,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
     name = Option(name).orElse(sparkProperties.get("spark.app.name")).orNull
     jars = Option(jars).orElse(sparkProperties.get("spark.jars")).orNull
     files = Option(files).orElse(sparkProperties.get("spark.files")).orNull
-    pyFiles = Option(pyFiles).orElse(sparkProperties.get("spark.submit.pyFiles")).orNull
     ivyRepoPath = sparkProperties.get("spark.jars.ivy").orNull
-    ivySettingsPath = sparkProperties.get("spark.jars.ivySettings")
     packages = Option(packages).orElse(sparkProperties.get("spark.jars.packages")).orNull
     packagesExclusions = Option(packagesExclusions)
       .orElse(sparkProperties.get("spark.jars.excludes")).orNull
@@ -199,8 +198,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
     queue = Option(queue).orElse(sparkProperties.get("spark.yarn.queue")).orNull
     keytab = Option(keytab).orElse(sparkProperties.get("spark.yarn.keytab")).orNull
     principal = Option(principal).orElse(sparkProperties.get("spark.yarn.principal")).orNull
-    dynamicAllocationEnabled =
-      sparkProperties.get("spark.dynamicAllocation.enabled").exists("true".equalsIgnoreCase)
 
     // Try to set main class from JAR if no --class argument is given
     if (mainClass == null && !isPython && !isR && primaryResource != null) {
@@ -216,10 +213,10 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
             }
           } catch {
             case _: Exception =>
-              error(s"Cannot load main class from JAR $primaryResource")
+              SparkSubmit.printErrorAndExit(s"Cannot load main class from JAR $primaryResource")
           }
         case _ =>
-          error(
+          SparkSubmit.printErrorAndExit(
             s"Cannot load main class from JAR $primaryResource with URI $uriScheme. " +
             "Please specify a class through --class.")
       }
@@ -236,7 +233,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
     // Set name from main class if not given
     name = Option(name).orElse(Option(mainClass)).orNull
     if (name == null && primaryResource != null) {
-      name = new File(primaryResource).getName()
+      name = Utils.stripDirectory(primaryResource)
     }
 
     // Action should be SUBMIT unless otherwise specified
@@ -249,7 +246,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
       case SUBMIT => validateSubmitArguments()
       case KILL => validateKillArguments()
       case REQUEST_STATUS => validateStatusRequestArguments()
-      case PRINT_VERSION =>
     }
   }
 
@@ -258,59 +254,62 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
       printUsageAndExit(-1)
     }
     if (primaryResource == null) {
-      error("Must specify a primary resource (JAR or Python or R file)")
+      SparkSubmit.printErrorAndExit("Must specify a primary resource (JAR or Python or R file)")
     }
     if (mainClass == null && SparkSubmit.isUserJar(primaryResource)) {
-      error("No main class set in JAR; please specify one with --class")
+      SparkSubmit.printErrorAndExit("No main class set in JAR; please specify one with --class")
     }
     if (driverMemory != null
         && Try(JavaUtils.byteStringAsBytes(driverMemory)).getOrElse(-1L) <= 0) {
-      error("Driver memory must be a positive number")
+      SparkSubmit.printErrorAndExit("Driver Memory must be a positive number")
     }
     if (executorMemory != null
         && Try(JavaUtils.byteStringAsBytes(executorMemory)).getOrElse(-1L) <= 0) {
-      error("Executor memory must be a positive number")
+      SparkSubmit.printErrorAndExit("Executor Memory cores must be a positive number")
     }
     if (executorCores != null && Try(executorCores.toInt).getOrElse(-1) <= 0) {
-      error("Executor cores must be a positive number")
+      SparkSubmit.printErrorAndExit("Executor cores must be a positive number")
     }
     if (totalExecutorCores != null && Try(totalExecutorCores.toInt).getOrElse(-1) <= 0) {
-      error("Total executor cores must be a positive number")
+      SparkSubmit.printErrorAndExit("Total executor cores must be a positive number")
     }
-    if (!dynamicAllocationEnabled &&
-      numExecutors != null && Try(numExecutors.toInt).getOrElse(-1) <= 0) {
-      error("Number of executors must be a positive number")
+    if (numExecutors != null && Try(numExecutors.toInt).getOrElse(-1) <= 0) {
+      SparkSubmit.printErrorAndExit("Number of executors must be a positive number")
+    }
+    if (pyFiles != null && !isPython) {
+      SparkSubmit.printErrorAndExit("--py-files given but primary resource is not a Python script")
     }
 
     if (master.startsWith("yarn")) {
       val hasHadoopEnv = env.contains("HADOOP_CONF_DIR") || env.contains("YARN_CONF_DIR")
       if (!hasHadoopEnv && !Utils.isTesting) {
-        error(s"When running with master '$master' " +
+        throw new Exception(s"When running with master '$master' " +
           "either HADOOP_CONF_DIR or YARN_CONF_DIR must be set in the environment.")
       }
     }
 
     if (proxyUser != null && principal != null) {
-      error("Only one of --proxy-user or --principal can be provided.")
+      SparkSubmit.printErrorAndExit("Only one of --proxy-user or --principal can be provided.")
     }
   }
 
   private def validateKillArguments(): Unit = {
     if (!master.startsWith("spark://") && !master.startsWith("mesos://")) {
-      error("Killing submissions is only supported in standalone or Mesos mode!")
+      SparkSubmit.printErrorAndExit(
+        "Killing submissions is only supported in standalone or Mesos mode!")
     }
     if (submissionToKill == null) {
-      error("Please specify a submission to kill.")
+      SparkSubmit.printErrorAndExit("Please specify a submission to kill.")
     }
   }
 
   private def validateStatusRequestArguments(): Unit = {
     if (!master.startsWith("spark://") && !master.startsWith("mesos://")) {
-      error(
+      SparkSubmit.printErrorAndExit(
         "Requesting submission statuses is only supported in standalone or Mesos mode!")
     }
     if (submissionToRequestStatusFor == null) {
-      error("Please specify a submission to request status for.")
+      SparkSubmit.printErrorAndExit("Please specify a submission to request status for.")
     }
   }
 
@@ -367,7 +366,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
 
       case DEPLOY_MODE =>
         if (value != "client" && value != "cluster") {
-          error("--deploy-mode must be either \"client\" or \"cluster\"")
+          SparkSubmit.printErrorAndExit("--deploy-mode must be either \"client\" or \"cluster\"")
         }
         deployMode = value
 
@@ -404,14 +403,14 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
       case KILL_SUBMISSION =>
         submissionToKill = value
         if (action != null) {
-          error(s"Action cannot be both $action and $KILL.")
+          SparkSubmit.printErrorAndExit(s"Action cannot be both $action and $KILL.")
         }
         action = KILL
 
       case STATUS =>
         submissionToRequestStatusFor = value
         if (action != null) {
-          error(s"Action cannot be both $action and $REQUEST_STATUS.")
+          SparkSubmit.printErrorAndExit(s"Action cannot be both $action and $REQUEST_STATUS.")
         }
         action = REQUEST_STATUS
 
@@ -443,7 +442,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
         repositories = value
 
       case CONF =>
-        val (confName, confValue) = SparkSubmitUtils.parseSparkConfProperty(value)
+        val (confName, confValue) = SparkSubmit.parseSparkConfProperty(value)
         sparkProperties(confName) = confValue
 
       case PROXY_USER =>
@@ -462,15 +461,15 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
         verbose = true
 
       case VERSION =>
-        action = SparkSubmitAction.PRINT_VERSION
+        SparkSubmit.printVersionAndExit()
 
       case USAGE_ERROR =>
         printUsageAndExit(1)
 
       case _ =>
-        error(s"Unexpected argument '$opt'.")
+        throw new IllegalArgumentException(s"Unexpected argument '$opt'.")
     }
-    action != SparkSubmitAction.PRINT_VERSION
+    true
   }
 
   /**
@@ -481,7 +480,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
    */
   override protected def handleUnknown(opt: String): Boolean = {
     if (opt.startsWith("-")) {
-      error(s"Unrecognized option '$opt'.")
+      SparkSubmit.printErrorAndExit(s"Unrecognized option '$opt'.")
     }
 
     primaryResource =
@@ -500,18 +499,20 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
   }
 
   private def printUsageAndExit(exitCode: Int, unknownParam: Any = null): Unit = {
+    // scalastyle:off println
+    val outStream = SparkSubmit.printStream
     if (unknownParam != null) {
-      logInfo("Unknown/unsupported param " + unknownParam)
+      outStream.println("Unknown/unsupported param " + unknownParam)
     }
     val command = sys.env.get("_SPARK_CMD_USAGE").getOrElse(
       """Usage: spark-submit [options] <app jar | python file | R file> [app arguments]
         |Usage: spark-submit --kill [submission ID] --master [spark://...]
         |Usage: spark-submit --status [submission ID] --master [spark://...]
         |Usage: spark-submit run-example [options] example-class [example args]""".stripMargin)
-    logInfo(command)
+    outStream.println(command)
 
     val mem_mb = Utils.DEFAULT_DRIVER_MEM_MB
-    logInfo(
+    outStream.println(
       s"""
         |Options:
         |  --master MASTER_URL         spark://host:port, mesos://host:port, yarn,
@@ -593,11 +594,12 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
     )
 
     if (SparkSubmit.isSqlShell(mainClass)) {
-      logInfo("CLI options:")
-      logInfo(getSqlShellOptions())
+      outStream.println("CLI options:")
+      outStream.println(getSqlShellOptions())
     }
+    // scalastyle:on println
 
-    throw new SparkUserAppException(exitCode)
+    SparkSubmit.exitFn(exitCode)
   }
 
   /**
@@ -651,7 +653,4 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
       System.setErr(currentErr)
     }
   }
-
-  private def error(msg: String): Unit = throw new SparkException(msg)
-
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index bf1eeb0c1bf59..f9d0b5ee4e23e 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -18,15 +18,12 @@
 package org.apache.spark.deploy.history
 
 import java.io.{File, FileNotFoundException, IOException}
-import java.nio.file.Files
-import java.nio.file.attribute.PosixFilePermissions
-import java.util.{Date, ServiceLoader}
+import java.util.{Date, ServiceLoader, UUID}
 import java.util.concurrent.{ExecutorService, TimeUnit}
 import java.util.zip.{ZipEntry, ZipOutputStream}
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
-import scala.io.Source
 import scala.util.Try
 import scala.xml.Node
 
@@ -61,10 +58,10 @@ import org.apache.spark.util.kvstore._
  *
  * == How new and updated attempts are detected ==
  *
- * - New attempts are detected in [[checkForLogs]]: the log dir is scanned, and any entries in the
- * log dir whose size changed since the last scan time are considered new or updated. These are
- * replayed to create a new attempt info entry and update or create a matching application info
- * element in the list of applications.
+ * - New attempts are detected in [[checkForLogs]]: the log dir is scanned, and any
+ * entries in the log dir whose modification time is greater than the last scan time
+ * are considered new or updated. These are replayed to create a new attempt info entry
+ * and update or create a matching application info element in the list of applications.
  * - Updated attempts are also found in [[checkForLogs]] -- if the attempt's log file has grown, the
  * attempt is replaced by another one with a larger log size.
  *
@@ -128,14 +125,11 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
   private val pendingReplayTasksCount = new java.util.concurrent.atomic.AtomicInteger(0)
 
   private val storePath = conf.get(LOCAL_STORE_DIR).map(new File(_))
-  private val fastInProgressParsing = conf.get(FAST_IN_PROGRESS_PARSING)
 
   // Visible for testing.
   private[history] val listing: KVStore = storePath.map { path =>
-    val perms = PosixFilePermissions.fromString("rwx------")
-    val dbPath = Files.createDirectories(new File(path, "listing.ldb").toPath(),
-      PosixFilePermissions.asFileAttribute(perms)).toFile()
-
+    require(path.isDirectory(), s"Configured store directory ($path) does not exist.")
+    val dbPath = new File(path, "listing.ldb")
     val metadata = new FsHistoryProviderMetadata(CURRENT_LISTING_VERSION,
       AppStatusStore.CURRENT_VERSION, logDir.toString())
 
@@ -179,7 +173,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
    * Fixed size thread pool to fetch and parse log files.
    */
   private val replayExecutor: ExecutorService = {
-    if (!Utils.isTesting) {
+    if (Utils.isTesting) {
       ThreadUtils.newDaemonFixedThreadPool(NUM_PROCESSING_THREADS, "log-replay-executor")
     } else {
       MoreExecutors.sameThreadExecutor()
@@ -408,13 +402,13 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
    */
   private[history] def checkForLogs(): Unit = {
     try {
-      val newLastScanTime = clock.getTimeMillis()
+      val newLastScanTime = getNewLastScanTime()
       logDebug(s"Scanning $logDir with lastScanTime==$lastScanTime")
 
       val updated = Option(fs.listStatus(new Path(logDir))).map(_.toSeq).getOrElse(Nil)
         .filter { entry =>
           !entry.isDirectory() &&
-            // FsHistoryProvider used to generate a hidden file which can't be read.  Accidentally
+            // FsHistoryProvider generates a hidden file which can't be read.  Accidentally
             // reading a garbage file is safe, but we would log an error which can be scary to
             // the end-user.
             !entry.getPath().getName().startsWith(".") &&
@@ -423,24 +417,15 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         .filter { entry =>
           try {
             val info = listing.read(classOf[LogInfo], entry.getPath().toString())
-
-            if (info.appId.isDefined) {
-              // If the SHS view has a valid application, update the time the file was last seen so
-              // that the entry is not deleted from the SHS listing. Also update the file size, in
-              // case the code below decides we don't need to parse the log.
-              listing.write(info.copy(lastProcessed = newLastScanTime, fileSize = entry.getLen()))
-            }
-
             if (info.fileSize < entry.getLen()) {
-              if (info.appId.isDefined && fastInProgressParsing) {
-                // When fast in-progress parsing is on, we don't need to re-parse when the
-                // size changes, but we do need to invalidate any existing UIs.
-                invalidateUI(info.appId.get, info.attemptId)
-                false
-              } else {
-                true
-              }
+              // Log size has changed, it should be parsed.
+              true
             } else {
+              // If the SHS view has a valid application, update the time the file was last seen so
+              // that the entry is not deleted from the SHS listing.
+              if (info.appId.isDefined) {
+                listing.write(info.copy(lastProcessed = newLastScanTime))
+              }
               false
             }
           } catch {
@@ -464,7 +449,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       val tasks = updated.map { entry =>
         try {
           replayExecutor.submit(new Runnable {
-            override def run(): Unit = mergeApplicationListing(entry, newLastScanTime, true)
+            override def run(): Unit = mergeApplicationListing(entry, newLastScanTime)
           })
         } catch {
           // let the iteration over the updated entries break, since an exception on
@@ -557,6 +542,25 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     }
   }
 
+  private[history] def getNewLastScanTime(): Long = {
+    val fileName = "." + UUID.randomUUID().toString
+    val path = new Path(logDir, fileName)
+    val fos = fs.create(path)
+
+    try {
+      fos.close()
+      fs.getFileStatus(path).getModificationTime
+    } catch {
+      case e: Exception =>
+        logError("Exception encountered when attempting to update last scan time", e)
+        lastScanTime.get()
+    } finally {
+      if (!fs.delete(path, true)) {
+        logWarning(s"Error deleting ${path}")
+      }
+    }
+  }
+
   override def writeEventLogs(
       appId: String,
       attemptId: Option[String],
@@ -603,10 +607,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
   /**
    * Replay the given log file, saving the application in the listing db.
    */
-  protected def mergeApplicationListing(
-      fileStatus: FileStatus,
-      scanTime: Long,
-      enableOptimizations: Boolean): Unit = {
+  protected def mergeApplicationListing(fileStatus: FileStatus, scanTime: Long): Unit = {
     val eventsFilter: ReplayEventsFilter = { eventString =>
       eventString.startsWith(APPL_START_EVENT_PREFIX) ||
         eventString.startsWith(APPL_END_EVENT_PREFIX) ||
@@ -615,118 +616,32 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     }
 
     val logPath = fileStatus.getPath()
-    val appCompleted = isCompleted(logPath.getName())
-    val reparseChunkSize = conf.get(END_EVENT_REPARSE_CHUNK_SIZE)
-
-    // Enable halt support in listener if:
-    // - app in progress && fast parsing enabled
-    // - skipping to end event is enabled (regardless of in-progress state)
-    val shouldHalt = enableOptimizations &&
-      ((!appCompleted && fastInProgressParsing) || reparseChunkSize > 0)
-
     val bus = new ReplayListenerBus()
-    val listener = new AppListingListener(fileStatus, clock, shouldHalt)
+    val listener = new AppListingListener(fileStatus, clock)
     bus.addListener(listener)
-
-    logInfo(s"Parsing $logPath for listing data...")
-    Utils.tryWithResource(EventLoggingListener.openEventLog(logPath, fs)) { in =>
-      bus.replay(in, logPath.toString, !appCompleted, eventsFilter)
-    }
-
-    // If enabled above, the listing listener will halt parsing when there's enough information to
-    // create a listing entry. When the app is completed, or fast parsing is disabled, we still need
-    // to replay until the end of the log file to try to find the app end event. Instead of reading
-    // and parsing line by line, this code skips bytes from the underlying stream so that it is
-    // positioned somewhere close to the end of the log file.
-    //
-    // Because the application end event is written while some Spark subsystems such as the
-    // scheduler are still active, there is no guarantee that the end event will be the last
-    // in the log. So, to be safe, the code uses a configurable chunk to be re-parsed at
-    // the end of the file, and retries parsing the whole log later if the needed data is
-    // still not found.
-    //
-    // Note that skipping bytes in compressed files is still not cheap, but there are still some
-    // minor gains over the normal log parsing done by the replay bus.
-    //
-    // This code re-opens the file so that it knows where it's skipping to. This isn't as cheap as
-    // just skipping from the current position, but there isn't a a good way to detect what the
-    // current position is, since the replay listener bus buffers data internally.
-    val lookForEndEvent = shouldHalt && (appCompleted || !fastInProgressParsing)
-    if (lookForEndEvent && listener.applicationInfo.isDefined) {
-      Utils.tryWithResource(EventLoggingListener.openEventLog(logPath, fs)) { in =>
-        val target = fileStatus.getLen() - reparseChunkSize
-        if (target > 0) {
-          logInfo(s"Looking for end event; skipping $target bytes from $logPath...")
-          var skipped = 0L
-          while (skipped < target) {
-            skipped += in.skip(target - skipped)
+    replay(fileStatus, bus, eventsFilter = eventsFilter)
+
+    val (appId, attemptId) = listener.applicationInfo match {
+      case Some(app) =>
+        // Invalidate the existing UI for the reloaded app attempt, if any. See LoadedAppUI for a
+        // discussion on the UI lifecycle.
+        synchronized {
+          activeUIs.get((app.info.id, app.attempts.head.info.attemptId)).foreach { ui =>
+            ui.invalidate()
+            ui.ui.store.close()
           }
         }
 
-        val source = Source.fromInputStream(in).getLines()
-
-        // Because skipping may leave the stream in the middle of a line, read the next line
-        // before replaying.
-        if (target > 0) {
-          source.next()
-        }
-
-        bus.replay(source, logPath.toString, !appCompleted, eventsFilter)
-      }
-    }
-
-    logInfo(s"Finished parsing $logPath")
-
-    listener.applicationInfo match {
-      case Some(app) if !lookForEndEvent || app.attempts.head.info.completed =>
-        // In this case, we either didn't care about the end event, or we found it. So the
-        // listing data is good.
-        invalidateUI(app.info.id, app.attempts.head.info.attemptId)
         addListing(app)
-        listing.write(LogInfo(logPath.toString(), scanTime, Some(app.info.id),
-          app.attempts.head.info.attemptId, fileStatus.getLen()))
-
-        // For a finished log, remove the corresponding "in progress" entry from the listing DB if
-        // the file is really gone.
-        if (appCompleted) {
-          val inProgressLog = logPath.toString() + EventLoggingListener.IN_PROGRESS
-          try {
-            // Fetch the entry first to avoid an RPC when it's already removed.
-            listing.read(classOf[LogInfo], inProgressLog)
-            if (!fs.isFile(new Path(inProgressLog))) {
-              listing.delete(classOf[LogInfo], inProgressLog)
-            }
-          } catch {
-            case _: NoSuchElementException =>
-          }
-        }
-
-      case Some(_) =>
-        // In this case, the attempt is still not marked as finished but was expected to. This can
-        // mean the end event is before the configured threshold, so call the method again to
-        // re-parse the whole log.
-        logInfo(s"Reparsing $logPath since end event was not found.")
-        mergeApplicationListing(fileStatus, scanTime, false)
+        (Some(app.info.id), app.attempts.head.info.attemptId)
 
       case _ =>
         // If the app hasn't written down its app ID to the logs, still record the entry in the
         // listing db, with an empty ID. This will make the log eligible for deletion if the app
         // does not make progress after the configured max log age.
-        listing.write(LogInfo(logPath.toString(), scanTime, None, None, fileStatus.getLen()))
-    }
-  }
-
-  /**
-   * Invalidate an existing UI for a given app attempt. See LoadedAppUI for a discussion on the
-   * UI lifecycle.
-   */
-  private def invalidateUI(appId: String, attemptId: Option[String]): Unit = {
-    synchronized {
-      activeUIs.get((appId, attemptId)).foreach { ui =>
-        ui.invalidate()
-        ui.ui.store.close()
-      }
+        (None, None)
     }
+    listing.write(LogInfo(logPath.toString(), scanTime, appId, attemptId, fileStatus.getLen()))
   }
 
   /**
@@ -781,6 +696,29 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     }
   }
 
+  /**
+   * Replays the events in the specified log file on the supplied `ReplayListenerBus`.
+   * `ReplayEventsFilter` determines what events are replayed.
+   */
+  private def replay(
+      eventLog: FileStatus,
+      bus: ReplayListenerBus,
+      eventsFilter: ReplayEventsFilter = SELECT_ALL_FILTER): Unit = {
+    val logPath = eventLog.getPath()
+    val isCompleted = !logPath.getName().endsWith(EventLoggingListener.IN_PROGRESS)
+    logInfo(s"Replaying log path: $logPath")
+    // Note that the eventLog may have *increased* in size since when we grabbed the filestatus,
+    // and when we read the file here.  That is OK -- it may result in an unnecessary refresh
+    // when there is no update, but will not result in missing an update.  We *must* prevent
+    // an error the other way -- if we report a size bigger (ie later) than the file that is
+    // actually read, we may never refresh the app.  FileStatus is guaranteed to be static
+    // after it's created, so we get a file size that is no bigger than what is actually read.
+    Utils.tryWithResource(EventLoggingListener.openEventLog(logPath, fs)) { in =>
+      bus.replay(in, logPath.toString, !isCompleted, eventsFilter)
+      logInfo(s"Finished parsing $logPath")
+    }
+  }
+
   /**
    * Rebuilds the application state store from its event log.
    */
@@ -803,13 +741,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     } replayBus.addListener(listener)
 
     try {
-      val path = eventLog.getPath()
-      logInfo(s"Parsing $path to re-build UI...")
-      Utils.tryWithResource(EventLoggingListener.openEventLog(path, fs)) { in =>
-        replayBus.replay(in, path.toString(), maybeTruncated = !isCompleted(path.toString()))
-      }
+      replay(eventLog, replayBus)
       trackingStore.close(false)
-      logInfo(s"Finished parsing $path")
     } catch {
       case e: Exception =>
         Utils.tryLogNonFatalError {
@@ -948,10 +881,6 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     }
   }
 
-  private def isCompleted(name: String): Boolean = {
-    !name.endsWith(EventLoggingListener.IN_PROGRESS)
-  }
-
 }
 
 private[history] object FsHistoryProvider {
@@ -1016,17 +945,11 @@ private[history] class ApplicationInfoWrapper(
 
 }
 
-private[history] class AppListingListener(
-    log: FileStatus,
-    clock: Clock,
-    haltEnabled: Boolean) extends SparkListener {
+private[history] class AppListingListener(log: FileStatus, clock: Clock) extends SparkListener {
 
   private val app = new MutableApplicationInfo()
   private val attempt = new MutableAttemptInfo(log.getPath().getName(), log.getLen())
 
-  private var gotEnvUpdate = false
-  private var halted = false
-
   override def onApplicationStart(event: SparkListenerApplicationStart): Unit = {
     app.id = event.appId.orNull
     app.name = event.appName
@@ -1035,8 +958,6 @@ private[history] class AppListingListener(
     attempt.startTime = new Date(event.time)
     attempt.lastUpdated = new Date(clock.getTimeMillis())
     attempt.sparkUser = event.sparkUser
-
-    checkProgress()
   }
 
   override def onApplicationEnd(event: SparkListenerApplicationEnd): Unit = {
@@ -1047,18 +968,11 @@ private[history] class AppListingListener(
   }
 
   override def onEnvironmentUpdate(event: SparkListenerEnvironmentUpdate): Unit = {
-    // Only parse the first env update, since any future changes don't have any effect on
-    // the ACLs set for the UI.
-    if (!gotEnvUpdate) {
-      val allProperties = event.environmentDetails("Spark Properties").toMap
-      attempt.viewAcls = allProperties.get("spark.ui.view.acls")
-      attempt.adminAcls = allProperties.get("spark.admin.acls")
-      attempt.viewAclsGroups = allProperties.get("spark.ui.view.acls.groups")
-      attempt.adminAclsGroups = allProperties.get("spark.admin.acls.groups")
-
-      gotEnvUpdate = true
-      checkProgress()
-    }
+    val allProperties = event.environmentDetails("Spark Properties").toMap
+    attempt.viewAcls = allProperties.get("spark.ui.view.acls")
+    attempt.adminAcls = allProperties.get("spark.admin.acls")
+    attempt.viewAclsGroups = allProperties.get("spark.ui.view.acls.groups")
+    attempt.adminAclsGroups = allProperties.get("spark.admin.acls.groups")
   }
 
   override def onOtherEvent(event: SparkListenerEvent): Unit = event match {
@@ -1075,17 +989,6 @@ private[history] class AppListingListener(
     }
   }
 
-  /**
-   * Throws a halt exception to stop replay if enough data to create the app listing has been
-   * read.
-   */
-  private def checkProgress(): Unit = {
-    if (haltEnabled && !halted && app.id != null && gotEnvUpdate) {
-      halted = true
-      throw new HaltReplayException()
-    }
-  }
-
   private class MutableApplicationInfo {
     var id: String = null
     var name: String = null
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
index 32667ddf5c7ea..5d62a7d8bebb4 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
@@ -37,8 +37,7 @@ private[history] class HistoryPage(parent: HistoryServer) extends WebUIPage("")
     val lastUpdatedTime = parent.getLastUpdatedTime()
     val providerConfig = parent.getProviderConfig()
     val content =
-      <script src={UIUtils.prependBaseUri(request, "/static/historypage-common.js")}></script> ++
-      <script src={UIUtils.prependBaseUri(request, "/static/utils.js")}></script>
+      <script src={UIUtils.prependBaseUri("/static/historypage-common.js")}></script>
       <div>
           <div class="container-fluid">
             <ul class="unstyled">
@@ -64,10 +63,10 @@ private[history] class HistoryPage(parent: HistoryServer) extends WebUIPage("")
 
             {
             if (allAppsSize > 0) {
-              <script src={UIUtils.prependBaseUri(
-                  request, "/static/dataTables.rowsGroup.js")}></script> ++
+              <script src={UIUtils.prependBaseUri("/static/dataTables.rowsGroup.js")}></script> ++
                 <div id="history-summary" class="row-fluid"></div> ++
-                <script src={UIUtils.prependBaseUri(request, "/static/historypage.js")}></script> ++
+                <script src={UIUtils.prependBaseUri("/static/utils.js")}></script> ++
+                <script src={UIUtils.prependBaseUri("/static/historypage.js")}></script> ++
                 <script>setAppLimit({parent.maxApplications})</script>
             } else if (requestedIncomplete) {
               <h4>No incomplete applications found!</h4>
@@ -78,7 +77,7 @@ private[history] class HistoryPage(parent: HistoryServer) extends WebUIPage("")
             }
             }
 
-            <a href={makePageLink(request, !requestedIncomplete)}>
+            <a href={makePageLink(!requestedIncomplete)}>
               {
               if (requestedIncomplete) {
                 "Back to completed applications"
@@ -89,11 +88,11 @@ private[history] class HistoryPage(parent: HistoryServer) extends WebUIPage("")
             </a>
           </div>
       </div>
-    UIUtils.basicSparkPage(request, content, "History Server", true)
+    UIUtils.basicSparkPage(content, "History Server", true)
   }
 
-  private def makePageLink(request: HttpServletRequest, showIncomplete: Boolean): String = {
-    UIUtils.prependBaseUri(request, "/?" + "showIncomplete=" + showIncomplete)
+  private def makePageLink(showIncomplete: Boolean): String = {
+    UIUtils.prependBaseUri("/?" + "showIncomplete=" + showIncomplete)
   }
 
   private def isApplicationCompleted(appInfo: ApplicationInfo): Boolean = {
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index a9a4d5a4ec6a2..0ec4afad0308c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -87,7 +87,7 @@ class HistoryServer(
       if (!loadAppUi(appId, None) && (!attemptId.isDefined || !loadAppUi(appId, attemptId))) {
         val msg = <div class="row-fluid">Application {appId} not found.</div>
         res.setStatus(HttpServletResponse.SC_NOT_FOUND)
-        UIUtils.basicSparkPage(req, msg, "Not Found").foreach { n =>
+        UIUtils.basicSparkPage(msg, "Not Found").foreach { n =>
           res.getWriter().write(n.toString)
         }
         return
@@ -150,18 +150,14 @@ class HistoryServer(
       ui: SparkUI,
       completed: Boolean) {
     assert(serverInfo.isDefined, "HistoryServer must be bound before attaching SparkUIs")
-    handlers.synchronized {
-      ui.getHandlers.foreach(attachHandler)
-      addFilters(ui.getHandlers, conf)
-    }
+    ui.getHandlers.foreach(attachHandler)
+    addFilters(ui.getHandlers, conf)
   }
 
   /** Detach a reconstructed UI from this server. Only valid after bind(). */
   override def detachSparkUI(appId: String, attemptId: Option[String], ui: SparkUI): Unit = {
     assert(serverInfo.isDefined, "HistoryServer must be bound before detaching SparkUIs")
-    handlers.synchronized {
-      ui.getHandlers.foreach(detachHandler)
-    }
+    ui.getHandlers.foreach(detachHandler)
     provider.onUIDetached(appId, attemptId, ui)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/config.scala b/core/src/main/scala/org/apache/spark/deploy/history/config.scala
index 25ba9edb9e014..efdbf672bb52f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/config.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/config.scala
@@ -49,19 +49,4 @@ private[spark] object config {
     .intConf
     .createWithDefault(18080)
 
-  val FAST_IN_PROGRESS_PARSING =
-    ConfigBuilder("spark.history.fs.inProgressOptimization.enabled")
-      .doc("Enable optimized handling of in-progress logs. This option may leave finished " +
-        "applications that fail to rename their event logs listed as in-progress.")
-      .booleanConf
-      .createWithDefault(true)
-
-  val END_EVENT_REPARSE_CHUNK_SIZE =
-    ConfigBuilder("spark.history.fs.endEventReparseChunkSize")
-      .doc("How many bytes to parse at the end of log files looking for the end event. " +
-        "This is used to speed up generation of application listings by skipping unnecessary " +
-        "parts of event log files. It can be disabled by setting this config to 0.")
-      .bytesConf(ByteUnit.BYTE)
-      .createWithDefaultString("1m")
-
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
index fad4e46dc035d..f699c75085fe1 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
@@ -40,7 +40,7 @@ private[ui] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app")
       .getOrElse(state.completedApps.find(_.id == appId).orNull)
     if (app == null) {
       val msg = <div class="row-fluid">No running application with ID {appId}</div>
-      return UIUtils.basicSparkPage(request, msg, "Not Found")
+      return UIUtils.basicSparkPage(msg, "Not Found")
     }
 
     val executorHeaders = Seq("ExecutorID", "Worker", "Cores", "Memory", "State", "Logs")
@@ -127,7 +127,7 @@ private[ui] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app")
           }
         </div>
       </div>;
-    UIUtils.basicSparkPage(request, content, "Application: " + app.desc.name)
+    UIUtils.basicSparkPage(content, "Application: " + app.desc.name)
   }
 
   private def executorRow(executor: ExecutorDesc): Seq[Node] = {
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
index b8afe203fbfa2..c629937606b51 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
@@ -215,7 +215,7 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
           }
         </div>;
 
-    UIUtils.basicSparkPage(request, content, "Spark Master at " + state.uri)
+    UIUtils.basicSparkPage(content, "Spark Master at " + state.uri)
   }
 
   private def workerRow(worker: WorkerInfo): Seq[Node] = {
diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionServer.scala b/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionServer.scala
index 3d99d085408c6..e88195d95f270 100644
--- a/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionServer.scala
@@ -94,7 +94,6 @@ private[spark] abstract class RestSubmissionServer(
       new HttpConnectionFactory())
     connector.setHost(host)
     connector.setPort(startPort)
-    connector.setReuseAddress(!Utils.isWindows)
     server.addConnector(connector)
 
     val mainHandler = new ServletContextHandler
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
index ab8d8d96a9b08..5151df00476f9 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
@@ -32,7 +32,7 @@ import org.apache.spark.internal.Logging
  *
  * Also, each HadoopDelegationTokenProvider is controlled by
  * spark.security.credentials.{service}.enabled, and will not be loaded if this config is set to
- * false. For example, Hive's delegation token provider [[HiveDelegationTokenProvider]] can be
+ * false.  For example, Hive's delegation token provider [[HiveDelegationTokenProvider]] can be
  * enabled/disabled by the configuration spark.security.credentials.hive.enabled.
  *
  * @param sparkConf Spark configuration
@@ -52,7 +52,7 @@ private[spark] class HadoopDelegationTokenManager(
 
   // Maintain all the registered delegation token providers
   private val delegationTokenProviders = getDelegationTokenProviders
-  logDebug("Using the following builtin delegation token providers: " +
+  logDebug(s"Using the following delegation token providers: " +
     s"${delegationTokenProviders.keys.mkString(", ")}.")
 
   /** Construct a [[HadoopDelegationTokenManager]] for the default Hadoop filesystem */
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HiveDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HiveDelegationTokenProvider.scala
index 7249eb85ac7c7..ece5ce79c650d 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HiveDelegationTokenProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HiveDelegationTokenProvider.scala
@@ -36,7 +36,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.KEYTAB
 import org.apache.spark.util.Utils
 
-private[spark] class HiveDelegationTokenProvider
+private[security] class HiveDelegationTokenProvider
     extends HadoopDelegationTokenProvider with Logging {
 
   override def serviceName: String = "hive"
@@ -124,9 +124,9 @@ private[spark] class HiveDelegationTokenProvider
     val currentUser = UserGroupInformation.getCurrentUser()
     val realUser = Option(currentUser.getRealUser()).getOrElse(currentUser)
 
-    // For some reason the Scala-generated anonymous class ends up causing an
-    // UndeclaredThrowableException, even if you annotate the method with @throws.
-    try {
+   // For some reason the Scala-generated anonymous class ends up causing an
+   // UndeclaredThrowableException, even if you annotate the method with @throws.
+   try {
       realUser.doAs(new PrivilegedExceptionAction[T]() {
         override def run(): T = fn
       })
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
index 8d6a2b80ef5f2..b19c9904d5982 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
@@ -25,7 +25,7 @@ import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.deploy.{DependencyUtils, SparkHadoopUtil, SparkSubmit}
 import org.apache.spark.internal.Logging
 import org.apache.spark.rpc.RpcEnv
-import org.apache.spark.util._
+import org.apache.spark.util.{ChildFirstURLClassLoader, MutableURLClassLoader, Utils}
 
 /**
  * Utility object for launching driver programs such that they share fate with the Worker process.
@@ -79,21 +79,16 @@ object DriverWrapper extends Logging {
     val secMgr = new SecurityManager(sparkConf)
     val hadoopConf = SparkHadoopUtil.newConfiguration(sparkConf)
 
-    val Seq(packagesExclusions, packages, repositories, ivyRepoPath, ivySettingsPath) =
-      Seq(
-        "spark.jars.excludes",
-        "spark.jars.packages",
-        "spark.jars.repositories",
-        "spark.jars.ivy",
-        "spark.jars.ivySettings"
-      ).map(sys.props.get(_).orNull)
+    val Seq(packagesExclusions, packages, repositories, ivyRepoPath) =
+      Seq("spark.jars.excludes", "spark.jars.packages", "spark.jars.repositories", "spark.jars.ivy")
+        .map(sys.props.get(_).orNull)
 
     val resolvedMavenCoordinates = DependencyUtils.resolveMavenDependencies(packagesExclusions,
-      packages, repositories, ivyRepoPath, Option(ivySettingsPath))
+      packages, repositories, ivyRepoPath)
     val jars = {
       val jarsProp = sys.props.get("spark.jars").orNull
       if (!StringUtils.isBlank(resolvedMavenCoordinates)) {
-        DependencyUtils.mergeFileLists(jarsProp, resolvedMavenCoordinates)
+        SparkSubmit.mergeFileLists(jarsProp, resolvedMavenCoordinates)
       } else {
         jarsProp
       }
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
index dc6a3076a5113..d4d8521cc8204 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
@@ -25,7 +25,7 @@ import scala.collection.JavaConverters._
 import com.google.common.io.Files
 
 import org.apache.spark.{SecurityManager, SparkConf}
-import org.apache.spark.deploy.{ApplicationDescription, Command, ExecutorState}
+import org.apache.spark.deploy.{ApplicationDescription, ExecutorState}
 import org.apache.spark.deploy.DeployMessages.ExecutorStateChanged
 import org.apache.spark.internal.Logging
 import org.apache.spark.rpc.RpcEndpointRef
@@ -142,11 +142,7 @@ private[deploy] class ExecutorRunner(
   private def fetchAndRunExecutor() {
     try {
       // Launch the process
-      val subsOpts = appDesc.command.javaOpts.map {
-        Utils.substituteAppNExecIds(_, appId, execId.toString)
-      }
-      val subsCommand = appDesc.command.copy(javaOpts = subsOpts)
-      val builder = CommandUtils.buildProcessBuilder(subsCommand, new SecurityManager(conf),
+      val builder = CommandUtils.buildProcessBuilder(appDesc.command, new SecurityManager(conf),
         memory, sparkHome.getAbsolutePath, substituteVariables)
       val command = builder.command()
       val formattedCommand = command.asScala.mkString("\"", "\" \"", "\"")
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index ee1ca0bba5749..563b84934f264 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -23,7 +23,6 @@ import java.text.SimpleDateFormat
 import java.util.{Date, Locale, UUID}
 import java.util.concurrent._
 import java.util.concurrent.{Future => JFuture, ScheduledFuture => JScheduledFuture}
-import java.util.function.Supplier
 
 import scala.collection.mutable.{HashMap, HashSet, LinkedHashMap}
 import scala.concurrent.ExecutionContext
@@ -50,8 +49,7 @@ private[deploy] class Worker(
     endpointName: String,
     workDirPath: String = null,
     val conf: SparkConf,
-    val securityMgr: SecurityManager,
-    externalShuffleServiceSupplier: Supplier[ExternalShuffleService] = null)
+    val securityMgr: SecurityManager)
   extends ThreadSafeRpcEndpoint with Logging {
 
   private val host = rpcEnv.address.host
@@ -99,10 +97,6 @@ private[deploy] class Worker(
   private val APP_DATA_RETENTION_SECONDS =
     conf.getLong("spark.worker.cleanup.appDataTtl", 7 * 24 * 3600)
 
-  // Whether or not cleanup the non-shuffle files on executor exits.
-  private val CLEANUP_NON_SHUFFLE_FILES_ENABLED =
-    conf.getBoolean("spark.storage.cleanupFilesAfterExecutorExit", true)
-
   private val testing: Boolean = sys.props.contains("spark.testing")
   private var master: Option[RpcEndpointRef] = None
 
@@ -148,11 +142,7 @@ private[deploy] class Worker(
     WorkerWebUI.DEFAULT_RETAINED_DRIVERS)
 
   // The shuffle service is not actually started unless configured.
-  private val shuffleService = if (externalShuffleServiceSupplier != null) {
-    externalShuffleServiceSupplier.get()
-  } else {
-    new ExternalShuffleService(conf, securityMgr)
-  }
+  private val shuffleService = new ExternalShuffleService(conf, securityMgr)
 
   private val publicAddress = {
     val envVar = conf.getenv("SPARK_PUBLIC_DNS")
@@ -742,9 +732,6 @@ private[deploy] class Worker(
           trimFinishedExecutorsIfNecessary()
           coresUsed -= executor.cores
           memoryUsed -= executor.memory
-          if (CLEANUP_NON_SHUFFLE_FILES_ENABLED) {
-            shuffleService.executorRemoved(executorStateChanged.execId.toString, appId)
-          }
         case None =>
           logInfo("Unknown Executor " + fullId + " finished with state " + state +
             message.map(" message " + _).getOrElse("") +
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala
index 4fca9342c0378..2f5a5642d3cab 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala
@@ -118,7 +118,7 @@ private[ui] class LogPage(parent: WorkerWebUI) extends WebUIPage("logPage") with
         <script>{Unparsed(jsOnload)}</script>
       </div>
 
-    UIUtils.basicSparkPage(request, content, logType + " log page for " + pageName)
+    UIUtils.basicSparkPage(content, logType + " log page for " + pageName)
   }
 
   /** Get the part of the log files given the offset and desired length of bytes */
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerPage.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerPage.scala
index aa4e28d213e2b..8b98ae56fc108 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerPage.scala
@@ -135,7 +135,7 @@ private[ui] class WorkerPage(parent: WorkerWebUI) extends WebUIPage("") {
           }
         </div>
       </div>;
-    UIUtils.basicSparkPage(request, content, "Spark Worker at %s:%s".format(
+    UIUtils.basicSparkPage(content, "Spark Worker at %s:%s".format(
       workerState.host, workerState.port))
   }
 
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index 48d3630abd1f9..9b62e4b1b7150 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -213,6 +213,13 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
           driverConf.set(key, value)
         }
       }
+      if (driverConf.contains("spark.yarn.credentials.file")) {
+        logInfo("Will periodically update credentials from: " +
+          driverConf.get("spark.yarn.credentials.file"))
+        Utils.classForName("org.apache.spark.deploy.yarn.YarnSparkHadoopUtil")
+          .getMethod("startCredentialUpdater", classOf[SparkConf])
+          .invoke(null, driverConf)
+      }
 
       cfg.hadoopDelegationCreds.foreach { tokens =>
         SparkHadoopUtil.get.addDelegationTokens(tokens, driverConf)
@@ -227,6 +234,11 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
         env.rpcEnv.setupEndpoint("WorkerWatcher", new WorkerWatcher(env.rpcEnv, url))
       }
       env.rpcEnv.awaitTermination()
+      if (driverConf.contains("spark.yarn.credentials.file")) {
+        Utils.classForName("org.apache.spark.deploy.yarn.YarnSparkHadoopUtil")
+          .getMethod("stopCredentialUpdater")
+          .invoke(null)
+      }
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index b1856ff0f3247..2c3a8ef74800b 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -35,7 +35,6 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder
 import org.apache.spark._
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config._
 import org.apache.spark.memory.{SparkOutOfMemoryError, TaskMemoryManager}
 import org.apache.spark.rpc.RpcTimeout
 import org.apache.spark.scheduler.{DirectTaskResult, IndirectTaskResult, Task, TaskDescription}
@@ -142,7 +141,8 @@ private[spark] class Executor(
     conf.getSizeAsBytes("spark.task.maxDirectResultSize", 1L << 20),
     RpcUtils.maxMessageSizeBytes(conf))
 
-  private val maxResultSize = conf.get(MAX_RESULT_SIZE)
+  // Limit of bytes for total size of results (default is 1GB)
+  private val maxResultSize = Utils.getMaxResultSize(conf)
 
   // Maintains the list of running tasks.
   private val runningTasks = new ConcurrentHashMap[Long, TaskRunner]
@@ -287,28 +287,6 @@ private[spark] class Executor(
       notifyAll()
     }
 
-    /**
-     *  Utility function to:
-     *    1. Report executor runtime and JVM gc time if possible
-     *    2. Collect accumulator updates
-     *    3. Set the finished flag to true and clear current thread's interrupt status
-     */
-    private def collectAccumulatorsAndResetStatusOnFailure(taskStartTime: Long) = {
-      // Report executor runtime and JVM gc time
-      Option(task).foreach(t => {
-        t.metrics.setExecutorRunTime(System.currentTimeMillis() - taskStartTime)
-        t.metrics.setJvmGCTime(computeTotalGcTime() - startGCTime)
-      })
-
-      // Collect latest accumulator values to report back to the driver
-      val accums: Seq[AccumulatorV2[_, _]] =
-        Option(task).map(_.collectAccumulatorUpdates(taskFailed = true)).getOrElse(Seq.empty)
-      val accUpdates = accums.map(acc => acc.toInfo(Some(acc.value), None))
-
-      setTaskFinishedAndClearInterruptStatus()
-      (accums, accUpdates)
-    }
-
     override def run(): Unit = {
       threadId = Thread.currentThread.getId
       Thread.currentThread.setName(threadName)
@@ -322,7 +300,7 @@ private[spark] class Executor(
       val ser = env.closureSerializer.newInstance()
       logInfo(s"Running $taskName (TID $taskId)")
       execBackend.statusUpdate(taskId, TaskState.RUNNING, EMPTY_BYTE_BUFFER)
-      var taskStartTime: Long = 0
+      var taskStart: Long = 0
       var taskStartCpu: Long = 0
       startGCTime = computeTotalGcTime()
 
@@ -358,7 +336,7 @@ private[spark] class Executor(
         }
 
         // Run the actual task and measure its runtime.
-        taskStartTime = System.currentTimeMillis()
+        taskStart = System.currentTimeMillis()
         taskStartCpu = if (threadMXBean.isCurrentThreadCpuTimeSupported) {
           threadMXBean.getCurrentThreadCpuTime
         } else 0L
@@ -418,11 +396,11 @@ private[spark] class Executor(
         // Deserialization happens in two parts: first, we deserialize a Task object, which
         // includes the Partition. Second, Task.run() deserializes the RDD and function to be run.
         task.metrics.setExecutorDeserializeTime(
-          (taskStartTime - deserializeStartTime) + task.executorDeserializeTime)
+          (taskStart - deserializeStartTime) + task.executorDeserializeTime)
         task.metrics.setExecutorDeserializeCpuTime(
           (taskStartCpu - deserializeStartCpuTime) + task.executorDeserializeCpuTime)
         // We need to subtract Task.run()'s deserialization time to avoid double-counting
-        task.metrics.setExecutorRunTime((taskFinish - taskStartTime) - task.executorDeserializeTime)
+        task.metrics.setExecutorRunTime((taskFinish - taskStart) - task.executorDeserializeTime)
         task.metrics.setExecutorCpuTime(
           (taskFinishCpu - taskStartCpu) - task.executorDeserializeCpuTime)
         task.metrics.setJvmGCTime(computeTotalGcTime() - startGCTime)
@@ -502,22 +480,6 @@ private[spark] class Executor(
         execBackend.statusUpdate(taskId, TaskState.FINISHED, serializedResult)
 
       } catch {
-        case t: TaskKilledException =>
-          logInfo(s"Executor killed $taskName (TID $taskId), reason: ${t.reason}")
-
-          val (accums, accUpdates) = collectAccumulatorsAndResetStatusOnFailure(taskStartTime)
-          val serializedTK = ser.serialize(TaskKilled(t.reason, accUpdates, accums))
-          execBackend.statusUpdate(taskId, TaskState.KILLED, serializedTK)
-
-        case _: InterruptedException | NonFatal(_) if
-            task != null && task.reasonIfKilled.isDefined =>
-          val killReason = task.reasonIfKilled.getOrElse("unknown reason")
-          logInfo(s"Executor interrupted and killed $taskName (TID $taskId), reason: $killReason")
-
-          val (accums, accUpdates) = collectAccumulatorsAndResetStatusOnFailure(taskStartTime)
-          val serializedTK = ser.serialize(TaskKilled(killReason, accUpdates, accums))
-          execBackend.statusUpdate(taskId, TaskState.KILLED, serializedTK)
-
         case t: Throwable if hasFetchFailure && !Utils.isFatalError(t) =>
           val reason = task.context.fetchFailed.get.toTaskFailedReason
           if (!t.isInstanceOf[FetchFailedException]) {
@@ -532,6 +494,19 @@ private[spark] class Executor(
           setTaskFinishedAndClearInterruptStatus()
           execBackend.statusUpdate(taskId, TaskState.FAILED, ser.serialize(reason))
 
+        case t: TaskKilledException =>
+          logInfo(s"Executor killed $taskName (TID $taskId), reason: ${t.reason}")
+          setTaskFinishedAndClearInterruptStatus()
+          execBackend.statusUpdate(taskId, TaskState.KILLED, ser.serialize(TaskKilled(t.reason)))
+
+        case _: InterruptedException | NonFatal(_) if
+            task != null && task.reasonIfKilled.isDefined =>
+          val killReason = task.reasonIfKilled.getOrElse("unknown reason")
+          logInfo(s"Executor interrupted and killed $taskName (TID $taskId), reason: $killReason")
+          setTaskFinishedAndClearInterruptStatus()
+          execBackend.statusUpdate(
+            taskId, TaskState.KILLED, ser.serialize(TaskKilled(killReason)))
+
         case CausedBy(cDE: CommitDeniedException) =>
           val reason = cDE.toTaskCommitDeniedReason
           setTaskFinishedAndClearInterruptStatus()
@@ -549,7 +524,17 @@ private[spark] class Executor(
           // the task failure would not be ignored if the shutdown happened because of premption,
           // instead of an app issue).
           if (!ShutdownHookManager.inShutdown()) {
-            val (accums, accUpdates) = collectAccumulatorsAndResetStatusOnFailure(taskStartTime)
+            // Collect latest accumulator values to report back to the driver
+            val accums: Seq[AccumulatorV2[_, _]] =
+              if (task != null) {
+                task.metrics.setExecutorRunTime(System.currentTimeMillis() - taskStart)
+                task.metrics.setJvmGCTime(computeTotalGcTime() - startGCTime)
+                task.collectAccumulatorUpdates(taskFailed = true)
+              } else {
+                Seq.empty
+              }
+
+            val accUpdates = accums.map(acc => acc.toInfo(Some(acc.value), None))
 
             val serializedTaskEndReason = {
               try {
diff --git a/core/src/main/scala/org/apache/spark/internal/config/ConfigBuilder.scala b/core/src/main/scala/org/apache/spark/internal/config/ConfigBuilder.scala
index f27aca03773a9..b0cd7110a3b47 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/ConfigBuilder.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/ConfigBuilder.scala
@@ -23,7 +23,6 @@ import java.util.regex.PatternSyntaxException
 import scala.util.matching.Regex
 
 import org.apache.spark.network.util.{ByteUnit, JavaUtils}
-import org.apache.spark.util.Utils
 
 private object ConfigHelpers {
 
@@ -46,7 +45,7 @@ private object ConfigHelpers {
   }
 
   def stringToSeq[T](str: String, converter: String => T): Seq[T] = {
-    Utils.stringToSeq(str).map(converter)
+    str.split(",").map(_.trim()).filter(_.nonEmpty).map(converter)
   }
 
   def seqToString[T](v: Seq[T], stringConverter: T => String): String = {
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index a54b091a64d50..bbfcfbaa7363c 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -126,10 +126,6 @@ package object config {
   private[spark] val DYN_ALLOCATION_MAX_EXECUTORS =
     ConfigBuilder("spark.dynamicAllocation.maxExecutors").intConf.createWithDefault(Int.MaxValue)
 
-  private[spark] val DYN_ALLOCATION_EXECUTOR_ALLOCATION_RATIO =
-    ConfigBuilder("spark.dynamicAllocation.executorAllocationRatio")
-      .doubleConf.createWithDefault(1.0)
-
   private[spark] val LOCALITY_WAIT = ConfigBuilder("spark.locality.wait")
     .timeConf(TimeUnit.MILLISECONDS)
     .createWithDefaultString("3s")
@@ -305,12 +301,6 @@ package object config {
     .booleanConf
     .createWithDefault(false)
 
-  private[spark] val IGNORE_MISSING_FILES = ConfigBuilder("spark.files.ignoreMissingFiles")
-    .doc("Whether to ignore missing files. If true, the Spark jobs will continue to run when " +
-        "encountering missing files and the contents that have been read will still be returned.")
-    .booleanConf
-    .createWithDefault(false)
-
   private[spark] val APP_CALLER_CONTEXT = ConfigBuilder("spark.log.callerContext")
     .stringConf
     .createOptional
@@ -342,7 +332,7 @@ package object config {
         "a property key or value, the value is redacted from the environment UI and various logs " +
         "like YARN and event logs.")
       .regexConf
-      .createWithDefault("(?i)secret|password".r)
+      .createWithDefault("(?i)secret|password|url|user|username".r)
 
   private[spark] val STRING_REDACTION_PATTERN =
     ConfigBuilder("spark.redaction.string.regex")
@@ -352,11 +342,6 @@ package object config {
       .regexConf
       .createOptional
 
-  private[spark] val AUTH_SECRET_BIT_LENGTH =
-    ConfigBuilder("spark.authenticate.secretBitLength")
-      .intConf
-      .createWithDefault(256)
-
   private[spark] val NETWORK_AUTH_ENABLED =
     ConfigBuilder("spark.authenticate")
       .booleanConf
@@ -535,21 +520,4 @@ package object config {
       .checkValue(v => v > 0, "The threshold should be positive.")
       .createWithDefault(10000000)
 
-  private[spark] val MAX_RESULT_SIZE = ConfigBuilder("spark.driver.maxResultSize")
-    .doc("Size limit for results.")
-    .bytesConf(ByteUnit.BYTE)
-    .createWithDefaultString("1g")
-
-  private[spark] val CREDENTIALS_RENEWAL_INTERVAL_RATIO =
-    ConfigBuilder("spark.security.credentials.renewalRatio")
-      .doc("Ratio of the credential's expiration time when Spark should fetch new credentials.")
-      .doubleConf
-      .createWithDefault(0.75d)
-
-  private[spark] val CREDENTIALS_RENEWAL_RETRY_WAIT =
-    ConfigBuilder("spark.security.credentials.retryWait")
-      .doc("How long to wait before retrying to fetch new credentials after a failure.")
-      .timeConf(TimeUnit.SECONDS)
-      .createWithDefaultString("1h")
-
 }
diff --git a/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala
index e6e9c9e328853..6d0059b6a0272 100644
--- a/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala
@@ -20,7 +20,6 @@ package org.apache.spark.internal.io
 import org.apache.hadoop.fs._
 import org.apache.hadoop.mapreduce._
 
-import org.apache.spark.internal.Logging
 import org.apache.spark.util.Utils
 
 
@@ -133,7 +132,7 @@ abstract class FileCommitProtocol {
 }
 
 
-object FileCommitProtocol extends Logging {
+object FileCommitProtocol {
   class TaskCommitMessage(val obj: Any) extends Serializable
 
   object EmptyTaskCommitMessage extends TaskCommitMessage(null)
@@ -146,23 +145,15 @@ object FileCommitProtocol extends Logging {
       jobId: String,
       outputPath: String,
       dynamicPartitionOverwrite: Boolean = false): FileCommitProtocol = {
-
-    logDebug(s"Creating committer $className; job $jobId; output=$outputPath;" +
-      s" dynamic=$dynamicPartitionOverwrite")
     val clazz = Utils.classForName(className).asInstanceOf[Class[FileCommitProtocol]]
     // First try the constructor with arguments (jobId: String, outputPath: String,
     // dynamicPartitionOverwrite: Boolean).
     // If that doesn't exist, try the one with (jobId: string, outputPath: String).
     try {
       val ctor = clazz.getDeclaredConstructor(classOf[String], classOf[String], classOf[Boolean])
-      logDebug("Using (String, String, Boolean) constructor")
       ctor.newInstance(jobId, outputPath, dynamicPartitionOverwrite.asInstanceOf[java.lang.Boolean])
     } catch {
       case _: NoSuchMethodException =>
-        logDebug("Falling back to (String, String) constructor")
-        require(!dynamicPartitionOverwrite,
-          "Dynamic Partition Overwrite is enabled but" +
-            s" the committer ${className} does not have the appropriate constructor")
         val ctor = clazz.getDeclaredConstructor(classOf[String], classOf[String])
         ctor.newInstance(jobId, outputPath)
     }
diff --git a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
index 3e60c50ada59b..6d20ef1f98a3c 100644
--- a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
@@ -186,17 +186,7 @@ class HadoopMapReduceCommitProtocol(
         logDebug(s"Clean up default partition directories for overwriting: $partitionPaths")
         for (part <- partitionPaths) {
           val finalPartPath = new Path(path, part)
-          if (!fs.delete(finalPartPath, true) && !fs.exists(finalPartPath.getParent)) {
-            // According to the official hadoop FileSystem API spec, delete op should assume
-            // the destination is no longer present regardless of return value, thus we do not
-            // need to double check if finalPartPath exists before rename.
-            // Also in our case, based on the spec, delete returns false only when finalPartPath
-            // does not exist. When this happens, we need to take action if parent of finalPartPath
-            // also does not exist(e.g. the scenario described on SPARK-23815), because
-            // FileSystem API spec on rename op says the rename dest(finalPartPath) must have
-            // a parent that exists, otherwise we may get unexpected result on the rename.
-            fs.mkdirs(finalPartPath.getParent)
-          }
+          fs.delete(finalPartPath, true)
           fs.rename(new Path(stagingDir, part), finalPartPath)
         }
       }
diff --git a/core/src/main/scala/org/apache/spark/launcher/LauncherBackend.scala b/core/src/main/scala/org/apache/spark/launcher/LauncherBackend.scala
index 1b049b786023a..aaae33ca4e6f3 100644
--- a/core/src/main/scala/org/apache/spark/launcher/LauncherBackend.scala
+++ b/core/src/main/scala/org/apache/spark/launcher/LauncherBackend.scala
@@ -67,13 +67,13 @@ private[spark] abstract class LauncherBackend {
   }
 
   def setAppId(appId: String): Unit = {
-    if (connection != null && isConnected) {
+    if (connection != null) {
       connection.send(new SetAppId(appId))
     }
   }
 
   def setState(state: SparkAppHandle.State): Unit = {
-    if (connection != null && isConnected && lastState != state) {
+    if (connection != null && lastState != state) {
       connection.send(new SetState(state))
       lastState = state
     }
@@ -114,10 +114,10 @@ private[spark] abstract class LauncherBackend {
 
     override def close(): Unit = {
       try {
-        _isConnected = false
         super.close()
       } finally {
         onDisconnected()
+        _isConnected = false
       }
     }
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala b/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
index 13db4985b0b80..c9ed12f4e1bd4 100644
--- a/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
@@ -90,7 +90,7 @@ class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Loggi
           // Otherwise, interpolate the number of partitions we need to try, but overestimate it
           // by 50%. We also cap the estimation in the end.
           if (results.size == 0) {
-            numPartsToTry = partsScanned * 4L
+            numPartsToTry = partsScanned * 4
           } else {
             // the left side of max is >=1 whenever partsScanned >= 2
             numPartsToTry = Math.max(1,
diff --git a/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
index 94e7d0b38cba3..10451a324b0f4 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
@@ -266,17 +266,17 @@ private class DefaultPartitionCoalescer(val balanceSlack: Double = 0.10)
         numCreated += 1
       }
     }
+    tries = 0
     // if we don't have enough partition groups, create duplicates
     while (numCreated < targetLen) {
-      // Copy the preferred location from a random input partition.
-      // This helps in avoiding skew when the input partitions are clustered by preferred location.
-      val (nxt_replica, nxt_part) = partitionLocs.partsWithLocs(
-        rnd.nextInt(partitionLocs.partsWithLocs.length))
+      val (nxt_replica, nxt_part) = partitionLocs.partsWithLocs(tries)
+      tries += 1
       val pgroup = new PartitionGroup(Some(nxt_replica))
       groupArr += pgroup
       groupHash.getOrElseUpdate(nxt_replica, ArrayBuffer()) += pgroup
       addPartToPGroup(nxt_part, pgroup)
       numCreated += 1
+      if (tries >= partitionLocs.partsWithLocs.length) tries = 0
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index 44895abc7bd4d..2480559a41b7a 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.rdd
 
-import java.io.{FileNotFoundException, IOException}
+import java.io.IOException
 import java.text.SimpleDateFormat
 import java.util.{Date, Locale}
 
@@ -28,7 +28,6 @@ import org.apache.hadoop.conf.{Configurable, Configuration}
 import org.apache.hadoop.mapred._
 import org.apache.hadoop.mapred.lib.CombineFileSplit
 import org.apache.hadoop.mapreduce.TaskType
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
 import org.apache.hadoop.util.ReflectionUtils
 
 import org.apache.spark._
@@ -135,8 +134,6 @@ class HadoopRDD[K, V](
 
   private val ignoreCorruptFiles = sparkContext.conf.get(IGNORE_CORRUPT_FILES)
 
-  private val ignoreMissingFiles = sparkContext.conf.get(IGNORE_MISSING_FILES)
-
   private val ignoreEmptySplits = sparkContext.conf.get(HADOOP_RDD_IGNORE_EMPTY_SPLITS)
 
   // Returns a JobConf that will be used on slaves to obtain input splits for Hadoop reads.
@@ -200,24 +197,17 @@ class HadoopRDD[K, V](
     val jobConf = getJobConf()
     // add the credentials here as this can be called before SparkContext initialized
     SparkHadoopUtil.get.addCredentials(jobConf)
-    try {
-      val allInputSplits = getInputFormat(jobConf).getSplits(jobConf, minPartitions)
-      val inputSplits = if (ignoreEmptySplits) {
-        allInputSplits.filter(_.getLength > 0)
-      } else {
-        allInputSplits
-      }
-      val array = new Array[Partition](inputSplits.size)
-      for (i <- 0 until inputSplits.size) {
-        array(i) = new HadoopPartition(id, i, inputSplits(i))
-      }
-      array
-    } catch {
-      case e: InvalidInputException if ignoreMissingFiles =>
-        logWarning(s"${jobConf.get(FileInputFormat.INPUT_DIR)} doesn't exist and no" +
-            s" partitions returned from this path.", e)
-        Array.empty[Partition]
+    val allInputSplits = getInputFormat(jobConf).getSplits(jobConf, minPartitions)
+    val inputSplits = if (ignoreEmptySplits) {
+      allInputSplits.filter(_.getLength > 0)
+    } else {
+      allInputSplits
     }
+    val array = new Array[Partition](inputSplits.size)
+    for (i <- 0 until inputSplits.size) {
+      array(i) = new HadoopPartition(id, i, inputSplits(i))
+    }
+    array
   }
 
   override def compute(theSplit: Partition, context: TaskContext): InterruptibleIterator[(K, V)] = {
@@ -266,12 +256,6 @@ class HadoopRDD[K, V](
         try {
           inputFormat.getRecordReader(split.inputSplit.value, jobConf, Reporter.NULL)
         } catch {
-          case e: FileNotFoundException if ignoreMissingFiles =>
-            logWarning(s"Skipped missing file: ${split.inputSplit}", e)
-            finished = true
-            null
-          // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
-          case e: FileNotFoundException if !ignoreMissingFiles => throw e
           case e: IOException if ignoreCorruptFiles =>
             logWarning(s"Skipped the rest content in the corrupted file: ${split.inputSplit}", e)
             finished = true
@@ -292,11 +276,6 @@ class HadoopRDD[K, V](
         try {
           finished = !reader.next(key, value)
         } catch {
-          case e: FileNotFoundException if ignoreMissingFiles =>
-            logWarning(s"Skipped missing file: ${split.inputSplit}", e)
-            finished = true
-          // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
-          case e: FileNotFoundException if !ignoreMissingFiles => throw e
           case e: IOException if ignoreCorruptFiles =>
             logWarning(s"Skipped the rest content in the corrupted file: ${split.inputSplit}", e)
             finished = true
diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
index ff66a04859d10..e4dd1b6a82498 100644
--- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.rdd
 
-import java.io.{FileNotFoundException, IOException}
+import java.io.IOException
 import java.text.SimpleDateFormat
 import java.util.{Date, Locale}
 
@@ -28,7 +28,7 @@ import org.apache.hadoop.conf.{Configurable, Configuration}
 import org.apache.hadoop.io.Writable
 import org.apache.hadoop.mapred.JobConf
 import org.apache.hadoop.mapreduce._
-import org.apache.hadoop.mapreduce.lib.input.{CombineFileSplit, FileInputFormat, FileSplit, InvalidInputException}
+import org.apache.hadoop.mapreduce.lib.input.{CombineFileSplit, FileSplit}
 import org.apache.hadoop.mapreduce.task.{JobContextImpl, TaskAttemptContextImpl}
 
 import org.apache.spark._
@@ -90,8 +90,6 @@ class NewHadoopRDD[K, V](
 
   private val ignoreCorruptFiles = sparkContext.conf.get(IGNORE_CORRUPT_FILES)
 
-  private val ignoreMissingFiles = sparkContext.conf.get(IGNORE_MISSING_FILES)
-
   private val ignoreEmptySplits = sparkContext.conf.get(HADOOP_RDD_IGNORE_EMPTY_SPLITS)
 
   def getConf: Configuration = {
@@ -126,25 +124,17 @@ class NewHadoopRDD[K, V](
         configurable.setConf(_conf)
       case _ =>
     }
-    try {
-      val allRowSplits = inputFormat.getSplits(new JobContextImpl(_conf, jobId)).asScala
-      val rawSplits = if (ignoreEmptySplits) {
-        allRowSplits.filter(_.getLength > 0)
-      } else {
-        allRowSplits
-      }
-      val result = new Array[Partition](rawSplits.size)
-      for (i <- 0 until rawSplits.size) {
-        result(i) =
-            new NewHadoopPartition(id, i, rawSplits(i).asInstanceOf[InputSplit with Writable])
-      }
-      result
-    } catch {
-      case e: InvalidInputException if ignoreMissingFiles =>
-        logWarning(s"${_conf.get(FileInputFormat.INPUT_DIR)} doesn't exist and no" +
-            s" partitions returned from this path.", e)
-        Array.empty[Partition]
+    val allRowSplits = inputFormat.getSplits(new JobContextImpl(_conf, jobId)).asScala
+    val rawSplits = if (ignoreEmptySplits) {
+      allRowSplits.filter(_.getLength > 0)
+    } else {
+      allRowSplits
     }
+    val result = new Array[Partition](rawSplits.size)
+    for (i <- 0 until rawSplits.size) {
+      result(i) = new NewHadoopPartition(id, i, rawSplits(i).asInstanceOf[InputSplit with Writable])
+    }
+    result
   }
 
   override def compute(theSplit: Partition, context: TaskContext): InterruptibleIterator[(K, V)] = {
@@ -199,12 +189,6 @@ class NewHadoopRDD[K, V](
           _reader.initialize(split.serializableHadoopSplit.value, hadoopAttemptContext)
           _reader
         } catch {
-          case e: FileNotFoundException if ignoreMissingFiles =>
-            logWarning(s"Skipped missing file: ${split.serializableHadoopSplit}", e)
-            finished = true
-            null
-          // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
-          case e: FileNotFoundException if !ignoreMissingFiles => throw e
           case e: IOException if ignoreCorruptFiles =>
             logWarning(
               s"Skipped the rest content in the corrupted file: ${split.serializableHadoopSplit}",
@@ -229,11 +213,6 @@ class NewHadoopRDD[K, V](
           try {
             finished = !reader.nextKeyValue
           } catch {
-            case e: FileNotFoundException if ignoreMissingFiles =>
-              logWarning(s"Skipped missing file: ${split.serializableHadoopSplit}", e)
-              finished = true
-            // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
-            case e: FileNotFoundException if !ignoreMissingFiles => throw e
             case e: IOException if ignoreCorruptFiles =>
               logWarning(
                 s"Skipped the rest content in the corrupted file: ${split.serializableHadoopSplit}",
diff --git a/core/src/main/scala/org/apache/spark/scheduler/AsyncEventQueue.scala b/core/src/main/scala/org/apache/spark/scheduler/AsyncEventQueue.scala
index e2b6df4600590..7e14938acd8e0 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/AsyncEventQueue.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/AsyncEventQueue.scala
@@ -34,11 +34,7 @@ import org.apache.spark.util.Utils
  * Delivery will only begin when the `start()` method is called. The `stop()` method should be
  * called when no more events need to be delivered.
  */
-private class AsyncEventQueue(
-    val name: String,
-    conf: SparkConf,
-    metrics: LiveListenerBusMetrics,
-    bus: LiveListenerBus)
+private class AsyncEventQueue(val name: String, conf: SparkConf, metrics: LiveListenerBusMetrics)
   extends SparkListenerBus
   with Logging {
 
@@ -85,18 +81,23 @@ private class AsyncEventQueue(
   }
 
   private def dispatch(): Unit = LiveListenerBus.withinListenerThread.withValue(true) {
-    var next: SparkListenerEvent = eventQueue.take()
-    while (next != POISON_PILL) {
-      val ctx = processingTime.time()
-      try {
-        super.postToAll(next)
-      } finally {
-        ctx.stop()
+    try {
+      var next: SparkListenerEvent = eventQueue.take()
+      while (next != POISON_PILL) {
+        val ctx = processingTime.time()
+        try {
+          super.postToAll(next)
+        } finally {
+          ctx.stop()
+        }
+        eventCount.decrementAndGet()
+        next = eventQueue.take()
       }
       eventCount.decrementAndGet()
-      next = eventQueue.take()
+    } catch {
+      case ie: InterruptedException =>
+        logInfo(s"Stopping listener queue $name.", ie)
     }
-    eventCount.decrementAndGet()
   }
 
   override protected def getTimer(listener: SparkListenerInterface): Option[Timer] = {
@@ -129,11 +130,7 @@ private class AsyncEventQueue(
       eventCount.incrementAndGet()
       eventQueue.put(POISON_PILL)
     }
-    // this thread might be trying to stop itself as part of error handling -- we can't join
-    // in that case.
-    if (Thread.currentThread() != dispatchThread) {
-      dispatchThread.join()
-    }
+    dispatchThread.join()
   }
 
   def post(event: SparkListenerEvent): Unit = {
@@ -169,7 +166,7 @@ private class AsyncEventQueue(
           val prevLastReportTimestamp = lastReportTimestamp
           lastReportTimestamp = System.currentTimeMillis()
           val previous = new java.util.Date(prevLastReportTimestamp)
-          logWarning(s"Dropped $droppedCount events from $name since $previous.")
+          logWarning(s"Dropped $droppedEvents events from $name since $previous.")
         }
       }
     }
@@ -190,12 +187,6 @@ private class AsyncEventQueue(
     true
   }
 
-  override def removeListenerOnError(listener: SparkListenerInterface): Unit = {
-    // the listener failed in an unrecoverably way, we want to remove it from the entire
-    // LiveListenerBus (potentially stopping a queue if it is empty)
-    bus.removeListener(listener)
-  }
-
 }
 
 private object AsyncEventQueue {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala b/core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala
index 30cf75d43ee09..cd8e61d6d0208 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala
@@ -152,8 +152,7 @@ private[scheduler] class BlacklistTracker (
         case Some(a) =>
           logInfo(s"Killing blacklisted executor id $exec " +
             s"since ${config.BLACKLIST_KILL_ENABLED.key} is set.")
-          a.killExecutors(Seq(exec), adjustTargetNumExecutors = false, countFailures = false,
-            force = true)
+          a.killExecutors(Seq(exec), true, true)
         case None =>
           logWarning(s"Not attempting to kill blacklisted executor id $exec " +
             s"since allocation client is not defined.")
@@ -210,7 +209,7 @@ private[scheduler] class BlacklistTracker (
         updateNextExpiryTime()
         killBlacklistedExecutor(exec)
 
-        val blacklistedExecsOnNode = nodeToBlacklistedExecs.getOrElseUpdate(host, HashSet[String]())
+        val blacklistedExecsOnNode = nodeToBlacklistedExecs.getOrElseUpdate(exec, HashSet[String]())
         blacklistedExecsOnNode += exec
       }
     }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index 041eade82d3ca..199937b8c27af 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -39,7 +39,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.partial.{ApproximateActionListener, ApproximateEvaluator, PartialResult}
-import org.apache.spark.rdd.{RDD, RDDCheckpointData}
+import org.apache.spark.rdd.RDD
 import org.apache.spark.rpc.RpcTimeout
 import org.apache.spark.storage._
 import org.apache.spark.storage.BlockManagerMessages.BlockManagerHeartbeat
@@ -206,7 +206,7 @@ class DAGScheduler(
   private val messageScheduler =
     ThreadUtils.newDaemonSingleThreadScheduledExecutor("dag-scheduler-message")
 
-  private[spark] val eventProcessLoop = new DAGSchedulerEventProcessLoop(this)
+  private[scheduler] val eventProcessLoop = new DAGSchedulerEventProcessLoop(this)
   taskScheduler.setDAGScheduler(this)
 
   /**
@@ -1016,24 +1016,15 @@ class DAGScheduler(
     // might modify state of objects referenced in their closures. This is necessary in Hadoop
     // where the JobConf/Configuration object is not thread-safe.
     var taskBinary: Broadcast[Array[Byte]] = null
-    var partitions: Array[Partition] = null
     try {
       // For ShuffleMapTask, serialize and broadcast (rdd, shuffleDep).
       // For ResultTask, serialize and broadcast (rdd, func).
-      var taskBinaryBytes: Array[Byte] = null
-      // taskBinaryBytes and partitions are both effected by the checkpoint status. We need
-      // this synchronization in case another concurrent job is checkpointing this RDD, so we get a
-      // consistent view of both variables.
-      RDDCheckpointData.synchronized {
-        taskBinaryBytes = stage match {
-          case stage: ShuffleMapStage =>
-            JavaUtils.bufferToArray(
-              closureSerializer.serialize((stage.rdd, stage.shuffleDep): AnyRef))
-          case stage: ResultStage =>
-            JavaUtils.bufferToArray(closureSerializer.serialize((stage.rdd, stage.func): AnyRef))
-        }
-
-        partitions = stage.rdd.partitions
+      val taskBinaryBytes: Array[Byte] = stage match {
+        case stage: ShuffleMapStage =>
+          JavaUtils.bufferToArray(
+            closureSerializer.serialize((stage.rdd, stage.shuffleDep): AnyRef))
+        case stage: ResultStage =>
+          JavaUtils.bufferToArray(closureSerializer.serialize((stage.rdd, stage.func): AnyRef))
       }
 
       taskBinary = sc.broadcast(taskBinaryBytes)
@@ -1058,7 +1049,7 @@ class DAGScheduler(
           stage.pendingPartitions.clear()
           partitionsToCompute.map { id =>
             val locs = taskIdToLocations(id)
-            val part = partitions(id)
+            val part = stage.rdd.partitions(id)
             stage.pendingPartitions += id
             new ShuffleMapTask(stage.id, stage.latestInfo.attemptNumber,
               taskBinary, part, locs, properties, serializedTaskMetrics, Option(jobId),
@@ -1068,7 +1059,7 @@ class DAGScheduler(
         case stage: ResultStage =>
           partitionsToCompute.map { id =>
             val p: Int = stage.partitions(id)
-            val part = partitions(p)
+            val part = stage.rdd.partitions(p)
             val locs = taskIdToLocations(id)
             new ResultTask(stage.id, stage.latestInfo.attemptNumber,
               taskBinary, part, locs, id, properties, serializedTaskMetrics,
@@ -1092,16 +1083,17 @@ class DAGScheduler(
       // the stage as completed here in case there are no tasks to run
       markStageAsFinished(stage, None)
 
-      stage match {
+      val debugString = stage match {
         case stage: ShuffleMapStage =>
-          logDebug(s"Stage ${stage} is actually done; " +
-              s"(available: ${stage.isAvailable}," +
-              s"available outputs: ${stage.numAvailableOutputs}," +
-              s"partitions: ${stage.numPartitions})")
-          markMapStageJobsAsFinished(stage)
+          s"Stage ${stage} is actually done; " +
+            s"(available: ${stage.isAvailable}," +
+            s"available outputs: ${stage.numAvailableOutputs}," +
+            s"partitions: ${stage.numPartitions})"
         case stage : ResultStage =>
-          logDebug(s"Stage ${stage} is actually done; (partitions: ${stage.numPartitions})")
+          s"Stage ${stage} is actually done; (partitions: ${stage.numPartitions})"
       }
+      logDebug(debugString)
+
       submitWaitingChildStages(stage)
     }
   }
@@ -1167,7 +1159,9 @@ class DAGScheduler(
    */
   private[scheduler] def handleTaskCompletion(event: CompletionEvent) {
     val task = event.task
+    val taskId = event.taskInfo.id
     val stageId = task.stageId
+    val taskType = Utils.getFormattedClassName(task)
 
     outputCommitCoordinator.taskCompleted(
       stageId,
@@ -1208,7 +1202,7 @@ class DAGScheduler(
           case _ =>
             updateAccumulators(event)
         }
-      case _: ExceptionFailure | _: TaskKilled => updateAccumulators(event)
+      case _: ExceptionFailure => updateAccumulators(event)
       case _ =>
     }
     postTaskEnd(event)
@@ -1304,7 +1298,13 @@ class DAGScheduler(
                   shuffleStage.findMissingPartitions().mkString(", "))
                 submitStage(shuffleStage)
               } else {
-                markMapStageJobsAsFinished(shuffleStage)
+                // Mark any map-stage jobs waiting on this stage as finished
+                if (shuffleStage.mapStageJobs.nonEmpty) {
+                  val stats = mapOutputTracker.getStatistics(shuffleStage.shuffleDep)
+                  for (job <- shuffleStage.mapStageJobs) {
+                    markMapStageJobAsFinished(job, stats)
+                  }
+                }
                 submitWaitingChildStages(shuffleStage)
               }
             }
@@ -1321,7 +1321,7 @@ class DAGScheduler(
               "tasks in ShuffleMapStages.")
         }
 
-      case FetchFailed(bmAddress, shuffleId, mapId, _, failureMessage) =>
+      case FetchFailed(bmAddress, shuffleId, mapId, reduceId, failureMessage) =>
         val failedStage = stageIdToStage(task.stageId)
         val mapStage = shuffleIdToMapStage(shuffleId)
 
@@ -1409,31 +1409,21 @@ class DAGScheduler(
           }
         }
 
-      case _: TaskCommitDenied =>
+      case commitDenied: TaskCommitDenied =>
         // Do nothing here, left up to the TaskScheduler to decide how to handle denied commits
 
-      case _: ExceptionFailure | _: TaskKilled =>
+      case exceptionFailure: ExceptionFailure =>
         // Nothing left to do, already handled above for accumulator updates.
 
       case TaskResultLost =>
         // Do nothing here; the TaskScheduler handles these failures and resubmits the task.
 
-      case _: ExecutorLostFailure | UnknownReason =>
+      case _: ExecutorLostFailure | _: TaskKilled | UnknownReason =>
         // Unrecognized failure - also do nothing. If the task fails repeatedly, the TaskScheduler
         // will abort the job.
     }
   }
 
-  private[scheduler] def markMapStageJobsAsFinished(shuffleStage: ShuffleMapStage): Unit = {
-    // Mark any map-stage jobs waiting on this stage as finished
-    if (shuffleStage.isAvailable && shuffleStage.mapStageJobs.nonEmpty) {
-      val stats = mapOutputTracker.getStatistics(shuffleStage.shuffleDep)
-      for (job <- shuffleStage.mapStageJobs) {
-        markMapStageJobAsFinished(job, stats)
-      }
-    }
-  }
-
   /**
    * Responds to an executor being lost. This is called inside the event loop, so it assumes it can
    * modify the scheduler's internal state. Use executorLost() to post a loss event from outside.
diff --git a/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala
index d135190d1e919..ba6387a8f08ad 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala
@@ -102,7 +102,7 @@ private[spark] class LiveListenerBus(conf: SparkConf) {
         queue.addListener(listener)
 
       case None =>
-        val newQueue = new AsyncEventQueue(queue, conf, metrics, this)
+        val newQueue = new AsyncEventQueue(queue, conf, metrics)
         newQueue.addListener(listener)
         if (started.get()) {
           newQueue.start(sparkContext)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
index 226c23733c870..c9cd662f5709d 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
@@ -115,8 +115,6 @@ private[spark] class ReplayListenerBus extends SparkListenerBus with Logging {
         }
       }
     } catch {
-      case e: HaltReplayException =>
-        // Just stop replay.
       case _: EOFException if maybeTruncated =>
       case ioe: IOException =>
         throw ioe
@@ -126,17 +124,8 @@ private[spark] class ReplayListenerBus extends SparkListenerBus with Logging {
     }
   }
 
-  override protected def isIgnorableException(e: Throwable): Boolean = {
-    e.isInstanceOf[HaltReplayException]
-  }
-
 }
 
-/**
- * Exception that can be thrown by listeners to halt replay. This is handled by ReplayListenerBus
- * only, and will cause errors if thrown when using other bus implementations.
- */
-private[spark] class HaltReplayException extends RuntimeException
 
 private[spark] object ReplayListenerBus {
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index 598b62f85a1fa..0c11806b3981b 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -42,7 +42,7 @@ import org.apache.spark.util.{AccumulatorV2, ThreadUtils, Utils}
  * up to launch speculative tasks, etc.
  *
  * Clients should first call initialize() and start(), then submit task sets through the
- * submitTasks method.
+ * runTasks method.
  *
  * THREADING: [[SchedulerBackend]]s and task-submitting clients can call this class from multiple
  * threads, so it needs locks in public API methods to maintain its state. In addition, some
@@ -62,7 +62,7 @@ private[spark] class TaskSchedulerImpl(
     this(sc, sc.conf.get(config.MAX_TASK_FAILURES))
   }
 
-  // Lazily initializing blacklistTrackerOpt to avoid getting empty ExecutorAllocationClient,
+  // Lazily initializing blackListTrackOpt to avoid getting empty ExecutorAllocationClient,
   // because ExecutorAllocationClient is created after this TaskSchedulerImpl.
   private[scheduler] lazy val blacklistTrackerOpt = maybeCreateBlacklistTracker(sc)
 
@@ -228,7 +228,7 @@ private[spark] class TaskSchedulerImpl(
         // 1. The task set manager has been created and some tasks have been scheduled.
         //    In this case, send a kill signal to the executors to kill the task and then abort
         //    the stage.
-        // 2. The task set manager has been created but no tasks have been scheduled. In this case,
+        // 2. The task set manager has been created but no tasks has been scheduled. In this case,
         //    simply abort the stage.
         tsm.runningTasksSet.foreach { tid =>
             taskIdToExecutorId.get(tid).foreach(execId =>
@@ -689,20 +689,6 @@ private[spark] class TaskSchedulerImpl(
     }
   }
 
-  /**
-   * Marks the task has completed in all TaskSetManagers for the given stage.
-   *
-   * After stage failure and retry, there may be multiple TaskSetManagers for the stage.
-   * If an earlier attempt of a stage completes a task, we should ensure that the later attempts
-   * do not also submit those same tasks.  That also means that a task completion from an earlier
-   * attempt can lead to the entire stage getting marked as successful.
-   */
-  private[scheduler] def markPartitionCompletedInAllTaskSets(stageId: Int, partitionId: Int) = {
-    taskSetsByStageIdAndAttempt.getOrElse(stageId, Map()).values.foreach { tsm =>
-      tsm.markPartitionCompleted(partitionId)
-    }
-  }
-
 }
 
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index a18c66596852a..886c2c99f1ff3 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -64,7 +64,8 @@ private[spark] class TaskSetManager(
   val SPECULATION_QUANTILE = conf.getDouble("spark.speculation.quantile", 0.75)
   val SPECULATION_MULTIPLIER = conf.getDouble("spark.speculation.multiplier", 1.5)
 
-  val maxResultSize = conf.get(config.MAX_RESULT_SIZE)
+  // Limit of bytes for total size of results (default is 1GB)
+  val maxResultSize = Utils.getMaxResultSize(conf)
 
   val speculationEnabled = conf.getBoolean("spark.speculation", false)
 
@@ -73,8 +74,6 @@ private[spark] class TaskSetManager(
   val ser = env.closureSerializer.newInstance()
 
   val tasks = taskSet.tasks
-  private[scheduler] val partitionToIndex = tasks.zipWithIndex
-    .map { case (t, idx) => t.partitionId -> idx }.toMap
   val numTasks = tasks.length
   val copiesRunning = new Array[Int](numTasks)
 
@@ -155,7 +154,7 @@ private[spark] class TaskSetManager(
   private[scheduler] val speculatableTasks = new HashSet[Int]
 
   // Task index, start and finish time for each task attempt (indexed by task ID)
-  private[scheduler] val taskInfos = new HashMap[Long, TaskInfo]
+  private val taskInfos = new HashMap[Long, TaskInfo]
 
   // Use a MedianHeap to record durations of successful tasks so we know when to launch
   // speculative tasks. This is only used when speculation is enabled, to avoid the overhead
@@ -289,7 +288,7 @@ private[spark] class TaskSetManager(
     None
   }
 
-  /** Check whether a task once ran an attempt on a given host */
+  /** Check whether a task is currently running an attempt on a given host */
   private def hasAttemptOnHost(taskIndex: Int, host: String): Boolean = {
     taskAttempts(taskIndex).exists(_.host == host)
   }
@@ -756,9 +755,6 @@ private[spark] class TaskSetManager(
       logInfo("Ignoring task-finished event for " + info.id + " in stage " + taskSet.id +
         " because task " + index + " has already completed successfully")
     }
-    // There may be multiple tasksets for this stage -- we let all of them know that the partition
-    // was completed.  This may result in some of the tasksets getting completed.
-    sched.markPartitionCompletedInAllTaskSets(stageId, tasks(index).partitionId)
     // This method is called by "TaskSchedulerImpl.handleSuccessfulTask" which holds the
     // "TaskSchedulerImpl" lock until exiting. To avoid the SPARK-7655 issue, we should not
     // "deserialize" the value when holding a lock to avoid blocking other threads. So we call
@@ -769,19 +765,6 @@ private[spark] class TaskSetManager(
     maybeFinishTaskSet()
   }
 
-  private[scheduler] def markPartitionCompleted(partitionId: Int): Unit = {
-    partitionToIndex.get(partitionId).foreach { index =>
-      if (!successful(index)) {
-        tasksSuccessful += 1
-        successful(index) = true
-        if (tasksSuccessful == numTasks) {
-          isZombie = true
-        }
-        maybeFinishTaskSet()
-      }
-    }
-  }
-
   /**
    * Marks the task as failed, re-adds it to the list of pending tasks, and notifies the
    * DAG Scheduler.
@@ -851,19 +834,13 @@ private[spark] class TaskSetManager(
         }
         ef.exception
 
-      case tk: TaskKilled =>
-        // TaskKilled might have accumulator updates
-        accumUpdates = tk.accums
-        logWarning(failureReason)
-        None
-
       case e: ExecutorLostFailure if !e.exitCausedByApp =>
         logInfo(s"Task $tid failed because while it was being computed, its executor " +
           "exited for a reason unrelated to the task. Not counting this failure towards the " +
           "maximum number of failures for the task.")
         None
 
-      case e: TaskFailedReason =>  // TaskResultLost and others
+      case e: TaskFailedReason =>  // TaskResultLost, TaskKilled, and others
         logWarning(failureReason)
         None
     }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index d8794e8e551aa..4d75063fbf1c5 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -147,8 +147,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
 
       case KillExecutorsOnHost(host) =>
         scheduler.getExecutorsAliveOnHost(host).foreach { exec =>
-          killExecutors(exec.toSeq, adjustTargetNumExecutors = false, countFailures = false,
-            force = true)
+          killExecutors(exec.toSeq, replace = true, force = true)
         }
 
       case UpdateDelegationTokens(newDelegationTokens) =>
@@ -585,18 +584,18 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
   /**
    * Request that the cluster manager kill the specified executors.
    *
+   * When asking the executor to be replaced, the executor loss is considered a failure, and
+   * killed tasks that are running on the executor will count towards the failure limits. If no
+   * replacement is being requested, then the tasks will not count towards the limit.
+   *
    * @param executorIds identifiers of executors to kill
-   * @param adjustTargetNumExecutors whether the target number of executors be adjusted down
-   *                                 after these executors have been killed
-   * @param countFailures if there are tasks running on the executors when they are killed, whether
-   *                      those failures be counted to task failure limits?
+   * @param replace whether to replace the killed executors with new ones, default false
    * @param force whether to force kill busy executors, default false
    * @return the ids of the executors acknowledged by the cluster manager to be removed.
    */
   final override def killExecutors(
       executorIds: Seq[String],
-      adjustTargetNumExecutors: Boolean,
-      countFailures: Boolean,
+      replace: Boolean,
       force: Boolean): Seq[String] = {
     logInfo(s"Requesting to kill executor(s) ${executorIds.mkString(", ")}")
 
@@ -611,7 +610,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
       val executorsToKill = knownExecutors
         .filter { id => !executorsPendingToRemove.contains(id) }
         .filter { id => force || !scheduler.isExecutorBusy(id) }
-      executorsToKill.foreach { id => executorsPendingToRemove(id) = !countFailures }
+      executorsToKill.foreach { id => executorsPendingToRemove(id) = !replace }
 
       logInfo(s"Actual list of executor(s) to be killed is ${executorsToKill.mkString(", ")}")
 
@@ -619,13 +618,12 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
       // with the cluster manager to avoid allocating new ones. When computing the new target,
       // take into account executors that are pending to be added or removed.
       val adjustTotalExecutors =
-        if (adjustTargetNumExecutors) {
+        if (!replace) {
           requestedTotalExecutors = math.max(requestedTotalExecutors - executorsToKill.size, 0)
           if (requestedTotalExecutors !=
               (numExistingExecutors + numPendingExecutors - executorsPendingToRemove.size)) {
             logDebug(
-              s"""killExecutors($executorIds, $adjustTargetNumExecutors, $countFailures, $force):
-                 |Executor counts do not match:
+              s"""killExecutors($executorIds, $replace, $force): Executor counts do not match:
                  |requestedTotalExecutors  = $requestedTotalExecutors
                  |numExistingExecutors     = $numExistingExecutors
                  |numPendingExecutors      = $numPendingExecutors
@@ -633,7 +631,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
           }
           doRequestTotalExecutors(requestedTotalExecutors)
         } else {
-          numPendingExecutors += executorsToKill.size
+          numPendingExecutors += knownExecutors.size
           Future.successful(true)
         }
 
diff --git a/core/src/main/scala/org/apache/spark/security/SocketAuthHelper.scala b/core/src/main/scala/org/apache/spark/security/SocketAuthHelper.scala
deleted file mode 100644
index d15e7937b0523..0000000000000
--- a/core/src/main/scala/org/apache/spark/security/SocketAuthHelper.scala
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.security
-
-import java.io.{DataInputStream, DataOutputStream, InputStream}
-import java.net.Socket
-import java.nio.charset.StandardCharsets.UTF_8
-
-import org.apache.spark.SparkConf
-import org.apache.spark.network.util.JavaUtils
-import org.apache.spark.util.Utils
-
-/**
- * A class that can be used to add a simple authentication protocol to socket-based communication.
- *
- * The protocol is simple: an auth secret is written to the socket, and the other side checks the
- * secret and writes either "ok" or "err" to the output. If authentication fails, the socket is
- * not expected to be valid anymore.
- *
- * There's no secrecy, so this relies on the sockets being either local or somehow encrypted.
- */
-private[spark] class SocketAuthHelper(conf: SparkConf) {
-
-  val secret = Utils.createSecret(conf)
-
-  /**
-   * Read the auth secret from the socket and compare to the expected value. Write the reply back
-   * to the socket.
-   *
-   * If authentication fails, this method will close the socket.
-   *
-   * @param s The client socket.
-   * @throws IllegalArgumentException If authentication fails.
-   */
-  def authClient(s: Socket): Unit = {
-    // Set the socket timeout while checking the auth secret. Reset it before returning.
-    val currentTimeout = s.getSoTimeout()
-    try {
-      s.setSoTimeout(10000)
-      val clientSecret = readUtf8(s)
-      if (secret == clientSecret) {
-        writeUtf8("ok", s)
-      } else {
-        writeUtf8("err", s)
-        JavaUtils.closeQuietly(s)
-      }
-    } finally {
-      s.setSoTimeout(currentTimeout)
-    }
-  }
-
-  /**
-   * Authenticate with a server by writing the auth secret and checking the server's reply.
-   *
-   * If authentication fails, this method will close the socket.
-   *
-   * @param s The socket connected to the server.
-   * @throws IllegalArgumentException If authentication fails.
-   */
-  def authToServer(s: Socket): Unit = {
-    writeUtf8(secret, s)
-
-    val reply = readUtf8(s)
-    if (reply != "ok") {
-      JavaUtils.closeQuietly(s)
-      throw new IllegalArgumentException("Authentication failed.")
-    }
-  }
-
-  protected def readUtf8(s: Socket): String = {
-    val din = new DataInputStream(s.getInputStream())
-    val len = din.readInt()
-    val bytes = new Array[Byte](len)
-    din.readFully(bytes)
-    new String(bytes, UTF_8)
-  }
-
-  protected def writeUtf8(str: String, s: Socket): Unit = {
-    val bytes = str.getBytes(UTF_8)
-    val dout = new DataOutputStream(s.getOutputStream())
-    dout.writeInt(bytes.length)
-    dout.write(bytes, 0, bytes.length)
-    dout.flush()
-  }
-
-}
diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
index 72427dd6ce4d4..538ae05e4eea1 100644
--- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
@@ -206,7 +206,6 @@ class KryoSerializer(conf: SparkConf)
         kryo.register(clazz)
       } catch {
         case NonFatal(_) => // do nothing
-        case _: NoClassDefFoundError if Utils.isTesting => // See SPARK-23422.
       }
     }
 
diff --git a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
index 4103dfb10175e..0562d45ff57c5 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
@@ -90,11 +90,12 @@ private[spark] class BlockStoreShuffleReader[K, C](
         dep.aggregator.get.combineValuesByKey(keyValuesIterator, context)
       }
     } else {
+      require(!dep.mapSideCombine, "Map-side combine without Aggregator specified!")
       interruptibleIter.asInstanceOf[Iterator[Product2[K, C]]]
     }
 
     // Sort the output if there is a sort ordering defined.
-    val resultIter = dep.keyOrdering match {
+    dep.keyOrdering match {
       case Some(keyOrd: Ordering[K]) =>
         // Create an ExternalSorter to sort the data.
         val sorter =
@@ -103,21 +104,9 @@ private[spark] class BlockStoreShuffleReader[K, C](
         context.taskMetrics().incMemoryBytesSpilled(sorter.memoryBytesSpilled)
         context.taskMetrics().incDiskBytesSpilled(sorter.diskBytesSpilled)
         context.taskMetrics().incPeakExecutionMemory(sorter.peakMemoryUsedBytes)
-        // Use completion callback to stop sorter if task was finished/cancelled.
-        context.addTaskCompletionListener(_ => {
-          sorter.stop()
-        })
         CompletionIterator[Product2[K, C], Iterator[Product2[K, C]]](sorter.iterator, sorter.stop())
       case None =>
         aggregatedIter
     }
-
-    resultIter match {
-      case _: InterruptibleIterator[Product2[K, C]] => resultIter
-      case _ =>
-        // Use another interruptible iterator here to support task cancellation as aggregator
-        // or(and) sorter may have consumed previous interruptible iterator.
-        new InterruptibleIterator[Product2[K, C]](context, resultIter)
-    }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
index d3f1c7ec1bbee..c5f3f6e2b42b6 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
@@ -84,7 +84,7 @@ private[spark] class IndexShuffleBlockResolver(
    */
   private def checkIndexAndDataFile(index: File, data: File, blocks: Int): Array[Long] = {
     // the index file should have `block + 1` longs as offset.
-    if (index.length() != (blocks + 1) * 8L) {
+    if (index.length() != (blocks + 1) * 8) {
       return null
     }
     val lengths = new Array[Long](blocks)
@@ -202,13 +202,13 @@ private[spark] class IndexShuffleBlockResolver(
     // class of issue from re-occurring in the future which is why they are left here even though
     // SPARK-22982 is fixed.
     val channel = Files.newByteChannel(indexFile.toPath)
-    channel.position(blockId.reduceId * 8L)
+    channel.position(blockId.reduceId * 8)
     val in = new DataInputStream(Channels.newInputStream(channel))
     try {
       val offset = in.readLong()
       val nextOffset = in.readLong()
       val actualPosition = channel.position()
-      val expectedPosition = blockId.reduceId * 8L + 16
+      val expectedPosition = blockId.reduceId * 8 + 16
       if (actualPosition != expectedPosition) {
         throw new Exception(s"SPARK-22982: Incorrect channel position after index file reads: " +
           s"expected $expectedPosition but actual position was $actualPosition.")
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
index d9fad64f34c7c..bfb4dc698e325 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
@@ -188,9 +188,9 @@ private[spark] object SortShuffleManager extends Logging {
       log.debug(s"Can't use serialized shuffle for shuffle $shufId because the serializer, " +
         s"${dependency.serializer.getClass.getName}, does not support object relocation")
       false
-    } else if (dependency.mapSideCombine) {
-      log.debug(s"Can't use serialized shuffle for shuffle $shufId because we need to do " +
-        s"map-side aggregation")
+    } else if (dependency.aggregator.isDefined) {
+      log.debug(
+        s"Can't use serialized shuffle for shuffle $shufId because an aggregator is defined")
       false
     } else if (numPartitions > MAX_SHUFFLE_OUTPUT_PARTITIONS_FOR_SERIALIZED_MODE) {
       log.debug(s"Can't use serialized shuffle for shuffle $shufId because it has more than " +
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
index 274399b9cc1f3..636b88e792bf3 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
@@ -50,6 +50,7 @@ private[spark] class SortShuffleWriter[K, V, C](
   /** Write a bunch of records to this task's output */
   override def write(records: Iterator[Product2[K, V]]): Unit = {
     sorter = if (dep.mapSideCombine) {
+      require(dep.aggregator.isDefined, "Map-side combine without Aggregator specified!")
       new ExternalSorter[K, V, C](
         context, dep.aggregator, Some(dep.partitioner), dep.keyOrdering, dep.serializer)
     } else {
@@ -106,6 +107,7 @@ private[spark] object SortShuffleWriter {
   def shouldBypassMergeSort(conf: SparkConf, dep: ShuffleDependency[_, _, _]): Boolean = {
     // We cannot bypass sorting if we need to do map-side aggregation.
     if (dep.mapSideCombine) {
+      require(dep.aggregator.isDefined, "Map-side combine without Aggregator specified!")
       false
     } else {
       val bypassMergeThreshold: Int = conf.getInt("spark.shuffle.sort.bypassMergeThreshold", 200)
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
index 5ea161cd0d151..ab01cddfca5b0 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
@@ -213,13 +213,11 @@ private[spark] class AppStatusListener(
 
   override def onExecutorBlacklistedForStage(
       event: SparkListenerExecutorBlacklistedForStage): Unit = {
-    val now = System.nanoTime()
-
     Option(liveStages.get((event.stageId, event.stageAttemptId))).foreach { stage =>
-      setStageBlackListStatus(stage, now, event.executorId)
-    }
-    liveExecutors.get(event.executorId).foreach { exec =>
-      addBlackListedStageTo(exec, event.stageId, now)
+      val now = System.nanoTime()
+      val esummary = stage.executorSummary(event.executorId)
+      esummary.isBlacklisted = true
+      maybeUpdate(esummary, now)
     }
   }
 
@@ -228,27 +226,14 @@ private[spark] class AppStatusListener(
 
     // Implicitly blacklist every available executor for the stage associated with this node
     Option(liveStages.get((event.stageId, event.stageAttemptId))).foreach { stage =>
-      val executorIds = liveExecutors.values.filter(_.host == event.hostId).map(_.executorId).toSeq
-      setStageBlackListStatus(stage, now, executorIds: _*)
-    }
-    liveExecutors.values.filter(_.hostname == event.hostId).foreach { exec =>
-      addBlackListedStageTo(exec, event.stageId, now)
-    }
-  }
-
-  private def addBlackListedStageTo(exec: LiveExecutor, stageId: Int, now: Long): Unit = {
-    exec.blacklistedInStages += stageId
-    liveUpdate(exec, now)
-  }
-
-  private def setStageBlackListStatus(stage: LiveStage, now: Long, executorIds: String*): Unit = {
-    executorIds.foreach { executorId =>
-      val executorStageSummary = stage.executorSummary(executorId)
-      executorStageSummary.isBlacklisted = true
-      maybeUpdate(executorStageSummary, now)
+      liveExecutors.values.foreach { exec =>
+        if (exec.hostname == event.hostId) {
+          val esummary = stage.executorSummary(exec.executorId)
+          esummary.isBlacklisted = true
+          maybeUpdate(esummary, now)
+        }
+      }
     }
-    stage.blackListedExecutors ++= executorIds
-    maybeUpdate(stage, now)
   }
 
   override def onExecutorUnblacklisted(event: SparkListenerExecutorUnblacklisted): Unit = {
@@ -609,24 +594,12 @@ private[spark] class AppStatusListener(
 
       stage.executorSummaries.values.foreach(update(_, now))
       update(stage, now, last = true)
-
-      val executorIdsForStage = stage.blackListedExecutors
-      executorIdsForStage.foreach { executorId =>
-        liveExecutors.get(executorId).foreach { exec =>
-          removeBlackListedStageFrom(exec, event.stageInfo.stageId, now)
-        }
-      }
     }
 
     appSummary = new AppSummary(appSummary.numCompletedJobs, appSummary.numCompletedStages + 1)
     kvstore.write(appSummary)
   }
 
-  private def removeBlackListedStageFrom(exec: LiveExecutor, stageId: Int, now: Long) = {
-    exec.blacklistedInStages -= stageId
-    liveUpdate(exec, now)
-  }
-
   override def onBlockManagerAdded(event: SparkListenerBlockManagerAdded): Unit = {
     // This needs to set fields that are already set by onExecutorAdded because the driver is
     // considered an "executor" in the UI, but does not have a SparkListenerExecutorAdded event.
@@ -915,10 +888,7 @@ private[spark] class AppStatusListener(
       return
     }
 
-    // As the completion time of a skipped stage is always -1, we will remove skipped stages first.
-    // This is safe since the job itself contains enough information to render skipped stages in the
-    // UI.
-    val view = kvstore.view(classOf[StageDataWrapper]).index("completionTime")
+    val view = kvstore.view(classOf[StageDataWrapper]).index("completionTime").first(0L)
     val stages = KVUtils.viewToSeq(view, countToDelete.toInt) { s =>
       s.info.status != v1.StageStatus.ACTIVE && s.info.status != v1.StageStatus.PENDING
     }
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
index 688f25a9fdea1..efc28538a33db 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
@@ -95,11 +95,7 @@ private[spark] class AppStatusStore(
   }
 
   def lastStageAttempt(stageId: Int): v1.StageData = {
-    val it = store.view(classOf[StageDataWrapper])
-      .index("stageId")
-      .reverse()
-      .first(stageId)
-      .last(stageId)
+    val it = store.view(classOf[StageDataWrapper]).index("stageId").reverse().first(stageId)
       .closeableIterator()
     try {
       if (it.hasNext()) {
diff --git a/core/src/main/scala/org/apache/spark/status/LiveEntity.scala b/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
index 79e3f13b826ce..d5f9e19ffdcd0 100644
--- a/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
+++ b/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
@@ -20,7 +20,6 @@ package org.apache.spark.status
 import java.util.Date
 import java.util.concurrent.atomic.AtomicInteger
 
-import scala.collection.immutable.{HashSet, TreeSet}
 import scala.collection.mutable.HashMap
 
 import com.google.common.collect.Interners
@@ -255,7 +254,6 @@ private class LiveExecutor(val executorId: String, _addTime: Long) extends LiveE
   var totalShuffleRead = 0L
   var totalShuffleWrite = 0L
   var isBlacklisted = false
-  var blacklistedInStages: Set[Int] = TreeSet()
 
   var executorLogs = Map[String, String]()
 
@@ -301,8 +299,7 @@ private class LiveExecutor(val executorId: String, _addTime: Long) extends LiveE
       Option(removeTime),
       Option(removeReason),
       executorLogs,
-      memoryMetrics,
-      blacklistedInStages)
+      memoryMetrics)
     new ExecutorSummaryWrapper(info)
   }
 
@@ -374,8 +371,6 @@ private class LiveStage extends LiveEntity {
 
   val executorSummaries = new HashMap[String, LiveExecutorStageSummary]()
 
-  var blackListedExecutors = new HashSet[String]()
-
   // Used for cleanup of tasks after they reach the configured limit. Not written to the store.
   @volatile var cleaning = false
   var savedTasks = new AtomicInteger(0)
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala
index d121068718b8a..ed9bdc6e1e3c2 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala
@@ -49,7 +49,6 @@ private[v1] class ApiRootResource extends ApiRequestContext {
   @Path("applications/{appId}")
   def application(): Class[OneApplicationResource] = classOf[OneApplicationResource]
 
-  @GET
   @Path("version")
   def version(): VersionInfo = new VersionInfo(org.apache.spark.SPARK_VERSION)
 
@@ -158,14 +157,6 @@ private[v1] class NotFoundException(msg: String) extends WebApplicationException
       .build()
 )
 
-private[v1] class ServiceUnavailable(msg: String) extends WebApplicationException(
-  new ServiceUnavailableException(msg),
-  Response
-    .status(Response.Status.SERVICE_UNAVAILABLE)
-    .entity(ErrorWrapper(msg))
-    .build()
-)
-
 private[v1] class BadParameterException(msg: String) extends WebApplicationException(
   new IllegalArgumentException(msg),
   Response
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala
index 974697890dd03..bd4df07e7afc6 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala
@@ -19,13 +19,13 @@ package org.apache.spark.status.api.v1
 import java.io.OutputStream
 import java.util.{List => JList}
 import java.util.zip.ZipOutputStream
-import javax.ws.rs._
+import javax.ws.rs.{GET, Path, PathParam, Produces, QueryParam}
 import javax.ws.rs.core.{MediaType, Response, StreamingOutput}
 
 import scala.util.control.NonFatal
 
-import org.apache.spark.{JobExecutionStatus, SparkContext}
-import org.apache.spark.ui.UIUtils
+import org.apache.spark.JobExecutionStatus
+import org.apache.spark.ui.SparkUI
 
 @Produces(Array(MediaType.APPLICATION_JSON))
 private[v1] class AbstractApplicationResource extends BaseAppResource {
@@ -51,29 +51,6 @@ private[v1] class AbstractApplicationResource extends BaseAppResource {
   @Path("executors")
   def executorList(): Seq[ExecutorSummary] = withUI(_.store.executorList(true))
 
-  @GET
-  @Path("executors/{executorId}/threads")
-  def threadDump(@PathParam("executorId") execId: String): Array[ThreadStackTrace] = withUI { ui =>
-    if (execId != SparkContext.DRIVER_IDENTIFIER && !execId.forall(Character.isDigit)) {
-      throw new BadParameterException(
-        s"Invalid executorId: neither '${SparkContext.DRIVER_IDENTIFIER}' nor number.")
-    }
-
-    val safeSparkContext = ui.sc.getOrElse {
-      throw new ServiceUnavailable("Thread dumps not available through the history server.")
-    }
-
-    ui.store.asOption(ui.store.executorSummary(execId)) match {
-      case Some(executorSummary) if executorSummary.isActive =>
-          val safeThreadDump = safeSparkContext.getExecutorThreadDump(execId).getOrElse {
-            throw new NotFoundException("No thread dump is available.")
-          }
-          safeThreadDump
-      case Some(_) => throw new BadParameterException("Executor is not active.")
-      case _ => throw new NotFoundException("Executor does not exist.")
-    }
-  }
-
   @GET
   @Path("allexecutors")
   def allExecutorList(): Seq[ExecutorSummary] = withUI(_.store.executorList(false))
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
index 971d7e90fa7b8..550eac3952bbb 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
@@ -19,8 +19,6 @@ package org.apache.spark.status.api.v1
 import java.lang.{Long => JLong}
 import java.util.Date
 
-import scala.xml.{NodeSeq, Text}
-
 import com.fasterxml.jackson.annotation.JsonIgnoreProperties
 import com.fasterxml.jackson.databind.annotation.JsonDeserialize
 
@@ -97,8 +95,7 @@ class ExecutorSummary private[spark](
     val removeTime: Option[Date],
     val removeReason: Option[String],
     val executorLogs: Map[String, String],
-    val memoryMetrics: Option[MemoryMetrics],
-    val blacklistedInStages: Set[Int])
+    val memoryMetrics: Option[MemoryMetrics])
 
 class MemoryMetrics private[spark](
     val usedOnHeapStorageMemory: Long,
@@ -318,32 +315,3 @@ class RuntimeInfo private[spark](
     val javaVersion: String,
     val javaHome: String,
     val scalaVersion: String)
-
-case class StackTrace(elems: Seq[String]) {
-  override def toString: String = elems.mkString
-
-  def html: NodeSeq = {
-    val withNewLine = elems.foldLeft(NodeSeq.Empty) { (acc, elem) =>
-      if (acc.isEmpty) {
-        acc :+ Text(elem)
-      } else {
-        acc :+ <br /> :+ Text(elem)
-      }
-    }
-
-    withNewLine
-  }
-
-  def mkString(start: String, sep: String, end: String): String = {
-    elems.mkString(start, sep, end)
-  }
-}
-
-case class ThreadStackTrace(
-    val threadId: Long,
-    val threadName: String,
-    val threadState: Thread.State,
-    val stackTrace: StackTrace,
-    val blockedByThreadId: Option[Long],
-    val blockedByLock: String,
-    val holdingLocks: Seq[String])
diff --git a/core/src/main/scala/org/apache/spark/status/storeTypes.scala b/core/src/main/scala/org/apache/spark/status/storeTypes.scala
index 646cf25880e37..412644d3657b5 100644
--- a/core/src/main/scala/org/apache/spark/status/storeTypes.scala
+++ b/core/src/main/scala/org/apache/spark/status/storeTypes.scala
@@ -109,7 +109,6 @@ private[spark] object TaskIndexNames {
   final val DURATION = "dur"
   final val ERROR = "err"
   final val EXECUTOR = "exe"
-  final val HOST = "hst"
   final val EXEC_CPU_TIME = "ect"
   final val EXEC_RUN_TIME = "ert"
   final val GC_TIME = "gc"
@@ -166,7 +165,6 @@ private[spark] class TaskDataWrapper(
     val duration: Long,
     @KVIndexParam(value = TaskIndexNames.EXECUTOR, parent = TaskIndexNames.STAGE)
     val executorId: String,
-    @KVIndexParam(value = TaskIndexNames.HOST, parent = TaskIndexNames.STAGE)
     val host: String,
     @KVIndexParam(value = TaskIndexNames.STATUS, parent = TaskIndexNames.STAGE)
     val status: String,
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
index d4a59c33b974c..2c3da0ee85e06 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
@@ -18,8 +18,7 @@
 package org.apache.spark.storage
 
 import java.io.{Externalizable, IOException, ObjectInput, ObjectOutput}
-
-import com.google.common.cache.{CacheBuilder, CacheLoader}
+import java.util.concurrent.ConcurrentHashMap
 
 import org.apache.spark.SparkContext
 import org.apache.spark.annotation.DeveloperApi
@@ -133,17 +132,10 @@ private[spark] object BlockManagerId {
     getCachedBlockManagerId(obj)
   }
 
-  /**
-   * The max cache size is hardcoded to 10000, since the size of a BlockManagerId
-   * object is about 48B, the total memory cost should be below 1MB which is feasible.
-   */
-  val blockManagerIdCache = CacheBuilder.newBuilder()
-    .maximumSize(10000)
-    .build(new CacheLoader[BlockManagerId, BlockManagerId]() {
-      override def load(id: BlockManagerId) = id
-    })
+  val blockManagerIdCache = new ConcurrentHashMap[BlockManagerId, BlockManagerId]()
 
   def getCachedBlockManagerId(id: BlockManagerId): BlockManagerId = {
+    blockManagerIdCache.putIfAbsent(id, id)
     blockManagerIdCache.get(id)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
index 8e8f7d197c9ef..89a6a71a589a1 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
@@ -164,8 +164,7 @@ class BlockManagerMasterEndpoint(
     val futures = blockManagerInfo.values.map { bm =>
       bm.slaveEndpoint.ask[Int](removeMsg).recover {
         case e: IOException =>
-          logWarning(s"Error trying to remove RDD $rddId from block manager ${bm.blockManagerId}",
-            e)
+          logWarning(s"Error trying to remove RDD $rddId", e)
           0 // zero blocks were removed
       }
     }.toSeq
@@ -193,16 +192,11 @@ class BlockManagerMasterEndpoint(
     val requiredBlockManagers = blockManagerInfo.values.filter { info =>
       removeFromDriver || !info.blockManagerId.isDriver
     }
-    val futures = requiredBlockManagers.map { bm =>
-      bm.slaveEndpoint.ask[Int](removeMsg).recover {
-        case e: IOException =>
-          logWarning(s"Error trying to remove broadcast $broadcastId from block manager " +
-            s"${bm.blockManagerId}", e)
-          0 // zero blocks were removed
-      }
-    }.toSeq
-
-    Future.sequence(futures)
+    Future.sequence(
+      requiredBlockManagers.map { bm =>
+        bm.slaveEndpoint.ask[Int](removeMsg)
+      }.toSeq
+    )
   }
 
   private def removeBlockManager(blockManagerId: BlockManagerId) {
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockReplicationPolicy.scala b/core/src/main/scala/org/apache/spark/storage/BlockReplicationPolicy.scala
index 0bacc34cdfd90..353eac60df171 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockReplicationPolicy.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockReplicationPolicy.scala
@@ -54,9 +54,10 @@ trait BlockReplicationPolicy {
 }
 
 object BlockReplicationUtils {
+  // scalastyle:off line.size.limit
   /**
    * Uses sampling algorithm by Robert Floyd. Finds a random sample in O(n) while
-   * minimizing space usage. Please see <a href="https://math.stackexchange.com/q/178690">
+   * minimizing space usage. Please see <a href="http://math.stackexchange.com/questions/178690/whats-the-proof-of-correctness-for-robert-floyds-algorithm-for-selecting-a-sin">
    * here</a>.
    *
    * @param n total number of indices
@@ -64,6 +65,7 @@ object BlockReplicationUtils {
    * @param r random number generator
    * @return list of m random unique indices
    */
+  // scalastyle:on line.size.limit
   private def getSampleIds(n: Int, m: Int, r: Random): List[Int] = {
     val indices = (n - m + 1 to n).foldLeft(mutable.LinkedHashSet.empty[Int]) {case (set, i) =>
       val t = r.nextInt(i) + 1
diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
index b31862323a895..98b5a735a4529 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
@@ -48,9 +48,7 @@ import org.apache.spark.util.io.ChunkedByteBufferOutputStream
  * @param blockManager [[BlockManager]] for reading local blocks
  * @param blocksByAddress list of blocks to fetch grouped by the [[BlockManagerId]].
  *                        For each block we also require the size (in bytes as a long field) in
- *                        order to throttle the memory usage. Note that zero-sized blocks are
- *                        already excluded, which happened in
- *                        [[MapOutputTracker.convertMapStatuses]].
+ *                        order to throttle the memory usage.
  * @param streamWrapper A function to wrap the returned input stream.
  * @param maxBytesInFlight max size (in bytes) of remote blocks to fetch at any given point.
  * @param maxReqsInFlight max number of remote requests to fetch blocks at any given point.
@@ -64,7 +62,7 @@ final class ShuffleBlockFetcherIterator(
     context: TaskContext,
     shuffleClient: ShuffleClient,
     blockManager: BlockManager,
-    blocksByAddress: Iterator[(BlockManagerId, Seq[(BlockId, Long)])],
+    blocksByAddress: Seq[(BlockManagerId, Seq[(BlockId, Long)])],
     streamWrapper: (BlockId, InputStream) => InputStream,
     maxBytesInFlight: Long,
     maxReqsInFlight: Int,
@@ -76,8 +74,8 @@ final class ShuffleBlockFetcherIterator(
   import ShuffleBlockFetcherIterator._
 
   /**
-   * Total number of blocks to fetch. This should be equal to the total number of blocks
-   * in [[blocksByAddress]] because we already filter out zero-sized blocks in [[blocksByAddress]].
+   * Total number of blocks to fetch. This can be smaller than the total number of blocks
+   * in [[blocksByAddress]] because we filter out zero-sized blocks in [[initialize]].
    *
    * This should equal localBlocks.size + remoteBlocks.size.
    */
@@ -92,7 +90,7 @@ final class ShuffleBlockFetcherIterator(
   private[this] val startTime = System.currentTimeMillis
 
   /** Local blocks to fetch, excluding zero-sized blocks. */
-  private[this] val localBlocks = scala.collection.mutable.LinkedHashSet[BlockId]()
+  private[this] val localBlocks = new ArrayBuffer[BlockId]()
 
   /** Remote blocks to fetch, excluding zero-sized blocks. */
   private[this] val remoteBlocks = new HashSet[BlockId]()
@@ -269,16 +267,13 @@ final class ShuffleBlockFetcherIterator(
     // at most maxBytesInFlight in order to limit the amount of data in flight.
     val remoteRequests = new ArrayBuffer[FetchRequest]
 
+    // Tracks total number of blocks (including zero sized blocks)
+    var totalBlocks = 0
     for ((address, blockInfos) <- blocksByAddress) {
+      totalBlocks += blockInfos.size
       if (address.executorId == blockManager.blockManagerId.executorId) {
-        blockInfos.find(_._2 <= 0) match {
-          case Some((blockId, size)) if size < 0 =>
-            throw new BlockException(blockId, "Negative block size " + size)
-          case Some((blockId, size)) if size == 0 =>
-            throw new BlockException(blockId, "Zero-sized blocks should be excluded.")
-          case None => // do nothing.
-        }
-        localBlocks ++= blockInfos.map(_._1)
+        // Filter out zero-sized blocks
+        localBlocks ++= blockInfos.filter(_._2 != 0).map(_._1)
         numBlocksToFetch += localBlocks.size
       } else {
         val iterator = blockInfos.iterator
@@ -286,15 +281,14 @@ final class ShuffleBlockFetcherIterator(
         var curBlocks = new ArrayBuffer[(BlockId, Long)]
         while (iterator.hasNext) {
           val (blockId, size) = iterator.next()
-          if (size < 0) {
-            throw new BlockException(blockId, "Negative block size " + size)
-          } else if (size == 0) {
-            throw new BlockException(blockId, "Zero-sized blocks should be excluded.")
-          } else {
+          // Skip empty blocks
+          if (size > 0) {
             curBlocks += ((blockId, size))
             remoteBlocks += blockId
             numBlocksToFetch += 1
             curRequestSize += size
+          } else if (size < 0) {
+            throw new BlockException(blockId, "Negative block size " + size)
           }
           if (curRequestSize >= targetRequestSize ||
               curBlocks.size >= maxBlocksInFlightPerAddress) {
@@ -312,8 +306,7 @@ final class ShuffleBlockFetcherIterator(
         }
       }
     }
-    logInfo(s"Getting $numBlocksToFetch non-empty blocks including ${localBlocks.size}" +
-        s" local blocks and ${remoteBlocks.size} remote blocks")
+    logInfo(s"Getting $numBlocksToFetch non-empty blocks out of $totalBlocks blocks")
     remoteRequests
   }
 
@@ -323,7 +316,6 @@ final class ShuffleBlockFetcherIterator(
    * track in-memory are the ManagedBuffer references themselves.
    */
   private[this] def fetchLocalBlocks() {
-    logDebug(s"Start fetching local blocks: ${localBlocks.mkString(", ")}")
     val iter = localBlocks.iterator
     while (iter.hasNext) {
       val blockId = iter.next()
@@ -332,8 +324,7 @@ final class ShuffleBlockFetcherIterator(
         shuffleMetrics.incLocalBlocksFetched(1)
         shuffleMetrics.incLocalBytesRead(buf.size)
         buf.retain()
-        results.put(new SuccessFetchResult(blockId, blockManager.blockManagerId,
-          buf.size(), buf, false))
+        results.put(new SuccessFetchResult(blockId, blockManager.blockManagerId, 0, buf, false))
       } catch {
         case e: Exception =>
           // If we see an exception, stop immediately.
@@ -406,33 +397,12 @@ final class ShuffleBlockFetcherIterator(
             }
             shuffleMetrics.incRemoteBlocksFetched(1)
           }
-          if (!localBlocks.contains(blockId)) {
-            bytesInFlight -= size
-          }
+          bytesInFlight -= size
           if (isNetworkReqDone) {
             reqsInFlight -= 1
             logDebug("Number of requests in flight " + reqsInFlight)
           }
 
-          if (buf.size == 0) {
-            // We will never legitimately receive a zero-size block. All blocks with zero records
-            // have zero size and all zero-size blocks have no records (and hence should never
-            // have been requested in the first place). This statement relies on behaviors of the
-            // shuffle writers, which are guaranteed by the following test cases:
-            //
-            // - BypassMergeSortShuffleWriterSuite: "write with some empty partitions"
-            // - UnsafeShuffleWriterSuite: "writeEmptyIterator"
-            // - DiskBlockObjectWriterSuite: "commit() and close() without ever opening or writing"
-            //
-            // There is not an explicit test for SortShuffleWriter but the underlying APIs that
-            // uses are shared by the UnsafeShuffleWriter (both writers use DiskBlockObjectWriter
-            // which returns a zero-size from commitAndGet() in case no records were written
-            // since the last call.
-            val msg = s"Received a zero-size buffer for block $blockId from $address " +
-              s"(expectedApproxSize = $size, isNetworkReqDone=$isNetworkReqDone)"
-            throwFetchFailedException(blockId, address, new IOException(msg))
-          }
-
           val in = try {
             buf.createInputStream()
           } catch {
@@ -613,8 +583,8 @@ object ShuffleBlockFetcherIterator {
    * Result of a fetch from a remote block successfully.
    * @param blockId block id
    * @param address BlockManager that the block was fetched from.
-   * @param size estimated size of the block. Note that this is NOT the exact bytes.
-   *             Size of remote block is used to calculate bytesInFlight.
+   * @param size estimated size of the block, used to calculate bytesInFlight.
+   *             Note that this is NOT the exact bytes.
    * @param buf `ManagedBuffer` for the content.
    * @param isNetworkReqDone Is this the last network request for this host in this fetch request.
    */
diff --git a/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala b/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
index adc406bb1c441..e9694fdbca2de 100644
--- a/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
+++ b/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
@@ -24,15 +24,19 @@ import scala.collection.mutable
 
 import sun.nio.ch.DirectBuffer
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.internal.Logging
 
 /**
+ * :: DeveloperApi ::
  * Storage information for each BlockManager.
  *
  * This class assumes BlockId and BlockStatus are immutable, such that the consumers of this
  * class cannot mutate the source of the information. Accesses are not thread-safe.
  */
-private[spark] class StorageStatus(
+@DeveloperApi
+@deprecated("This class may be removed or made private in a future release.", "2.2.0")
+class StorageStatus(
     val blockManagerId: BlockManagerId,
     val maxMemory: Long,
     val maxOnHeapMem: Option[Long],
@@ -40,6 +44,9 @@ private[spark] class StorageStatus(
 
   /**
    * Internal representation of the blocks stored in this block manager.
+   *
+   * We store RDD blocks and non-RDD blocks separately to allow quick retrievals of RDD blocks.
+   * These collections should only be mutated through the add/update/removeBlock methods.
    */
   private val _rddBlocks = new mutable.HashMap[Int, mutable.Map[BlockId, BlockStatus]]
   private val _nonRddBlocks = new mutable.HashMap[BlockId, BlockStatus]
@@ -80,6 +87,9 @@ private[spark] class StorageStatus(
    */
   def rddBlocks: Map[BlockId, BlockStatus] = _rddBlocks.flatMap { case (_, blocks) => blocks }
 
+  /** Return the blocks that belong to the given RDD stored in this block manager. */
+  def rddBlocksById(rddId: Int): Map[BlockId, BlockStatus] = _rddBlocks.getOrElse(rddId, Map.empty)
+
   /** Add the given block to this storage status. If it already exists, overwrite it. */
   private[spark] def addBlock(blockId: BlockId, blockStatus: BlockStatus): Unit = {
     updateStorageInfo(blockId, blockStatus)
@@ -91,6 +101,46 @@ private[spark] class StorageStatus(
     }
   }
 
+  /** Update the given block in this storage status. If it doesn't already exist, add it. */
+  private[spark] def updateBlock(blockId: BlockId, blockStatus: BlockStatus): Unit = {
+    addBlock(blockId, blockStatus)
+  }
+
+  /** Remove the given block from this storage status. */
+  private[spark] def removeBlock(blockId: BlockId): Option[BlockStatus] = {
+    updateStorageInfo(blockId, BlockStatus.empty)
+    blockId match {
+      case RDDBlockId(rddId, _) =>
+        // Actually remove the block, if it exists
+        if (_rddBlocks.contains(rddId)) {
+          val removed = _rddBlocks(rddId).remove(blockId)
+          // If the given RDD has no more blocks left, remove the RDD
+          if (_rddBlocks(rddId).isEmpty) {
+            _rddBlocks.remove(rddId)
+          }
+          removed
+        } else {
+          None
+        }
+      case _ =>
+        _nonRddBlocks.remove(blockId)
+    }
+  }
+
+  /**
+   * Return whether the given block is stored in this block manager in O(1) time.
+   *
+   * @note This is much faster than `this.blocks.contains`, which is O(blocks) time.
+   */
+  def containsBlock(blockId: BlockId): Boolean = {
+    blockId match {
+      case RDDBlockId(rddId, _) =>
+        _rddBlocks.get(rddId).exists(_.contains(blockId))
+      case _ =>
+        _nonRddBlocks.contains(blockId)
+    }
+  }
+
   /**
    * Return the given block stored in this block manager in O(1) time.
    *
@@ -105,12 +155,37 @@ private[spark] class StorageStatus(
     }
   }
 
+  /**
+   * Return the number of blocks stored in this block manager in O(RDDs) time.
+   *
+   * @note This is much faster than `this.blocks.size`, which is O(blocks) time.
+   */
+  def numBlocks: Int = _nonRddBlocks.size + numRddBlocks
+
+  /**
+   * Return the number of RDD blocks stored in this block manager in O(RDDs) time.
+   *
+   * @note This is much faster than `this.rddBlocks.size`, which is O(RDD blocks) time.
+   */
+  def numRddBlocks: Int = _rddBlocks.values.map(_.size).sum
+
+  /**
+   * Return the number of blocks that belong to the given RDD in O(1) time.
+   *
+   * @note This is much faster than `this.rddBlocksById(rddId).size`, which is
+   * O(blocks in this RDD) time.
+   */
+  def numRddBlocksById(rddId: Int): Int = _rddBlocks.get(rddId).map(_.size).getOrElse(0)
+
   /** Return the max memory can be used by this block manager. */
   def maxMem: Long = maxMemory
 
   /** Return the memory remaining in this block manager. */
   def memRemaining: Long = maxMem - memUsed
 
+  /** Return the memory used by caching RDDs */
+  def cacheSize: Long = onHeapCacheSize.getOrElse(0L) + offHeapCacheSize.getOrElse(0L)
+
   /** Return the memory used by this block manager. */
   def memUsed: Long = onHeapMemUsed.getOrElse(0L) + offHeapMemUsed.getOrElse(0L)
 
@@ -145,9 +220,15 @@ private[spark] class StorageStatus(
   /** Return the disk space used by this block manager. */
   def diskUsed: Long = _nonRddStorageInfo.diskUsage + _rddBlocks.keys.toSeq.map(diskUsedByRdd).sum
 
+  /** Return the memory used by the given RDD in this block manager in O(1) time. */
+  def memUsedByRdd(rddId: Int): Long = _rddStorageInfo.get(rddId).map(_.memoryUsage).getOrElse(0L)
+
   /** Return the disk space used by the given RDD in this block manager in O(1) time. */
   def diskUsedByRdd(rddId: Int): Long = _rddStorageInfo.get(rddId).map(_.diskUsage).getOrElse(0L)
 
+  /** Return the storage level, if any, used by the given RDD in this block manager. */
+  def rddStorageLevel(rddId: Int): Option[StorageLevel] = _rddStorageInfo.get(rddId).map(_.level)
+
   /**
    * Update the relevant storage info, taking into account any existing status for this block.
    */
@@ -214,4 +295,40 @@ private[spark] object StorageUtils extends Logging {
       cleaner.clean()
     }
   }
+
+  /**
+   * Update the given list of RDDInfo with the given list of storage statuses.
+   * This method overwrites the old values stored in the RDDInfo's.
+   */
+  def updateRddInfo(rddInfos: Seq[RDDInfo], statuses: Seq[StorageStatus]): Unit = {
+    rddInfos.foreach { rddInfo =>
+      val rddId = rddInfo.id
+      // Assume all blocks belonging to the same RDD have the same storage level
+      val storageLevel = statuses
+        .flatMap(_.rddStorageLevel(rddId)).headOption.getOrElse(StorageLevel.NONE)
+      val numCachedPartitions = statuses.map(_.numRddBlocksById(rddId)).sum
+      val memSize = statuses.map(_.memUsedByRdd(rddId)).sum
+      val diskSize = statuses.map(_.diskUsedByRdd(rddId)).sum
+
+      rddInfo.storageLevel = storageLevel
+      rddInfo.numCachedPartitions = numCachedPartitions
+      rddInfo.memSize = memSize
+      rddInfo.diskSize = diskSize
+    }
+  }
+
+  /**
+   * Return a mapping from block ID to its locations for each block that belongs to the given RDD.
+   */
+  def getRddBlockLocations(rddId: Int, statuses: Seq[StorageStatus]): Map[BlockId, Seq[String]] = {
+    val blockLocations = new mutable.HashMap[BlockId, mutable.ListBuffer[String]]
+    statuses.foreach { status =>
+      status.rddBlocksById(rddId).foreach { case (bid, _) =>
+        val location = status.blockManagerId.hostPort
+        blockLocations.getOrElseUpdate(bid, mutable.ListBuffer.empty) += location
+      }
+    }
+    blockLocations
+  }
+
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
index d6a025a6f12da..0adeb4058b6e4 100644
--- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
@@ -343,14 +343,12 @@ private[spark] object JettyUtils extends Logging {
           -1,
           connectionFactories: _*)
         connector.setPort(port)
-        connector.setHost(hostName)
-        connector.setReuseAddress(!Utils.isWindows)
+        connector.start()
 
         // Currently we only use "SelectChannelConnector"
         // Limit the max acceptor number to 8 so that we don't waste a lot of threads
         connector.setAcceptQueueSize(math.min(connector.getAcceptors, 8))
-
-        connector.start()
+        connector.setHost(hostName)
         // The number of selectors always equals to the number of acceptors
         minThreads += connector.getAcceptors * 2
 
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index 5d015b0531ef6..ba798df13c95d 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -20,7 +20,6 @@ package org.apache.spark.ui
 import java.net.URLDecoder
 import java.text.SimpleDateFormat
 import java.util.{Date, Locale, TimeZone}
-import javax.servlet.http.HttpServletRequest
 
 import scala.util.control.NonFatal
 import scala.xml._
@@ -149,71 +148,60 @@ private[spark] object UIUtils extends Logging {
   }
 
   // Yarn has to go through a proxy so the base uri is provided and has to be on all links
-  def uiRoot(request: HttpServletRequest): String = {
-    // Knox uses X-Forwarded-Context to notify the application the base path
-    val knoxBasePath = Option(request.getHeader("X-Forwarded-Context"))
+  def uiRoot: String = {
     // SPARK-11484 - Use the proxyBase set by the AM, if not found then use env.
     sys.props.get("spark.ui.proxyBase")
       .orElse(sys.env.get("APPLICATION_WEB_PROXY_BASE"))
-      .orElse(knoxBasePath)
       .getOrElse("")
   }
 
-  def prependBaseUri(
-      request: HttpServletRequest,
-      basePath: String = "",
-      resource: String = ""): String = {
-    uiRoot(request) + basePath + resource
+  def prependBaseUri(basePath: String = "", resource: String = ""): String = {
+    uiRoot + basePath + resource
   }
 
-  def commonHeaderNodes(request: HttpServletRequest): Seq[Node] = {
+  def commonHeaderNodes: Seq[Node] = {
     <meta http-equiv="Content-type" content="text/html; charset=utf-8" />
-    <link rel="stylesheet"
-          href={prependBaseUri(request, "/static/bootstrap.min.css")} type="text/css"/>
-    <link rel="stylesheet" href={prependBaseUri(request, "/static/vis.min.css")} type="text/css"/>
-    <link rel="stylesheet" href={prependBaseUri(request, "/static/webui.css")} type="text/css"/>
-    <link rel="stylesheet"
-          href={prependBaseUri(request, "/static/timeline-view.css")} type="text/css"/>
-    <script src={prependBaseUri(request, "/static/sorttable.js")} ></script>
-    <script src={prependBaseUri(request, "/static/jquery-1.11.1.min.js")}></script>
-    <script src={prependBaseUri(request, "/static/vis.min.js")}></script>
-    <script src={prependBaseUri(request, "/static/bootstrap-tooltip.js")}></script>
-    <script src={prependBaseUri(request, "/static/initialize-tooltips.js")}></script>
-    <script src={prependBaseUri(request, "/static/table.js")}></script>
-    <script src={prependBaseUri(request, "/static/additional-metrics.js")}></script>
-    <script src={prependBaseUri(request, "/static/timeline-view.js")}></script>
-    <script src={prependBaseUri(request, "/static/log-view.js")}></script>
-    <script src={prependBaseUri(request, "/static/webui.js")}></script>
-    <script>setUIRoot('{UIUtils.uiRoot(request)}')</script>
+    <link rel="stylesheet" href={prependBaseUri("/static/bootstrap.min.css")} type="text/css"/>
+    <link rel="stylesheet" href={prependBaseUri("/static/vis.min.css")} type="text/css"/>
+    <link rel="stylesheet" href={prependBaseUri("/static/webui.css")} type="text/css"/>
+    <link rel="stylesheet" href={prependBaseUri("/static/timeline-view.css")} type="text/css"/>
+    <script src={prependBaseUri("/static/sorttable.js")} ></script>
+    <script src={prependBaseUri("/static/jquery-1.11.1.min.js")}></script>
+    <script src={prependBaseUri("/static/vis.min.js")}></script>
+    <script src={prependBaseUri("/static/bootstrap-tooltip.js")}></script>
+    <script src={prependBaseUri("/static/initialize-tooltips.js")}></script>
+    <script src={prependBaseUri("/static/table.js")}></script>
+    <script src={prependBaseUri("/static/additional-metrics.js")}></script>
+    <script src={prependBaseUri("/static/timeline-view.js")}></script>
+    <script src={prependBaseUri("/static/log-view.js")}></script>
+    <script src={prependBaseUri("/static/webui.js")}></script>
+    <script>setUIRoot('{UIUtils.uiRoot}')</script>
   }
 
-  def vizHeaderNodes(request: HttpServletRequest): Seq[Node] = {
-    <link rel="stylesheet"
-          href={prependBaseUri(request, "/static/spark-dag-viz.css")} type="text/css" />
-    <script src={prependBaseUri(request, "/static/d3.min.js")}></script>
-    <script src={prependBaseUri(request, "/static/dagre-d3.min.js")}></script>
-    <script src={prependBaseUri(request, "/static/graphlib-dot.min.js")}></script>
-    <script src={prependBaseUri(request, "/static/spark-dag-viz.js")}></script>
+  def vizHeaderNodes: Seq[Node] = {
+    <link rel="stylesheet" href={prependBaseUri("/static/spark-dag-viz.css")} type="text/css" />
+    <script src={prependBaseUri("/static/d3.min.js")}></script>
+    <script src={prependBaseUri("/static/dagre-d3.min.js")}></script>
+    <script src={prependBaseUri("/static/graphlib-dot.min.js")}></script>
+    <script src={prependBaseUri("/static/spark-dag-viz.js")}></script>
   }
 
-  def dataTablesHeaderNodes(request: HttpServletRequest): Seq[Node] = {
-    <link rel="stylesheet" href={prependBaseUri(request,
-      "/static/jquery.dataTables.1.10.4.min.css")} type="text/css"/>
+  def dataTablesHeaderNodes: Seq[Node] = {
     <link rel="stylesheet"
-          href={prependBaseUri(request, "/static/dataTables.bootstrap.css")} type="text/css"/>
+          href={prependBaseUri("/static/jquery.dataTables.1.10.4.min.css")} type="text/css"/>
     <link rel="stylesheet"
-          href={prependBaseUri(request, "/static/jsonFormatter.min.css")} type="text/css"/>
-    <script src={prependBaseUri(request, "/static/jquery.dataTables.1.10.4.min.js")}></script>
-    <script src={prependBaseUri(request, "/static/jquery.cookies.2.2.0.min.js")}></script>
-    <script src={prependBaseUri(request, "/static/jquery.blockUI.min.js")}></script>
-    <script src={prependBaseUri(request, "/static/dataTables.bootstrap.min.js")}></script>
-    <script src={prependBaseUri(request, "/static/jsonFormatter.min.js")}></script>
-    <script src={prependBaseUri(request, "/static/jquery.mustache.js")}></script>
+          href={prependBaseUri("/static/dataTables.bootstrap.css")} type="text/css"/>
+    <link rel="stylesheet" href={prependBaseUri("/static/jsonFormatter.min.css")} type="text/css"/>
+    <script src={prependBaseUri("/static/jquery.dataTables.1.10.4.min.js")}></script>
+    <script src={prependBaseUri("/static/jquery.cookies.2.2.0.min.js")}></script>
+    <script src={prependBaseUri("/static/jquery.blockUI.min.js")}></script>
+    <script src={prependBaseUri("/static/dataTables.bootstrap.min.js")}></script>
+    <script src={prependBaseUri("/static/jsonFormatter.min.js")}></script>
+    <script src={prependBaseUri("/static/jquery.mustache.js")}></script>
   }
 
   /** Returns a spark page with correctly formatted headers */
   def headerSparkPage(
-      request: HttpServletRequest,
       title: String,
       content: => Seq[Node],
       activeTab: SparkUITab,
@@ -226,26 +214,24 @@ private[spark] object UIUtils extends Logging {
     val shortAppName = if (appName.length < 36) appName else appName.take(32) + "..."
     val header = activeTab.headerTabs.map { tab =>
       <li class={if (tab == activeTab) "active" else ""}>
-        <a href={prependBaseUri(request, activeTab.basePath, "/" + tab.prefix + "/")}>{tab.name}</a>
+        <a href={prependBaseUri(activeTab.basePath, "/" + tab.prefix + "/")}>{tab.name}</a>
       </li>
     }
     val helpButton: Seq[Node] = helpText.map(tooltip(_, "bottom")).getOrElse(Seq.empty)
 
     <html>
       <head>
-        {commonHeaderNodes(request)}
-        {if (showVisualization) vizHeaderNodes(request) else Seq.empty}
-        {if (useDataTables) dataTablesHeaderNodes(request) else Seq.empty}
-        <link rel="shortcut icon"
-              href={prependBaseUri(request, "/static/spark-logo-77x50px-hd.png")}></link>
+        {commonHeaderNodes}
+        {if (showVisualization) vizHeaderNodes else Seq.empty}
+        {if (useDataTables) dataTablesHeaderNodes else Seq.empty}
         <title>{appName} - {title}</title>
       </head>
       <body>
         <div class="navbar navbar-static-top">
           <div class="navbar-inner">
             <div class="brand">
-              <a href={prependBaseUri(request, "/")} class="brand">
-                <img src={prependBaseUri(request, "/static/spark-logo-77x50px-hd.png")} />
+              <a href={prependBaseUri("/")} class="brand">
+                <img src={prependBaseUri("/static/spark-logo-77x50px-hd.png")} />
                 <span class="version">{activeTab.appSparkVersion}</span>
               </a>
             </div>
@@ -272,16 +258,13 @@ private[spark] object UIUtils extends Logging {
 
   /** Returns a page with the spark css/js and a simple format. Used for scheduler UI. */
   def basicSparkPage(
-      request: HttpServletRequest,
       content: => Seq[Node],
       title: String,
       useDataTables: Boolean = false): Seq[Node] = {
     <html>
       <head>
-        {commonHeaderNodes(request)}
-        {if (useDataTables) dataTablesHeaderNodes(request) else Seq.empty}
-        <link rel="shortcut icon"
-              href={prependBaseUri(request, "/static/spark-logo-77x50px-hd.png")}></link>
+        {commonHeaderNodes}
+        {if (useDataTables) dataTablesHeaderNodes else Seq.empty}
         <title>{title}</title>
       </head>
       <body>
@@ -289,8 +272,8 @@ private[spark] object UIUtils extends Logging {
           <div class="row-fluid">
             <div class="span12">
               <h3 style="vertical-align: middle; display: inline-block;">
-                <a style="text-decoration: none" href={prependBaseUri(request, "/")}>
-                  <img src={prependBaseUri(request, "/static/spark-logo-77x50px-hd.png")} />
+                <a style="text-decoration: none" href={prependBaseUri("/")}>
+                  <img src={prependBaseUri("/static/spark-logo-77x50px-hd.png")} />
                   <span class="version"
                         style="margin-right: 15px;">{org.apache.spark.SPARK_VERSION}</span>
                 </a>
diff --git a/core/src/main/scala/org/apache/spark/ui/env/EnvironmentPage.scala b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentPage.scala
index 3d465a34e44aa..902eb92b854f2 100644
--- a/core/src/main/scala/org/apache/spark/ui/env/EnvironmentPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentPage.scala
@@ -94,7 +94,7 @@ private[ui] class EnvironmentPage(
         </div>
       </span>
 
-    UIUtils.headerSparkPage(request, "Environment", content, parent)
+    UIUtils.headerSparkPage("Environment", content, parent)
   }
 
   private def propertyHeader = Seq("Name", "Value")
diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
index f9713fb5b4a3c..f4686ea3cf91f 100644
--- a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.ui.exec
 
+import java.util.Locale
 import javax.servlet.http.HttpServletRequest
 
 import scala.xml.{Node, Text}
@@ -40,7 +41,17 @@ private[ui] class ExecutorThreadDumpPage(
     val maybeThreadDump = sc.get.getExecutorThreadDump(executorId)
 
     val content = maybeThreadDump.map { threadDump =>
-      val dumpRows = threadDump.map { thread =>
+      val dumpRows = threadDump.sortWith {
+        case (threadTrace1, threadTrace2) =>
+          val v1 = if (threadTrace1.threadName.contains("Executor task launch")) 1 else 0
+          val v2 = if (threadTrace2.threadName.contains("Executor task launch")) 1 else 0
+          if (v1 == v2) {
+            threadTrace1.threadName.toLowerCase(Locale.ROOT) <
+              threadTrace2.threadName.toLowerCase(Locale.ROOT)
+          } else {
+            v1 > v2
+          }
+      }.map { thread =>
         val threadId = thread.threadId
         val blockedBy = thread.blockedByThreadId match {
           case Some(_) =>
@@ -60,7 +71,7 @@ private[ui] class ExecutorThreadDumpPage(
           <td id={s"${threadId}_td_name"}>{thread.threadName}</td>
           <td id={s"${threadId}_td_state"}>{thread.threadState}</td>
           <td id={s"${threadId}_td_locking"}>{blockedBy}{heldLocks}</td>
-          <td id={s"${threadId}_td_stacktrace"} class="hidden">{thread.stackTrace.html}</td>
+          <td id={s"${threadId}_td_stacktrace"} class="hidden">{thread.stackTrace}</td>
         </tr>
       }
 
@@ -97,6 +108,6 @@ private[ui] class ExecutorThreadDumpPage(
       </table>
     </div>
     }.getOrElse(Text("Error fetching thread dump"))
-    UIUtils.headerSparkPage(request, s"Thread dump for executor $executorId", content, parent)
+    UIUtils.headerSparkPage(s"Thread dump for executor $executorId", content, parent)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala
index d5a60f52cbb0f..843486f4a70d2 100644
--- a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala
@@ -49,12 +49,12 @@ private[ui] class ExecutorsPage(
       <div>
         {
           <div id="active-executors" class="row-fluid"></div> ++
-          <script src={UIUtils.prependBaseUri(request, "/static/utils.js")}></script> ++
-          <script src={UIUtils.prependBaseUri(request, "/static/executorspage.js")}></script> ++
+          <script src={UIUtils.prependBaseUri("/static/utils.js")}></script> ++
+          <script src={UIUtils.prependBaseUri("/static/executorspage.js")}></script> ++
           <script>setThreadDumpEnabled({threadDumpEnabled})</script>
         }
       </div>
 
-    UIUtils.headerSparkPage(request, "Executors", content, parent, useDataTables = true)
+    UIUtils.headerSparkPage("Executors", content, parent, useDataTables = true)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
index 178d2c8d1a10a..2b0f4acbac72a 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
@@ -206,9 +206,7 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
       jobs: Seq[v1.JobData],
       killEnabled: Boolean): Seq[Node] = {
     // stripXSS is called to remove suspicious characters used in XSS attacks
-    val allParameters = request.getParameterMap.asScala.toMap.map { case (k, v) =>
-      UIUtils.stripXSS(k) -> v.map(UIUtils.stripXSS).toSeq
-    }
+    val allParameters = request.getParameterMap.asScala.toMap.mapValues(_.map(UIUtils.stripXSS))
     val parameterOtherTable = allParameters.filterNot(_._1.startsWith(jobTag))
       .map(para => para._1 + "=" + para._2(0))
 
@@ -250,7 +248,7 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
         jobs,
         tableHeaderId,
         jobTag,
-        UIUtils.prependBaseUri(request, parent.basePath),
+        UIUtils.prependBaseUri(parent.basePath),
         "jobs", // subPath
         parameterOtherTable,
         killEnabled,
@@ -409,7 +407,7 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
     val helpText = """A job is triggered by an action, like count() or saveAsTextFile().""" +
       " Click on a job to see information about the stages of tasks inside it."
 
-    UIUtils.headerSparkPage(request, "Spark Jobs", content, parent, helpText = Some(helpText))
+    UIUtils.headerSparkPage("Spark Jobs", content, parent, helpText = Some(helpText))
   }
 
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala
index f672ce0ec6a68..606dc1e180e5b 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala
@@ -19,20 +19,42 @@ package org.apache.spark.ui.jobs
 
 import javax.servlet.http.HttpServletRequest
 
-import scala.xml.{Attribute, Elem, Node, NodeSeq, Null, Text}
+import scala.xml.{Node, NodeSeq}
 
 import org.apache.spark.scheduler.Schedulable
-import org.apache.spark.status.{AppSummary, PoolData}
-import org.apache.spark.status.api.v1.{StageData, StageStatus}
+import org.apache.spark.status.PoolData
+import org.apache.spark.status.api.v1._
 import org.apache.spark.ui.{UIUtils, WebUIPage}
 
 /** Page showing list of all ongoing and recently finished stages and pools */
 private[ui] class AllStagesPage(parent: StagesTab) extends WebUIPage("") {
   private val sc = parent.sc
-  private val subPath = "stages"
   private def isFairScheduler = parent.isFairScheduler
 
   def render(request: HttpServletRequest): Seq[Node] = {
+    val allStages = parent.store.stageList(null)
+
+    val activeStages = allStages.filter(_.status == StageStatus.ACTIVE)
+    val pendingStages = allStages.filter(_.status == StageStatus.PENDING)
+    val completedStages = allStages.filter(_.status == StageStatus.COMPLETE)
+    val failedStages = allStages.filter(_.status == StageStatus.FAILED).reverse
+
+    val numFailedStages = failedStages.size
+    val subPath = "stages"
+
+    val activeStagesTable =
+      new StageTableBase(parent.store, request, activeStages, "active", "activeStage",
+        parent.basePath, subPath, parent.isFairScheduler, parent.killEnabled, false)
+    val pendingStagesTable =
+      new StageTableBase(parent.store, request, pendingStages, "pending", "pendingStage",
+        parent.basePath, subPath, parent.isFairScheduler, false, false)
+    val completedStagesTable =
+      new StageTableBase(parent.store, request, completedStages, "completed", "completedStage",
+        parent.basePath, subPath, parent.isFairScheduler, false, false)
+    val failedStagesTable =
+      new StageTableBase(parent.store, request, failedStages, "failed", "failedStage",
+        parent.basePath, subPath, parent.isFairScheduler, false, true)
+
     // For now, pool information is only accessible in live UIs
     val pools = sc.map(_.getAllPools).getOrElse(Seq.empty[Schedulable]).map { pool =>
       val uiPool = parent.store.asOption(parent.store.pool(pool.name)).getOrElse(
@@ -41,121 +63,129 @@ private[ui] class AllStagesPage(parent: StagesTab) extends WebUIPage("") {
     }.toMap
     val poolTable = new PoolTable(pools, parent)
 
-    val allStatuses = Seq(StageStatus.ACTIVE, StageStatus.PENDING, StageStatus.COMPLETE,
-      StageStatus.SKIPPED, StageStatus.FAILED)
+    val shouldShowActiveStages = activeStages.nonEmpty
+    val shouldShowPendingStages = pendingStages.nonEmpty
+    val shouldShowCompletedStages = completedStages.nonEmpty
+    val shouldShowFailedStages = failedStages.nonEmpty
 
-    val allStages = parent.store.stageList(null)
     val appSummary = parent.store.appSummary()
-
-    val (summaries, tables) = allStatuses.map(
-      summaryAndTableForStatus(allStages, appSummary, _, request)).unzip
+    val completedStageNumStr = if (appSummary.numCompletedStages == completedStages.size) {
+      s"${appSummary.numCompletedStages}"
+    } else {
+      s"${appSummary.numCompletedStages}, only showing ${completedStages.size}"
+    }
 
     val summary: NodeSeq =
       <div>
         <ul class="unstyled">
-          {summaries.flatten}
+          {
+            if (shouldShowActiveStages) {
+              <li>
+                <a href="#active"><strong>Active Stages:</strong></a>
+                {activeStages.size}
+              </li>
+            }
+          }
+          {
+            if (shouldShowPendingStages) {
+              <li>
+                <a href="#pending"><strong>Pending Stages:</strong></a>
+                {pendingStages.size}
+              </li>
+            }
+          }
+          {
+            if (shouldShowCompletedStages) {
+              <li id="completed-summary">
+                <a href="#completed"><strong>Completed Stages:</strong></a>
+                {completedStageNumStr}
+              </li>
+            }
+          }
+          {
+            if (shouldShowFailedStages) {
+              <li>
+                <a href="#failed"><strong>Failed Stages:</strong></a>
+                {numFailedStages}
+              </li>
+            }
+          }
         </ul>
       </div>
 
-    val poolsDescription = if (sc.isDefined && isFairScheduler) {
-        <span class="collapse-aggregated-poolTable collapse-table"
-            onClick="collapseTable('collapse-aggregated-poolTable','aggregated-poolTable')">
+    var content = summary ++
+      {
+        if (sc.isDefined && isFairScheduler) {
+          <span class="collapse-aggregated-poolTable collapse-table"
+              onClick="collapseTable('collapse-aggregated-poolTable','aggregated-poolTable')">
+            <h4>
+              <span class="collapse-table-arrow arrow-open"></span>
+              <a>Fair Scheduler Pools ({pools.size})</a>
+            </h4>
+          </span> ++
+          <div class="aggregated-poolTable collapsible-table">
+            {poolTable.toNodeSeq}
+          </div>
+        } else {
+          Seq.empty[Node]
+        }
+      }
+    if (shouldShowActiveStages) {
+      content ++=
+        <span id="active" class="collapse-aggregated-allActiveStages collapse-table"
+            onClick="collapseTable('collapse-aggregated-allActiveStages',
+            'aggregated-allActiveStages')">
           <h4>
             <span class="collapse-table-arrow arrow-open"></span>
-            <a>Fair Scheduler Pools ({pools.size})</a>
+            <a>Active Stages ({activeStages.size})</a>
           </h4>
         </span> ++
-        <div class="aggregated-poolTable collapsible-table">
-          {poolTable.toNodeSeq(request)}
+        <div class="aggregated-allActiveStages collapsible-table">
+          {activeStagesTable.toNodeSeq}
         </div>
-      } else {
-        Seq.empty[Node]
-      }
-
-    val content = summary ++ poolsDescription ++ tables.flatten.flatten
-
-    UIUtils.headerSparkPage(request, "Stages for All Jobs", content, parent)
-  }
-
-  private def summaryAndTableForStatus(
-      allStages: Seq[StageData],
-      appSummary: AppSummary,
-      status: StageStatus,
-      request: HttpServletRequest): (Option[Elem], Option[NodeSeq]) = {
-    val stages = if (status == StageStatus.FAILED) {
-      allStages.filter(_.status == status).reverse
-    } else {
-      allStages.filter(_.status == status)
     }
-
-    if (stages.isEmpty) {
-      (None, None)
-    } else {
-      val killEnabled = status == StageStatus.ACTIVE && parent.killEnabled
-      val isFailedStage = status == StageStatus.FAILED
-
-      val stagesTable =
-        new StageTableBase(parent.store, request, stages, statusName(status), stageTag(status),
-          parent.basePath, subPath, parent.isFairScheduler, killEnabled, isFailedStage)
-      val stagesSize = stages.size
-      (Some(summary(appSummary, status, stagesSize)),
-        Some(table(appSummary, status, stagesTable, stagesSize)))
+    if (shouldShowPendingStages) {
+      content ++=
+        <span id="pending" class="collapse-aggregated-allPendingStages collapse-table"
+            onClick="collapseTable('collapse-aggregated-allPendingStages',
+            'aggregated-allPendingStages')">
+          <h4>
+            <span class="collapse-table-arrow arrow-open"></span>
+            <a>Pending Stages ({pendingStages.size})</a>
+          </h4>
+        </span> ++
+        <div class="aggregated-allPendingStages collapsible-table">
+          {pendingStagesTable.toNodeSeq}
+        </div>
     }
-  }
-
-  private def statusName(status: StageStatus): String = status match {
-    case StageStatus.ACTIVE => "active"
-    case StageStatus.COMPLETE => "completed"
-    case StageStatus.FAILED => "failed"
-    case StageStatus.PENDING => "pending"
-    case StageStatus.SKIPPED => "skipped"
-  }
-
-  private def stageTag(status: StageStatus): String = s"${statusName(status)}Stage"
-
-  private def headerDescription(status: StageStatus): String = statusName(status).capitalize
-
-  private def summaryContent(appSummary: AppSummary, status: StageStatus, size: Int): String = {
-    if (status == StageStatus.COMPLETE && appSummary.numCompletedStages != size) {
-      s"${appSummary.numCompletedStages}, only showing $size"
-    } else {
-      s"$size"
+    if (shouldShowCompletedStages) {
+      content ++=
+        <span id="completed" class="collapse-aggregated-allCompletedStages collapse-table"
+            onClick="collapseTable('collapse-aggregated-allCompletedStages',
+            'aggregated-allCompletedStages')">
+          <h4>
+            <span class="collapse-table-arrow arrow-open"></span>
+            <a>Completed Stages ({completedStageNumStr})</a>
+          </h4>
+        </span> ++
+        <div class="aggregated-allCompletedStages collapsible-table">
+          {completedStagesTable.toNodeSeq}
+        </div>
     }
-  }
-
-  private def summary(appSummary: AppSummary, status: StageStatus, size: Int): Elem = {
-    val summary =
-      <li>
-        <a href={s"#${statusName(status)}"}>
-          <strong>{headerDescription(status)} Stages:</strong>
-        </a>
-        {summaryContent(appSummary, status, size)}
-      </li>
-
-    if (status == StageStatus.COMPLETE) {
-      summary % Attribute(None, "id", Text("completed-summary"), Null)
-    } else {
-      summary
+    if (shouldShowFailedStages) {
+      content ++=
+        <span id ="failed" class="collapse-aggregated-allFailedStages collapse-table"
+            onClick="collapseTable('collapse-aggregated-allFailedStages',
+            'aggregated-allFailedStages')">
+          <h4>
+            <span class="collapse-table-arrow arrow-open"></span>
+            <a>Failed Stages ({numFailedStages})</a>
+          </h4>
+        </span> ++
+        <div class="aggregated-allFailedStages collapsible-table">
+          {failedStagesTable.toNodeSeq}
+        </div>
     }
-  }
-
-  private def table(
-      appSummary: AppSummary,
-      status: StageStatus,
-      stagesTable: StageTableBase,
-      size: Int): NodeSeq = {
-    val classSuffix = s"${statusName(status).capitalize}Stages"
-    <span id={statusName(status)}
-          class={s"collapse-aggregated-all$classSuffix collapse-table"}
-          onClick={s"collapseTable('collapse-aggregated-all$classSuffix'," +
-            s" 'aggregated-all$classSuffix')"}>
-      <h4>
-        <span class="collapse-table-arrow arrow-open"></span>
-        <a>{headerDescription(status)} Stages ({summaryContent(appSummary, status, size)})</a>
-      </h4>
-    </span> ++
-      <div class={s"aggregated-all$classSuffix collapsible-table"}>
-        {stagesTable.toNodeSeq}
-      </div>
+    UIUtils.headerSparkPage("Stages for All Jobs", content, parent)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
index 55444a2c0c9ab..46f2a76cc651b 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
@@ -195,7 +195,7 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP
           <p>No information to display for job {jobId}</p>
         </div>
       return UIUtils.headerSparkPage(
-        request, s"Details for Job $jobId", content, parent)
+        s"Details for Job $jobId", content, parent)
     }
     val isComplete = jobData.status != JobExecutionStatus.RUNNING
     val stages = jobData.stageIds.map { stageId =>
@@ -413,7 +413,6 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP
           {failedStagesTable.toNodeSeq}
         </div>
     }
-    UIUtils.headerSparkPage(
-      request, s"Details for Job $jobId", content, parent, showVisualization = true)
+    UIUtils.headerSparkPage(s"Details for Job $jobId", content, parent, showVisualization = true)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala
index 22a40101e33df..a3e1f13782e30 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala
@@ -49,7 +49,7 @@ private[ui] class PoolPage(parent: StagesTab) extends WebUIPage("pool") {
         "stages/pool", parent.isFairScheduler, parent.killEnabled, false)
 
     val poolTable = new PoolTable(Map(pool -> uiPool), parent)
-    var content = <h4>Summary </h4> ++ poolTable.toNodeSeq(request)
+    var content = <h4>Summary </h4> ++ poolTable.toNodeSeq
     if (activeStages.nonEmpty) {
       content ++=
         <span class="collapse-aggregated-poolActiveStages collapse-table"
@@ -65,6 +65,6 @@ private[ui] class PoolPage(parent: StagesTab) extends WebUIPage("pool") {
         </div>
     }
 
-    UIUtils.headerSparkPage(request, "Fair Scheduler Pool: " + poolName, content, parent)
+    UIUtils.headerSparkPage("Fair Scheduler Pool: " + poolName, content, parent)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala
index 96b5f72393070..5dfce858dec07 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.ui.jobs
 
 import java.net.URLEncoder
-import javax.servlet.http.HttpServletRequest
 
 import scala.xml.Node
 
@@ -29,7 +28,7 @@ import org.apache.spark.ui.UIUtils
 /** Table showing list of pools */
 private[ui] class PoolTable(pools: Map[Schedulable, PoolData], parent: StagesTab) {
 
-  def toNodeSeq(request: HttpServletRequest): Seq[Node] = {
+  def toNodeSeq: Seq[Node] = {
     <table class="table table-bordered table-striped table-condensed sortable table-fixed">
       <thead>
         <th>Pool Name</th>
@@ -40,15 +39,15 @@ private[ui] class PoolTable(pools: Map[Schedulable, PoolData], parent: StagesTab
         <th>SchedulingMode</th>
       </thead>
       <tbody>
-        {pools.map { case (s, p) => poolRow(request, s, p) }}
+        {pools.map { case (s, p) => poolRow(s, p) }}
       </tbody>
     </table>
   }
 
-  private def poolRow(request: HttpServletRequest, s: Schedulable, p: PoolData): Seq[Node] = {
+  private def poolRow(s: Schedulable, p: PoolData): Seq[Node] = {
     val activeStages = p.stageIds.size
     val href = "%s/stages/pool?poolname=%s"
-      .format(UIUtils.prependBaseUri(request, parent.basePath), URLEncoder.encode(p.name, "UTF-8"))
+      .format(UIUtils.prependBaseUri(parent.basePath), URLEncoder.encode(p.name, "UTF-8"))
     <tr>
       <td>
         <a href={href}>{p.name}</a>
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
index d4e6a7bc3effa..5c2b0c3a19996 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -112,19 +112,20 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
           <div id="no-info">
             <p>No information to display for Stage {stageId} (Attempt {stageAttemptId})</p>
           </div>
-        return UIUtils.headerSparkPage(request, stageHeader, content, parent)
+        return UIUtils.headerSparkPage(stageHeader, content, parent)
       }
 
     val localitySummary = store.localitySummary(stageData.stageId, stageData.attemptId)
 
-    val totalTasks = taskCount(stageData)
+    val totalTasks = stageData.numActiveTasks + stageData.numCompleteTasks +
+      stageData.numFailedTasks + stageData.numKilledTasks
     if (totalTasks == 0) {
       val content =
         <div>
           <h4>Summary Metrics</h4> No tasks have started yet
           <h4>Tasks</h4> No tasks have started yet
         </div>
-      return UIUtils.headerSparkPage(request, stageHeader, content, parent)
+      return UIUtils.headerSparkPage(stageHeader, content, parent)
     }
 
     val storedTasks = store.taskCount(stageData.stageId, stageData.attemptId)
@@ -132,7 +133,7 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
     val totalTasksNumStr = if (totalTasks == storedTasks) {
       s"$totalTasks"
     } else {
-      s"$storedTasks, showing ${totalTasks}"
+      s"$totalTasks, showing ${storedTasks}"
     }
 
     val summary =
@@ -281,7 +282,7 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
     val (taskTable, taskTableHTML) = try {
       val _taskTable = new TaskPagedTable(
         stageData,
-        UIUtils.prependBaseUri(request, parent.basePath) +
+        UIUtils.prependBaseUri(parent.basePath) +
           s"/stages/stage?id=${stageId}&attempt=${stageAttemptId}",
         currentTime,
         pageSize = taskPageSize,
@@ -497,7 +498,7 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
       <div class="aggregated-tasks collapsible-table">
         {taskTableHTML ++ jsForScrollingDownToTaskTable}
       </div>
-    UIUtils.headerSparkPage(request, stageHeader, content, parent, showVisualization = true)
+    UIUtils.headerSparkPage(stageHeader, content, parent, showVisualization = true)
   }
 
   def makeTimeline(tasks: Seq[TaskData], currentTime: Long): Seq[Node] = {
@@ -685,7 +686,7 @@ private[ui] class TaskDataSource(
 
   private var _tasksToShow: Seq[TaskData] = null
 
-  override def dataSize: Int = taskCount(stage)
+  override def dataSize: Int = stage.numTasks
 
   override def sliceData(from: Int, to: Int): Seq[TaskData] = {
     if (_tasksToShow == null) {
@@ -749,39 +750,37 @@ private[ui] class TaskPagedTable(
   }
 
   def headers: Seq[Node] = {
-    import ApiHelper._
-
     val taskHeadersAndCssClasses: Seq[(String, String)] =
       Seq(
-        (HEADER_TASK_INDEX, ""), (HEADER_ID, ""), (HEADER_ATTEMPT, ""), (HEADER_STATUS, ""),
-        (HEADER_LOCALITY, ""), (HEADER_EXECUTOR, ""), (HEADER_HOST, ""), (HEADER_LAUNCH_TIME, ""),
-        (HEADER_DURATION, ""), (HEADER_SCHEDULER_DELAY, TaskDetailsClassNames.SCHEDULER_DELAY),
-        (HEADER_DESER_TIME, TaskDetailsClassNames.TASK_DESERIALIZATION_TIME),
-        (HEADER_GC_TIME, ""),
-        (HEADER_SER_TIME, TaskDetailsClassNames.RESULT_SERIALIZATION_TIME),
-        (HEADER_GETTING_RESULT_TIME, TaskDetailsClassNames.GETTING_RESULT_TIME),
-        (HEADER_PEAK_MEM, TaskDetailsClassNames.PEAK_EXECUTION_MEMORY)) ++
-        {if (hasAccumulators(stage)) Seq((HEADER_ACCUMULATORS, "")) else Nil} ++
-        {if (hasInput(stage)) Seq((HEADER_INPUT_SIZE, "")) else Nil} ++
-        {if (hasOutput(stage)) Seq((HEADER_OUTPUT_SIZE, "")) else Nil} ++
+        ("Index", ""), ("ID", ""), ("Attempt", ""), ("Status", ""), ("Locality Level", ""),
+        ("Executor ID", ""), ("Host", ""), ("Launch Time", ""), ("Duration", ""),
+        ("Scheduler Delay", TaskDetailsClassNames.SCHEDULER_DELAY),
+        ("Task Deserialization Time", TaskDetailsClassNames.TASK_DESERIALIZATION_TIME),
+        ("GC Time", ""),
+        ("Result Serialization Time", TaskDetailsClassNames.RESULT_SERIALIZATION_TIME),
+        ("Getting Result Time", TaskDetailsClassNames.GETTING_RESULT_TIME),
+        ("Peak Execution Memory", TaskDetailsClassNames.PEAK_EXECUTION_MEMORY)) ++
+        {if (hasAccumulators(stage)) Seq(("Accumulators", "")) else Nil} ++
+        {if (hasInput(stage)) Seq(("Input Size / Records", "")) else Nil} ++
+        {if (hasOutput(stage)) Seq(("Output Size / Records", "")) else Nil} ++
         {if (hasShuffleRead(stage)) {
-          Seq((HEADER_SHUFFLE_READ_TIME, TaskDetailsClassNames.SHUFFLE_READ_BLOCKED_TIME),
-            (HEADER_SHUFFLE_TOTAL_READS, ""),
-            (HEADER_SHUFFLE_REMOTE_READS, TaskDetailsClassNames.SHUFFLE_READ_REMOTE_SIZE))
+          Seq(("Shuffle Read Blocked Time", TaskDetailsClassNames.SHUFFLE_READ_BLOCKED_TIME),
+            ("Shuffle Read Size / Records", ""),
+            ("Shuffle Remote Reads", TaskDetailsClassNames.SHUFFLE_READ_REMOTE_SIZE))
         } else {
           Nil
         }} ++
         {if (hasShuffleWrite(stage)) {
-          Seq((HEADER_SHUFFLE_WRITE_TIME, ""), (HEADER_SHUFFLE_WRITE_SIZE, ""))
+          Seq(("Write Time", ""), ("Shuffle Write Size / Records", ""))
         } else {
           Nil
         }} ++
         {if (hasBytesSpilled(stage)) {
-          Seq((HEADER_MEM_SPILL, ""), (HEADER_DISK_SPILL, ""))
+          Seq(("Shuffle Spill (Memory)", ""), ("Shuffle Spill (Disk)", ""))
         } else {
           Nil
         }} ++
-        Seq((HEADER_ERROR, ""))
+        Seq(("Errors", ""))
 
     if (!taskHeadersAndCssClasses.map(_._1).contains(sortColumn)) {
       throw new IllegalArgumentException(s"Unknown column: $sortColumn")
@@ -962,62 +961,35 @@ private[ui] class TaskPagedTable(
   }
 }
 
-private[ui] object ApiHelper {
-
-  val HEADER_ID = "ID"
-  val HEADER_TASK_INDEX = "Index"
-  val HEADER_ATTEMPT = "Attempt"
-  val HEADER_STATUS = "Status"
-  val HEADER_LOCALITY = "Locality Level"
-  val HEADER_EXECUTOR = "Executor ID"
-  val HEADER_HOST = "Host"
-  val HEADER_LAUNCH_TIME = "Launch Time"
-  val HEADER_DURATION = "Duration"
-  val HEADER_SCHEDULER_DELAY = "Scheduler Delay"
-  val HEADER_DESER_TIME = "Task Deserialization Time"
-  val HEADER_GC_TIME = "GC Time"
-  val HEADER_SER_TIME = "Result Serialization Time"
-  val HEADER_GETTING_RESULT_TIME = "Getting Result Time"
-  val HEADER_PEAK_MEM = "Peak Execution Memory"
-  val HEADER_ACCUMULATORS = "Accumulators"
-  val HEADER_INPUT_SIZE = "Input Size / Records"
-  val HEADER_OUTPUT_SIZE = "Output Size / Records"
-  val HEADER_SHUFFLE_READ_TIME = "Shuffle Read Blocked Time"
-  val HEADER_SHUFFLE_TOTAL_READS = "Shuffle Read Size / Records"
-  val HEADER_SHUFFLE_REMOTE_READS = "Shuffle Remote Reads"
-  val HEADER_SHUFFLE_WRITE_TIME = "Write Time"
-  val HEADER_SHUFFLE_WRITE_SIZE = "Shuffle Write Size / Records"
-  val HEADER_MEM_SPILL = "Shuffle Spill (Memory)"
-  val HEADER_DISK_SPILL = "Shuffle Spill (Disk)"
-  val HEADER_ERROR = "Errors"
-
-  private[ui] val COLUMN_TO_INDEX = Map(
-    HEADER_ID -> null.asInstanceOf[String],
-    HEADER_TASK_INDEX -> TaskIndexNames.TASK_INDEX,
-    HEADER_ATTEMPT -> TaskIndexNames.ATTEMPT,
-    HEADER_STATUS -> TaskIndexNames.STATUS,
-    HEADER_LOCALITY -> TaskIndexNames.LOCALITY,
-    HEADER_EXECUTOR -> TaskIndexNames.EXECUTOR,
-    HEADER_HOST -> TaskIndexNames.HOST,
-    HEADER_LAUNCH_TIME -> TaskIndexNames.LAUNCH_TIME,
-    HEADER_DURATION -> TaskIndexNames.DURATION,
-    HEADER_SCHEDULER_DELAY -> TaskIndexNames.SCHEDULER_DELAY,
-    HEADER_DESER_TIME -> TaskIndexNames.DESER_TIME,
-    HEADER_GC_TIME -> TaskIndexNames.GC_TIME,
-    HEADER_SER_TIME -> TaskIndexNames.SER_TIME,
-    HEADER_GETTING_RESULT_TIME -> TaskIndexNames.GETTING_RESULT_TIME,
-    HEADER_PEAK_MEM -> TaskIndexNames.PEAK_MEM,
-    HEADER_ACCUMULATORS -> TaskIndexNames.ACCUMULATORS,
-    HEADER_INPUT_SIZE -> TaskIndexNames.INPUT_SIZE,
-    HEADER_OUTPUT_SIZE -> TaskIndexNames.OUTPUT_SIZE,
-    HEADER_SHUFFLE_READ_TIME -> TaskIndexNames.SHUFFLE_READ_TIME,
-    HEADER_SHUFFLE_TOTAL_READS -> TaskIndexNames.SHUFFLE_TOTAL_READS,
-    HEADER_SHUFFLE_REMOTE_READS -> TaskIndexNames.SHUFFLE_REMOTE_READS,
-    HEADER_SHUFFLE_WRITE_TIME -> TaskIndexNames.SHUFFLE_WRITE_TIME,
-    HEADER_SHUFFLE_WRITE_SIZE -> TaskIndexNames.SHUFFLE_WRITE_SIZE,
-    HEADER_MEM_SPILL -> TaskIndexNames.MEM_SPILL,
-    HEADER_DISK_SPILL -> TaskIndexNames.DISK_SPILL,
-    HEADER_ERROR -> TaskIndexNames.ERROR)
+private object ApiHelper {
+
+
+  private val COLUMN_TO_INDEX = Map(
+    "ID" -> null.asInstanceOf[String],
+    "Index" -> TaskIndexNames.TASK_INDEX,
+    "Attempt" -> TaskIndexNames.ATTEMPT,
+    "Status" -> TaskIndexNames.STATUS,
+    "Locality Level" -> TaskIndexNames.LOCALITY,
+    "Executor ID / Host" -> TaskIndexNames.EXECUTOR,
+    "Launch Time" -> TaskIndexNames.LAUNCH_TIME,
+    "Duration" -> TaskIndexNames.DURATION,
+    "Scheduler Delay" -> TaskIndexNames.SCHEDULER_DELAY,
+    "Task Deserialization Time" -> TaskIndexNames.DESER_TIME,
+    "GC Time" -> TaskIndexNames.GC_TIME,
+    "Result Serialization Time" -> TaskIndexNames.SER_TIME,
+    "Getting Result Time" -> TaskIndexNames.GETTING_RESULT_TIME,
+    "Peak Execution Memory" -> TaskIndexNames.PEAK_MEM,
+    "Accumulators" -> TaskIndexNames.ACCUMULATORS,
+    "Input Size / Records" -> TaskIndexNames.INPUT_SIZE,
+    "Output Size / Records" -> TaskIndexNames.OUTPUT_SIZE,
+    "Shuffle Read Blocked Time" -> TaskIndexNames.SHUFFLE_READ_TIME,
+    "Shuffle Read Size / Records" -> TaskIndexNames.SHUFFLE_TOTAL_READS,
+    "Shuffle Remote Reads" -> TaskIndexNames.SHUFFLE_REMOTE_READS,
+    "Write Time" -> TaskIndexNames.SHUFFLE_WRITE_TIME,
+    "Shuffle Write Size / Records" -> TaskIndexNames.SHUFFLE_WRITE_SIZE,
+    "Shuffle Spill (Memory)" -> TaskIndexNames.MEM_SPILL,
+    "Shuffle Spill (Disk)" -> TaskIndexNames.DISK_SPILL,
+    "Errors" -> TaskIndexNames.ERROR)
 
   def hasAccumulators(stageData: StageData): Boolean = {
     stageData.accumulatorUpdates.exists { acc => acc.name != null && acc.value != null }
@@ -1047,13 +1019,8 @@ private[ui] object ApiHelper {
   }
 
   def lastStageNameAndDescription(store: AppStatusStore, job: JobData): (String, String) = {
-    val stage = store.asOption(store.stageAttempt(job.stageIds.max, 0))
+    val stage = store.asOption(store.lastStageAttempt(job.stageIds.max))
     (stage.map(_.name).getOrElse(""), stage.flatMap(_.description).getOrElse(job.name))
   }
 
-  def taskCount(stageData: StageData): Int = {
-    stageData.numActiveTasks + stageData.numCompleteTasks + stageData.numFailedTasks +
-      stageData.numKilledTasks
-  }
-
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
index 56e4d6838a99a..18a4926f2f6c0 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
@@ -43,9 +43,7 @@ private[ui] class StageTableBase(
     killEnabled: Boolean,
     isFailedStage: Boolean) {
   // stripXSS is called to remove suspicious characters used in XSS attacks
-  val allParameters = request.getParameterMap.asScala.toMap.map { case (k, v) =>
-    UIUtils.stripXSS(k) -> v.map(UIUtils.stripXSS).toSeq
-  }
+  val allParameters = request.getParameterMap.asScala.toMap.mapValues(_.map(UIUtils.stripXSS))
   val parameterOtherTable = allParameters.filterNot(_._1.startsWith(stageTag))
     .map(para => para._1 + "=" + para._2(0))
 
@@ -94,8 +92,7 @@ private[ui] class StageTableBase(
       stageSortColumn,
       stageSortDesc,
       isFailedStage,
-      parameterOtherTable,
-      request
+      parameterOtherTable
     ).table(page)
   } catch {
     case e @ (_ : IllegalArgumentException | _ : IndexOutOfBoundsException) =>
@@ -150,8 +147,7 @@ private[ui] class StagePagedTable(
     sortColumn: String,
     desc: Boolean,
     isFailedStage: Boolean,
-    parameterOtherTable: Iterable[String],
-    request: HttpServletRequest) extends PagedTable[StageTableRowData] {
+    parameterOtherTable: Iterable[String]) extends PagedTable[StageTableRowData] {
 
   override def tableId: String = stageTag + "-table"
 
@@ -165,7 +161,7 @@ private[ui] class StagePagedTable(
 
   override def pageNumberFormField: String = stageTag + ".page"
 
-  val parameterPath = UIUtils.prependBaseUri(request, basePath) + s"/$subPath/?" +
+  val parameterPath = UIUtils.prependBaseUri(basePath) + s"/$subPath/?" +
     parameterOtherTable.mkString("&")
 
   override val dataSource = new StageDataSource(
@@ -292,7 +288,7 @@ private[ui] class StagePagedTable(
         {if (isFairScheduler) {
           <td>
             <a href={"%s/stages/pool?poolname=%s"
-              .format(UIUtils.prependBaseUri(request, basePath), data.schedulingPool)}>
+              .format(UIUtils.prependBaseUri(basePath), data.schedulingPool)}>
               {data.schedulingPool}
             </a>
           </td>
@@ -350,7 +346,7 @@ private[ui] class StagePagedTable(
   }
 
   private def makeDescription(s: v1.StageData, descriptionOption: Option[String]): Seq[Node] = {
-    val basePathUri = UIUtils.prependBaseUri(request, basePath)
+    val basePathUri = UIUtils.prependBaseUri(basePath)
 
     val killLink = if (killEnabled) {
       val confirm =
diff --git a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
index 238cd31433660..02cee7f8c5b33 100644
--- a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
@@ -23,7 +23,7 @@ import javax.servlet.http.HttpServletRequest
 import scala.xml.{Node, Unparsed}
 
 import org.apache.spark.status.AppStatusStore
-import org.apache.spark.status.api.v1.{ExecutorSummary, RDDDataDistribution, RDDPartitionInfo}
+import org.apache.spark.status.api.v1.{RDDDataDistribution, RDDPartitionInfo}
 import org.apache.spark.ui._
 import org.apache.spark.util.Utils
 
@@ -53,7 +53,7 @@ private[ui] class RDDPage(parent: SparkUITab, store: AppStatusStore) extends Web
     } catch {
       case _: NoSuchElementException =>
         // Rather than crashing, render an "RDD Not Found" page
-        return UIUtils.headerSparkPage(request, "RDD Not Found", Seq.empty[Node], parent)
+        return UIUtils.headerSparkPage("RDD Not Found", Seq.empty[Node], parent)
     }
 
     // Worker table
@@ -72,12 +72,11 @@ private[ui] class RDDPage(parent: SparkUITab, store: AppStatusStore) extends Web
     }
     val blockTableHTML = try {
       val _blockTable = new BlockPagedTable(
-        UIUtils.prependBaseUri(request, parent.basePath) + s"/storage/rdd/?id=${rddId}",
+        UIUtils.prependBaseUri(parent.basePath) + s"/storage/rdd/?id=${rddId}",
         rddStorageInfo.partitions.get,
         blockPageSize,
         blockSortColumn,
-        blockSortDesc,
-        store.executorList(true))
+        blockSortDesc)
       _blockTable.table(page)
     } catch {
       case e @ (_ : IllegalArgumentException | _ : IndexOutOfBoundsException) =>
@@ -145,8 +144,7 @@ private[ui] class RDDPage(parent: SparkUITab, store: AppStatusStore) extends Web
         {blockTableHTML ++ jsForScrollingDownToBlockTable}
       </div>;
 
-    UIUtils.headerSparkPage(
-      request, "RDD Storage Info for " + rddStorageInfo.name, content, parent)
+    UIUtils.headerSparkPage("RDD Storage Info for " + rddStorageInfo.name, content, parent)
   }
 
   /** Header fields for the worker table */
@@ -184,8 +182,7 @@ private[ui] class BlockDataSource(
     rddPartitions: Seq[RDDPartitionInfo],
     pageSize: Int,
     sortColumn: String,
-    desc: Boolean,
-    executorIdToAddress: Map[String, String]) extends PagedDataSource[BlockTableRowData](pageSize) {
+    desc: Boolean) extends PagedDataSource[BlockTableRowData](pageSize) {
 
   private val data = rddPartitions.map(blockRow).sorted(ordering(sortColumn, desc))
 
@@ -201,10 +198,7 @@ private[ui] class BlockDataSource(
       rddPartition.storageLevel,
       rddPartition.memoryUsed,
       rddPartition.diskUsed,
-      rddPartition.executors
-        .map { id => executorIdToAddress.get(id).getOrElse(id) }
-        .sorted
-        .mkString(" "))
+      rddPartition.executors.mkString(" "))
   }
 
   /**
@@ -232,8 +226,7 @@ private[ui] class BlockPagedTable(
     rddPartitions: Seq[RDDPartitionInfo],
     pageSize: Int,
     sortColumn: String,
-    desc: Boolean,
-    executorSummaries: Seq[ExecutorSummary]) extends PagedTable[BlockTableRowData] {
+    desc: Boolean) extends PagedTable[BlockTableRowData] {
 
   override def tableId: String = "rdd-storage-by-block-table"
 
@@ -250,8 +243,7 @@ private[ui] class BlockPagedTable(
     rddPartitions,
     pageSize,
     sortColumn,
-    desc,
-    executorSummaries.map { ex => (ex.id, ex.hostPort) }.toMap)
+    desc)
 
   override def pageLink(page: Int): String = {
     val encodedSortColumn = URLEncoder.encode(sortColumn, "UTF-8")
diff --git a/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala b/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala
index 3eb546e336e99..68d946574a37b 100644
--- a/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala
@@ -31,14 +31,11 @@ import org.apache.spark.util.Utils
 private[ui] class StoragePage(parent: SparkUITab, store: AppStatusStore) extends WebUIPage("") {
 
   def render(request: HttpServletRequest): Seq[Node] = {
-    val content = rddTable(request, store.rddList()) ++
-      receiverBlockTables(store.streamBlocksList())
-    UIUtils.headerSparkPage(request, "Storage", content, parent)
+    val content = rddTable(store.rddList()) ++ receiverBlockTables(store.streamBlocksList())
+    UIUtils.headerSparkPage("Storage", content, parent)
   }
 
-  private[storage] def rddTable(
-      request: HttpServletRequest,
-      rdds: Seq[v1.RDDStorageInfo]): Seq[Node] = {
+  private[storage] def rddTable(rdds: Seq[v1.RDDStorageInfo]): Seq[Node] = {
     if (rdds.isEmpty) {
       // Don't show the rdd table if there is no RDD persisted.
       Nil
@@ -52,11 +49,7 @@ private[ui] class StoragePage(parent: SparkUITab, store: AppStatusStore) extends
           </h4>
         </span>
         <div class="aggregated-rdds collapsible-table">
-          {UIUtils.listingTable(
-            rddHeader,
-            rddRow(request, _: v1.RDDStorageInfo),
-            rdds,
-            id = Some("storage-by-rdd-table"))}
+          {UIUtils.listingTable(rddHeader, rddRow, rdds, id = Some("storage-by-rdd-table"))}
         </div>
       </div>
     }
@@ -73,13 +66,12 @@ private[ui] class StoragePage(parent: SparkUITab, store: AppStatusStore) extends
     "Size on Disk")
 
   /** Render an HTML row representing an RDD */
-  private def rddRow(request: HttpServletRequest, rdd: v1.RDDStorageInfo): Seq[Node] = {
+  private def rddRow(rdd: v1.RDDStorageInfo): Seq[Node] = {
     // scalastyle:off
     <tr>
       <td>{rdd.id}</td>
       <td>
-        <a href={"%s/storage/rdd?id=%s".format(
-          UIUtils.prependBaseUri(request, parent.basePath), rdd.id)}>
+        <a href={"%s/storage/rdd?id=%s".format(UIUtils.prependBaseUri(parent.basePath), rdd.id)}>
           {rdd.name}
         </a>
       </td>
diff --git a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
index 3b469a69437b9..f4a736d6d439a 100644
--- a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
+++ b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
@@ -24,7 +24,6 @@ import java.util.concurrent.ConcurrentHashMap
 import java.util.concurrent.atomic.AtomicLong
 
 import org.apache.spark.{InternalAccumulator, SparkContext, TaskContext}
-import org.apache.spark.internal.Logging
 import org.apache.spark.scheduler.AccumulableInfo
 
 private[spark] case class AccumulatorMetadata(
@@ -212,7 +211,7 @@ abstract class AccumulatorV2[IN, OUT] extends Serializable {
 /**
  * An internal class used to track accumulators by Spark itself.
  */
-private[spark] object AccumulatorContext extends Logging {
+private[spark] object AccumulatorContext {
 
   /**
    * This global map holds the original accumulator objects that are created on the driver.
@@ -259,16 +258,13 @@ private[spark] object AccumulatorContext extends Logging {
    * Returns the [[AccumulatorV2]] registered with the given ID, if any.
    */
   def get(id: Long): Option[AccumulatorV2[_, _]] = {
-    val ref = originals.get(id)
-    if (ref eq null) {
-      None
-    } else {
-      // Since we are storing weak references, warn when the underlying data is not valid.
+    Option(originals.get(id)).map { ref =>
+      // Since we are storing weak references, we must check whether the underlying data is valid.
       val acc = ref.get
       if (acc eq null) {
-        logWarning(s"Attempted to access garbage collected accumulator $id")
+        throw new IllegalStateException(s"Attempted to access garbage collected accumulator $id")
       }
-      Option(acc)
+      acc
     }
   }
 
@@ -294,8 +290,7 @@ class LongAccumulator extends AccumulatorV2[jl.Long, jl.Long] {
   private var _count = 0L
 
   /**
-   * Returns false if this accumulator has had any values added to it or the sum is non-zero.
-   *
+   * Adds v to the accumulator, i.e. increment sum by v and count by 1.
    * @since 2.0.0
    */
   override def isZero: Boolean = _sum == 0L && _count == 0
@@ -373,9 +368,6 @@ class DoubleAccumulator extends AccumulatorV2[jl.Double, jl.Double] {
   private var _sum = 0.0
   private var _count = 0L
 
-  /**
-   * Returns false if this accumulator has had any values added to it or the sum is non-zero.
-   */
   override def isZero: Boolean = _sum == 0.0 && _count == 0
 
   override def copy(): DoubleAccumulator = {
@@ -449,9 +441,6 @@ class DoubleAccumulator extends AccumulatorV2[jl.Double, jl.Double] {
 class CollectionAccumulator[T] extends AccumulatorV2[T, java.util.List[T]] {
   private val _list: java.util.List[T] = Collections.synchronizedList(new ArrayList[T]())
 
-  /**
-   * Returns false if this accumulator instance has any values in it.
-   */
   override def isZero: Boolean = _list.isEmpty
 
   override def copyAndReset(): CollectionAccumulator[T] = new CollectionAccumulator
@@ -490,9 +479,7 @@ class LegacyAccumulatorWrapper[R, T](
     param: org.apache.spark.AccumulableParam[R, T]) extends AccumulatorV2[T, R] {
   private[spark] var _value = initialValue  // Current value on driver
 
-  @transient private lazy val _zero = param.zero(initialValue)
-
-  override def isZero: Boolean = _value.asInstanceOf[AnyRef].eq(_zero.asInstanceOf[AnyRef])
+  override def isZero: Boolean = _value == param.zero(initialValue)
 
   override def copy(): LegacyAccumulatorWrapper[R, T] = {
     val acc = new LegacyAccumulatorWrapper(initialValue, param)
@@ -501,7 +488,7 @@ class LegacyAccumulatorWrapper[R, T](
   }
 
   override def reset(): Unit = {
-    _value = _zero
+    _value = param.zero(initialValue)
   }
 
   override def add(v: T): Unit = _value = param.addAccumulator(_value, v)
diff --git a/core/src/main/scala/org/apache/spark/util/CommandLineUtils.scala b/core/src/main/scala/org/apache/spark/util/CommandLineUtils.scala
index 4b6602b50aa1c..d73901686b705 100644
--- a/core/src/main/scala/org/apache/spark/util/CommandLineUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/CommandLineUtils.scala
@@ -33,14 +33,24 @@ private[spark] trait CommandLineUtils {
   private[spark] var printStream: PrintStream = System.err
 
   // scalastyle:off println
-  private[spark] def printMessage(str: String): Unit = printStream.println(str)
-  // scalastyle:on println
+
+  private[spark] def printWarning(str: String): Unit = printStream.println("Warning: " + str)
 
   private[spark] def printErrorAndExit(str: String): Unit = {
-    printMessage("Error: " + str)
-    printMessage("Run with --help for usage help or --verbose for debug output")
+    printStream.println("Error: " + str)
+    printStream.println("Run with --help for usage help or --verbose for debug output")
     exitFn(1)
   }
 
+  // scalastyle:on println
+
+  private[spark] def parseSparkConfProperty(pair: String): (String, String) = {
+    pair.split("=", 2).toSeq match {
+      case Seq(k, v) => (k, v)
+      case _ => printErrorAndExit(s"Spark config without '=': $pair")
+        throw new SparkException(s"Spark config without '=': $pair")
+    }
+  }
+
   def main(args: Array[String]): Unit
 }
diff --git a/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala b/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala
index 21acaa95c5645..31d230d0fec8e 100644
--- a/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala
+++ b/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala
@@ -22,7 +22,9 @@ package org.apache.spark.util
  * through all the elements.
  */
 private[spark]
+// scalastyle:off
 abstract class CompletionIterator[ +A, +I <: Iterator[A]](sub: I) extends Iterator[A] {
+// scalastyle:on
 
   private[this] var completed = false
   def next(): A = sub.next()
diff --git a/core/src/main/scala/org/apache/spark/util/EventLoop.scala b/core/src/main/scala/org/apache/spark/util/EventLoop.scala
index 651ea4996f6cb..3ea9139e11027 100644
--- a/core/src/main/scala/org/apache/spark/util/EventLoop.scala
+++ b/core/src/main/scala/org/apache/spark/util/EventLoop.scala
@@ -37,8 +37,7 @@ private[spark] abstract class EventLoop[E](name: String) extends Logging {
 
   private val stopped = new AtomicBoolean(false)
 
-  // Exposed for testing.
-  private[spark] val eventThread = new Thread(name) {
+  private val eventThread = new Thread(name) {
     setDaemon(true)
 
     override def run(): Unit = {
diff --git a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
index 50c6461373dee..ff83301d631c4 100644
--- a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
@@ -48,7 +48,7 @@ import org.apache.spark.storage._
  * To ensure that we provide these guarantees, follow these rules when modifying these methods:
  *
  *  - Never delete any JSON fields.
- *  - Any new JSON fields should be optional; use `jsonOption` when reading these fields
+ *  - Any new JSON fields should be optional; use `Utils.jsonOption` when reading these fields
  *    in `*FromJson` methods.
  */
 private[spark] object JsonProtocol {
@@ -407,10 +407,8 @@ private[spark] object JsonProtocol {
         ("Exit Caused By App" -> exitCausedByApp) ~
         ("Loss Reason" -> reason.map(_.toString))
       case taskKilled: TaskKilled =>
-        val accumUpdates = JArray(taskKilled.accumUpdates.map(accumulableInfoToJson).toList)
-        ("Kill Reason" -> taskKilled.reason) ~
-        ("Accumulator Updates" -> accumUpdates)
-      case _ => emptyJson
+        ("Kill Reason" -> taskKilled.reason)
+      case _ => Utils.emptyJson
     }
     ("Reason" -> reason) ~ json
   }
@@ -424,7 +422,7 @@ private[spark] object JsonProtocol {
   def jobResultToJson(jobResult: JobResult): JValue = {
     val result = Utils.getFormattedClassName(jobResult)
     val json = jobResult match {
-      case JobSucceeded => emptyJson
+      case JobSucceeded => Utils.emptyJson
       case jobFailed: JobFailed =>
         JObject("Exception" -> exceptionToJson(jobFailed.exception))
     }
@@ -575,7 +573,7 @@ private[spark] object JsonProtocol {
   def taskStartFromJson(json: JValue): SparkListenerTaskStart = {
     val stageId = (json \ "Stage ID").extract[Int]
     val stageAttemptId =
-      jsonOption(json \ "Stage Attempt ID").map(_.extract[Int]).getOrElse(0)
+      Utils.jsonOption(json \ "Stage Attempt ID").map(_.extract[Int]).getOrElse(0)
     val taskInfo = taskInfoFromJson(json \ "Task Info")
     SparkListenerTaskStart(stageId, stageAttemptId, taskInfo)
   }
@@ -588,7 +586,7 @@ private[spark] object JsonProtocol {
   def taskEndFromJson(json: JValue): SparkListenerTaskEnd = {
     val stageId = (json \ "Stage ID").extract[Int]
     val stageAttemptId =
-      jsonOption(json \ "Stage Attempt ID").map(_.extract[Int]).getOrElse(0)
+      Utils.jsonOption(json \ "Stage Attempt ID").map(_.extract[Int]).getOrElse(0)
     val taskType = (json \ "Task Type").extract[String]
     val taskEndReason = taskEndReasonFromJson(json \ "Task End Reason")
     val taskInfo = taskInfoFromJson(json \ "Task Info")
@@ -599,11 +597,11 @@ private[spark] object JsonProtocol {
   def jobStartFromJson(json: JValue): SparkListenerJobStart = {
     val jobId = (json \ "Job ID").extract[Int]
     val submissionTime =
-      jsonOption(json \ "Submission Time").map(_.extract[Long]).getOrElse(-1L)
+      Utils.jsonOption(json \ "Submission Time").map(_.extract[Long]).getOrElse(-1L)
     val stageIds = (json \ "Stage IDs").extract[List[JValue]].map(_.extract[Int])
     val properties = propertiesFromJson(json \ "Properties")
     // The "Stage Infos" field was added in Spark 1.2.0
-    val stageInfos = jsonOption(json \ "Stage Infos")
+    val stageInfos = Utils.jsonOption(json \ "Stage Infos")
       .map(_.extract[Seq[JValue]].map(stageInfoFromJson)).getOrElse {
         stageIds.map { id =>
           new StageInfo(id, 0, "unknown", 0, Seq.empty, Seq.empty, "unknown")
@@ -615,7 +613,7 @@ private[spark] object JsonProtocol {
   def jobEndFromJson(json: JValue): SparkListenerJobEnd = {
     val jobId = (json \ "Job ID").extract[Int]
     val completionTime =
-      jsonOption(json \ "Completion Time").map(_.extract[Long]).getOrElse(-1L)
+      Utils.jsonOption(json \ "Completion Time").map(_.extract[Long]).getOrElse(-1L)
     val jobResult = jobResultFromJson(json \ "Job Result")
     SparkListenerJobEnd(jobId, completionTime, jobResult)
   }
@@ -632,15 +630,15 @@ private[spark] object JsonProtocol {
   def blockManagerAddedFromJson(json: JValue): SparkListenerBlockManagerAdded = {
     val blockManagerId = blockManagerIdFromJson(json \ "Block Manager ID")
     val maxMem = (json \ "Maximum Memory").extract[Long]
-    val time = jsonOption(json \ "Timestamp").map(_.extract[Long]).getOrElse(-1L)
-    val maxOnHeapMem = jsonOption(json \ "Maximum Onheap Memory").map(_.extract[Long])
-    val maxOffHeapMem = jsonOption(json \ "Maximum Offheap Memory").map(_.extract[Long])
+    val time = Utils.jsonOption(json \ "Timestamp").map(_.extract[Long]).getOrElse(-1L)
+    val maxOnHeapMem = Utils.jsonOption(json \ "Maximum Onheap Memory").map(_.extract[Long])
+    val maxOffHeapMem = Utils.jsonOption(json \ "Maximum Offheap Memory").map(_.extract[Long])
     SparkListenerBlockManagerAdded(time, blockManagerId, maxMem, maxOnHeapMem, maxOffHeapMem)
   }
 
   def blockManagerRemovedFromJson(json: JValue): SparkListenerBlockManagerRemoved = {
     val blockManagerId = blockManagerIdFromJson(json \ "Block Manager ID")
-    val time = jsonOption(json \ "Timestamp").map(_.extract[Long]).getOrElse(-1L)
+    val time = Utils.jsonOption(json \ "Timestamp").map(_.extract[Long]).getOrElse(-1L)
     SparkListenerBlockManagerRemoved(time, blockManagerId)
   }
 
@@ -650,11 +648,11 @@ private[spark] object JsonProtocol {
 
   def applicationStartFromJson(json: JValue): SparkListenerApplicationStart = {
     val appName = (json \ "App Name").extract[String]
-    val appId = jsonOption(json \ "App ID").map(_.extract[String])
+    val appId = Utils.jsonOption(json \ "App ID").map(_.extract[String])
     val time = (json \ "Timestamp").extract[Long]
     val sparkUser = (json \ "User").extract[String]
-    val appAttemptId = jsonOption(json \ "App Attempt ID").map(_.extract[String])
-    val driverLogs = jsonOption(json \ "Driver Logs").map(mapFromJson)
+    val appAttemptId = Utils.jsonOption(json \ "App Attempt ID").map(_.extract[String])
+    val driverLogs = Utils.jsonOption(json \ "Driver Logs").map(mapFromJson)
     SparkListenerApplicationStart(appName, appId, time, sparkUser, appAttemptId, driverLogs)
   }
 
@@ -705,19 +703,19 @@ private[spark] object JsonProtocol {
 
   def stageInfoFromJson(json: JValue): StageInfo = {
     val stageId = (json \ "Stage ID").extract[Int]
-    val attemptId = jsonOption(json \ "Stage Attempt ID").map(_.extract[Int]).getOrElse(0)
+    val attemptId = Utils.jsonOption(json \ "Stage Attempt ID").map(_.extract[Int]).getOrElse(0)
     val stageName = (json \ "Stage Name").extract[String]
     val numTasks = (json \ "Number of Tasks").extract[Int]
     val rddInfos = (json \ "RDD Info").extract[List[JValue]].map(rddInfoFromJson)
-    val parentIds = jsonOption(json \ "Parent IDs")
+    val parentIds = Utils.jsonOption(json \ "Parent IDs")
       .map { l => l.extract[List[JValue]].map(_.extract[Int]) }
       .getOrElse(Seq.empty)
-    val details = jsonOption(json \ "Details").map(_.extract[String]).getOrElse("")
-    val submissionTime = jsonOption(json \ "Submission Time").map(_.extract[Long])
-    val completionTime = jsonOption(json \ "Completion Time").map(_.extract[Long])
-    val failureReason = jsonOption(json \ "Failure Reason").map(_.extract[String])
+    val details = Utils.jsonOption(json \ "Details").map(_.extract[String]).getOrElse("")
+    val submissionTime = Utils.jsonOption(json \ "Submission Time").map(_.extract[Long])
+    val completionTime = Utils.jsonOption(json \ "Completion Time").map(_.extract[Long])
+    val failureReason = Utils.jsonOption(json \ "Failure Reason").map(_.extract[String])
     val accumulatedValues = {
-      jsonOption(json \ "Accumulables").map(_.extract[List[JValue]]) match {
+      Utils.jsonOption(json \ "Accumulables").map(_.extract[List[JValue]]) match {
         case Some(values) => values.map(accumulableInfoFromJson)
         case None => Seq.empty[AccumulableInfo]
       }
@@ -737,17 +735,17 @@ private[spark] object JsonProtocol {
   def taskInfoFromJson(json: JValue): TaskInfo = {
     val taskId = (json \ "Task ID").extract[Long]
     val index = (json \ "Index").extract[Int]
-    val attempt = jsonOption(json \ "Attempt").map(_.extract[Int]).getOrElse(1)
+    val attempt = Utils.jsonOption(json \ "Attempt").map(_.extract[Int]).getOrElse(1)
     val launchTime = (json \ "Launch Time").extract[Long]
     val executorId = (json \ "Executor ID").extract[String].intern()
     val host = (json \ "Host").extract[String].intern()
     val taskLocality = TaskLocality.withName((json \ "Locality").extract[String])
-    val speculative = jsonOption(json \ "Speculative").exists(_.extract[Boolean])
+    val speculative = Utils.jsonOption(json \ "Speculative").exists(_.extract[Boolean])
     val gettingResultTime = (json \ "Getting Result Time").extract[Long]
     val finishTime = (json \ "Finish Time").extract[Long]
     val failed = (json \ "Failed").extract[Boolean]
-    val killed = jsonOption(json \ "Killed").exists(_.extract[Boolean])
-    val accumulables = jsonOption(json \ "Accumulables").map(_.extract[Seq[JValue]]) match {
+    val killed = Utils.jsonOption(json \ "Killed").exists(_.extract[Boolean])
+    val accumulables = Utils.jsonOption(json \ "Accumulables").map(_.extract[Seq[JValue]]) match {
       case Some(values) => values.map(accumulableInfoFromJson)
       case None => Seq.empty[AccumulableInfo]
     }
@@ -764,13 +762,13 @@ private[spark] object JsonProtocol {
 
   def accumulableInfoFromJson(json: JValue): AccumulableInfo = {
     val id = (json \ "ID").extract[Long]
-    val name = jsonOption(json \ "Name").map(_.extract[String])
-    val update = jsonOption(json \ "Update").map { v => accumValueFromJson(name, v) }
-    val value = jsonOption(json \ "Value").map { v => accumValueFromJson(name, v) }
-    val internal = jsonOption(json \ "Internal").exists(_.extract[Boolean])
+    val name = Utils.jsonOption(json \ "Name").map(_.extract[String])
+    val update = Utils.jsonOption(json \ "Update").map { v => accumValueFromJson(name, v) }
+    val value = Utils.jsonOption(json \ "Value").map { v => accumValueFromJson(name, v) }
+    val internal = Utils.jsonOption(json \ "Internal").exists(_.extract[Boolean])
     val countFailedValues =
-      jsonOption(json \ "Count Failed Values").exists(_.extract[Boolean])
-    val metadata = jsonOption(json \ "Metadata").map(_.extract[String])
+      Utils.jsonOption(json \ "Count Failed Values").exists(_.extract[Boolean])
+    val metadata = Utils.jsonOption(json \ "Metadata").map(_.extract[String])
     new AccumulableInfo(id, name, update, value, internal, countFailedValues, metadata)
   }
 
@@ -823,49 +821,49 @@ private[spark] object JsonProtocol {
     metrics.incDiskBytesSpilled((json \ "Disk Bytes Spilled").extract[Long])
 
     // Shuffle read metrics
-    jsonOption(json \ "Shuffle Read Metrics").foreach { readJson =>
+    Utils.jsonOption(json \ "Shuffle Read Metrics").foreach { readJson =>
       val readMetrics = metrics.createTempShuffleReadMetrics()
       readMetrics.incRemoteBlocksFetched((readJson \ "Remote Blocks Fetched").extract[Int])
       readMetrics.incLocalBlocksFetched((readJson \ "Local Blocks Fetched").extract[Int])
       readMetrics.incRemoteBytesRead((readJson \ "Remote Bytes Read").extract[Long])
-      jsonOption(readJson \ "Remote Bytes Read To Disk")
+      Utils.jsonOption(readJson \ "Remote Bytes Read To Disk")
         .foreach { v => readMetrics.incRemoteBytesReadToDisk(v.extract[Long])}
       readMetrics.incLocalBytesRead(
-        jsonOption(readJson \ "Local Bytes Read").map(_.extract[Long]).getOrElse(0L))
+        Utils.jsonOption(readJson \ "Local Bytes Read").map(_.extract[Long]).getOrElse(0L))
       readMetrics.incFetchWaitTime((readJson \ "Fetch Wait Time").extract[Long])
       readMetrics.incRecordsRead(
-        jsonOption(readJson \ "Total Records Read").map(_.extract[Long]).getOrElse(0L))
+        Utils.jsonOption(readJson \ "Total Records Read").map(_.extract[Long]).getOrElse(0L))
       metrics.mergeShuffleReadMetrics()
     }
 
     // Shuffle write metrics
     // TODO: Drop the redundant "Shuffle" since it's inconsistent with related classes.
-    jsonOption(json \ "Shuffle Write Metrics").foreach { writeJson =>
+    Utils.jsonOption(json \ "Shuffle Write Metrics").foreach { writeJson =>
       val writeMetrics = metrics.shuffleWriteMetrics
       writeMetrics.incBytesWritten((writeJson \ "Shuffle Bytes Written").extract[Long])
       writeMetrics.incRecordsWritten(
-        jsonOption(writeJson \ "Shuffle Records Written").map(_.extract[Long]).getOrElse(0L))
+        Utils.jsonOption(writeJson \ "Shuffle Records Written").map(_.extract[Long]).getOrElse(0L))
       writeMetrics.incWriteTime((writeJson \ "Shuffle Write Time").extract[Long])
     }
 
     // Output metrics
-    jsonOption(json \ "Output Metrics").foreach { outJson =>
+    Utils.jsonOption(json \ "Output Metrics").foreach { outJson =>
       val outputMetrics = metrics.outputMetrics
       outputMetrics.setBytesWritten((outJson \ "Bytes Written").extract[Long])
       outputMetrics.setRecordsWritten(
-        jsonOption(outJson \ "Records Written").map(_.extract[Long]).getOrElse(0L))
+        Utils.jsonOption(outJson \ "Records Written").map(_.extract[Long]).getOrElse(0L))
     }
 
     // Input metrics
-    jsonOption(json \ "Input Metrics").foreach { inJson =>
+    Utils.jsonOption(json \ "Input Metrics").foreach { inJson =>
       val inputMetrics = metrics.inputMetrics
       inputMetrics.incBytesRead((inJson \ "Bytes Read").extract[Long])
       inputMetrics.incRecordsRead(
-        jsonOption(inJson \ "Records Read").map(_.extract[Long]).getOrElse(0L))
+        Utils.jsonOption(inJson \ "Records Read").map(_.extract[Long]).getOrElse(0L))
     }
 
     // Updated blocks
-    jsonOption(json \ "Updated Blocks").foreach { blocksJson =>
+    Utils.jsonOption(json \ "Updated Blocks").foreach { blocksJson =>
       metrics.setUpdatedBlockStatuses(blocksJson.extract[List[JValue]].map { blockJson =>
         val id = BlockId((blockJson \ "Block ID").extract[String])
         val status = blockStatusFromJson(blockJson \ "Status")
@@ -899,7 +897,7 @@ private[spark] object JsonProtocol {
         val shuffleId = (json \ "Shuffle ID").extract[Int]
         val mapId = (json \ "Map ID").extract[Int]
         val reduceId = (json \ "Reduce ID").extract[Int]
-        val message = jsonOption(json \ "Message").map(_.extract[String])
+        val message = Utils.jsonOption(json \ "Message").map(_.extract[String])
         new FetchFailed(blockManagerAddress, shuffleId, mapId, reduceId,
           message.getOrElse("Unknown reason"))
       case `exceptionFailure` =>
@@ -907,9 +905,9 @@ private[spark] object JsonProtocol {
         val description = (json \ "Description").extract[String]
         val stackTrace = stackTraceFromJson(json \ "Stack Trace")
         val fullStackTrace =
-          jsonOption(json \ "Full Stack Trace").map(_.extract[String]).orNull
+          Utils.jsonOption(json \ "Full Stack Trace").map(_.extract[String]).orNull
         // Fallback on getting accumulator updates from TaskMetrics, which was logged in Spark 1.x
-        val accumUpdates = jsonOption(json \ "Accumulator Updates")
+        val accumUpdates = Utils.jsonOption(json \ "Accumulator Updates")
           .map(_.extract[List[JValue]].map(accumulableInfoFromJson))
           .getOrElse(taskMetricsFromJson(json \ "Metrics").accumulators().map(acc => {
             acc.toInfo(Some(acc.value), None)
@@ -917,24 +915,21 @@ private[spark] object JsonProtocol {
         ExceptionFailure(className, description, stackTrace, fullStackTrace, None, accumUpdates)
       case `taskResultLost` => TaskResultLost
       case `taskKilled` =>
-        val killReason = jsonOption(json \ "Kill Reason")
+        val killReason = Utils.jsonOption(json \ "Kill Reason")
           .map(_.extract[String]).getOrElse("unknown reason")
-        val accumUpdates = jsonOption(json \ "Accumulator Updates")
-          .map(_.extract[List[JValue]].map(accumulableInfoFromJson))
-          .getOrElse(Seq[AccumulableInfo]())
-        TaskKilled(killReason, accumUpdates)
+        TaskKilled(killReason)
       case `taskCommitDenied` =>
         // Unfortunately, the `TaskCommitDenied` message was introduced in 1.3.0 but the JSON
         // de/serialization logic was not added until 1.5.1. To provide backward compatibility
         // for reading those logs, we need to provide default values for all the fields.
-        val jobId = jsonOption(json \ "Job ID").map(_.extract[Int]).getOrElse(-1)
-        val partitionId = jsonOption(json \ "Partition ID").map(_.extract[Int]).getOrElse(-1)
-        val attemptNo = jsonOption(json \ "Attempt Number").map(_.extract[Int]).getOrElse(-1)
+        val jobId = Utils.jsonOption(json \ "Job ID").map(_.extract[Int]).getOrElse(-1)
+        val partitionId = Utils.jsonOption(json \ "Partition ID").map(_.extract[Int]).getOrElse(-1)
+        val attemptNo = Utils.jsonOption(json \ "Attempt Number").map(_.extract[Int]).getOrElse(-1)
         TaskCommitDenied(jobId, partitionId, attemptNo)
       case `executorLostFailure` =>
-        val exitCausedByApp = jsonOption(json \ "Exit Caused By App").map(_.extract[Boolean])
-        val executorId = jsonOption(json \ "Executor ID").map(_.extract[String])
-        val reason = jsonOption(json \ "Loss Reason").map(_.extract[String])
+        val exitCausedByApp = Utils.jsonOption(json \ "Exit Caused By App").map(_.extract[Boolean])
+        val executorId = Utils.jsonOption(json \ "Executor ID").map(_.extract[String])
+        val reason = Utils.jsonOption(json \ "Loss Reason").map(_.extract[String])
         ExecutorLostFailure(
           executorId.getOrElse("Unknown"),
           exitCausedByApp.getOrElse(true),
@@ -973,11 +968,11 @@ private[spark] object JsonProtocol {
   def rddInfoFromJson(json: JValue): RDDInfo = {
     val rddId = (json \ "RDD ID").extract[Int]
     val name = (json \ "Name").extract[String]
-    val scope = jsonOption(json \ "Scope")
+    val scope = Utils.jsonOption(json \ "Scope")
       .map(_.extract[String])
       .map(RDDOperationScope.fromJson)
-    val callsite = jsonOption(json \ "Callsite").map(_.extract[String]).getOrElse("")
-    val parentIds = jsonOption(json \ "Parent IDs")
+    val callsite = Utils.jsonOption(json \ "Callsite").map(_.extract[String]).getOrElse("")
+    val parentIds = Utils.jsonOption(json \ "Parent IDs")
       .map { l => l.extract[List[JValue]].map(_.extract[Int]) }
       .getOrElse(Seq.empty)
     val storageLevel = storageLevelFromJson(json \ "Storage Level")
@@ -1034,7 +1029,7 @@ private[spark] object JsonProtocol {
   }
 
   def propertiesFromJson(json: JValue): Properties = {
-    jsonOption(json).map { value =>
+    Utils.jsonOption(json).map { value =>
       val properties = new Properties
       mapFromJson(json).foreach { case (k, v) => properties.setProperty(k, v) }
       properties
@@ -1063,14 +1058,4 @@ private[spark] object JsonProtocol {
     e
   }
 
-  /** Return an option that translates JNothing to None */
-  private def jsonOption(json: JValue): Option[JValue] = {
-    json match {
-      case JNothing => None
-      case value: JValue => Some(value)
-    }
-  }
-
-  private def emptyJson: JObject = JObject(List[JField]())
-
 }
diff --git a/core/src/main/scala/org/apache/spark/util/ListenerBus.scala b/core/src/main/scala/org/apache/spark/util/ListenerBus.scala
index d4474a90b26f1..76a56298aaebc 100644
--- a/core/src/main/scala/org/apache/spark/util/ListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/util/ListenerBus.scala
@@ -60,15 +60,6 @@ private[spark] trait ListenerBus[L <: AnyRef, E] extends Logging {
     }
   }
 
-  /**
-   * This can be overriden by subclasses if there is any extra cleanup to do when removing a
-   * listener.  In particular AsyncEventQueues can clean up queues in the LiveListenerBus.
-   */
-  def removeListenerOnError(listener: L): Unit = {
-    removeListener(listener)
-  }
-
-
   /**
    * Post the event to all registered listeners. The `postToAll` caller should guarantee calling
    * `postToAll` in the same thread for all events.
@@ -89,17 +80,8 @@ private[spark] trait ListenerBus[L <: AnyRef, E] extends Logging {
       }
       try {
         doPostEvent(listener, event)
-        if (Thread.interrupted()) {
-          // We want to throw the InterruptedException right away so we can associate the interrupt
-          // with this listener, as opposed to waiting for a queue.take() etc. to detect it.
-          throw new InterruptedException()
-        }
       } catch {
-        case ie: InterruptedException =>
-          logError(s"Interrupted while posting to ${Utils.getFormattedClassName(listener)}.  " +
-            s"Removing that listener.", ie)
-          removeListenerOnError(listener)
-        case NonFatal(e) if !isIgnorableException(e) =>
+        case NonFatal(e) =>
           logError(s"Listener ${Utils.getFormattedClassName(listener)} threw an exception", e)
       } finally {
         if (maybeTimerContext != null) {
@@ -115,9 +97,6 @@ private[spark] trait ListenerBus[L <: AnyRef, E] extends Logging {
    */
   protected def doPostEvent(listener: L, event: E): Unit
 
-  /** Allows bus implementations to prevent error logging for certain exceptions. */
-  protected def isIgnorableException(e: Throwable): Boolean = false
-
   private[spark] def findListenersByClass[T <: L : ClassTag](): Seq[T] = {
     val c = implicitly[ClassTag[T]].runtimeClass
     listeners.asScala.filter(_.getClass == c).map(_.asInstanceOf[T]).toSeq
diff --git a/core/src/main/scala/org/apache/spark/util/ShutdownHookManager.scala b/core/src/main/scala/org/apache/spark/util/ShutdownHookManager.scala
index b702838fa257f..4001fac3c3d5a 100644
--- a/core/src/main/scala/org/apache/spark/util/ShutdownHookManager.scala
+++ b/core/src/main/scala/org/apache/spark/util/ShutdownHookManager.scala
@@ -143,7 +143,7 @@ private[spark] object ShutdownHookManager extends Logging {
   }
 
   /**
-   * Adds a shutdown hook with the given priority. Hooks with higher priority values run
+   * Adds a shutdown hook with the given priority. Hooks with lower priority values run
    * first.
    *
    * @param hook The code to run during shutdown.
diff --git a/core/src/main/scala/org/apache/spark/util/SparkFatalException.scala b/core/src/main/scala/org/apache/spark/util/SparkFatalException.scala
deleted file mode 100644
index 1aa2009fa9b5b..0000000000000
--- a/core/src/main/scala/org/apache/spark/util/SparkFatalException.scala
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.util
-
-/**
- * SPARK-24294: To bypass scala bug: https://github.com/scala/bug/issues/9554, we catch
- * fatal throwable in {@link scala.concurrent.Future}'s body, and re-throw
- * SparkFatalException, which wraps the fatal throwable inside.
- * Note that SparkFatalException should only be thrown from a {@link scala.concurrent.Future},
- * which is run by using ThreadUtils.awaitResult. ThreadUtils.awaitResult will catch
- * it and re-throw the original exception/error.
- */
-private[spark] final class SparkFatalException(val throwable: Throwable) extends Exception
diff --git a/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala b/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala
index 1b34fbde38cd6..e0f5af5250e7f 100644
--- a/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala
+++ b/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala
@@ -39,15 +39,10 @@ private[spark] class SparkUncaughtExceptionHandler(val exitOnUncaughtException:
       // We may have been called from a shutdown hook. If so, we must not call System.exit().
       // (If we do, we will deadlock.)
       if (!ShutdownHookManager.inShutdown()) {
-        exception match {
-          case _: OutOfMemoryError =>
-            System.exit(SparkExitCode.OOM)
-          case e: SparkFatalException if e.throwable.isInstanceOf[OutOfMemoryError] =>
-            // SPARK-24294: This is defensive code, in case that SparkFatalException is
-            // misused and uncaught.
-            System.exit(SparkExitCode.OOM)
-          case _ if exitOnUncaughtException =>
-            System.exit(SparkExitCode.UNCAUGHT_EXCEPTION)
+        if (exception.isInstanceOf[OutOfMemoryError]) {
+          System.exit(SparkExitCode.OOM)
+        } else if (exitOnUncaughtException) {
+          System.exit(SparkExitCode.UNCAUGHT_EXCEPTION)
         }
       }
     } catch {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSparkSessionSuite.scala b/core/src/main/scala/org/apache/spark/util/ThreadStackTrace.scala
similarity index 70%
rename from sql/core/src/test/scala/org/apache/spark/sql/test/TestSparkSessionSuite.scala
rename to core/src/main/scala/org/apache/spark/util/ThreadStackTrace.scala
index 4019c6888da98..b1217980faf1f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSparkSessionSuite.scala
+++ b/core/src/main/scala/org/apache/spark/util/ThreadStackTrace.scala
@@ -15,15 +15,17 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.test
+package org.apache.spark.util
 
-import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.SparkSession
+/**
+ * Used for shipping per-thread stacktraces from the executors to driver.
+ */
+private[spark] case class ThreadStackTrace(
+  threadId: Long,
+  threadName: String,
+  threadState: Thread.State,
+  stackTrace: String,
+  blockedByThreadId: Option[Long],
+  blockedByLock: String,
+  holdingLocks: Seq[String])
 
-class TestSparkSessionSuite extends SparkFunSuite {
-  test("default session is set in constructor") {
-    val session = new TestSparkSession()
-    assert(SparkSession.getDefaultSession.contains(session))
-    session.stop()
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
index 165a15c73e7ca..81aaf79db0c13 100644
--- a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
@@ -200,8 +200,6 @@ private[spark] object ThreadUtils {
       val awaitPermission = null.asInstanceOf[scala.concurrent.CanAwait]
       awaitable.result(atMost)(awaitPermission)
     } catch {
-      case e: SparkFatalException =>
-        throw e.throwable
       // TimeoutException is thrown in the current thread, so not need to warp the exception.
       case NonFatal(t) if !t.isInstanceOf[TimeoutException] =>
         throw new SparkException("Exception thrown in awaitResult: ", t)
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index f9191a59c1655..5853302973140 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.util
 
 import java.io._
-import java.lang.{Byte => JByte}
 import java.lang.management.{LockInfo, ManagementFactory, MonitorInfo, ThreadInfo}
 import java.lang.reflect.InvocationTargetException
 import java.math.{MathContext, RoundingMode}
@@ -26,12 +25,12 @@ import java.net._
 import java.nio.ByteBuffer
 import java.nio.channels.{Channels, FileChannel}
 import java.nio.charset.StandardCharsets
-import java.nio.file.Files
-import java.security.SecureRandom
+import java.nio.file.{Files, Paths}
 import java.util.{Locale, Properties, Random, UUID}
 import java.util.concurrent._
 import java.util.concurrent.atomic.AtomicBoolean
 import java.util.zip.GZIPInputStream
+import javax.net.ssl.HttpsURLConnection
 
 import scala.annotation.tailrec
 import scala.collection.JavaConverters._
@@ -45,7 +44,6 @@ import scala.util.matching.Regex
 
 import _root_.io.netty.channel.unix.Errors.NativeIoException
 import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache}
-import com.google.common.hash.HashCodes
 import com.google.common.io.{ByteStreams, Files => GFiles}
 import com.google.common.net.InetAddresses
 import org.apache.commons.lang3.SystemUtils
@@ -53,7 +51,9 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, FileUtil, Path}
 import org.apache.hadoop.security.UserGroupInformation
 import org.apache.hadoop.yarn.conf.YarnConfiguration
+import org.apache.log4j.PropertyConfigurator
 import org.eclipse.jetty.util.MultiException
+import org.json4s._
 import org.slf4j.Logger
 
 import org.apache.spark._
@@ -63,7 +63,6 @@ import org.apache.spark.internal.config._
 import org.apache.spark.launcher.SparkLauncher
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.serializer.{DeserializationStream, SerializationStream, SerializerInstance}
-import org.apache.spark.status.api.v1.{StackTrace, ThreadStackTrace}
 
 /** CallSite represents a place in user code. It can have a short and a long form. */
 private[spark] case class CallSite(shortForm: String, longForm: String)
@@ -673,6 +672,7 @@ private[spark] object Utils extends Logging {
           logDebug("fetchFile not using security")
           uc = new URL(url).openConnection()
         }
+        Utils.setupSecureURLConnection(uc, securityMgr)
 
         val timeoutMs =
           conf.getTimeAsSeconds("spark.files.fetchTimeout", "60s").toInt * 1000
@@ -810,15 +810,15 @@ private[spark] object Utils extends Logging {
       conf.getenv("SPARK_EXECUTOR_DIRS").split(File.pathSeparator)
     } else if (conf.getenv("SPARK_LOCAL_DIRS") != null) {
       conf.getenv("SPARK_LOCAL_DIRS").split(",")
-    } else if (conf.getenv("MESOS_SANDBOX") != null && !shuffleServiceEnabled) {
+    } else if (conf.getenv("MESOS_DIRECTORY") != null && !shuffleServiceEnabled) {
       // Mesos already creates a directory per Mesos task. Spark should use that directory
       // instead so all temporary files are automatically cleaned up when the Mesos task ends.
       // Note that we don't want this if the shuffle service is enabled because we want to
       // continue to serve shuffle files after the executors that wrote them have already exited.
-      Array(conf.getenv("MESOS_SANDBOX"))
+      Array(conf.getenv("MESOS_DIRECTORY"))
     } else {
-      if (conf.getenv("MESOS_SANDBOX") != null && shuffleServiceEnabled) {
-        logInfo("MESOS_SANDBOX available but not using provided Mesos sandbox because " +
+      if (conf.getenv("MESOS_DIRECTORY") != null && shuffleServiceEnabled) {
+        logInfo("MESOS_DIRECTORY available but not using provided Mesos sandbox because " +
           "spark.shuffle.service.enabled is enabled.")
       }
       // In non-Yarn mode (or for the driver in yarn-client mode), we cannot trust the user
@@ -1017,18 +1017,70 @@ private[spark] object Utils extends Logging {
     " " + (System.currentTimeMillis - startTimeMs) + " ms"
   }
 
+  private def listFilesSafely(file: File): Seq[File] = {
+    if (file.exists()) {
+      val files = file.listFiles()
+      if (files == null) {
+        throw new IOException("Failed to list files for dir: " + file)
+      }
+      files
+    } else {
+      List()
+    }
+  }
+
+  /**
+   * Lists files recursively.
+   */
+  def recursiveList(f: File): Array[File] = {
+    require(f.isDirectory)
+    val current = f.listFiles
+    current ++ current.filter(_.isDirectory).flatMap(recursiveList)
+  }
+
   /**
    * Delete a file or directory and its contents recursively.
    * Don't follow directories if they are symlinks.
    * Throws an exception if deletion is unsuccessful.
    */
-  def deleteRecursively(file: File): Unit = {
+  def deleteRecursively(file: File) {
     if (file != null) {
-      JavaUtils.deleteRecursively(file)
-      ShutdownHookManager.removeShutdownDeleteDir(file)
+      try {
+        if (file.isDirectory && !isSymlink(file)) {
+          var savedIOException: IOException = null
+          for (child <- listFilesSafely(file)) {
+            try {
+              deleteRecursively(child)
+            } catch {
+              // In case of multiple exceptions, only last one will be thrown
+              case ioe: IOException => savedIOException = ioe
+            }
+          }
+          if (savedIOException != null) {
+            throw savedIOException
+          }
+          ShutdownHookManager.removeShutdownDeleteDir(file)
+        }
+      } finally {
+        if (file.delete()) {
+          logTrace(s"${file.getAbsolutePath} has been deleted")
+        } else {
+          // Delete can also fail if the file simply did not exist
+          if (file.exists()) {
+            throw new IOException("Failed to delete: " + file.getAbsolutePath)
+          }
+        }
+      }
     }
   }
 
+  /**
+   * Check to see if file is a symbolic link.
+   */
+  def isSymlink(file: File): Boolean = {
+    return Files.isSymbolicLink(Paths.get(file.toURI))
+  }
+
   /**
    * Determines if a directory contains any files newer than cutoff seconds.
    *
@@ -1776,7 +1828,7 @@ private[spark] object Utils extends Logging {
    * [[scala.collection.Iterator#size]] because it uses a for loop, which is slightly slower
    * in the current version of Scala.
    */
-  def getIteratorSize(iterator: Iterator[_]): Long = {
+  def getIteratorSize[T](iterator: Iterator[T]): Long = {
     var count = 0L
     while (iterator.hasNext) {
       count += 1L
@@ -1823,6 +1875,17 @@ private[spark] object Utils extends Logging {
     obj.getClass.getSimpleName.replace("$", "")
   }
 
+  /** Return an option that translates JNothing to None */
+  def jsonOption(json: JValue): Option[JValue] = {
+    json match {
+      case JNothing => None
+      case value: JValue => Some(value)
+    }
+  }
+
+  /** Return an empty JSON object */
+  def emptyJson: JsonAST.JObject = JObject(List[JField]())
+
   /**
    * Return a Hadoop FileSystem with the scheme encoded in the given path.
    */
@@ -1837,6 +1900,15 @@ private[spark] object Utils extends Logging {
     getHadoopFileSystem(new URI(path), conf)
   }
 
+  /**
+   * Return the absolute path of a file in the given directory.
+   */
+  def getFilePath(dir: File, fileName: String): Path = {
+    assert(dir.isDirectory)
+    val path = new File(dir, fileName).getAbsolutePath
+    new Path(path)
+  }
+
   /**
    * Whether the underlying operating system is Windows.
    */
@@ -1859,6 +1931,13 @@ private[spark] object Utils extends Logging {
     sys.env.contains("SPARK_TESTING") || sys.props.contains("spark.testing")
   }
 
+  /**
+   * Strip the directory from a path name
+   */
+  def stripDirectory(path: String): String = {
+    new File(path).getName
+  }
+
   /**
    * Terminates a process waiting for at most the specified duration.
    *
@@ -2089,22 +2168,7 @@ private[spark] object Utils extends Logging {
     // We need to filter out null values here because dumpAllThreads() may return null array
     // elements for threads that are dead / don't exist.
     val threadInfos = ManagementFactory.getThreadMXBean.dumpAllThreads(true, true).filter(_ != null)
-    threadInfos.sortWith { case (threadTrace1, threadTrace2) =>
-        val v1 = if (threadTrace1.getThreadName.contains("Executor task launch")) 1 else 0
-        val v2 = if (threadTrace2.getThreadName.contains("Executor task launch")) 1 else 0
-        if (v1 == v2) {
-          val name1 = threadTrace1.getThreadName().toLowerCase(Locale.ROOT)
-          val name2 = threadTrace2.getThreadName().toLowerCase(Locale.ROOT)
-          val nameCmpRes = name1.compareTo(name2)
-          if (nameCmpRes == 0) {
-            threadTrace1.getThreadId < threadTrace2.getThreadId
-          } else {
-            nameCmpRes < 0
-          }
-        } else {
-          v1 > v2
-        }
-    }.map(threadInfoToThreadStackTrace)
+    threadInfos.sortBy(_.getThreadId).map(threadInfoToThreadStackTrace)
   }
 
   def getThreadDumpForThread(threadId: Long): Option[ThreadStackTrace] = {
@@ -2120,14 +2184,14 @@ private[spark] object Utils extends Logging {
 
   private def threadInfoToThreadStackTrace(threadInfo: ThreadInfo): ThreadStackTrace = {
     val monitors = threadInfo.getLockedMonitors.map(m => m.getLockedStackFrame -> m).toMap
-    val stackTrace = StackTrace(threadInfo.getStackTrace.map { frame =>
+    val stackTrace = threadInfo.getStackTrace.map { frame =>
       monitors.get(frame) match {
         case Some(monitor) =>
           monitor.getLockedStackFrame.toString + s" => holding ${monitor.lockString}"
         case None =>
           frame.toString
       }
-    })
+    }.mkString("\n")
 
     // use a set to dedup re-entrant locks that are held at multiple places
     val heldLocks =
@@ -2269,6 +2333,50 @@ private[spark] object Utils extends Logging {
     org.apache.log4j.Logger.getRootLogger().setLevel(l)
   }
 
+  /**
+   * config a log4j properties used for testsuite
+   */
+  def configTestLog4j(level: String): Unit = {
+    val pro = new Properties()
+    pro.put("log4j.rootLogger", s"$level, console")
+    pro.put("log4j.appender.console", "org.apache.log4j.ConsoleAppender")
+    pro.put("log4j.appender.console.target", "System.err")
+    pro.put("log4j.appender.console.layout", "org.apache.log4j.PatternLayout")
+    pro.put("log4j.appender.console.layout.ConversionPattern",
+      "%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n")
+    PropertyConfigurator.configure(pro)
+  }
+
+  /**
+   * If the given URL connection is HttpsURLConnection, it sets the SSL socket factory and
+   * the host verifier from the given security manager.
+   */
+  def setupSecureURLConnection(urlConnection: URLConnection, sm: SecurityManager): URLConnection = {
+    urlConnection match {
+      case https: HttpsURLConnection =>
+        sm.sslSocketFactory.foreach(https.setSSLSocketFactory)
+        sm.hostnameVerifier.foreach(https.setHostnameVerifier)
+        https
+      case connection => connection
+    }
+  }
+
+  def invoke(
+      clazz: Class[_],
+      obj: AnyRef,
+      methodName: String,
+      args: (Class[_], AnyRef)*): AnyRef = {
+    val (types, values) = args.unzip
+    val method = clazz.getDeclaredMethod(methodName, types: _*)
+    method.setAccessible(true)
+    method.invoke(obj, values.toSeq: _*)
+  }
+
+  // Limit of bytes for total size of results (default is 1GB)
+  def getMaxResultSize(conf: SparkConf): Long = {
+    memoryStringToMb(conf.get("spark.driver.maxResultSize", "1g")).toLong << 20
+  }
+
   /**
    * Return the current system LD_LIBRARY_PATH name
    */
@@ -2304,20 +2412,16 @@ private[spark] object Utils extends Logging {
   }
 
   /**
-   * Return the value of a config either through the SparkConf or the Hadoop configuration.
-   * We Check whether the key is set in the SparkConf before look at any Hadoop configuration.
-   * If the key is set in SparkConf, no matter whether it is running on YARN or not,
-   * gets the value from SparkConf.
-   * Only when the key is not set in SparkConf and running on YARN,
-   * gets the value from Hadoop configuration.
+   * Return the value of a config either through the SparkConf or the Hadoop configuration
+   * if this is Yarn mode. In the latter case, this defaults to the value set through SparkConf
+   * if the key is not set in the Hadoop configuration.
    */
   def getSparkOrYarnConfig(conf: SparkConf, key: String, default: String): String = {
-    if (conf.contains(key)) {
-      conf.get(key, default)
-    } else if (conf.get(SparkLauncher.SPARK_MASTER, null) == "yarn") {
-      new YarnConfiguration(SparkHadoopUtil.get.newConfiguration(conf)).get(key, default)
+    val sparkValue = conf.get(key, default)
+    if (conf.get(SparkLauncher.SPARK_MASTER, null) == "yarn") {
+      new YarnConfiguration(SparkHadoopUtil.get.newConfiguration(conf)).get(key, sparkValue)
     } else {
-      default
+      sparkValue
     }
   }
 
@@ -2505,6 +2609,16 @@ private[spark] object Utils extends Logging {
     SignalUtils.registerLogger(log)
   }
 
+  /**
+   * Unions two comma-separated lists of files and filters out empty strings.
+   */
+  def unionFileLists(leftList: Option[String], rightList: Option[String]): Set[String] = {
+    var allFiles = Set.empty[String]
+    leftList.foreach { value => allFiles ++= value.split(",") }
+    rightList.foreach { value => allFiles ++= value.split(",") }
+    allFiles.filter { _.nonEmpty }
+  }
+
   /**
    * Return the jar files pointed by the "spark.jars" property. Spark internally will distribute
    * these jars through file server. In the YARN mode, it will return an empty list, since YARN
@@ -2691,30 +2805,6 @@ private[spark] object Utils extends Logging {
 
     s"k8s://$resolvedURL"
   }
-
-  /**
-   * Replaces all the {{EXECUTOR_ID}} occurrences with the Executor Id
-   * and {{APP_ID}} occurrences with the App Id.
-   */
-  def substituteAppNExecIds(opt: String, appId: String, execId: String): String = {
-    opt.replace("{{APP_ID}}", appId).replace("{{EXECUTOR_ID}}", execId)
-  }
-
-  /**
-   * Replaces all the {{APP_ID}} occurrences with the App Id.
-   */
-  def substituteAppId(opt: String, appId: String): String = {
-    opt.replace("{{APP_ID}}", appId)
-  }
-
-  def createSecret(conf: SparkConf): String = {
-    val bits = conf.get(AUTH_SECRET_BIT_LENGTH)
-    val rnd = new SecureRandom()
-    val secretBytes = new Array[Byte](bits / JByte.SIZE)
-    rnd.nextBytes(secretBytes)
-    HashCodes.fromBytes(secretBytes).toString()
-  }
-
 }
 
 private[util] object CallerContext extends Logging {
diff --git a/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala b/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala
index 81457b53cd814..8183f825592c0 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala
@@ -19,7 +19,6 @@ package org.apache.spark.util.collection
 
 import org.apache.spark.SparkEnv
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config._
 import org.apache.spark.memory.{MemoryConsumer, MemoryMode, TaskMemoryManager}
 
 /**
@@ -42,7 +41,7 @@ private[spark] abstract class Spillable[C](taskMemoryManager: TaskMemoryManager)
   protected def forceSpill(): Boolean
 
   // Number of elements read from input since last spill
-  protected def elementsRead: Int = _elementsRead
+  protected def elementsRead: Long = _elementsRead
 
   // Called by subclasses every time a record is read
   // It's used for checking spilling frequency
@@ -55,15 +54,15 @@ private[spark] abstract class Spillable[C](taskMemoryManager: TaskMemoryManager)
 
   // Force this collection to spill when there are this many elements in memory
   // For testing only
-  private[this] val numElementsForceSpillThreshold: Int =
-    SparkEnv.get.conf.get(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD)
+  private[this] val numElementsForceSpillThreshold: Long =
+    SparkEnv.get.conf.getLong("spark.shuffle.spill.numElementsForceSpillThreshold", Long.MaxValue)
 
   // Threshold for this collection's size in bytes before we start tracking its memory usage
   // To avoid a large number of small spills, initialize this to a value orders of magnitude > 0
   @volatile private[this] var myMemoryThreshold = initialMemoryThreshold
 
   // Number of elements read from input since last spill
-  private[this] var _elementsRead = 0
+  private[this] var _elementsRead = 0L
 
   // Number of bytes spilled in total
   @volatile private[this] var _memoryBytesSpilled = 0L
diff --git a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala
index 700ce56466c35..7367af7888bd8 100644
--- a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala
+++ b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala
@@ -63,19 +63,10 @@ private[spark] class ChunkedByteBuffer(var chunks: Array[ByteBuffer]) {
    */
   def writeFully(channel: WritableByteChannel): Unit = {
     for (bytes <- getChunks()) {
-      val originalLimit = bytes.limit()
-      while (bytes.hasRemaining) {
-        // If `bytes` is an on-heap ByteBuffer, the Java NIO API will copy it to a temporary direct
-        // ByteBuffer when writing it out. This temporary direct ByteBuffer is cached per thread.
-        // Its size has no limit and can keep growing if it sees a larger input ByteBuffer. This may
-        // cause significant native memory leak, if a large direct ByteBuffer is allocated and
-        // cached, as it's never released until thread exits. Here we write the `bytes` with
-        // fixed-size slices to limit the size of the cached direct ByteBuffer.
-        // Please refer to http://www.evanjones.ca/java-bytebuffer-leak.html for more details.
+      while (bytes.remaining() > 0) {
         val ioSize = Math.min(bytes.remaining(), bufferWriteChunkSize)
         bytes.limit(bytes.position() + ioSize)
         channel.write(bytes)
-        bytes.limit(originalLimit)
       }
     }
   }
diff --git a/core/src/test/java/org/apache/spark/io/GenericFileInputStreamSuite.java b/core/src/test/java/org/apache/spark/io/GenericFileInputStreamSuite.java
index 22db3592ecc96..3440e1aea2f46 100644
--- a/core/src/test/java/org/apache/spark/io/GenericFileInputStreamSuite.java
+++ b/core/src/test/java/org/apache/spark/io/GenericFileInputStreamSuite.java
@@ -37,7 +37,7 @@
 
   protected File inputFile;
 
-  protected InputStream[] inputStreams;
+  protected InputStream inputStream;
 
   @Before
   public void setUp() throws IOException {
@@ -54,91 +54,77 @@ public void tearDown() {
 
   @Test
   public void testReadOneByte() throws IOException {
-    for (InputStream inputStream: inputStreams) {
-      for (int i = 0; i < randomBytes.length; i++) {
-        assertEquals(randomBytes[i], (byte) inputStream.read());
-      }
+    for (int i = 0; i < randomBytes.length; i++) {
+      assertEquals(randomBytes[i], (byte) inputStream.read());
     }
   }
 
   @Test
   public void testReadMultipleBytes() throws IOException {
-    for (InputStream inputStream: inputStreams) {
-      byte[] readBytes = new byte[8 * 1024];
-      int i = 0;
-      while (i < randomBytes.length) {
-        int read = inputStream.read(readBytes, 0, 8 * 1024);
-        for (int j = 0; j < read; j++) {
-          assertEquals(randomBytes[i], readBytes[j]);
-          i++;
-        }
+    byte[] readBytes = new byte[8 * 1024];
+    int i = 0;
+    while (i < randomBytes.length) {
+      int read = inputStream.read(readBytes, 0, 8 * 1024);
+      for (int j = 0; j < read; j++) {
+        assertEquals(randomBytes[i], readBytes[j]);
+        i++;
       }
     }
   }
 
   @Test
   public void testBytesSkipped() throws IOException {
-    for (InputStream inputStream: inputStreams) {
-      assertEquals(1024, inputStream.skip(1024));
-      for (int i = 1024; i < randomBytes.length; i++) {
-        assertEquals(randomBytes[i], (byte) inputStream.read());
-      }
+    assertEquals(1024, inputStream.skip(1024));
+    for (int i = 1024; i < randomBytes.length; i++) {
+      assertEquals(randomBytes[i], (byte) inputStream.read());
     }
   }
 
   @Test
   public void testBytesSkippedAfterRead() throws IOException {
-    for (InputStream inputStream: inputStreams) {
-      for (int i = 0; i < 1024; i++) {
-        assertEquals(randomBytes[i], (byte) inputStream.read());
-      }
-      assertEquals(1024, inputStream.skip(1024));
-      for (int i = 2048; i < randomBytes.length; i++) {
-        assertEquals(randomBytes[i], (byte) inputStream.read());
-      }
+    for (int i = 0; i < 1024; i++) {
+      assertEquals(randomBytes[i], (byte) inputStream.read());
+    }
+    assertEquals(1024, inputStream.skip(1024));
+    for (int i = 2048; i < randomBytes.length; i++) {
+      assertEquals(randomBytes[i], (byte) inputStream.read());
     }
   }
 
   @Test
   public void testNegativeBytesSkippedAfterRead() throws IOException {
-    for (InputStream inputStream: inputStreams) {
-      for (int i = 0; i < 1024; i++) {
-        assertEquals(randomBytes[i], (byte) inputStream.read());
-      }
-      // Skipping negative bytes should essential be a no-op
-      assertEquals(0, inputStream.skip(-1));
-      assertEquals(0, inputStream.skip(-1024));
-      assertEquals(0, inputStream.skip(Long.MIN_VALUE));
-      assertEquals(1024, inputStream.skip(1024));
-      for (int i = 2048; i < randomBytes.length; i++) {
-        assertEquals(randomBytes[i], (byte) inputStream.read());
-      }
+    for (int i = 0; i < 1024; i++) {
+      assertEquals(randomBytes[i], (byte) inputStream.read());
+    }
+    // Skipping negative bytes should essential be a no-op
+    assertEquals(0, inputStream.skip(-1));
+    assertEquals(0, inputStream.skip(-1024));
+    assertEquals(0, inputStream.skip(Long.MIN_VALUE));
+    assertEquals(1024, inputStream.skip(1024));
+    for (int i = 2048; i < randomBytes.length; i++) {
+      assertEquals(randomBytes[i], (byte) inputStream.read());
     }
   }
 
   @Test
   public void testSkipFromFileChannel() throws IOException {
-    for (InputStream inputStream: inputStreams) {
-      // Since the buffer is smaller than the skipped bytes, this will guarantee
-      // we skip from underlying file channel.
-      assertEquals(1024, inputStream.skip(1024));
-      for (int i = 1024; i < 2048; i++) {
-        assertEquals(randomBytes[i], (byte) inputStream.read());
-      }
-      assertEquals(256, inputStream.skip(256));
-      assertEquals(256, inputStream.skip(256));
-      assertEquals(512, inputStream.skip(512));
-      for (int i = 3072; i < randomBytes.length; i++) {
-        assertEquals(randomBytes[i], (byte) inputStream.read());
-      }
+    // Since the buffer is smaller than the skipped bytes, this will guarantee
+    // we skip from underlying file channel.
+    assertEquals(1024, inputStream.skip(1024));
+    for (int i = 1024; i < 2048; i++) {
+      assertEquals(randomBytes[i], (byte) inputStream.read());
+    }
+    assertEquals(256, inputStream.skip(256));
+    assertEquals(256, inputStream.skip(256));
+    assertEquals(512, inputStream.skip(512));
+    for (int i = 3072; i < randomBytes.length; i++) {
+      assertEquals(randomBytes[i], (byte) inputStream.read());
     }
   }
 
   @Test
   public void testBytesSkippedAfterEOF() throws IOException {
-    for (InputStream inputStream: inputStreams) {
-      assertEquals(randomBytes.length, inputStream.skip(randomBytes.length + 1));
-      assertEquals(-1, inputStream.read());
-    }
+    assertEquals(randomBytes.length, inputStream.skip(randomBytes.length + 1));
+    assertEquals(-1, inputStream.read());
   }
 }
diff --git a/core/src/test/java/org/apache/spark/io/NioBufferedInputStreamSuite.java b/core/src/test/java/org/apache/spark/io/NioBufferedInputStreamSuite.java
index a320f8662f707..211b33a1a9fb0 100644
--- a/core/src/test/java/org/apache/spark/io/NioBufferedInputStreamSuite.java
+++ b/core/src/test/java/org/apache/spark/io/NioBufferedInputStreamSuite.java
@@ -18,7 +18,6 @@
 
 import org.junit.Before;
 
-import java.io.InputStream;
 import java.io.IOException;
 
 /**
@@ -29,9 +28,6 @@
   @Before
   public void setUp() throws IOException {
     super.setUp();
-    inputStreams = new InputStream[] {
-      new NioBufferedFileInputStream(inputFile), // default
-      new NioBufferedFileInputStream(inputFile, 123) // small, unaligned buffer
-    };
+    inputStream = new NioBufferedFileInputStream(inputFile);
   }
 }
diff --git a/core/src/test/java/org/apache/spark/io/ReadAheadInputStreamSuite.java b/core/src/test/java/org/apache/spark/io/ReadAheadInputStreamSuite.java
index bfa1e0b908824..918ddc4517ec4 100644
--- a/core/src/test/java/org/apache/spark/io/ReadAheadInputStreamSuite.java
+++ b/core/src/test/java/org/apache/spark/io/ReadAheadInputStreamSuite.java
@@ -19,27 +19,16 @@
 import org.junit.Before;
 
 import java.io.IOException;
-import java.io.InputStream;
 
 /**
- * Tests functionality of {@link ReadAheadInputStreamSuite}
+ * Tests functionality of {@link NioBufferedFileInputStream}
  */
 public class ReadAheadInputStreamSuite extends GenericFileInputStreamSuite {
 
   @Before
   public void setUp() throws IOException {
     super.setUp();
-    inputStreams = new InputStream[] {
-      // Tests equal and aligned buffers of wrapped an outer stream.
-      new ReadAheadInputStream(new NioBufferedFileInputStream(inputFile, 8 * 1024), 8 * 1024),
-      // Tests aligned buffers, wrapped bigger than outer.
-      new ReadAheadInputStream(new NioBufferedFileInputStream(inputFile, 3 * 1024), 2 * 1024),
-      // Tests aligned buffers, wrapped smaller than outer.
-      new ReadAheadInputStream(new NioBufferedFileInputStream(inputFile, 2 * 1024), 3 * 1024),
-      // Tests unaligned buffers, wrapped bigger than outer.
-      new ReadAheadInputStream(new NioBufferedFileInputStream(inputFile, 321), 123),
-      // Tests unaligned buffers, wrapped smaller than outer.
-      new ReadAheadInputStream(new NioBufferedFileInputStream(inputFile, 123), 321)
-    };
+    inputStream = new ReadAheadInputStream(
+        new NioBufferedFileInputStream(inputFile), 8 * 1024, 4 * 1024);
   }
 }
diff --git a/core/src/test/java/org/apache/spark/launcher/SparkLauncherSuite.java b/core/src/test/java/org/apache/spark/launcher/SparkLauncherSuite.java
index 6a1a38c1a54f4..2225591a4ff75 100644
--- a/core/src/test/java/org/apache/spark/launcher/SparkLauncherSuite.java
+++ b/core/src/test/java/org/apache/spark/launcher/SparkLauncherSuite.java
@@ -109,7 +109,7 @@ public void testChildProcLauncher() throws Exception {
       .addSparkArg(opts.CONF,
         String.format("%s=-Dfoo=ShouldBeOverriddenBelow", SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS))
       .setConf(SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS,
-        "-Dfoo=bar -Dtest.appender=console")
+        "-Dfoo=bar -Dtest.appender=childproc")
       .setConf(SparkLauncher.DRIVER_EXTRA_CLASSPATH, System.getProperty("java.class.path"))
       .addSparkArg(opts.CLASS, "ShouldBeOverriddenBelow")
       .setMainClass(SparkLauncherTestApp.class.getName())
@@ -192,41 +192,6 @@ private void inProcessLauncherTestImpl() throws Exception {
     }
   }
 
-  @Test
-  public void testInProcessLauncherDoesNotKillJvm() throws Exception {
-    SparkSubmitOptionParser opts = new SparkSubmitOptionParser();
-    List<String[]> wrongArgs = Arrays.asList(
-      new String[] { "--unknown" },
-      new String[] { opts.DEPLOY_MODE, "invalid" });
-
-    for (String[] args : wrongArgs) {
-      InProcessLauncher launcher = new InProcessLauncher()
-        .setAppResource(SparkLauncher.NO_RESOURCE);
-      switch (args.length) {
-        case 2:
-          launcher.addSparkArg(args[0], args[1]);
-          break;
-
-        case 1:
-          launcher.addSparkArg(args[0]);
-          break;
-
-        default:
-          fail("FIXME: invalid test.");
-      }
-
-      SparkAppHandle handle = launcher.startApplication();
-      waitFor(handle);
-      assertEquals(SparkAppHandle.State.FAILED, handle.getState());
-    }
-
-    // Run --version, which is useless as a use case, but should succeed and not exit the JVM.
-    // The expected state is "LOST" since "--version" doesn't report state back to the handle.
-    SparkAppHandle handle = new InProcessLauncher().addSparkArg(opts.VERSION).startApplication();
-    waitFor(handle);
-    assertEquals(SparkAppHandle.State.LOST, handle.getState());
-  }
-
   public static class SparkLauncherTestApp {
 
     public static void main(String[] args) throws Exception {
diff --git a/core/src/test/java/org/apache/spark/memory/TaskMemoryManagerSuite.java b/core/src/test/java/org/apache/spark/memory/TaskMemoryManagerSuite.java
index d7d2d0b012bd3..a0664b30d6cc2 100644
--- a/core/src/test/java/org/apache/spark/memory/TaskMemoryManagerSuite.java
+++ b/core/src/test/java/org/apache/spark/memory/TaskMemoryManagerSuite.java
@@ -76,7 +76,7 @@ public void freeingPageSetsPageNumberToSpecialConstant() {
     final MemoryConsumer c = new TestMemoryConsumer(manager, MemoryMode.ON_HEAP);
     final MemoryBlock dataPage = manager.allocatePage(256, c);
     c.freePage(dataPage);
-    Assert.assertEquals(MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER, dataPage.getPageNumber());
+    Assert.assertEquals(MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER, dataPage.pageNumber);
   }
 
   @Test(expected = AssertionError.class)
diff --git a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
index 0d5c5ea7903e9..24a55df84a240 100644
--- a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
+++ b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
@@ -95,7 +95,7 @@ public void tearDown() {
   @SuppressWarnings("unchecked")
   public void setUp() throws IOException {
     MockitoAnnotations.initMocks(this);
-    tempDir = Utils.createTempDir(null, "test");
+    tempDir = Utils.createTempDir("test", "test");
     mergedOutputFile = File.createTempFile("mergedoutput", "", tempDir);
     partitionSizesInMergedFile = null;
     spillFilesCreated.clear();
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json
index 7bb8fe8fd8f98..942e6d8f04363 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json
@@ -19,6 +19,5 @@
   "isBlacklisted" : false,
   "maxMemory" : 278302556,
   "addTime" : "2015-02-03T16:43:00.906GMT",
-  "executorLogs" : { },
-  "blacklistedInStages" : [ ]
+  "executorLogs" : { }
 } ]
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json
index dd5b1dcb7372b..ed33c90dd39ba 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json
@@ -25,8 +25,7 @@
     "usedOffHeapStorageMemory" : 0,
     "totalOnHeapStorageMemory" : 384093388,
     "totalOffHeapStorageMemory" : 524288000
-  },
-  "blacklistedInStages" : [ ]
+  }
 }, {
   "id" : "3",
   "hostPort" : "172.22.0.167:51485",
@@ -57,8 +56,7 @@
     "usedOffHeapStorageMemory" : 0,
     "totalOnHeapStorageMemory" : 384093388,
     "totalOffHeapStorageMemory" : 524288000
-  },
-  "blacklistedInStages" : [ ]
+  }
 } ,{
   "id" : "2",
   "hostPort" : "172.22.0.167:51487",
@@ -89,8 +87,7 @@
     "usedOffHeapStorageMemory" : 0,
     "totalOnHeapStorageMemory" : 384093388,
     "totalOffHeapStorageMemory" : 524288000
-  },
-  "blacklistedInStages" : [ ]
+  }
 }, {
   "id" : "1",
   "hostPort" : "172.22.0.167:51490",
@@ -121,8 +118,7 @@
     "usedOffHeapStorageMemory": 0,
     "totalOnHeapStorageMemory": 384093388,
     "totalOffHeapStorageMemory": 524288000
-  },
-  "blacklistedInStages" : [ ]
+  }
 }, {
   "id" : "0",
   "hostPort" : "172.22.0.167:51491",
@@ -153,6 +149,5 @@
     "usedOffHeapStorageMemory" : 0,
     "totalOnHeapStorageMemory" : 384093388,
     "totalOffHeapStorageMemory" : 524288000
-  },
-  "blacklistedInStages" : [ ]
+  }
 } ]
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_expectation.json
index 3e55d3d9d7eb9..73519f1d9e2e4 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_expectation.json
@@ -25,8 +25,7 @@
     "usedOffHeapStorageMemory" : 0,
     "totalOnHeapStorageMemory" : 384093388,
     "totalOffHeapStorageMemory" : 524288000
-  },
-  "blacklistedInStages" : [ ]
+  }
 }, {
   "id" : "3",
   "hostPort" : "172.22.0.167:51485",
@@ -57,8 +56,7 @@
     "usedOffHeapStorageMemory" : 0,
     "totalOnHeapStorageMemory" : 384093388,
     "totalOffHeapStorageMemory" : 524288000
-  },
-  "blacklistedInStages" : [ ]
+  }
 }, {
   "id" : "2",
   "hostPort" : "172.22.0.167:51487",
@@ -89,8 +87,7 @@
     "usedOffHeapStorageMemory" : 0,
     "totalOnHeapStorageMemory" : 384093388,
     "totalOffHeapStorageMemory" : 524288000
-  },
-  "blacklistedInStages" : [ ]
+  }
 }, {
   "id" : "1",
   "hostPort" : "172.22.0.167:51490",
@@ -121,8 +118,7 @@
     "usedOffHeapStorageMemory": 0,
     "totalOnHeapStorageMemory": 384093388,
     "totalOffHeapStorageMemory": 524288000
-  },
-  "blacklistedInStages" : [ ]
+  }
 }, {
   "id" : "0",
   "hostPort" : "172.22.0.167:51491",
@@ -153,6 +149,5 @@
     "usedOffHeapStorageMemory": 0,
     "totalOnHeapStorageMemory": 384093388,
     "totalOffHeapStorageMemory": 524288000
-  },
-  "blacklistedInStages" : [ ]
+  }
 } ]
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_unblacklisting_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_unblacklisting_expectation.json
index e87f3e78f2dc8..6931fead3d2ff 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_unblacklisting_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_unblacklisting_expectation.json
@@ -19,8 +19,7 @@
   "isBlacklisted" : false,
   "maxMemory" : 384093388,
   "addTime" : "2016-11-15T23:20:38.836GMT",
-  "executorLogs" : { },
-  "blacklistedInStages" : [ ]
+  "executorLogs" : { }
 }, {
   "id" : "3",
   "hostPort" : "172.22.0.111:64543",
@@ -45,8 +44,7 @@
   "executorLogs" : {
     "stdout" : "http://172.22.0.111:64521/logPage/?appId=app-20161115172038-0000&executorId=3&logType=stdout",
     "stderr" : "http://172.22.0.111:64521/logPage/?appId=app-20161115172038-0000&executorId=3&logType=stderr"
-  },
-  "blacklistedInStages" : [ ]
+  }
 }, {
   "id" : "2",
   "hostPort" : "172.22.0.111:64539",
@@ -71,8 +69,7 @@
   "executorLogs" : {
     "stdout" : "http://172.22.0.111:64519/logPage/?appId=app-20161115172038-0000&executorId=2&logType=stdout",
     "stderr" : "http://172.22.0.111:64519/logPage/?appId=app-20161115172038-0000&executorId=2&logType=stderr"
-  },
-  "blacklistedInStages" : [ ]
+  }
 }, {
   "id" : "1",
   "hostPort" : "172.22.0.111:64541",
@@ -97,8 +94,7 @@
   "executorLogs" : {
     "stdout" : "http://172.22.0.111:64518/logPage/?appId=app-20161115172038-0000&executorId=1&logType=stdout",
     "stderr" : "http://172.22.0.111:64518/logPage/?appId=app-20161115172038-0000&executorId=1&logType=stderr"
-  },
-  "blacklistedInStages" : [ ]
+  }
 }, {
   "id" : "0",
   "hostPort" : "172.22.0.111:64540",
@@ -123,6 +119,5 @@
   "executorLogs" : {
     "stdout" : "http://172.22.0.111:64517/logPage/?appId=app-20161115172038-0000&executorId=0&logType=stdout",
     "stderr" : "http://172.22.0.111:64517/logPage/?appId=app-20161115172038-0000&executorId=0&logType=stderr"
-  },
-  "blacklistedInStages" : [ ]
+  }
 } ]
diff --git a/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
index 5d0ffd92647bc..3990ee1ec326d 100644
--- a/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
@@ -209,8 +209,10 @@ class AccumulatorSuite extends SparkFunSuite with Matchers with LocalSparkContex
     System.gc()
     assert(ref.get.isEmpty)
 
-    // Getting a garbage collected accum should return None.
-    assert(AccumulatorContext.get(accId).isEmpty)
+    // Getting a garbage collected accum should throw error
+    intercept[IllegalStateException] {
+      AccumulatorContext.get(accId)
+    }
 
     // Getting a normal accumulator. Note: this has to be separate because referencing an
     // accumulator above in an `assert` would keep it from being garbage collected.
diff --git a/core/src/test/scala/org/apache/spark/DistributedSuite.scala b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
index 28ea0c6f0bdba..e09d5f59817b9 100644
--- a/core/src/test/scala/org/apache/spark/DistributedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
@@ -160,8 +160,11 @@ class DistributedSuite extends SparkFunSuite with Matchers with LocalSparkContex
     val data = sc.parallelize(1 to 1000, 10)
     val cachedData = data.persist(storageLevel)
     assert(cachedData.count === 1000)
-    assert(sc.getRDDStorageInfo.filter(_.id == cachedData.id).map(_.numCachedPartitions).sum ===
-      data.getNumPartitions)
+    assert(sc.getExecutorStorageStatus.map(_.rddBlocksById(cachedData.id).size).sum ===
+      storageLevel.replication * data.getNumPartitions)
+    assert(cachedData.count === 1000)
+    assert(cachedData.count === 1000)
+
     // Get all the locations of the first partition and try to fetch the partitions
     // from those locations.
     val blockIds = data.partitions.indices.map(index => RDDBlockId(data.id, index)).toArray
diff --git a/core/src/test/scala/org/apache/spark/DriverSuite.scala b/core/src/test/scala/org/apache/spark/DriverSuite.scala
index 896cd2e80aaef..962945e5b6bb1 100644
--- a/core/src/test/scala/org/apache/spark/DriverSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DriverSuite.scala
@@ -51,7 +51,7 @@ class DriverSuite extends SparkFunSuite with TimeLimits {
  */
 object DriverWithoutCleanup {
   def main(args: Array[String]) {
-    TestUtils.configTestLog4j("INFO")
+    Utils.configTestLog4j("INFO")
     val conf = new SparkConf
     val sc = new SparkContext(args(0), "DriverWithoutCleanup", conf)
     sc.parallelize(1 to 100, 4).count()
diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
index 3cfb0a9feb32b..a0cae5a9e011c 100644
--- a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
@@ -19,8 +19,6 @@ package org.apache.spark
 
 import scala.collection.mutable
 
-import org.mockito.Matchers.{any, eq => meq}
-import org.mockito.Mockito.{mock, never, verify, when}
 import org.scalatest.{BeforeAndAfter, PrivateMethodTester}
 
 import org.apache.spark.executor.TaskMetrics
@@ -28,7 +26,6 @@ import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.ExternalClusterManager
 import org.apache.spark.scheduler.cluster.ExecutorInfo
 import org.apache.spark.scheduler.local.LocalSchedulerBackend
-import org.apache.spark.storage.BlockManagerMaster
 import org.apache.spark.util.ManualClock
 
 /**
@@ -145,39 +142,6 @@ class ExecutorAllocationManagerSuite
     assert(numExecutorsToAdd(manager) === 1)
   }
 
-  def testAllocationRatio(cores: Int, divisor: Double, expected: Int): Unit = {
-    val conf = new SparkConf()
-      .setMaster("myDummyLocalExternalClusterManager")
-      .setAppName("test-executor-allocation-manager")
-      .set("spark.dynamicAllocation.enabled", "true")
-      .set("spark.dynamicAllocation.testing", "true")
-      .set("spark.dynamicAllocation.maxExecutors", "15")
-      .set("spark.dynamicAllocation.minExecutors", "3")
-      .set("spark.dynamicAllocation.executorAllocationRatio", divisor.toString)
-      .set("spark.executor.cores", cores.toString)
-    val sc = new SparkContext(conf)
-    contexts += sc
-    var manager = sc.executorAllocationManager.get
-    post(sc.listenerBus, SparkListenerStageSubmitted(createStageInfo(0, 20)))
-    for (i <- 0 to 5) {
-      addExecutors(manager)
-    }
-    assert(numExecutorsTarget(manager) === expected)
-    sc.stop()
-  }
-
-  test("executionAllocationRatio is correctly handled") {
-    testAllocationRatio(1, 0.5, 10)
-    testAllocationRatio(1, 1.0/3.0, 7)
-    testAllocationRatio(2, 1.0/3.0, 4)
-    testAllocationRatio(1, 0.385, 8)
-
-    // max/min executors capping
-    testAllocationRatio(1, 1.0, 15) // should be 20 but capped by max
-    testAllocationRatio(4, 1.0/3.0, 3)  // should be 2 but elevated by min
-  }
-
-
   test("add executors capped by num pending tasks") {
     sc = createSparkContext(0, 10, 0)
     val manager = sc.executorAllocationManager.get
@@ -1086,66 +1050,6 @@ class ExecutorAllocationManagerSuite
     assert(removeTimes(manager) === Map.empty)
   }
 
-  test("SPARK-23365 Don't update target num executors when killing idle executors") {
-    val minExecutors = 1
-    val initialExecutors = 1
-    val maxExecutors = 2
-    val conf = new SparkConf()
-      .set("spark.dynamicAllocation.enabled", "true")
-      .set("spark.shuffle.service.enabled", "true")
-      .set("spark.dynamicAllocation.minExecutors", minExecutors.toString)
-      .set("spark.dynamicAllocation.maxExecutors", maxExecutors.toString)
-      .set("spark.dynamicAllocation.initialExecutors", initialExecutors.toString)
-      .set("spark.dynamicAllocation.schedulerBacklogTimeout", "1000ms")
-      .set("spark.dynamicAllocation.sustainedSchedulerBacklogTimeout", "1000ms")
-      .set("spark.dynamicAllocation.executorIdleTimeout", s"3000ms")
-    val mockAllocationClient = mock(classOf[ExecutorAllocationClient])
-    val mockBMM = mock(classOf[BlockManagerMaster])
-    val manager = new ExecutorAllocationManager(
-      mockAllocationClient, mock(classOf[LiveListenerBus]), conf, mockBMM)
-    val clock = new ManualClock()
-    manager.setClock(clock)
-
-    when(mockAllocationClient.requestTotalExecutors(meq(2), any(), any())).thenReturn(true)
-    // test setup -- job with 2 tasks, scale up to two executors
-    assert(numExecutorsTarget(manager) === 1)
-    manager.listener.onExecutorAdded(SparkListenerExecutorAdded(
-      clock.getTimeMillis(), "executor-1", new ExecutorInfo("host1", 1, Map.empty)))
-    manager.listener.onStageSubmitted(SparkListenerStageSubmitted(createStageInfo(0, 2)))
-    clock.advance(1000)
-    manager invokePrivate _updateAndSyncNumExecutorsTarget(clock.getTimeMillis())
-    assert(numExecutorsTarget(manager) === 2)
-    val taskInfo0 = createTaskInfo(0, 0, "executor-1")
-    manager.listener.onTaskStart(SparkListenerTaskStart(0, 0, taskInfo0))
-    manager.listener.onExecutorAdded(SparkListenerExecutorAdded(
-      clock.getTimeMillis(), "executor-2", new ExecutorInfo("host1", 1, Map.empty)))
-    val taskInfo1 = createTaskInfo(1, 1, "executor-2")
-    manager.listener.onTaskStart(SparkListenerTaskStart(0, 0, taskInfo1))
-    assert(numExecutorsTarget(manager) === 2)
-
-    // have one task finish -- we should adjust the target number of executors down
-    // but we should *not* kill any executors yet
-    manager.listener.onTaskEnd(SparkListenerTaskEnd(0, 0, null, Success, taskInfo0, null))
-    assert(maxNumExecutorsNeeded(manager) === 1)
-    assert(numExecutorsTarget(manager) === 2)
-    clock.advance(1000)
-    manager invokePrivate _updateAndSyncNumExecutorsTarget(clock.getTimeMillis())
-    assert(numExecutorsTarget(manager) === 1)
-    verify(mockAllocationClient, never).killExecutors(any(), any(), any(), any())
-
-    // now we cross the idle timeout for executor-1, so we kill it.  the really important
-    // thing here is that we do *not* ask the executor allocation client to adjust the target
-    // number of executors down
-    when(mockAllocationClient.killExecutors(Seq("executor-1"), false, false, false))
-      .thenReturn(Seq("executor-1"))
-    clock.advance(3000)
-    schedule(manager)
-    assert(maxNumExecutorsNeeded(manager) === 1)
-    assert(numExecutorsTarget(manager) === 1)
-    // here's the important verify -- we did kill the executors, but did not adjust the target count
-    verify(mockAllocationClient).killExecutors(Seq("executor-1"), false, false, false)
-  }
-
   private def createSparkContext(
       minExecutors: Int = 1,
       maxExecutors: Int = 5,
@@ -1364,8 +1268,7 @@ private class DummyLocalSchedulerBackend (sc: SparkContext, sb: SchedulerBackend
 
   override def killExecutors(
       executorIds: Seq[String],
-      adjustTargetNumExecutors: Boolean,
-      countFailures: Boolean,
+      replace: Boolean,
       force: Boolean): Seq[String] = executorIds
 
   override def start(): Unit = sb.start()
diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala b/core/src/test/scala/org/apache/spark/FileSuite.scala
index a441b9c8ab97a..55a9122cf9026 100644
--- a/core/src/test/scala/org/apache/spark/FileSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileSuite.scala
@@ -23,7 +23,6 @@ import java.util.zip.GZIPOutputStream
 
 import scala.io.Source
 
-import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.io._
 import org.apache.hadoop.io.compress.DefaultCodec
@@ -33,7 +32,7 @@ import org.apache.hadoop.mapreduce.lib.input.{FileSplit => NewFileSplit, TextInp
 import org.apache.hadoop.mapreduce.lib.output.{TextOutputFormat => NewTextOutputFormat}
 
 import org.apache.spark.internal.config._
-import org.apache.spark.rdd.{HadoopRDD, NewHadoopRDD, RDD}
+import org.apache.spark.rdd.{HadoopRDD, NewHadoopRDD}
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.Utils
 
@@ -597,70 +596,4 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
       actualPartitionNum = 5,
       expectedPartitionNum = 2)
   }
-
-  test("spark.files.ignoreMissingFiles should work both HadoopRDD and NewHadoopRDD") {
-    // "file not found" can happen both when getPartitions or compute in HadoopRDD/NewHadoopRDD,
-    // We test both cases here.
-
-    val deletedPath = new Path(tempDir.getAbsolutePath, "test-data-1")
-    val fs = deletedPath.getFileSystem(new Configuration())
-    fs.delete(deletedPath, true)
-    intercept[FileNotFoundException](fs.open(deletedPath))
-
-    def collectRDDAndDeleteFileBeforeCompute(newApi: Boolean): Array[_] = {
-      val dataPath = new Path(tempDir.getAbsolutePath, "test-data-2")
-      val writer = new OutputStreamWriter(new FileOutputStream(new File(dataPath.toString)))
-      writer.write("hello\n")
-      writer.write("world\n")
-      writer.close()
-      val rdd = if (newApi) {
-        sc.newAPIHadoopFile(dataPath.toString, classOf[NewTextInputFormat],
-          classOf[LongWritable], classOf[Text])
-      } else {
-        sc.textFile(dataPath.toString)
-      }
-      rdd.partitions
-      fs.delete(dataPath, true)
-      // Exception happens when initialize record reader in HadoopRDD/NewHadoopRDD.compute
-      // because partitions' info already cached.
-      rdd.collect()
-    }
-
-    // collect HadoopRDD and NewHadoopRDD when spark.files.ignoreMissingFiles=false by default.
-    sc = new SparkContext("local", "test")
-    intercept[org.apache.hadoop.mapred.InvalidInputException] {
-      // Exception happens when HadoopRDD.getPartitions
-      sc.textFile(deletedPath.toString).collect()
-    }
-
-    var e = intercept[SparkException] {
-      collectRDDAndDeleteFileBeforeCompute(false)
-    }
-    assert(e.getCause.isInstanceOf[java.io.FileNotFoundException])
-
-    intercept[org.apache.hadoop.mapreduce.lib.input.InvalidInputException] {
-      // Exception happens when NewHadoopRDD.getPartitions
-      sc.newAPIHadoopFile(deletedPath.toString, classOf[NewTextInputFormat],
-        classOf[LongWritable], classOf[Text]).collect
-    }
-
-    e = intercept[SparkException] {
-      collectRDDAndDeleteFileBeforeCompute(true)
-    }
-    assert(e.getCause.isInstanceOf[java.io.FileNotFoundException])
-
-    sc.stop()
-
-    // collect HadoopRDD and NewHadoopRDD when spark.files.ignoreMissingFiles=true.
-    val conf = new SparkConf().set(IGNORE_MISSING_FILES, true)
-    sc = new SparkContext("local", "test", conf)
-    assert(sc.textFile(deletedPath.toString).collect().isEmpty)
-
-    assert(collectRDDAndDeleteFileBeforeCompute(false).isEmpty)
-
-    assert(sc.newAPIHadoopFile(deletedPath.toString, classOf[NewTextInputFormat],
-      classOf[LongWritable], classOf[Text]).collect().isEmpty)
-
-    assert(collectRDDAndDeleteFileBeforeCompute(true).isEmpty)
-  }
 }
diff --git a/core/src/test/scala/org/apache/spark/InternalAccumulatorSuite.scala b/core/src/test/scala/org/apache/spark/InternalAccumulatorSuite.scala
index 62824a5bec9d1..8d7be77f51fe9 100644
--- a/core/src/test/scala/org/apache/spark/InternalAccumulatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/InternalAccumulatorSuite.scala
@@ -135,7 +135,7 @@ class InternalAccumulatorSuite extends SparkFunSuite with LocalSparkContext {
       // This job runs 2 stages, and we're in the second stage. Therefore, any task attempt
       // ID that's < 2 * numPartitions belongs to the first attempt of this stage.
       val taskContext = TaskContext.get()
-      val isFirstStageAttempt = taskContext.taskAttemptId() < numPartitions * 2L
+      val isFirstStageAttempt = taskContext.taskAttemptId() < numPartitions * 2
       if (isFirstStageAttempt) {
         throw new FetchFailedException(
           SparkEnv.get.blockManager.blockManagerId,
diff --git a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
index 61da4138896cd..8a77aea75a992 100644
--- a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark
 
 import java.util.concurrent.Semaphore
-import java.util.concurrent.atomic.AtomicInteger
 
 import scala.concurrent.ExecutionContext.Implicits.global
 import scala.concurrent.Future
@@ -27,7 +26,7 @@ import scala.concurrent.duration._
 import org.scalatest.BeforeAndAfter
 import org.scalatest.Matchers
 
-import org.apache.spark.scheduler.{SparkListener, SparkListenerStageCompleted, SparkListenerTaskEnd, SparkListenerTaskStart}
+import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskStart}
 import org.apache.spark.util.ThreadUtils
 
 /**
@@ -41,10 +40,6 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
   override def afterEach() {
     try {
       resetSparkContext()
-      JobCancellationSuite.taskStartedSemaphore.drainPermits()
-      JobCancellationSuite.taskCancelledSemaphore.drainPermits()
-      JobCancellationSuite.twoJobsSharingStageSemaphore.drainPermits()
-      JobCancellationSuite.executionOfInterruptibleCounter.set(0)
     } finally {
       super.afterEach()
     }
@@ -325,67 +320,6 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
     f2.get()
   }
 
-  test("interruptible iterator of shuffle reader") {
-    // In this test case, we create a Spark job of two stages. The second stage is cancelled during
-    // execution and a counter is used to make sure that the corresponding tasks are indeed
-    // cancelled.
-    import JobCancellationSuite._
-    sc = new SparkContext("local[2]", "test interruptible iterator")
-
-    // Increase the number of elements to be proceeded to avoid this test being flaky.
-    val numElements = 10000
-    val taskCompletedSem = new Semaphore(0)
-
-    sc.addSparkListener(new SparkListener {
-      override def onStageCompleted(stageCompleted: SparkListenerStageCompleted): Unit = {
-        // release taskCancelledSemaphore when cancelTasks event has been posted
-        if (stageCompleted.stageInfo.stageId == 1) {
-          taskCancelledSemaphore.release(numElements)
-        }
-      }
-
-      override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
-        if (taskEnd.stageId == 1) { // make sure tasks are completed
-          taskCompletedSem.release()
-        }
-      }
-    })
-
-    // Explicitly disable interrupt task thread on cancelling tasks, so the task thread can only be
-    // interrupted by `InterruptibleIterator`.
-    sc.setLocalProperty(SparkContext.SPARK_JOB_INTERRUPT_ON_CANCEL, "false")
-
-    val f = sc.parallelize(1 to numElements).map { i => (i, i) }
-      .repartitionAndSortWithinPartitions(new HashPartitioner(1))
-      .mapPartitions { iter =>
-        taskStartedSemaphore.release()
-        iter
-      }.foreachAsync { x =>
-        // Block this code from being executed, until the job get cancelled. In this case, if the
-        // source iterator is interruptible, the max number of increment should be under
-        // `numElements`.
-        taskCancelledSemaphore.acquire()
-        executionOfInterruptibleCounter.getAndIncrement()
-    }
-
-    taskStartedSemaphore.acquire()
-    // Job is cancelled when:
-    // 1. task in reduce stage has been started, guaranteed by previous line.
-    // 2. task in reduce stage is blocked as taskCancelledSemaphore is not released until
-    //    JobCancelled event is posted.
-    // After job being cancelled, task in reduce stage will be cancelled asynchronously, thus
-    // partial of the inputs should not get processed (It's very unlikely that Spark can process
-    // 10000 elements between JobCancelled is posted and task is really killed).
-    f.cancel()
-
-    val e = intercept[SparkException](f.get()).getCause
-    assert(e.getMessage.contains("cancelled") || e.getMessage.contains("killed"))
-
-    // Make sure tasks are indeed completed.
-    taskCompletedSem.acquire()
-    assert(executionOfInterruptibleCounter.get() < numElements)
- }
-
   def testCount() {
     // Cancel before launching any tasks
     {
@@ -447,9 +381,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
 
 
 object JobCancellationSuite {
-  // To avoid any headaches, reset these global variables in the companion class's afterEach block
   val taskStartedSemaphore = new Semaphore(0)
   val taskCancelledSemaphore = new Semaphore(0)
   val twoJobsSharingStageSemaphore = new Semaphore(0)
-  val executionOfInterruptibleCounter = new AtomicInteger(0)
 }
diff --git a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
index 21f481d477242..50b8ea754d8d9 100644
--- a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
@@ -147,7 +147,7 @@ class MapOutputTrackerSuite extends SparkFunSuite {
     masterTracker.registerMapOutput(10, 0, MapStatus(
       BlockManagerId("a", "hostA", 1000), Array(1000L)))
     slaveTracker.updateEpoch(masterTracker.getEpoch)
-    assert(slaveTracker.getMapSizesByExecutorId(10, 0).toSeq ===
+    assert(slaveTracker.getMapSizesByExecutorId(10, 0) ===
       Seq((BlockManagerId("a", "hostA", 1000), ArrayBuffer((ShuffleBlockId(10, 0, 0), size1000)))))
     assert(0 == masterTracker.getNumCachedSerializedBroadcast)
 
@@ -298,33 +298,4 @@ class MapOutputTrackerSuite extends SparkFunSuite {
     }
   }
 
-  test("zero-sized blocks should be excluded when getMapSizesByExecutorId") {
-    val rpcEnv = createRpcEnv("test")
-    val tracker = newTrackerMaster()
-    tracker.trackerEndpoint = rpcEnv.setupEndpoint(MapOutputTracker.ENDPOINT_NAME,
-      new MapOutputTrackerMasterEndpoint(rpcEnv, tracker, conf))
-    tracker.registerShuffle(10, 2)
-
-    val size0 = MapStatus.decompressSize(MapStatus.compressSize(0L))
-    val size1000 = MapStatus.decompressSize(MapStatus.compressSize(1000L))
-    val size10000 = MapStatus.decompressSize(MapStatus.compressSize(10000L))
-    tracker.registerMapOutput(10, 0, MapStatus(BlockManagerId("a", "hostA", 1000),
-      Array(size0, size1000, size0, size10000)))
-    tracker.registerMapOutput(10, 1, MapStatus(BlockManagerId("b", "hostB", 1000),
-      Array(size10000, size0, size1000, size0)))
-    assert(tracker.containsShuffle(10))
-    assert(tracker.getMapSizesByExecutorId(10, 0, 4).toSeq ===
-        Seq(
-          (BlockManagerId("a", "hostA", 1000),
-              Seq((ShuffleBlockId(10, 0, 1), size1000), (ShuffleBlockId(10, 0, 3), size10000))),
-          (BlockManagerId("b", "hostB", 1000),
-              Seq((ShuffleBlockId(10, 1, 0), size10000), (ShuffleBlockId(10, 1, 2), size1000)))
-        )
-    )
-
-    tracker.unregisterShuffle(10)
-    tracker.stop()
-    rpcEnv.shutdown()
-  }
-
 }
diff --git a/core/src/test/scala/org/apache/spark/SSLSampleConfigs.scala b/core/src/test/scala/org/apache/spark/SSLSampleConfigs.scala
new file mode 100644
index 0000000000000..33270bec6247c
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/SSLSampleConfigs.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import java.io.File
+
+object SSLSampleConfigs {
+  val keyStorePath = new File(this.getClass.getResource("/keystore").toURI).getAbsolutePath
+  val untrustedKeyStorePath = new File(
+    this.getClass.getResource("/untrusted-keystore").toURI).getAbsolutePath
+  val trustStorePath = new File(this.getClass.getResource("/truststore").toURI).getAbsolutePath
+
+  val enabledAlgorithms =
+    // A reasonable set of TLSv1.2 Oracle security provider suites
+    "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384, " +
+    "TLS_RSA_WITH_AES_256_CBC_SHA256, " +
+    "TLS_DHE_RSA_WITH_AES_256_CBC_SHA256, " +
+    "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256, " +
+    "TLS_DHE_RSA_WITH_AES_128_CBC_SHA256, " +
+    // and their equivalent names in the IBM Security provider
+    "SSL_ECDHE_RSA_WITH_AES_256_CBC_SHA384, " +
+    "SSL_RSA_WITH_AES_256_CBC_SHA256, " +
+    "SSL_DHE_RSA_WITH_AES_256_CBC_SHA256, " +
+    "SSL_ECDHE_RSA_WITH_AES_128_CBC_SHA256, " +
+    "SSL_DHE_RSA_WITH_AES_128_CBC_SHA256"
+
+  def sparkSSLConfig(): SparkConf = {
+    val conf = new SparkConf(loadDefaults = false)
+    conf.set("spark.ssl.enabled", "true")
+    conf.set("spark.ssl.keyStore", keyStorePath)
+    conf.set("spark.ssl.keyStorePassword", "password")
+    conf.set("spark.ssl.keyPassword", "password")
+    conf.set("spark.ssl.trustStore", trustStorePath)
+    conf.set("spark.ssl.trustStorePassword", "password")
+    conf.set("spark.ssl.enabledAlgorithms", enabledAlgorithms)
+    conf.set("spark.ssl.protocol", "TLSv1.2")
+    conf
+  }
+
+  def sparkSSLConfigUntrusted(): SparkConf = {
+    val conf = new SparkConf(loadDefaults = false)
+    conf.set("spark.ssl.enabled", "true")
+    conf.set("spark.ssl.keyStore", untrustedKeyStorePath)
+    conf.set("spark.ssl.keyStorePassword", "password")
+    conf.set("spark.ssl.keyPassword", "password")
+    conf.set("spark.ssl.trustStore", trustStorePath)
+    conf.set("spark.ssl.trustStorePassword", "password")
+    conf.set("spark.ssl.enabledAlgorithms", enabledAlgorithms)
+    conf.set("spark.ssl.protocol", "TLSv1.2")
+    conf
+  }
+
+}
diff --git a/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala b/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
index e357299770a2e..cf59265dd646d 100644
--- a/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
@@ -370,6 +370,51 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
     assert(securityManager.checkModifyPermissions("user1") === false)
   }
 
+  test("ssl on setup") {
+    val conf = SSLSampleConfigs.sparkSSLConfig()
+    val expectedAlgorithms = Set(
+    "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384",
+    "TLS_RSA_WITH_AES_256_CBC_SHA256",
+    "TLS_DHE_RSA_WITH_AES_256_CBC_SHA256",
+    "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256",
+    "TLS_DHE_RSA_WITH_AES_128_CBC_SHA256",
+    "SSL_ECDHE_RSA_WITH_AES_256_CBC_SHA384",
+    "SSL_RSA_WITH_AES_256_CBC_SHA256",
+    "SSL_DHE_RSA_WITH_AES_256_CBC_SHA256",
+    "SSL_ECDHE_RSA_WITH_AES_128_CBC_SHA256",
+    "SSL_DHE_RSA_WITH_AES_128_CBC_SHA256")
+
+    val securityManager = new SecurityManager(conf)
+
+    assert(securityManager.fileServerSSLOptions.enabled === true)
+
+    assert(securityManager.sslSocketFactory.isDefined === true)
+    assert(securityManager.hostnameVerifier.isDefined === true)
+
+    assert(securityManager.fileServerSSLOptions.trustStore.isDefined === true)
+    assert(securityManager.fileServerSSLOptions.trustStore.get.getName === "truststore")
+    assert(securityManager.fileServerSSLOptions.keyStore.isDefined === true)
+    assert(securityManager.fileServerSSLOptions.keyStore.get.getName === "keystore")
+    assert(securityManager.fileServerSSLOptions.trustStorePassword === Some("password"))
+    assert(securityManager.fileServerSSLOptions.keyStorePassword === Some("password"))
+    assert(securityManager.fileServerSSLOptions.keyPassword === Some("password"))
+    assert(securityManager.fileServerSSLOptions.protocol === Some("TLSv1.2"))
+    assert(securityManager.fileServerSSLOptions.enabledAlgorithms === expectedAlgorithms)
+  }
+
+  test("ssl off setup") {
+    val file = File.createTempFile("SSLOptionsSuite", "conf", Utils.createTempDir())
+
+    System.setProperty("spark.ssl.configFile", file.getAbsolutePath)
+    val conf = new SparkConf()
+
+    val securityManager = new SecurityManager(conf)
+
+    assert(securityManager.fileServerSSLOptions.enabled === false)
+    assert(securityManager.sslSocketFactory.isDefined === false)
+    assert(securityManager.hostnameVerifier.isDefined === false)
+  }
+
   test("missing secret authentication key") {
     val conf = new SparkConf().set("spark.authenticate", "true")
     val mgr = new SecurityManager(conf)
@@ -395,41 +440,23 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
     assert(keyFromEnv === new SecurityManager(conf2).getSecretKey())
   }
 
-  test("secret key generation") {
-    Seq(
-      ("yarn", true),
-      ("local", true),
-      ("local[*]", true),
-      ("local[1, 2]", true),
-      ("local-cluster[2, 1, 1024]", false),
-      ("invalid", false)
-    ).foreach { case (master, shouldGenerateSecret) =>
-      val conf = new SparkConf()
-        .set(NETWORK_AUTH_ENABLED, true)
-        .set(SparkLauncher.SPARK_MASTER, master)
-      val mgr = new SecurityManager(conf)
-
-      UserGroupInformation.createUserForTesting("authTest", Array()).doAs(
-        new PrivilegedExceptionAction[Unit]() {
-          override def run(): Unit = {
-            if (shouldGenerateSecret) {
-              mgr.initializeAuth()
-              val creds = UserGroupInformation.getCurrentUser().getCredentials()
-              val secret = creds.getSecretKey(SecurityManager.SECRET_LOOKUP_KEY)
-              assert(secret != null)
-              assert(new String(secret, UTF_8) === mgr.getSecretKey())
-            } else {
-              intercept[IllegalArgumentException] {
-                mgr.initializeAuth()
-              }
-              intercept[IllegalArgumentException] {
-                mgr.getSecretKey()
-              }
-            }
-          }
+  test("secret key generation in yarn mode") {
+    val conf = new SparkConf()
+      .set(NETWORK_AUTH_ENABLED, true)
+      .set(SparkLauncher.SPARK_MASTER, "yarn")
+    val mgr = new SecurityManager(conf)
+
+    UserGroupInformation.createUserForTesting("authTest", Array()).doAs(
+      new PrivilegedExceptionAction[Unit]() {
+        override def run(): Unit = {
+          mgr.initializeAuth()
+          val creds = UserGroupInformation.getCurrentUser().getCredentials()
+          val secret = creds.getSecretKey(SecurityManager.SECRET_LOOKUP_KEY)
+          assert(secret != null)
+          assert(new String(secret, UTF_8) === mgr.getSecretKey())
         }
-      )
-    }
+      }
+    )
   }
 
 }
diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
index 0d06b02e74e34..bff808eb540ac 100644
--- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -339,38 +339,6 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
     }
   }
 
-  val defaultIllegalValue = "SomeIllegalValue"
-  val illegalValueTests : Map[String, (SparkConf, String) => Any] = Map(
-    "getTimeAsSeconds" -> (_.getTimeAsSeconds(_)),
-    "getTimeAsSeconds with default" -> (_.getTimeAsSeconds(_, defaultIllegalValue)),
-    "getTimeAsMs" -> (_.getTimeAsMs(_)),
-    "getTimeAsMs with default" -> (_.getTimeAsMs(_, defaultIllegalValue)),
-    "getSizeAsBytes" -> (_.getSizeAsBytes(_)),
-    "getSizeAsBytes with default string" -> (_.getSizeAsBytes(_, defaultIllegalValue)),
-    "getSizeAsBytes with default long" -> (_.getSizeAsBytes(_, 0L)),
-    "getSizeAsKb" -> (_.getSizeAsKb(_)),
-    "getSizeAsKb with default" -> (_.getSizeAsKb(_, defaultIllegalValue)),
-    "getSizeAsMb" -> (_.getSizeAsMb(_)),
-    "getSizeAsMb with default" -> (_.getSizeAsMb(_, defaultIllegalValue)),
-    "getSizeAsGb" -> (_.getSizeAsGb(_)),
-    "getSizeAsGb with default" -> (_.getSizeAsGb(_, defaultIllegalValue)),
-    "getInt" -> (_.getInt(_, 0)),
-    "getLong" -> (_.getLong(_, 0L)),
-    "getDouble" -> (_.getDouble(_, 0.0)),
-    "getBoolean" -> (_.getBoolean(_, false))
-  )
-
-  illegalValueTests.foreach { case (name, getValue) =>
-    test(s"SPARK-24337: $name throws an useful error message with key name") {
-      val key = "SomeKey"
-      val conf = new SparkConf()
-      conf.set(key, "SomeInvalidValue")
-      val thrown = intercept[IllegalArgumentException] {
-        getValue(conf, key)
-      }
-      assert(thrown.getMessage.contains(key))
-    }
-  }
 }
 
 class Class1 {}
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index ce9f2be1c02dd..b30bd74812b36 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark
 import java.io.File
 import java.net.{MalformedURLException, URI}
 import java.nio.charset.StandardCharsets
-import java.util.concurrent.{CountDownLatch, Semaphore, TimeUnit}
+import java.util.concurrent.{Semaphore, TimeUnit}
 
 import scala.concurrent.duration._
 
@@ -498,36 +498,45 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
 
   test("Cancelling stages/jobs with custom reasons.") {
     sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
-    sc.setLocalProperty(SparkContext.SPARK_JOB_INTERRUPT_ON_CANCEL, "true")
     val REASON = "You shall not pass"
+    val slices = 10
 
-    for (cancelWhat <- Seq("stage", "job")) {
-      // This countdown latch used to make sure stage or job canceled in listener
-      val latch = new CountDownLatch(1)
-
-      val listener = cancelWhat match {
-        case "stage" =>
-          new SparkListener {
-            override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
-              sc.cancelStage(taskStart.stageId, REASON)
-              latch.countDown()
-            }
+    val listener = new SparkListener {
+      override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
+        if (SparkContextSuite.cancelStage) {
+          eventually(timeout(10.seconds)) {
+            assert(SparkContextSuite.isTaskStarted)
           }
-        case "job" =>
-          new SparkListener {
-            override def onJobStart(jobStart: SparkListenerJobStart): Unit = {
-              sc.cancelJob(jobStart.jobId, REASON)
-              latch.countDown()
-            }
+          sc.cancelStage(taskStart.stageId, REASON)
+          SparkContextSuite.cancelStage = false
+          SparkContextSuite.semaphore.release(slices)
+        }
+      }
+
+      override def onJobStart(jobStart: SparkListenerJobStart): Unit = {
+        if (SparkContextSuite.cancelJob) {
+          eventually(timeout(10.seconds)) {
+            assert(SparkContextSuite.isTaskStarted)
           }
+          sc.cancelJob(jobStart.jobId, REASON)
+          SparkContextSuite.cancelJob = false
+          SparkContextSuite.semaphore.release(slices)
+        }
       }
-      sc.addSparkListener(listener)
+    }
+    sc.addSparkListener(listener)
+
+    for (cancelWhat <- Seq("stage", "job")) {
+      SparkContextSuite.semaphore.drainPermits()
+      SparkContextSuite.isTaskStarted = false
+      SparkContextSuite.cancelStage = (cancelWhat == "stage")
+      SparkContextSuite.cancelJob = (cancelWhat == "job")
 
       val ex = intercept[SparkException] {
-        sc.range(0, 10000L, numSlices = 10).mapPartitions { x =>
-          x.synchronized {
-            x.wait()
-          }
+        sc.range(0, 10000L, numSlices = slices).mapPartitions { x =>
+          SparkContextSuite.isTaskStarted = true
+          // Block waiting for the listener to cancel the stage or job.
+          SparkContextSuite.semaphore.acquire()
           x
         }.count()
       }
@@ -541,11 +550,9 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
           fail("Expected the cause to be SparkException, got " + cause.toString() + " instead.")
       }
 
-      latch.await(20, TimeUnit.SECONDS)
       eventually(timeout(20.seconds)) {
         assert(sc.statusTracker.getExecutorInfos.map(_.numRunningTasks()).sum == 0)
       }
-      sc.removeSparkListener(listener)
     }
   }
 
@@ -630,6 +637,8 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
 }
 
 object SparkContextSuite {
+  @volatile var cancelJob = false
+  @volatile var cancelStage = false
   @volatile var isTaskStarted = false
   @volatile var taskKilled = false
   @volatile var taskSucceeded = false
diff --git a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
index 31289026b0027..3af9d82393bc4 100644
--- a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
@@ -59,7 +59,6 @@ abstract class SparkFunSuite
   protected val enableAutoThreadAudit = true
 
   protected override def beforeAll(): Unit = {
-    System.setProperty("spark.testing", "true")
     if (enableAutoThreadAudit) {
       doThreadPreAudit()
     }
diff --git a/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala
index ef947eb074647..32dd3ecc2f027 100644
--- a/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala
@@ -66,6 +66,7 @@ class RPackageUtilsSuite
 
   override def beforeEach(): Unit = {
     super.beforeEach()
+    System.setProperty("spark.testing", "true")
     lineBuffer.clear()
   }
 
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 545c8d0423dc3..27dd435332348 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.deploy
 import java.io._
 import java.net.URI
 import java.nio.charset.StandardCharsets
-import java.nio.file.{Files, Paths}
+import java.nio.file.Files
 
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
@@ -35,14 +35,12 @@ import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
 import org.scalatest.time.SpanSugar._
 
 import org.apache.spark._
-import org.apache.spark.TestUtils
 import org.apache.spark.TestUtils.JavaSourceFromString
 import org.apache.spark.api.r.RUtils
 import org.apache.spark.deploy.SparkSubmit._
 import org.apache.spark.deploy.SparkSubmitUtils.MavenCoordinate
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
-import org.apache.spark.launcher.SparkLauncher
 import org.apache.spark.scheduler.EventLoggingListener
 import org.apache.spark.util.{CommandLineUtils, ResetSystemProperties, Utils}
 
@@ -107,13 +105,9 @@ class SparkSubmitSuite
   // Necessary to make ScalaTest 3.x interrupt a thread on the JVM like ScalaTest 2.2.x
   implicit val defaultSignaler: Signaler = ThreadSignaler
 
-  private val emptyIvySettings = File.createTempFile("ivy", ".xml")
-  FileUtils.write(emptyIvySettings, "<ivysettings />", StandardCharsets.UTF_8)
-
-  private val submit = new SparkSubmit()
-
   override def beforeEach() {
     super.beforeEach()
+    System.setProperty("spark.testing", "true")
   }
 
   // scalastyle:off println
@@ -131,16 +125,13 @@ class SparkSubmitSuite
   }
 
   test("handle binary specified but not class") {
-    val jar = TestUtils.createJarWithClasses(Seq("SparkSubmitClassA"))
-    testPrematureExit(Array(jar.toString()), "No main class")
+    testPrematureExit(Array("foo.jar"), "No main class")
   }
 
   test("handles arguments with --key=val") {
     val clArgs = Seq(
       "--jars=one.jar,two.jar,three.jar",
-      "--name=myApp",
-      "--class=org.FooBar",
-      SparkLauncher.NO_RESOURCE)
+      "--name=myApp")
     val appArgs = new SparkSubmitArguments(clArgs)
     appArgs.jars should include regex (".*one.jar,.*two.jar,.*three.jar")
     appArgs.name should be ("myApp")
@@ -180,26 +171,6 @@ class SparkSubmitSuite
     appArgs.toString should include ("thequeue")
   }
 
-  test("SPARK-24241: do not fail fast if executor num is 0 when dynamic allocation is enabled") {
-    val clArgs1 = Seq(
-      "--name", "myApp",
-      "--class", "Foo",
-      "--num-executors", "0",
-      "--conf", "spark.dynamicAllocation.enabled=true",
-      "thejar.jar")
-    new SparkSubmitArguments(clArgs1)
-
-    val clArgs2 = Seq(
-      "--name", "myApp",
-      "--class", "Foo",
-      "--num-executors", "0",
-      "--conf", "spark.dynamicAllocation.enabled=false",
-      "thejar.jar")
-
-    val e = intercept[SparkException](new SparkSubmitArguments(clArgs2))
-    assert(e.getMessage.contains("Number of executors must be a positive number"))
-  }
-
   test("specify deploy mode through configuration") {
     val clArgs = Seq(
       "--master", "yarn",
@@ -208,7 +179,7 @@ class SparkSubmitSuite
       "thejar.jar"
     )
     val appArgs = new SparkSubmitArguments(clArgs)
-    val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs)
+    val (_, _, conf, _) = prepareSubmitEnvironment(appArgs)
 
     appArgs.deployMode should be ("client")
     conf.get("spark.submit.deployMode") should be ("client")
@@ -218,11 +189,11 @@ class SparkSubmitSuite
       "--master", "yarn",
       "--deploy-mode", "cluster",
       "--conf", "spark.submit.deployMode=client",
-      "--class", "org.SomeClass",
+      "-class", "org.SomeClass",
       "thejar.jar"
     )
     val appArgs1 = new SparkSubmitArguments(clArgs1)
-    val (_, _, conf1, _) = submit.prepareSubmitEnvironment(appArgs1)
+    val (_, _, conf1, _) = prepareSubmitEnvironment(appArgs1)
 
     appArgs1.deployMode should be ("cluster")
     conf1.get("spark.submit.deployMode") should be ("cluster")
@@ -236,7 +207,7 @@ class SparkSubmitSuite
     val appArgs2 = new SparkSubmitArguments(clArgs2)
     appArgs2.deployMode should be (null)
 
-    val (_, _, conf2, _) = submit.prepareSubmitEnvironment(appArgs2)
+    val (_, _, conf2, _) = prepareSubmitEnvironment(appArgs2)
     appArgs2.deployMode should be ("client")
     conf2.get("spark.submit.deployMode") should be ("client")
   }
@@ -259,7 +230,7 @@ class SparkSubmitSuite
       "thejar.jar",
       "arg1", "arg2")
     val appArgs = new SparkSubmitArguments(clArgs)
-    val (childArgs, classpath, conf, mainClass) = submit.prepareSubmitEnvironment(appArgs)
+    val (childArgs, classpath, conf, mainClass) = prepareSubmitEnvironment(appArgs)
     val childArgsStr = childArgs.mkString(" ")
     childArgsStr should include ("--class org.SomeClass")
     childArgsStr should include ("--arg arg1 --arg arg2")
@@ -302,7 +273,7 @@ class SparkSubmitSuite
       "thejar.jar",
       "arg1", "arg2")
     val appArgs = new SparkSubmitArguments(clArgs)
-    val (childArgs, classpath, conf, mainClass) = submit.prepareSubmitEnvironment(appArgs)
+    val (childArgs, classpath, conf, mainClass) = prepareSubmitEnvironment(appArgs)
     childArgs.mkString(" ") should be ("arg1 arg2")
     mainClass should be ("org.SomeClass")
     classpath should have length (4)
@@ -348,7 +319,7 @@ class SparkSubmitSuite
       "arg1", "arg2")
     val appArgs = new SparkSubmitArguments(clArgs)
     appArgs.useRest = useRest
-    val (childArgs, classpath, conf, mainClass) = submit.prepareSubmitEnvironment(appArgs)
+    val (childArgs, classpath, conf, mainClass) = prepareSubmitEnvironment(appArgs)
     val childArgsStr = childArgs.mkString(" ")
     if (useRest) {
       childArgsStr should endWith ("thejar.jar org.SomeClass arg1 arg2")
@@ -385,7 +356,7 @@ class SparkSubmitSuite
       "thejar.jar",
       "arg1", "arg2")
     val appArgs = new SparkSubmitArguments(clArgs)
-    val (childArgs, classpath, conf, mainClass) = submit.prepareSubmitEnvironment(appArgs)
+    val (childArgs, classpath, conf, mainClass) = prepareSubmitEnvironment(appArgs)
     childArgs.mkString(" ") should be ("arg1 arg2")
     mainClass should be ("org.SomeClass")
     classpath should have length (1)
@@ -407,7 +378,7 @@ class SparkSubmitSuite
       "thejar.jar",
       "arg1", "arg2")
     val appArgs = new SparkSubmitArguments(clArgs)
-    val (childArgs, classpath, conf, mainClass) = submit.prepareSubmitEnvironment(appArgs)
+    val (childArgs, classpath, conf, mainClass) = prepareSubmitEnvironment(appArgs)
     childArgs.mkString(" ") should be ("arg1 arg2")
     mainClass should be ("org.SomeClass")
     classpath should have length (1)
@@ -429,7 +400,7 @@ class SparkSubmitSuite
       "/home/thejar.jar",
       "arg1")
     val appArgs = new SparkSubmitArguments(clArgs)
-    val (childArgs, classpath, conf, mainClass) = submit.prepareSubmitEnvironment(appArgs)
+    val (childArgs, classpath, conf, mainClass) = prepareSubmitEnvironment(appArgs)
 
     val childArgsMap = childArgs.grouped(2).map(a => a(0) -> a(1)).toMap
     childArgsMap.get("--primary-java-resource") should be (Some("file:/home/thejar.jar"))
@@ -454,7 +425,7 @@ class SparkSubmitSuite
       "thejar.jar",
       "arg1", "arg2")
     val appArgs = new SparkSubmitArguments(clArgs)
-    val (_, _, conf, mainClass) = submit.prepareSubmitEnvironment(appArgs)
+    val (_, _, conf, mainClass) = prepareSubmitEnvironment(appArgs)
     conf.get("spark.executor.memory") should be ("5g")
     conf.get("spark.master") should be ("yarn")
     conf.get("spark.submit.deployMode") should be ("cluster")
@@ -467,12 +438,12 @@ class SparkSubmitSuite
 
     val clArgs1 = Seq("--class", "org.apache.spark.repl.Main", "spark-shell")
     val appArgs1 = new SparkSubmitArguments(clArgs1)
-    val (_, _, conf1, _) = submit.prepareSubmitEnvironment(appArgs1)
+    val (_, _, conf1, _) = prepareSubmitEnvironment(appArgs1)
     conf1.get(UI_SHOW_CONSOLE_PROGRESS) should be (true)
 
     val clArgs2 = Seq("--class", "org.SomeClass", "thejar.jar")
     val appArgs2 = new SparkSubmitArguments(clArgs2)
-    val (_, _, conf2, _) = submit.prepareSubmitEnvironment(appArgs2)
+    val (_, _, conf2, _) = prepareSubmitEnvironment(appArgs2)
     assert(!conf2.contains(UI_SHOW_CONSOLE_PROGRESS))
   }
 
@@ -549,7 +520,6 @@ class SparkSubmitSuite
         "--repositories", repo,
         "--conf", "spark.ui.enabled=false",
         "--conf", "spark.master.rest.enabled=false",
-        "--conf", s"spark.jars.ivySettings=${emptyIvySettings.getAbsolutePath()}",
         unusedJar.toString,
         "my.great.lib.MyLib", "my.great.dep.MyLib")
       runSparkSubmit(args)
@@ -560,6 +530,7 @@ class SparkSubmitSuite
     val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
     val main = MavenCoordinate("my.great.lib", "mylib", "0.1")
     val dep = MavenCoordinate("my.great.dep", "mylib", "0.1")
+    // Test using "spark.jars.packages" and "spark.jars.repositories" configurations.
     IvyTestUtils.withRepository(main, Some(dep.toString), None) { repo =>
       val args = Seq(
         "--class", JarCreationTest.getClass.getName.stripSuffix("$"),
@@ -569,7 +540,6 @@ class SparkSubmitSuite
         "--conf", s"spark.jars.repositories=$repo",
         "--conf", "spark.ui.enabled=false",
         "--conf", "spark.master.rest.enabled=false",
-        "--conf", s"spark.jars.ivySettings=${emptyIvySettings.getAbsolutePath()}",
         unusedJar.toString,
         "my.great.lib.MyLib", "my.great.dep.MyLib")
       runSparkSubmit(args)
@@ -580,6 +550,7 @@ class SparkSubmitSuite
   // See https://gist.github.com/shivaram/3a2fecce60768a603dac for a error log
   ignore("correctly builds R packages included in a jar with --packages") {
     assume(RUtils.isRInstalled, "R isn't installed on this machine.")
+    // Check if the SparkR package is installed
     assume(RUtils.isSparkRInstalled, "SparkR is not installed in this build.")
     val main = MavenCoordinate("my.great.lib", "mylib", "0.1")
     val sparkHome = sys.props.getOrElse("spark.test.home", fail("spark.test.home is not set!"))
@@ -592,7 +563,6 @@ class SparkSubmitSuite
         "--master", "local-cluster[2,1,1024]",
         "--packages", main.toString,
         "--repositories", repo,
-        "--conf", s"spark.jars.ivySettings=${emptyIvySettings.getAbsolutePath()}",
         "--verbose",
         "--conf", "spark.ui.enabled=false",
         rScriptDir)
@@ -603,6 +573,7 @@ class SparkSubmitSuite
   test("include an external JAR in SparkR") {
     assume(RUtils.isRInstalled, "R isn't installed on this machine.")
     val sparkHome = sys.props.getOrElse("spark.test.home", fail("spark.test.home is not set!"))
+    // Check if the SparkR package is installed
     assume(RUtils.isSparkRInstalled, "SparkR is not installed in this build.")
     val rScriptDir =
       Seq(sparkHome, "R", "pkg", "tests", "fulltests", "jarTest.R").mkString(File.separator)
@@ -635,13 +606,10 @@ class SparkSubmitSuite
   }
 
   test("resolves command line argument paths correctly") {
-    val dir = Utils.createTempDir()
-    val archive = Paths.get(dir.toPath.toString, "single.zip")
-    Files.createFile(archive)
-    val jars = "/jar1,/jar2"
-    val files = "local:/file1,file2"
-    val archives = s"file:/archive1,${dir.toPath.toAbsolutePath.toString}/*.zip#archive3"
-    val pyFiles = "py-file1,py-file2"
+    val jars = "/jar1,/jar2"                 // --jars
+    val files = "local:/file1,file2"          // --files
+    val archives = "file:/archive1,archive2" // --archives
+    val pyFiles = "py-file1,py-file2"        // --py-files
 
     // Test jars and files
     val clArgs = Seq(
@@ -651,7 +619,7 @@ class SparkSubmitSuite
       "--files", files,
       "thejar.jar")
     val appArgs = new SparkSubmitArguments(clArgs)
-    val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs)
+    val (_, _, conf, _) = SparkSubmit.prepareSubmitEnvironment(appArgs)
     appArgs.jars should be (Utils.resolveURIs(jars))
     appArgs.files should be (Utils.resolveURIs(files))
     conf.get("spark.jars") should be (Utils.resolveURIs(jars + ",thejar.jar"))
@@ -666,12 +634,11 @@ class SparkSubmitSuite
       "thejar.jar"
     )
     val appArgs2 = new SparkSubmitArguments(clArgs2)
-    val (_, _, conf2, _) = submit.prepareSubmitEnvironment(appArgs2)
+    val (_, _, conf2, _) = SparkSubmit.prepareSubmitEnvironment(appArgs2)
     appArgs2.files should be (Utils.resolveURIs(files))
-    appArgs2.archives should fullyMatch regex ("file:/archive1,file:.*#archive3")
+    appArgs2.archives should be (Utils.resolveURIs(archives))
     conf2.get("spark.yarn.dist.files") should be (Utils.resolveURIs(files))
-    conf2.get("spark.yarn.dist.archives") should fullyMatch regex
-      ("file:/archive1,file:.*#archive3")
+    conf2.get("spark.yarn.dist.archives") should be (Utils.resolveURIs(archives))
 
     // Test python files
     val clArgs3 = Seq(
@@ -682,7 +649,7 @@ class SparkSubmitSuite
       "mister.py"
     )
     val appArgs3 = new SparkSubmitArguments(clArgs3)
-    val (_, _, conf3, _) = submit.prepareSubmitEnvironment(appArgs3)
+    val (_, _, conf3, _) = SparkSubmit.prepareSubmitEnvironment(appArgs3)
     appArgs3.pyFiles should be (Utils.resolveURIs(pyFiles))
     conf3.get("spark.submit.pyFiles") should be (
       PythonRunner.formatPaths(Utils.resolveURIs(pyFiles)).mkString(","))
@@ -690,29 +657,6 @@ class SparkSubmitSuite
     conf3.get(PYSPARK_PYTHON.key) should be ("python3.5")
   }
 
-  test("ambiguous archive mapping results in error message") {
-    val dir = Utils.createTempDir()
-    val archive1 = Paths.get(dir.toPath.toString, "first.zip")
-    val archive2 = Paths.get(dir.toPath.toString, "second.zip")
-    Files.createFile(archive1)
-    Files.createFile(archive2)
-    val jars = "/jar1,/jar2"
-    val files = "local:/file1,file2"
-    val archives = s"file:/archive1,${dir.toPath.toAbsolutePath.toString}/*.zip#archive3"
-    val pyFiles = "py-file1,py-file2"
-
-    // Test files and archives (Yarn)
-    val clArgs2 = Seq(
-      "--master", "yarn",
-      "--class", "org.SomeClass",
-      "--files", files,
-      "--archives", archives,
-      "thejar.jar"
-    )
-
-    testPrematureExit(clArgs2.toArray, "resolves ambiguously to multiple files")
-  }
-
   test("resolves config paths correctly") {
     val jars = "/jar1,/jar2" // spark.jars
     val files = "local:/file1,file2" // spark.files / spark.yarn.dist.files
@@ -734,7 +678,7 @@ class SparkSubmitSuite
       "thejar.jar"
     )
     val appArgs = new SparkSubmitArguments(clArgs)
-    val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs)
+    val (_, _, conf, _) = SparkSubmit.prepareSubmitEnvironment(appArgs)
     conf.get("spark.jars") should be(Utils.resolveURIs(jars + ",thejar.jar"))
     conf.get("spark.files") should be(Utils.resolveURIs(files))
 
@@ -751,7 +695,7 @@ class SparkSubmitSuite
       "thejar.jar"
     )
     val appArgs2 = new SparkSubmitArguments(clArgs2)
-    val (_, _, conf2, _) = submit.prepareSubmitEnvironment(appArgs2)
+    val (_, _, conf2, _) = SparkSubmit.prepareSubmitEnvironment(appArgs2)
     conf2.get("spark.yarn.dist.files") should be(Utils.resolveURIs(files))
     conf2.get("spark.yarn.dist.archives") should be(Utils.resolveURIs(archives))
 
@@ -766,18 +710,14 @@ class SparkSubmitSuite
       "mister.py"
     )
     val appArgs3 = new SparkSubmitArguments(clArgs3)
-    val (_, _, conf3, _) = submit.prepareSubmitEnvironment(appArgs3)
+    val (_, _, conf3, _) = SparkSubmit.prepareSubmitEnvironment(appArgs3)
     conf3.get("spark.submit.pyFiles") should be(
       PythonRunner.formatPaths(Utils.resolveURIs(pyFiles)).mkString(","))
 
     // Test remote python files
-    val hadoopConf = new Configuration()
-    updateConfWithFakeS3Fs(hadoopConf)
     val f4 = File.createTempFile("test-submit-remote-python-files", "", tmpDir)
-    val pyFile1 = File.createTempFile("file1", ".py", tmpDir)
-    val pyFile2 = File.createTempFile("file2", ".py", tmpDir)
     val writer4 = new PrintWriter(f4)
-    val remotePyFiles = s"s3a://${pyFile1.getAbsolutePath},s3a://${pyFile2.getAbsolutePath}"
+    val remotePyFiles = "hdfs:///tmp/file1.py,hdfs:///tmp/file2.py"
     writer4.println("spark.submit.pyFiles " + remotePyFiles)
     writer4.close()
     val clArgs4 = Seq(
@@ -787,7 +727,7 @@ class SparkSubmitSuite
       "hdfs:///tmp/mister.py"
     )
     val appArgs4 = new SparkSubmitArguments(clArgs4)
-    val (_, _, conf4, _) = submit.prepareSubmitEnvironment(appArgs4, conf = Some(hadoopConf))
+    val (_, _, conf4, _) = SparkSubmit.prepareSubmitEnvironment(appArgs4)
     // Should not format python path for yarn cluster mode
     conf4.get("spark.submit.pyFiles") should be(Utils.resolveURIs(remotePyFiles))
   }
@@ -808,20 +748,32 @@ class SparkSubmitSuite
   }
 
   test("SPARK_CONF_DIR overrides spark-defaults.conf") {
-    forConfDir(Map("spark.executor.memory" -> "3g")) { path =>
+    forConfDir(Map("spark.executor.memory" -> "2.3g")) { path =>
       val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
       val args = Seq(
         "--class", SimpleApplicationTest.getClass.getName.stripSuffix("$"),
         "--name", "testApp",
         "--master", "local",
         unusedJar.toString)
-      val appArgs = new SparkSubmitArguments(args, env = Map("SPARK_CONF_DIR" -> path))
+      val appArgs = new SparkSubmitArguments(args, Map("SPARK_CONF_DIR" -> path))
       assert(appArgs.propertiesFile != null)
       assert(appArgs.propertiesFile.startsWith(path))
-      appArgs.executorMemory should be ("3g")
+      appArgs.executorMemory should be ("2.3g")
     }
   }
 
+  test("comma separated list of files are unioned correctly") {
+    val left = Option("/tmp/a.jar,/tmp/b.jar")
+    val right = Option("/tmp/c.jar,/tmp/a.jar")
+    val emptyString = Option("")
+    Utils.unionFileLists(left, right) should be (Set("/tmp/a.jar", "/tmp/b.jar", "/tmp/c.jar"))
+    Utils.unionFileLists(emptyString, emptyString) should be (Set.empty)
+    Utils.unionFileLists(Option("/tmp/a.jar"), emptyString) should be (Set("/tmp/a.jar"))
+    Utils.unionFileLists(emptyString, Option("/tmp/a.jar")) should be (Set("/tmp/a.jar"))
+    Utils.unionFileLists(None, Option("/tmp/a.jar")) should be (Set("/tmp/a.jar"))
+    Utils.unionFileLists(Option("/tmp/a.jar"), None) should be (Set("/tmp/a.jar"))
+  }
+
   test("support glob path") {
     val tmpJarDir = Utils.createTempDir()
     val jar1 = TestUtils.createJarWithFiles(Map("test.resource" -> "1"), tmpJarDir)
@@ -839,9 +791,6 @@ class SparkSubmitSuite
     val archive1 = File.createTempFile("archive1", ".zip", tmpArchiveDir)
     val archive2 = File.createTempFile("archive2", ".zip", tmpArchiveDir)
 
-    val tempPyFile = File.createTempFile("tmpApp", ".py")
-    tempPyFile.deleteOnExit()
-
     val args = Seq(
       "--class", UserClasspathFirstTest.getClass.getName.stripPrefix("$"),
       "--name", "testApp",
@@ -851,10 +800,10 @@ class SparkSubmitSuite
       "--files", s"${tmpFileDir.getAbsolutePath}/tmpFile*",
       "--py-files", s"${tmpPyFileDir.getAbsolutePath}/tmpPy*",
       "--archives", s"${tmpArchiveDir.getAbsolutePath}/*.zip",
-      tempPyFile.toURI().toString())
+      jar2.toString)
 
     val appArgs = new SparkSubmitArguments(args)
-    val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs)
+    val (_, _, conf, _) = SparkSubmit.prepareSubmitEnvironment(appArgs)
     conf.get("spark.yarn.dist.jars").split(",").toSet should be
       (Set(jar1.toURI.toString, jar2.toURI.toString))
     conf.get("spark.yarn.dist.files").split(",").toSet should be
@@ -980,7 +929,7 @@ class SparkSubmitSuite
       )
 
     val appArgs = new SparkSubmitArguments(args)
-    val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs, conf = Some(hadoopConf))
+    val (_, _, conf, _) = SparkSubmit.prepareSubmitEnvironment(appArgs, Some(hadoopConf))
 
     // All the resources should still be remote paths, so that YARN client will not upload again.
     conf.get("spark.yarn.dist.jars") should be (tmpJarPath)
@@ -995,28 +944,25 @@ class SparkSubmitSuite
   }
 
   test("download remote resource if it is not supported by yarn service") {
-    testRemoteResources(enableHttpFs = false, blacklistHttpFs = false)
+    testRemoteResources(isHttpSchemeBlacklisted = false, supportMockHttpFs = false)
   }
 
   test("avoid downloading remote resource if it is supported by yarn service") {
-    testRemoteResources(enableHttpFs = true, blacklistHttpFs = false)
+    testRemoteResources(isHttpSchemeBlacklisted = false, supportMockHttpFs = true)
   }
 
   test("force download from blacklisted schemes") {
-    testRemoteResources(enableHttpFs = true, blacklistHttpFs = true)
+    testRemoteResources(isHttpSchemeBlacklisted = true, supportMockHttpFs = true)
   }
 
-  private def testRemoteResources(
-      enableHttpFs: Boolean,
-      blacklistHttpFs: Boolean): Unit = {
+  private def testRemoteResources(isHttpSchemeBlacklisted: Boolean,
+      supportMockHttpFs: Boolean): Unit = {
     val hadoopConf = new Configuration()
     updateConfWithFakeS3Fs(hadoopConf)
-    if (enableHttpFs) {
+    if (supportMockHttpFs) {
       hadoopConf.set("fs.http.impl", classOf[TestFileSystem].getCanonicalName)
-    } else {
-      hadoopConf.set("fs.http.impl", getClass().getName() + ".DoesNotExist")
+      hadoopConf.set("fs.http.impl.disable.cache", "true")
     }
-    hadoopConf.set("fs.http.impl.disable.cache", "true")
 
     val tmpDir = Utils.createTempDir()
     val mainResource = File.createTempFile("tmpPy", ".py", tmpDir)
@@ -1025,29 +971,30 @@ class SparkSubmitSuite
     val tmpHttpJar = TestUtils.createJarWithFiles(Map("test.resource" -> "USER"), tmpDir)
     val tmpHttpJarPath = s"http://${new File(tmpHttpJar.toURI).getAbsolutePath}"
 
-    val forceDownloadArgs = if (blacklistHttpFs) {
-      Seq("--conf", "spark.yarn.dist.forceDownloadSchemes=http")
-    } else {
-      Nil
-    }
-
     val args = Seq(
       "--class", UserClasspathFirstTest.getClass.getName.stripPrefix("$"),
       "--name", "testApp",
       "--master", "yarn",
       "--deploy-mode", "client",
-      "--jars", s"$tmpS3JarPath,$tmpHttpJarPath"
-    ) ++ forceDownloadArgs ++ Seq(s"s3a://$mainResource")
+      "--jars", s"$tmpS3JarPath,$tmpHttpJarPath",
+      s"s3a://$mainResource"
+    ) ++ (
+      if (isHttpSchemeBlacklisted) {
+        Seq("--conf", "spark.yarn.dist.forceDownloadSchemes=http,https")
+      } else {
+        Nil
+      }
+    )
 
     val appArgs = new SparkSubmitArguments(args)
-    val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs, conf = Some(hadoopConf))
+    val (_, _, conf, _) = SparkSubmit.prepareSubmitEnvironment(appArgs, Some(hadoopConf))
 
     val jars = conf.get("spark.yarn.dist.jars").split(",").toSet
 
     // The URI of remote S3 resource should still be remote.
     assert(jars.contains(tmpS3JarPath))
 
-    if (enableHttpFs && !blacklistHttpFs) {
+    if (supportMockHttpFs) {
       // If Http FS is supported by yarn service, the URI of remote http resource should
       // still be remote.
       assert(jars.contains(tmpHttpJarPath))
@@ -1091,50 +1038,11 @@ class SparkSubmitSuite
       "hello")
 
     val exception = intercept[SparkException] {
-      submit.doSubmit(args)
+      SparkSubmit.main(args)
     }
 
     assert(exception.getMessage() === "hello")
   }
-
-  test("support --py-files/spark.submit.pyFiles in non pyspark application") {
-    val hadoopConf = new Configuration()
-    updateConfWithFakeS3Fs(hadoopConf)
-
-    val tmpDir = Utils.createTempDir()
-    val pyFile = File.createTempFile("tmpPy", ".egg", tmpDir)
-
-    val args = Seq(
-      "--class", UserClasspathFirstTest.getClass.getName.stripPrefix("$"),
-      "--name", "testApp",
-      "--master", "yarn",
-      "--deploy-mode", "client",
-      "--py-files", s"s3a://${pyFile.getAbsolutePath}",
-      "spark-internal"
-    )
-
-    val appArgs = new SparkSubmitArguments(args)
-    val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs, conf = Some(hadoopConf))
-
-    conf.get(PY_FILES.key) should be (s"s3a://${pyFile.getAbsolutePath}")
-    conf.get("spark.submit.pyFiles") should (startWith("/"))
-
-    // Verify "spark.submit.pyFiles"
-    val args1 = Seq(
-      "--class", UserClasspathFirstTest.getClass.getName.stripPrefix("$"),
-      "--name", "testApp",
-      "--master", "yarn",
-      "--deploy-mode", "client",
-      "--conf", s"spark.submit.pyFiles=s3a://${pyFile.getAbsolutePath}",
-      "spark-internal"
-    )
-
-    val appArgs1 = new SparkSubmitArguments(args1)
-    val (_, _, conf1, _) = submit.prepareSubmitEnvironment(appArgs1, conf = Some(hadoopConf))
-
-    conf1.get(PY_FILES.key) should be (s"s3a://${pyFile.getAbsolutePath}")
-    conf1.get("spark.submit.pyFiles") should (startWith("/"))
-  }
 }
 
 object SparkSubmitSuite extends SparkFunSuite with TimeLimits {

  (This diff was longer than 20,000 lines, and has been truncated...)


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org