You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2018/12/22 01:42:27 UTC
[GitHub] Ngone51 closed pull request #23366: [SPARK-26269][YARN][BRANCH-2.4]Yarnallocator should have same blacklist behaviour with yarn to maxmize use of cluster resource

Ngone51 closed pull request #23366: [SPARK-26269][YARN][BRANCH-2.4]Yarnallocator should have same blacklist behaviour with yarn to maxmize use of cluster resource
URL: https://github.com/apache/spark/pull/23366
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/.gitignore b/.gitignore
index 19db7ac277944..e4c44d0590d59 100644
--- a/.gitignore
+++ b/.gitignore
@@ -77,7 +77,6 @@ target/
 unit-tests.log
 work/
 docs/.jekyll-metadata
-*.crc
 
 # For Hive
 TempStatsStore/
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 05b94adeeb93b..0000000000000
--- a/.travis.yml
+++ /dev/null
@@ -1,50 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Spark provides this Travis CI configuration file to help contributors
-# check Scala/Java style conformance and JDK7/8 compilation easily
-# during their preparing pull requests.
-#   - Scalastyle is executed during `maven install` implicitly.
-#   - Java Checkstyle is executed by `lint-java`.
-# See the related discussion here.
-# https://github.com/apache/spark/pull/12980
-
-# 1. Choose OS (Ubuntu 14.04.3 LTS Server Edition 64bit, ~2 CORE, 7.5GB RAM)
-sudo: required
-dist: trusty
-
-# 2. Choose language and target JDKs for parallel builds.
-language: java
-jdk:
-  - oraclejdk8
-
-# 3. Setup cache directory for SBT and Maven.
-cache:
-  directories:
-  - $HOME/.sbt
-  - $HOME/.m2
-
-# 4. Turn off notifications.
-notifications:
-  email: false
-
-# 5. Run maven install before running lint-java.
-install:
-  - export MAVEN_SKIP_RC=1
-  - build/mvn -T 4 -q -DskipTests -Pkubernetes -Pmesos -Pyarn -Pkinesis-asl -Phive -Phive-thriftserver install
-
-# 6. Run lint-java.
-script:
-  - dev/lint-java
diff --git a/R/WINDOWS.md b/R/WINDOWS.md
index da668a69b8679..33a4c850cfdac 100644
--- a/R/WINDOWS.md
+++ b/R/WINDOWS.md
@@ -3,7 +3,7 @@
 To build SparkR on Windows, the following steps are required
 
 1. Install R (>= 3.1) and [Rtools](http://cran.r-project.org/bin/windows/Rtools/). Make sure to
-include Rtools and R in `PATH`.
+include Rtools and R in `PATH`. Note that support for R prior to version 3.4 is deprecated as of Spark 3.0.0.
 
 2. Install
 [JDK8](http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html) and set
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index f52d785e05cdd..736da46eaa8d3 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 2.4.0
+Version: 3.0.0
 Title: R Frontend for Apache Spark
 Description: Provides an R Frontend for Apache Spark.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
@@ -15,7 +15,7 @@ URL: http://www.apache.org/ http://spark.apache.org/
 BugReports: http://spark.apache.org/contributing.html
 SystemRequirements: Java (== 8)
 Depends:
-    R (>= 3.0),
+    R (>= 3.1),
     methods
 Suggests:
     knitr,
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 96ff389faf4a0..1f8ba0bcf1cf5 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -28,9 +28,8 @@ importFrom("utils", "download.file", "object.size", "packageVersion", "tail", "u
 
 # S3 methods exported
 export("sparkR.session")
-export("sparkR.init")
-export("sparkR.stop")
 export("sparkR.session.stop")
+export("sparkR.stop")
 export("sparkR.conf")
 export("sparkR.version")
 export("sparkR.uiWebUrl")
@@ -42,9 +41,6 @@ export("sparkR.callJStatic")
 
 export("install.spark")
 
-export("sparkRSQL.init",
-       "sparkRHive.init")
-
 # MLlib integration
 exportMethods("glm",
               "spark.glm",
@@ -70,7 +66,8 @@ exportMethods("glm",
               "spark.svmLinear",
               "spark.fpGrowth",
               "spark.freqItemsets",
-              "spark.associationRules")
+              "spark.associationRules",
+              "spark.findFrequentSequentialPatterns")
 
 # Job group lifecycle management methods
 export("setJobGroup",
@@ -150,7 +147,6 @@ exportMethods("arrange",
               "printSchema",
               "randomSplit",
               "rbind",
-              "registerTempTable",
               "rename",
               "repartition",
               "repartitionByRange",
@@ -158,7 +154,6 @@ exportMethods("arrange",
               "sample",
               "sample_frac",
               "sampleBy",
-              "saveAsParquetFile",
               "saveAsTable",
               "saveDF",
               "schema",
@@ -200,6 +195,7 @@ exportMethods("%<=>%",
               "acos",
               "add_months",
               "alias",
+              "approx_count_distinct",
               "approxCountDistinct",
               "approxQuantile",
               "array_contains",
@@ -258,6 +254,7 @@ exportMethods("%<=>%",
               "dayofweek",
               "dayofyear",
               "decode",
+              "degrees",
               "dense_rank",
               "desc",
               "element_at",
@@ -274,6 +271,7 @@ exportMethods("%<=>%",
               "floor",
               "format_number",
               "format_string",
+              "from_csv",
               "from_json",
               "from_unixtime",
               "from_utc_timestamp",
@@ -339,6 +337,7 @@ exportMethods("%<=>%",
               "posexplode",
               "posexplode_outer",
               "quarter",
+              "radians",
               "rand",
               "randn",
               "rank",
@@ -352,6 +351,8 @@ exportMethods("%<=>%",
               "row_number",
               "rpad",
               "rtrim",
+              "schema_of_csv",
+              "schema_of_json",
               "second",
               "sha1",
               "sha2",
@@ -385,6 +386,7 @@ exportMethods("%<=>%",
               "tanh",
               "toDegrees",
               "toRadians",
+              "to_csv",
               "to_date",
               "to_json",
               "to_timestamp",
@@ -413,18 +415,14 @@ export("as.DataFrame",
        "cacheTable",
        "clearCache",
        "createDataFrame",
-       "createExternalTable",
        "createTable",
        "currentDatabase",
-       "dropTempTable",
        "dropTempView",
-       "jsonFile",
        "listColumns",
        "listDatabases",
        "listFunctions",
        "listTables",
        "loadDF",
-       "parquetFile",
        "read.df",
        "read.jdbc",
        "read.json",
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 4f2d4c7c002d4..24ed449f2a7d1 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -226,7 +226,9 @@ setMethod("showDF",
 
 #' show
 #'
-#' Print class and type information of a Spark object.
+#' If eager evaluation is enabled and the Spark object is a SparkDataFrame, evaluate the
+#' SparkDataFrame and print top rows of the SparkDataFrame, otherwise, print the class
+#' and type information of the Spark object.
 #'
 #' @param object a Spark object. Can be a SparkDataFrame, Column, GroupedData, WindowSpec.
 #'
@@ -244,11 +246,33 @@ setMethod("showDF",
 #' @note show(SparkDataFrame) since 1.4.0
 setMethod("show", "SparkDataFrame",
           function(object) {
-            cols <- lapply(dtypes(object), function(l) {
-              paste(l, collapse = ":")
-            })
-            s <- paste(cols, collapse = ", ")
-            cat(paste(class(object), "[", s, "]\n", sep = ""))
+            allConf <- sparkR.conf()
+            prop <- allConf[["spark.sql.repl.eagerEval.enabled"]]
+            if (!is.null(prop) && identical(prop, "true")) {
+              argsList <- list()
+              argsList$x <- object
+              prop <- allConf[["spark.sql.repl.eagerEval.maxNumRows"]]
+              if (!is.null(prop)) {
+                numRows <- as.integer(prop)
+                if (numRows > 0) {
+                  argsList$numRows <- numRows
+                }
+              }
+              prop <- allConf[["spark.sql.repl.eagerEval.truncate"]]
+              if (!is.null(prop)) {
+                truncate <- as.integer(prop)
+                if (truncate > 0) {
+                  argsList$truncate <- truncate
+                }
+              }
+              do.call(showDF, argsList)
+            } else {
+              cols <- lapply(dtypes(object), function(l) {
+                paste(l, collapse = ":")
+              })
+              s <- paste(cols, collapse = ", ")
+              cat(paste(class(object), "[", s, "]\n", sep = ""))
+            }
           })
 
 #' DataTypes
@@ -497,33 +521,6 @@ setMethod("createOrReplaceTempView",
               invisible(callJMethod(x@sdf, "createOrReplaceTempView", viewName))
           })
 
-#' (Deprecated) Register Temporary Table
-#'
-#' Registers a SparkDataFrame as a Temporary Table in the SparkSession
-#' @param x A SparkDataFrame
-#' @param tableName A character vector containing the name of the table
-#'
-#' @family SparkDataFrame functions
-#' @seealso \link{createOrReplaceTempView}
-#' @rdname registerTempTable-deprecated
-#' @name registerTempTable
-#' @aliases registerTempTable,SparkDataFrame,character-method
-#' @examples
-#'\dontrun{
-#' sparkR.session()
-#' path <- "path/to/file.json"
-#' df <- read.json(path)
-#' registerTempTable(df, "json_df")
-#' new_df <- sql("SELECT * FROM json_df")
-#'}
-#' @note registerTempTable since 1.4.0
-setMethod("registerTempTable",
-          signature(x = "SparkDataFrame", tableName = "character"),
-          function(x, tableName) {
-              .Deprecated("createOrReplaceTempView")
-              invisible(callJMethod(x@sdf, "createOrReplaceTempView", tableName))
-          })
-
 #' insertInto
 #'
 #' Insert the contents of a SparkDataFrame into a table registered in the current SparkSession.
@@ -769,6 +766,13 @@ setMethod("repartition",
 #'  \item{2.} {Return a new SparkDataFrame range partitioned by the given column(s),
 #'                      using \code{spark.sql.shuffle.partitions} as number of partitions.}
 #'}
+#' At least one partition-by expression must be specified.
+#' When no explicit sort order is specified, "ascending nulls first" is assumed.
+#'
+#' Note that due to performance reasons this method uses sampling to estimate the ranges.
+#' Hence, the output may not be consistent, since sampling can return different values.
+#' The sample size can be controlled by the config
+#' \code{spark.sql.execution.rangeExchange.sampleSizePerPartition}.
 #'
 #' @param x a SparkDataFrame.
 #' @param numPartitions the number of partitions to use.
@@ -823,7 +827,6 @@ setMethod("repartitionByRange",
 #' toJSON
 #'
 #' Converts a SparkDataFrame into a SparkDataFrame of JSON string.
-#'
 #' Each row is turned into a JSON document with columns as different fields.
 #' The returned SparkDataFrame has a single character column with the name \code{value}
 #'
@@ -933,7 +936,6 @@ setMethod("write.orc",
 #' path <- "path/to/file.json"
 #' df <- read.json(path)
 #' write.parquet(df, "/tmp/sparkr-tmp1/")
-#' saveAsParquetFile(df, "/tmp/sparkr-tmp2/")
 #'}
 #' @note write.parquet since 1.6.0
 setMethod("write.parquet",
@@ -944,17 +946,6 @@ setMethod("write.parquet",
             invisible(handledCallJMethod(write, "parquet", path))
           })
 
-#' @rdname write.parquet
-#' @name saveAsParquetFile
-#' @aliases saveAsParquetFile,SparkDataFrame,character-method
-#' @note saveAsParquetFile since 1.4.0
-setMethod("saveAsParquetFile",
-          signature(x = "SparkDataFrame", path = "character"),
-          function(x, path) {
-            .Deprecated("write.parquet")
-            write.parquet(x, path)
-          })
-
 #' Save the content of SparkDataFrame in a text file at the specified path.
 #'
 #' Save the content of the SparkDataFrame in a text file at the specified path.
@@ -2739,15 +2730,29 @@ setMethod("union",
             dataFrame(unioned)
           })
 
-#' unionAll is deprecated - use union instead
-#' @rdname union
-#' @name unionAll
+#' Return a new SparkDataFrame containing the union of rows.
+#'
+#' This is an alias for \code{union}.
+#'
+#' @param x a SparkDataFrame.
+#' @param y a SparkDataFrame.
+#' @return A SparkDataFrame containing the result of the unionAll operation.
+#' @family SparkDataFrame functions
 #' @aliases unionAll,SparkDataFrame,SparkDataFrame-method
+#' @rdname unionAll
+#' @name unionAll
+#' @seealso \link{union}
+#' @examples
+#'\dontrun{
+#' sparkR.session()
+#' df1 <- read.json(path)
+#' df2 <- read.json(path2)
+#' unionAllDF <- unionAll(df1, df2)
+#' }
 #' @note unionAll since 1.4.0
 setMethod("unionAll",
           signature(x = "SparkDataFrame", y = "SparkDataFrame"),
           function(x, y) {
-            .Deprecated("union")
             union(x, y)
           })
 
@@ -2955,6 +2960,9 @@ setMethod("exceptAll",
 #' @param source a name for external data source.
 #' @param mode one of 'append', 'overwrite', 'error', 'errorifexists', 'ignore'
 #'             save mode (it is 'error' by default)
+#' @param partitionBy a name or a list of names of columns to partition the output by on the file
+#'                    system. If specified, the output is laid out on the file system similar
+#'                    to Hive's partitioning scheme.
 #' @param ... additional argument(s) passed to the method.
 #'
 #' @family SparkDataFrame functions
@@ -2966,13 +2974,13 @@ setMethod("exceptAll",
 #' sparkR.session()
 #' path <- "path/to/file.json"
 #' df <- read.json(path)
-#' write.df(df, "myfile", "parquet", "overwrite")
+#' write.df(df, "myfile", "parquet", "overwrite", partitionBy = c("col1", "col2"))
 #' saveDF(df, parquetPath2, "parquet", mode = "append", mergeSchema = TRUE)
 #' }
 #' @note write.df since 1.4.0
 setMethod("write.df",
           signature(df = "SparkDataFrame"),
-          function(df, path = NULL, source = NULL, mode = "error", ...) {
+          function(df, path = NULL, source = NULL, mode = "error", partitionBy = NULL, ...) {
             if (!is.null(path) && !is.character(path)) {
               stop("path should be character, NULL or omitted.")
             }
@@ -2986,8 +2994,18 @@ setMethod("write.df",
             if (is.null(source)) {
               source <- getDefaultSqlSource()
             }
+            cols <- NULL
+            if (!is.null(partitionBy)) {
+              if (!all(sapply(partitionBy, function(c) is.character(c)))) {
+                stop("All partitionBy column names should be characters.")
+              }
+              cols <- as.list(partitionBy)
+            }
             write <- callJMethod(df@sdf, "write")
             write <- callJMethod(write, "format", source)
+            if (!is.null(cols)) {
+              write <- callJMethod(write, "partitionBy", cols)
+            }
             write <- setWriteOptions(write, path = path, mode = mode, ...)
             write <- handledCallJMethod(write, "save")
           })
@@ -3986,7 +4004,17 @@ setMethod("hint",
           signature(x = "SparkDataFrame", name = "character"),
           function(x, name, ...) {
             parameters <- list(...)
-            stopifnot(all(sapply(parameters, is.character)))
+            if (!all(sapply(parameters, function(y) {
+              if (is.character(y) || is.numeric(y)) {
+                TRUE
+              } else if (is.list(y)) {
+                all(sapply(y, function(z) { is.character(z) || is.numeric(z) }))
+              } else {
+                FALSE
+              }
+            }))) {
+              stop("sql hint should be character, numeric, or list with character or numeric.")
+            }
             jdf <- callJMethod(x@sdf, "hint", name, parameters)
             dataFrame(jdf)
           })
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index c819a7d14ae98..afcdd6faa849d 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -37,37 +37,6 @@ getInternalType <- function(x) {
          stop(paste("Unsupported type for SparkDataFrame:", class(x))))
 }
 
-#' Temporary function to reroute old S3 Method call to new
-#' This function is specifically implemented to remove SQLContext from the parameter list.
-#' It determines the target to route the call by checking the parent of this callsite (say 'func').
-#' The target should be called 'func.default'.
-#' We need to check the class of x to ensure it is SQLContext/HiveContext before dispatching.
-#' @param newFuncSig name of the function the user should call instead in the deprecation message
-#' @param x the first parameter of the original call
-#' @param ... the rest of parameter to pass along
-#' @return whatever the target returns
-#' @noRd
-dispatchFunc <- function(newFuncSig, x, ...) {
-  # When called with SparkR::createDataFrame, sys.call()[[1]] returns c(::, SparkR, createDataFrame)
-  callsite <- as.character(sys.call(sys.parent())[[1]])
-  funcName <- callsite[[length(callsite)]]
-  f <- get(paste0(funcName, ".default"))
-  # Strip sqlContext from list of parameters and then pass the rest along.
-  contextNames <- c("org.apache.spark.sql.SQLContext",
-                    "org.apache.spark.sql.hive.HiveContext",
-                    "org.apache.spark.sql.hive.test.TestHiveContext",
-                    "org.apache.spark.sql.SparkSession")
-  if (missing(x) && length(list(...)) == 0) {
-    f()
-  } else if (class(x) == "jobj" &&
-            any(grepl(paste(contextNames, collapse = "|"), getClassName.jobj(x)))) {
-    .Deprecated(newFuncSig, old = paste0(funcName, "(sqlContext...)"))
-    f(...)
-  } else {
-    f(x, ...)
-  }
-}
-
 #' return the SparkSession
 #' @noRd
 getSparkSession <- function() {
@@ -198,11 +167,10 @@ getDefaultSqlSource <- function() {
 #' df4 <- createDataFrame(cars, numPartitions = 2)
 #' }
 #' @name createDataFrame
-#' @method createDataFrame default
 #' @note createDataFrame since 1.4.0
 # TODO(davies): support sampling and infer type from NA
-createDataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0,
-                                    numPartitions = NULL) {
+createDataFrame <- function(data, schema = NULL, samplingRatio = 1.0,
+                            numPartitions = NULL) {
   sparkSession <- getSparkSession()
 
   if (is.data.frame(data)) {
@@ -285,31 +253,18 @@ createDataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0,
   dataFrame(sdf)
 }
 
-createDataFrame <- function(x, ...) {
-  dispatchFunc("createDataFrame(data, schema = NULL)", x, ...)
-}
-
 #' @rdname createDataFrame
 #' @aliases createDataFrame
-#' @method as.DataFrame default
 #' @note as.DataFrame since 1.6.0
-as.DataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0, numPartitions = NULL) {
+as.DataFrame <- function(data, schema = NULL, samplingRatio = 1.0, numPartitions = NULL) {
   createDataFrame(data, schema, samplingRatio, numPartitions)
 }
 
-#' @param ... additional argument(s).
-#' @rdname createDataFrame
-#' @aliases as.DataFrame
-as.DataFrame <- function(data, ...) {
-  dispatchFunc("as.DataFrame(data, schema = NULL)", data, ...)
-}
-
 #' toDF
 #'
 #' Converts an RDD to a SparkDataFrame by infer the types.
 #'
 #' @param x An RDD
-#'
 #' @rdname SparkDataFrame
 #' @noRd
 #' @examples
@@ -343,12 +298,10 @@ setMethod("toDF", signature(x = "RDD"),
 #' path <- "path/to/file.json"
 #' df <- read.json(path)
 #' df <- read.json(path, multiLine = TRUE)
-#' df <- jsonFile(path)
 #' }
 #' @name read.json
-#' @method read.json default
 #' @note read.json since 1.6.0
-read.json.default <- function(path, ...) {
+read.json <- function(path, ...) {
   sparkSession <- getSparkSession()
   options <- varargsToStrEnv(...)
   # Allow the user to have a more flexible definition of the text file path
@@ -359,55 +312,6 @@ read.json.default <- function(path, ...) {
   dataFrame(sdf)
 }
 
-read.json <- function(x, ...) {
-  dispatchFunc("read.json(path)", x, ...)
-}
-
-#' @rdname read.json
-#' @name jsonFile
-#' @method jsonFile default
-#' @note jsonFile since 1.4.0
-jsonFile.default <- function(path) {
-  .Deprecated("read.json")
-  read.json(path)
-}
-
-jsonFile <- function(x, ...) {
-  dispatchFunc("jsonFile(path)", x, ...)
-}
-
-#' JSON RDD
-#'
-#' Loads an RDD storing one JSON object per string as a SparkDataFrame.
-#'
-#' @param sqlContext SQLContext to use
-#' @param rdd An RDD of JSON string
-#' @param schema A StructType object to use as schema
-#' @param samplingRatio The ratio of simpling used to infer the schema
-#' @return A SparkDataFrame
-#' @noRd
-#' @examples
-#'\dontrun{
-#' sparkR.session()
-#' rdd <- texFile(sc, "path/to/json")
-#' df <- jsonRDD(sqlContext, rdd)
-#'}
-
-# TODO: remove - this method is no longer exported
-# TODO: support schema
-jsonRDD <- function(sqlContext, rdd, schema = NULL, samplingRatio = 1.0) {
-  .Deprecated("read.json")
-  rdd <- serializeToString(rdd)
-  if (is.null(schema)) {
-    read <- callJMethod(sqlContext, "read")
-    # samplingRatio is deprecated
-    sdf <- callJMethod(read, "json", callJMethod(getJRDD(rdd), "rdd"))
-    dataFrame(sdf)
-  } else {
-    stop("not implemented")
-  }
-}
-
 #' Create a SparkDataFrame from an ORC file.
 #'
 #' Loads an ORC file, returning the result as a SparkDataFrame.
@@ -434,12 +338,12 @@ read.orc <- function(path, ...) {
 #' Loads a Parquet file, returning the result as a SparkDataFrame.
 #'
 #' @param path path of file to read. A vector of multiple paths is allowed.
+#' @param ... additional data source specific named properties.
 #' @return SparkDataFrame
 #' @rdname read.parquet
 #' @name read.parquet
-#' @method read.parquet default
 #' @note read.parquet since 1.6.0
-read.parquet.default <- function(path, ...) {
+read.parquet <- function(path, ...) {
   sparkSession <- getSparkSession()
   options <- varargsToStrEnv(...)
   # Allow the user to have a more flexible definition of the Parquet file path
@@ -450,24 +354,6 @@ read.parquet.default <- function(path, ...) {
   dataFrame(sdf)
 }
 
-read.parquet <- function(x, ...) {
-  dispatchFunc("read.parquet(...)", x, ...)
-}
-
-#' @param ... argument(s) passed to the method.
-#' @rdname read.parquet
-#' @name parquetFile
-#' @method parquetFile default
-#' @note parquetFile since 1.4.0
-parquetFile.default <- function(...) {
-  .Deprecated("read.parquet")
-  read.parquet(unlist(list(...)))
-}
-
-parquetFile <- function(x, ...) {
-  dispatchFunc("parquetFile(...)", x, ...)
-}
-
 #' Create a SparkDataFrame from a text file.
 #'
 #' Loads text files and returns a SparkDataFrame whose schema starts with
@@ -487,9 +373,8 @@ parquetFile <- function(x, ...) {
 #' df <- read.text(path)
 #' }
 #' @name read.text
-#' @method read.text default
 #' @note read.text since 1.6.1
-read.text.default <- function(path, ...) {
+read.text <- function(path, ...) {
   sparkSession <- getSparkSession()
   options <- varargsToStrEnv(...)
   # Allow the user to have a more flexible definition of the text file path
@@ -500,10 +385,6 @@ read.text.default <- function(path, ...) {
   dataFrame(sdf)
 }
 
-read.text <- function(x, ...) {
-  dispatchFunc("read.text(path)", x, ...)
-}
-
 #' SQL Query
 #'
 #' Executes a SQL query using Spark, returning the result as a SparkDataFrame.
@@ -520,18 +401,13 @@ read.text <- function(x, ...) {
 #' new_df <- sql("SELECT * FROM table")
 #' }
 #' @name sql
-#' @method sql default
 #' @note sql since 1.4.0
-sql.default <- function(sqlQuery) {
+sql <- function(sqlQuery) {
   sparkSession <- getSparkSession()
   sdf <- callJMethod(sparkSession, "sql", sqlQuery)
   dataFrame(sdf)
 }
 
-sql <- function(x, ...) {
-  dispatchFunc("sql(sqlQuery)", x, ...)
-}
-
 #' Create a SparkDataFrame from a SparkSQL table or view
 #'
 #' Returns the specified table or view as a SparkDataFrame. The table or view must already exist or
@@ -590,9 +466,8 @@ tableToDF <- function(tableName) {
 #' df4 <- read.df(mapTypeJsonPath, "json", stringSchema, multiLine = TRUE)
 #' }
 #' @name read.df
-#' @method read.df default
 #' @note read.df since 1.4.0
-read.df.default <- function(path = NULL, source = NULL, schema = NULL, na.strings = "NA", ...) {
+read.df <- function(path = NULL, source = NULL, schema = NULL, na.strings = "NA", ...) {
   if (!is.null(path) && !is.character(path)) {
     stop("path should be character, NULL or omitted.")
   }
@@ -627,22 +502,13 @@ read.df.default <- function(path = NULL, source = NULL, schema = NULL, na.string
   dataFrame(sdf)
 }
 
-read.df <- function(x = NULL, ...) {
-  dispatchFunc("read.df(path = NULL, source = NULL, schema = NULL, ...)", x, ...)
-}
-
 #' @rdname read.df
 #' @name loadDF
-#' @method loadDF default
 #' @note loadDF since 1.6.0
-loadDF.default <- function(path = NULL, source = NULL, schema = NULL, ...) {
+loadDF <- function(path = NULL, source = NULL, schema = NULL, ...) {
   read.df(path, source, schema, ...)
 }
 
-loadDF <- function(x = NULL, ...) {
-  dispatchFunc("loadDF(path = NULL, source = NULL, schema = NULL, ...)", x, ...)
-}
-
 #' Create a SparkDataFrame representing the database table accessible via JDBC URL
 #'
 #' Additional JDBC database connection properties can be set (...)
diff --git a/R/pkg/R/catalog.R b/R/pkg/R/catalog.R
index baf4d861fcf86..7641f8a7a0432 100644
--- a/R/pkg/R/catalog.R
+++ b/R/pkg/R/catalog.R
@@ -17,40 +17,6 @@
 
 # catalog.R: SparkSession catalog functions
 
-#' (Deprecated) Create an external table
-#'
-#' Creates an external table based on the dataset in a data source,
-#' Returns a SparkDataFrame associated with the external table.
-#'
-#' The data source is specified by the \code{source} and a set of options(...).
-#' If \code{source} is not specified, the default data source configured by
-#' "spark.sql.sources.default" will be used.
-#'
-#' @param tableName a name of the table.
-#' @param path the path of files to load.
-#' @param source the name of external data source.
-#' @param schema the schema of the data required for some data sources.
-#' @param ... additional argument(s) passed to the method.
-#' @return A SparkDataFrame.
-#' @rdname createExternalTable-deprecated
-#' @seealso \link{createTable}
-#' @examples
-#'\dontrun{
-#' sparkR.session()
-#' df <- createExternalTable("myjson", path="path/to/json", source="json", schema)
-#' }
-#' @name createExternalTable
-#' @method createExternalTable default
-#' @note createExternalTable since 1.4.0
-createExternalTable.default <- function(tableName, path = NULL, source = NULL, schema = NULL, ...) {
-  .Deprecated("createTable", old = "createExternalTable")
-  createTable(tableName, path, source, schema, ...)
-}
-
-createExternalTable <- function(x, ...) {
-  dispatchFunc("createExternalTable(tableName, path = NULL, source = NULL, ...)", x, ...)
-}
-
 #' Creates a table based on the dataset in a data source
 #'
 #' Creates a table based on the dataset in a data source. Returns a SparkDataFrame associated with
@@ -69,7 +35,6 @@ createExternalTable <- function(x, ...) {
 #' @param ... additional named parameters as options for the data source.
 #' @return A SparkDataFrame.
 #' @rdname createTable
-#' @seealso \link{createExternalTable}
 #' @examples
 #'\dontrun{
 #' sparkR.session()
@@ -117,18 +82,13 @@ createTable <- function(tableName, path = NULL, source = NULL, schema = NULL, ..
 #' cacheTable("table")
 #' }
 #' @name cacheTable
-#' @method cacheTable default
 #' @note cacheTable since 1.4.0
-cacheTable.default <- function(tableName) {
+cacheTable <- function(tableName) {
   sparkSession <- getSparkSession()
   catalog <- callJMethod(sparkSession, "catalog")
   invisible(handledCallJMethod(catalog, "cacheTable", tableName))
 }
 
-cacheTable <- function(x, ...) {
-  dispatchFunc("cacheTable(tableName)", x, ...)
-}
-
 #' Uncache Table
 #'
 #' Removes the specified table from the in-memory cache.
@@ -146,18 +106,13 @@ cacheTable <- function(x, ...) {
 #' uncacheTable("table")
 #' }
 #' @name uncacheTable
-#' @method uncacheTable default
 #' @note uncacheTable since 1.4.0
-uncacheTable.default <- function(tableName) {
+uncacheTable <- function(tableName) {
   sparkSession <- getSparkSession()
   catalog <- callJMethod(sparkSession, "catalog")
   invisible(handledCallJMethod(catalog, "uncacheTable", tableName))
 }
 
-uncacheTable <- function(x, ...) {
-  dispatchFunc("uncacheTable(tableName)", x, ...)
-}
-
 #' Clear Cache
 #'
 #' Removes all cached tables from the in-memory cache.
@@ -168,48 +123,13 @@ uncacheTable <- function(x, ...) {
 #' clearCache()
 #' }
 #' @name clearCache
-#' @method clearCache default
 #' @note clearCache since 1.4.0
-clearCache.default <- function() {
+clearCache <- function() {
   sparkSession <- getSparkSession()
   catalog <- callJMethod(sparkSession, "catalog")
   invisible(callJMethod(catalog, "clearCache"))
 }
 
-clearCache <- function() {
-  dispatchFunc("clearCache()")
-}
-
-#' (Deprecated) Drop Temporary Table
-#'
-#' Drops the temporary table with the given table name in the catalog.
-#' If the table has been cached/persisted before, it's also unpersisted.
-#'
-#' @param tableName The name of the SparkSQL table to be dropped.
-#' @seealso \link{dropTempView}
-#' @rdname dropTempTable-deprecated
-#' @examples
-#' \dontrun{
-#' sparkR.session()
-#' df <- read.df(path, "parquet")
-#' createOrReplaceTempView(df, "table")
-#' dropTempTable("table")
-#' }
-#' @name dropTempTable
-#' @method dropTempTable default
-#' @note dropTempTable since 1.4.0
-dropTempTable.default <- function(tableName) {
-  .Deprecated("dropTempView", old = "dropTempTable")
-  if (class(tableName) != "character") {
-    stop("tableName must be a string.")
-  }
-  dropTempView(tableName)
-}
-
-dropTempTable <- function(x, ...) {
-  dispatchFunc("dropTempView(viewName)", x, ...)
-}
-
 #' Drops the temporary view with the given view name in the catalog.
 #'
 #' Drops the temporary view with the given view name in the catalog.
@@ -250,17 +170,12 @@ dropTempView <- function(viewName) {
 #' tables("hive")
 #' }
 #' @name tables
-#' @method tables default
 #' @note tables since 1.4.0
-tables.default <- function(databaseName = NULL) {
+tables <- function(databaseName = NULL) {
   # rename column to match previous output schema
   withColumnRenamed(listTables(databaseName), "name", "tableName")
 }
 
-tables <- function(x, ...) {
-  dispatchFunc("tables(databaseName = NULL)", x, ...)
-}
-
 #' Table Names
 #'
 #' Returns the names of tables in the given database as an array.
@@ -274,9 +189,8 @@ tables <- function(x, ...) {
 #' tableNames("hive")
 #' }
 #' @name tableNames
-#' @method tableNames default
 #' @note tableNames since 1.4.0
-tableNames.default <- function(databaseName = NULL) {
+tableNames <- function(databaseName = NULL) {
   sparkSession <- getSparkSession()
   callJStatic("org.apache.spark.sql.api.r.SQLUtils",
               "getTableNames",
@@ -284,10 +198,6 @@ tableNames.default <- function(databaseName = NULL) {
               databaseName)
 }
 
-tableNames <- function(x, ...) {
-  dispatchFunc("tableNames(databaseName = NULL)", x, ...)
-}
-
 #' Returns the current default database
 #'
 #' Returns the current default database.
diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
index f168ca76b6007..e99136723f65b 100644
--- a/R/pkg/R/context.R
+++ b/R/pkg/R/context.R
@@ -167,18 +167,30 @@ parallelize <- function(sc, coll, numSlices = 1) {
   # 2-tuples of raws
   serializedSlices <- lapply(slices, serialize, connection = NULL)
 
-  # The PRC backend cannot handle arguments larger than 2GB (INT_MAX)
+  # The RPC backend cannot handle arguments larger than 2GB (INT_MAX)
   # If serialized data is safely less than that threshold we send it over the PRC channel.
   # Otherwise, we write it to a file and send the file name
   if (objectSize < sizeLimit) {
     jrdd <- callJStatic("org.apache.spark.api.r.RRDD", "createRDDFromArray", sc, serializedSlices)
   } else {
-    fileName <- writeToTempFile(serializedSlices)
-    jrdd <- tryCatch(callJStatic(
-        "org.apache.spark.api.r.RRDD", "createRDDFromFile", sc, fileName, as.integer(numSlices)),
-      finally = {
-        file.remove(fileName)
-    })
+    if (callJStatic("org.apache.spark.api.r.RUtils", "getEncryptionEnabled", sc)) {
+      # the length of slices here is the parallelism to use in the jvm's sc.parallelize()
+      parallelism <- as.integer(numSlices)
+      jserver <- newJObject("org.apache.spark.api.r.RParallelizeServer", sc, parallelism)
+      authSecret <- callJMethod(jserver, "secret")
+      port <- callJMethod(jserver, "port")
+      conn <- socketConnection(port = port, blocking = TRUE, open = "wb", timeout = 1500)
+      doServerAuth(conn, authSecret)
+      writeToConnection(serializedSlices, conn)
+      jrdd <- callJMethod(jserver, "getResult")
+    } else {
+      fileName <- writeToTempFile(serializedSlices)
+      jrdd <- tryCatch(callJStatic(
+          "org.apache.spark.api.r.RRDD", "createRDDFromFile", sc, fileName, as.integer(numSlices)),
+        finally = {
+          file.remove(fileName)
+      })
+    }
   }
 
   RDD(jrdd, "byte")
@@ -194,14 +206,21 @@ getMaxAllocationLimit <- function(sc) {
   ))
 }
 
+writeToConnection <- function(serializedSlices, conn) {
+  tryCatch({
+    for (slice in serializedSlices) {
+      writeBin(as.integer(length(slice)), conn, endian = "big")
+      writeBin(slice, conn, endian = "big")
+    }
+  }, finally = {
+    close(conn)
+  })
+}
+
 writeToTempFile <- function(serializedSlices) {
   fileName <- tempfile()
   conn <- file(fileName, "wb")
-  for (slice in serializedSlices) {
-    writeBin(as.integer(length(slice)), conn, endian = "big")
-    writeBin(slice, conn, endian = "big")
-  }
-  close(conn)
+  writeToConnection(serializedSlices, conn)
   fileName
 }
 
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 572dee50127b8..f568a931ae1fe 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -112,7 +112,7 @@ NULL
 #' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
 #' tmp <- mutate(df, v1 = log(df$mpg), v2 = cbrt(df$disp),
 #'                   v3 = bround(df$wt, 1), v4 = bin(df$cyl),
-#'                   v5 = hex(df$wt), v6 = toDegrees(df$gear),
+#'                   v5 = hex(df$wt), v6 = degrees(df$gear),
 #'                   v7 = atan2(df$cyl, df$am), v8 = hypot(df$cyl, df$am),
 #'                   v9 = pmod(df$hp, df$cyl), v10 = shiftLeft(df$disp, 1),
 #'                   v11 = conv(df$hp, 10, 16), v12 = sign(df$vs - 0.5),
@@ -187,7 +187,9 @@ NULL
 #'          \itemize{
 #'          \item \code{to_json}: it is the column containing the struct, array of the structs,
 #'              the map or array of maps.
+#'          \item \code{to_csv}: it is the column containing the struct.
 #'          \item \code{from_json}: it is the column containing the JSON string.
+#'          \item \code{from_csv}: it is the column containing the CSV string.
 #'          }
 #' @param y Column to compute on.
 #' @param value A value to compute on.
@@ -196,10 +198,25 @@ NULL
 #'          \item \code{array_position}: a value to locate in the given array.
 #'          \item \code{array_remove}: a value to remove in the given array.
 #'          }
-#' @param ... additional argument(s). In \code{to_json} and \code{from_json}, this contains
-#'            additional named properties to control how it is converted, accepts the same
-#'            options as the JSON data source.  In \code{arrays_zip}, this contains additional
-#'            Columns of arrays to be merged.
+#' @param schema
+#'          \itemize{
+#'          \item \code{from_json}: a structType object to use as the schema to use
+#'              when parsing the JSON string. Since Spark 2.3, the DDL-formatted string is
+#'              also supported for the schema.
+#'          \item \code{from_csv}: a DDL-formatted string
+#'          }
+#' @param ... additional argument(s).
+#'          \itemize{
+#'          \item \code{to_json}, \code{from_json} and \code{schema_of_json}: this contains
+#'              additional named properties to control how it is converted and accepts the
+#'              same options as the JSON data source.
+#'          \item \code{to_json}: it supports the "pretty" option which enables pretty
+#'              JSON generation.
+#'          \item \code{to_csv}, \code{from_csv} and \code{schema_of_csv}: this contains
+#'              additional named properties to control how it is converted and accepts the
+#'              same options as the CSV data source.
+#'          \item \code{arrays_zip}, this contains additional Columns of arrays to be merged.
+#'          }
 #' @name column_collection_functions
 #' @rdname column_collection_functions
 #' @family collection functions
@@ -310,23 +327,37 @@ setMethod("acos",
           })
 
 #' @details
-#' \code{approxCountDistinct}: Returns the approximate number of distinct items in a group.
+#' \code{approx_count_distinct}: Returns the approximate number of distinct items in a group.
 #'
 #' @rdname column_aggregate_functions
-#' @aliases approxCountDistinct approxCountDistinct,Column-method
+#' @aliases approx_count_distinct approx_count_distinct,Column-method
 #' @examples
 #'
 #' \dontrun{
-#' head(select(df, approxCountDistinct(df$gear)))
-#' head(select(df, approxCountDistinct(df$gear, 0.02)))
+#' head(select(df, approx_count_distinct(df$gear)))
+#' head(select(df, approx_count_distinct(df$gear, 0.02)))
 #' head(select(df, countDistinct(df$gear, df$cyl)))
 #' head(select(df, n_distinct(df$gear)))
 #' head(distinct(select(df, "gear")))}
+#' @note approx_count_distinct(Column) since 3.0.0
+setMethod("approx_count_distinct",
+          signature(x = "Column"),
+          function(x) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "approx_count_distinct", x@jc)
+            column(jc)
+          })
+
+#' @details
+#' \code{approxCountDistinct}: Returns the approximate number of distinct items in a group.
+#'
+#' @rdname column_aggregate_functions
+#' @aliases approxCountDistinct approxCountDistinct,Column-method
 #' @note approxCountDistinct(Column) since 1.4.0
 setMethod("approxCountDistinct",
           signature(x = "Column"),
           function(x) {
-            jc <- callJStatic("org.apache.spark.sql.functions", "approxCountDistinct", x@jc)
+            .Deprecated("approx_count_distinct")
+            jc <- callJStatic("org.apache.spark.sql.functions", "approx_count_distinct", x@jc)
             column(jc)
           })
 
@@ -1641,7 +1672,22 @@ setMethod("tanh",
 setMethod("toDegrees",
           signature(x = "Column"),
           function(x) {
-            jc <- callJStatic("org.apache.spark.sql.functions", "toDegrees", x@jc)
+            .Deprecated("degrees")
+            jc <- callJStatic("org.apache.spark.sql.functions", "degrees", x@jc)
+            column(jc)
+          })
+
+#' @details
+#' \code{degrees}: Converts an angle measured in radians to an approximately equivalent angle
+#' measured in degrees.
+#'
+#' @rdname column_math_functions
+#' @aliases degrees degrees,Column-method
+#' @note degrees since 3.0.0
+setMethod("degrees",
+          signature(x = "Column"),
+          function(x) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "degrees", x@jc)
             column(jc)
           })
 
@@ -1655,7 +1701,22 @@ setMethod("toDegrees",
 setMethod("toRadians",
           signature(x = "Column"),
           function(x) {
-            jc <- callJStatic("org.apache.spark.sql.functions", "toRadians", x@jc)
+            .Deprecated("radians")
+            jc <- callJStatic("org.apache.spark.sql.functions", "radians", x@jc)
+            column(jc)
+          })
+
+#' @details
+#' \code{radians}: Converts an angle measured in degrees to an approximately equivalent angle
+#' measured in radians.
+#'
+#' @rdname column_math_functions
+#' @aliases radians radians,Column-method
+#' @note radians since 3.0.0
+setMethod("radians",
+          signature(x = "Column"),
+          function(x) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "radians", x@jc)
             column(jc)
           })
 
@@ -1717,12 +1778,16 @@ setMethod("to_date",
 #' df2 <- mutate(df2, people_json = to_json(df2$people))
 #'
 #' # Converts a map into a JSON object
-#' df2 <- sql("SELECT map('name', 'Bob')) as people")
+#' df2 <- sql("SELECT map('name', 'Bob') as people")
 #' df2 <- mutate(df2, people_json = to_json(df2$people))
 #'
 #' # Converts an array of maps into a JSON array
 #' df2 <- sql("SELECT array(map('name', 'Bob'), map('name', 'Alice')) as people")
-#' df2 <- mutate(df2, people_json = to_json(df2$people))}
+#' df2 <- mutate(df2, people_json = to_json(df2$people))
+#'
+#' # Converts a map into a pretty JSON object
+#' df2 <- sql("SELECT map('name', 'Bob') as people")
+#' df2 <- mutate(df2, people_json = to_json(df2$people, pretty = TRUE))}
 #' @note to_json since 2.2.0
 setMethod("to_json", signature(x = "Column"),
           function(x, ...) {
@@ -1731,6 +1796,26 @@ setMethod("to_json", signature(x = "Column"),
             column(jc)
           })
 
+#' @details
+#' \code{to_csv}: Converts a column containing a \code{structType} into a Column of CSV string.
+#' Resolving the Column can fail if an unsupported type is encountered.
+#'
+#' @rdname column_collection_functions
+#' @aliases to_csv to_csv,Column-method
+#' @examples
+#'
+#' \dontrun{
+#' # Converts a struct into a CSV string
+#' df2 <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
+#' select(df2, to_csv(df2$d, dateFormat = 'dd/MM/yyyy'))}
+#' @note to_csv since 3.0.0
+setMethod("to_csv", signature(x = "Column"),
+          function(x, ...) {
+            options <- varargsToStrEnv(...)
+            jc <- callJStatic("org.apache.spark.sql.functions", "to_csv", x@jc, options)
+            column(jc)
+          })
+
 #' @details
 #' \code{to_timestamp}: Converts the column into a TimestampType. You may optionally specify
 #' a format according to the rules in:
@@ -2035,13 +2120,24 @@ setMethod("pmod", signature(y = "Column"),
 
 #' @param rsd maximum estimation error allowed (default = 0.05).
 #'
+#' @rdname column_aggregate_functions
+#' @aliases approx_count_distinct,Column-method
+#' @note approx_count_distinct(Column, numeric) since 3.0.0
+setMethod("approx_count_distinct",
+          signature(x = "Column"),
+          function(x, rsd = 0.05) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "approx_count_distinct", x@jc, rsd)
+            column(jc)
+          })
+
 #' @rdname column_aggregate_functions
 #' @aliases approxCountDistinct,Column-method
 #' @note approxCountDistinct(Column, numeric) since 1.4.0
 setMethod("approxCountDistinct",
           signature(x = "Column"),
           function(x, rsd = 0.05) {
-            jc <- callJStatic("org.apache.spark.sql.functions", "approxCountDistinct", x@jc, rsd)
+            .Deprecated("approx_count_distinct")
+            jc <- callJStatic("org.apache.spark.sql.functions", "approx_count_distinct", x@jc, rsd)
             column(jc)
           })
 
@@ -2164,8 +2260,6 @@ setMethod("date_format", signature(y = "Column", x = "character"),
 #' to \code{TRUE}. If the string is unparseable, the Column will contain the value NA.
 #'
 #' @rdname column_collection_functions
-#' @param schema a structType object to use as the schema to use when parsing the JSON string.
-#'               Since Spark 2.3, the DDL-formatted string is also supported for the schema.
 #' @param as.json.array indicating if input string is JSON array of objects or a single object.
 #' @aliases from_json from_json,Column,characterOrstructType-method
 #' @examples
@@ -2203,9 +2297,98 @@ setMethod("from_json", signature(x = "Column", schema = "characterOrstructType")
           })
 
 #' @details
-#' \code{from_utc_timestamp}: Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a
-#' time in UTC, and renders that time as a timestamp in the given time zone. For example, 'GMT+1'
-#' would yield '2017-07-14 03:40:00.0'.
+#' \code{schema_of_json}: Parses a JSON string and infers its schema in DDL format.
+#'
+#' @rdname column_collection_functions
+#' @aliases schema_of_json schema_of_json,characterOrColumn-method
+#' @examples
+#'
+#' \dontrun{
+#' json <- "{\"name\":\"Bob\"}"
+#' df <- sql("SELECT * FROM range(1)")
+#' head(select(df, schema_of_json(json)))}
+#' @note schema_of_json since 3.0.0
+setMethod("schema_of_json", signature(x = "characterOrColumn"),
+          function(x, ...) {
+            if (class(x) == "character") {
+              col <- callJStatic("org.apache.spark.sql.functions", "lit", x)
+            } else {
+              col <- x@jc
+            }
+            options <- varargsToStrEnv(...)
+            jc <- callJStatic("org.apache.spark.sql.functions",
+                              "schema_of_json",
+                              col, options)
+            column(jc)
+          })
+
+#' @details
+#' \code{from_csv}: Parses a column containing a CSV string into a Column of \code{structType}
+#' with the specified \code{schema}.
+#' If the string is unparseable, the Column will contain the value NA.
+#'
+#' @rdname column_collection_functions
+#' @aliases from_csv from_csv,Column,character-method
+#' @examples
+#'
+#' \dontrun{
+#' df <- sql("SELECT 'Amsterdam,2018' as csv")
+#' schema <- "city STRING, year INT"
+#' head(select(df, from_csv(df$csv, schema)))}
+#' @note from_csv since 3.0.0
+setMethod("from_csv", signature(x = "Column", schema = "characterOrColumn"),
+          function(x, schema, ...) {
+            if (class(schema) == "Column") {
+              jschema <- schema@jc
+            } else if (is.character(schema)) {
+              jschema <- callJStatic("org.apache.spark.sql.functions", "lit", schema)
+            } else {
+              stop("schema argument should be a column or character")
+            }
+            options <- varargsToStrEnv(...)
+            jc <- callJStatic("org.apache.spark.sql.functions",
+                              "from_csv",
+                              x@jc, jschema, options)
+            column(jc)
+          })
+
+#' @details
+#' \code{schema_of_csv}: Parses a CSV string and infers its schema in DDL format.
+#'
+#' @rdname column_collection_functions
+#' @aliases schema_of_csv schema_of_csv,characterOrColumn-method
+#' @examples
+#'
+#' \dontrun{
+#' csv <- "Amsterdam,2018"
+#' df <- sql("SELECT * FROM range(1)")
+#' head(select(df, schema_of_csv(csv)))}
+#' @note schema_of_csv since 3.0.0
+setMethod("schema_of_csv", signature(x = "characterOrColumn"),
+          function(x, ...) {
+            if (class(x) == "character") {
+              col <- callJStatic("org.apache.spark.sql.functions", "lit", x)
+            } else {
+              col <- x@jc
+            }
+            options <- varargsToStrEnv(...)
+            jc <- callJStatic("org.apache.spark.sql.functions",
+                              "schema_of_csv",
+                              col, options)
+            column(jc)
+          })
+
+#' @details
+#' \code{from_utc_timestamp}: This is a common function for databases supporting TIMESTAMP WITHOUT
+#' TIMEZONE. This function takes a timestamp which is timezone-agnostic, and interprets it as a
+#' timestamp in UTC, and renders that timestamp as a timestamp in the given time zone.
+#' However, timestamp in Spark represents number of microseconds from the Unix epoch, which is not
+#' timezone-agnostic. So in Spark this function just shift the timestamp value from UTC timezone to
+#' the given timezone.
+#' This function may return confusing result if the input is a string with timezone, e.g.
+#' (\code{2018-03-13T06:18:23+00:00}). The reason is that, Spark firstly cast the string to
+#' timestamp according to the timezone in the string, and finally display the result by converting
+#' the timestamp to string according to the session local timezone.
 #'
 #' @rdname column_datetime_diff_functions
 #'
@@ -2261,9 +2444,16 @@ setMethod("next_day", signature(y = "Column", x = "character"),
           })
 
 #' @details
-#' \code{to_utc_timestamp}: Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a
-#' time in the given time zone, and renders that time as a timestamp in UTC. For example, 'GMT+1'
-#' would yield '2017-07-14 01:40:00.0'.
+#' \code{to_utc_timestamp}: This is a common function for databases supporting TIMESTAMP WITHOUT
+#' TIMEZONE. This function takes a timestamp which is timezone-agnostic, and interprets it as a
+#' timestamp in the given timezone, and renders that timestamp as a timestamp in UTC.
+#' However, timestamp in Spark represents number of microseconds from the Unix epoch, which is not
+#' timezone-agnostic. So in Spark this function just shift the timestamp value from the given
+#' timezone to UTC timezone.
+#' This function may return confusing result if the input is a string with timezone, e.g.
+#' (\code{2018-03-13T06:18:23+00:00}). The reason is that, Spark firstly cast the string to
+#' timestamp according to the timezone in the string, and finally display the result by converting
+#' the timestamp to string according to the session local timezone.
 #'
 #' @rdname column_datetime_diff_functions
 #' @aliases to_utc_timestamp to_utc_timestamp,Column,character-method
@@ -3243,7 +3433,7 @@ setMethod("flatten",
 #'
 #' @rdname column_collection_functions
 #' @aliases map_entries map_entries,Column-method
-#' @note map_entries since 2.4.0
+#' @note map_entries since 3.0.0
 setMethod("map_entries",
           signature(x = "Column"),
           function(x) {
@@ -3458,13 +3648,21 @@ setMethod("collect_set",
 
 #' @details
 #' \code{split_string}: Splits string on regular expression.
-#' Equivalent to \code{split} SQL function.
+#' Equivalent to \code{split} SQL function. Optionally a
+#' \code{limit} can be specified
 #'
 #' @rdname column_string_functions
+#' @param limit determines the length of the returned array.
+#'              \itemize{
+#'              \item \code{limit > 0}: length of the array will be at most \code{limit}
+#'              \item \code{limit <= 0}: the returned array can have any length
+#'              }
+#'
 #' @aliases split_string split_string,Column-method
 #' @examples
 #'
 #' \dontrun{
+#' head(select(df, split_string(df$Class, "\\d", 2)))
 #' head(select(df, split_string(df$Sex, "a")))
 #' head(select(df, split_string(df$Class, "\\d")))
 #' # This is equivalent to the following SQL expression
@@ -3472,8 +3670,9 @@ setMethod("collect_set",
 #' @note split_string 2.3.0
 setMethod("split_string",
           signature(x = "Column", pattern = "character"),
-          function(x, pattern) {
-            jc <- callJStatic("org.apache.spark.sql.functions", "split", x@jc, pattern)
+          function(x, pattern, limit = -1) {
+            jc <- callJStatic("org.apache.spark.sql.functions",
+                              "split", x@jc, pattern, as.integer(limit))
             column(jc)
           })
 
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 27c1b312d645c..eed76465221c6 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -528,9 +528,6 @@ setGeneric("persist", function(x, newLevel) { standardGeneric("persist") })
 #' @rdname printSchema
 setGeneric("printSchema", function(x) { standardGeneric("printSchema") })
 
-#' @rdname registerTempTable-deprecated
-setGeneric("registerTempTable", function(x, tableName) { standardGeneric("registerTempTable") })
-
 #' @rdname rename
 setGeneric("rename", function(x, ...) { standardGeneric("rename") })
 
@@ -595,9 +592,6 @@ setGeneric("write.parquet", function(x, path, ...) {
   standardGeneric("write.parquet")
 })
 
-#' @rdname write.parquet
-setGeneric("saveAsParquetFile", function(x, path) { standardGeneric("saveAsParquetFile") })
-
 #' @rdname write.stream
 setGeneric("write.stream", function(df, source = NULL, outputMode = NULL, ...) {
   standardGeneric("write.stream")
@@ -637,7 +631,7 @@ setGeneric("toRDD", function(x) { standardGeneric("toRDD") })
 #' @rdname union
 setGeneric("union", function(x, y) { standardGeneric("union") })
 
-#' @rdname union
+#' @rdname unionAll
 setGeneric("unionAll", function(x, y) { standardGeneric("unionAll") })
 
 #' @rdname unionByName
@@ -755,6 +749,10 @@ setGeneric("windowOrderBy", function(col, ...) { standardGeneric("windowOrderBy"
 #' @name NULL
 setGeneric("add_months", function(y, x) { standardGeneric("add_months") })
 
+#' @rdname column_aggregate_functions
+#' @name NULL
+setGeneric("approx_count_distinct", function(x, ...) { standardGeneric("approx_count_distinct") })
+
 #' @rdname column_aggregate_functions
 #' @name NULL
 setGeneric("approxCountDistinct", function(x, ...) { standardGeneric("approxCountDistinct") })
@@ -984,6 +982,10 @@ setGeneric("format_string", function(format, x, ...) { standardGeneric("format_s
 #' @name NULL
 setGeneric("from_json", function(x, schema, ...) { standardGeneric("from_json") })
 
+#' @rdname column_collection_functions
+#' @name NULL
+setGeneric("from_csv", function(x, schema, ...) { standardGeneric("from_csv") })
+
 #' @rdname column_datetime_functions
 #' @name NULL
 setGeneric("from_unixtime", function(x, ...) { standardGeneric("from_unixtime") })
@@ -1204,6 +1206,14 @@ setGeneric("rpad", function(x, len, pad) { standardGeneric("rpad") })
 #' @name NULL
 setGeneric("rtrim", function(x, trimString) { standardGeneric("rtrim") })
 
+#' @rdname column_collection_functions
+#' @name NULL
+setGeneric("schema_of_csv", function(x, ...) { standardGeneric("schema_of_csv") })
+
+#' @rdname column_collection_functions
+#' @name NULL
+setGeneric("schema_of_json", function(x, ...) { standardGeneric("schema_of_json") })
+
 #' @rdname column_aggregate_functions
 #' @name NULL
 setGeneric("sd", function(x, na.rm = FALSE) { standardGeneric("sd") })
@@ -1258,7 +1268,7 @@ setGeneric("sort_array", function(x, asc = TRUE) { standardGeneric("sort_array")
 
 #' @rdname column_string_functions
 #' @name NULL
-setGeneric("split_string", function(x, pattern) { standardGeneric("split_string") })
+setGeneric("split_string", function(x, pattern, ...) { standardGeneric("split_string") })
 
 #' @rdname column_string_functions
 #' @name NULL
@@ -1292,10 +1302,18 @@ setGeneric("substring_index", function(x, delim, count) { standardGeneric("subst
 #' @name NULL
 setGeneric("sumDistinct", function(x) { standardGeneric("sumDistinct") })
 
+#' @rdname column_math_functions
+#' @name NULL
+setGeneric("degrees", function(x) { standardGeneric("degrees") })
+
 #' @rdname column_math_functions
 #' @name NULL
 setGeneric("toDegrees", function(x) { standardGeneric("toDegrees") })
 
+#' @rdname column_math_functions
+#' @name NULL
+setGeneric("radians", function(x) { standardGeneric("radians") })
+
 #' @rdname column_math_functions
 #' @name NULL
 setGeneric("toRadians", function(x) { standardGeneric("toRadians") })
@@ -1308,6 +1326,10 @@ setGeneric("to_date", function(x, format) { standardGeneric("to_date") })
 #' @name NULL
 setGeneric("to_json", function(x, ...) { standardGeneric("to_json") })
 
+#' @rdname column_collection_functions
+#' @name NULL
+setGeneric("to_csv", function(x, ...) { standardGeneric("to_csv") })
+
 #' @rdname column_datetime_functions
 #' @name NULL
 setGeneric("to_timestamp", function(x, format) { standardGeneric("to_timestamp") })
@@ -1453,6 +1475,10 @@ setGeneric("spark.freqItemsets", function(object) { standardGeneric("spark.freqI
 #' @rdname spark.fpGrowth
 setGeneric("spark.associationRules", function(object) { standardGeneric("spark.associationRules") })
 
+#' @rdname spark.prefixSpan
+setGeneric("spark.findFrequentSequentialPatterns",
+            function(data, ...) { standardGeneric("spark.findFrequentSequentialPatterns") })
+
 #' @param object a fitted ML model object.
 #' @param path the directory where the model is saved.
 #' @param ... additional argument(s) passed to the method.
diff --git a/R/pkg/R/mllib_fpm.R b/R/pkg/R/mllib_fpm.R
index 4ad34fe82328f..ac37580c6b373 100644
--- a/R/pkg/R/mllib_fpm.R
+++ b/R/pkg/R/mllib_fpm.R
@@ -23,6 +23,12 @@
 #' @note FPGrowthModel since 2.2.0
 setClass("FPGrowthModel", slots = list(jobj = "jobj"))
 
+#' S4 class that represents a PrefixSpan
+#'
+#' @param jobj a Java object reference to the backing Scala PrefixSpan
+#' @note PrefixSpan since 3.0.0
+setClass("PrefixSpan", slots = list(jobj = "jobj"))
+
 #' FP-growth
 #'
 #' A parallel FP-growth algorithm to mine frequent itemsets.
@@ -155,3 +161,61 @@ setMethod("write.ml", signature(object = "FPGrowthModel", path = "character"),
           function(object, path, overwrite = FALSE) {
             write_internal(object, path, overwrite)
           })
+
+#' PrefixSpan
+#'
+#' A parallel PrefixSpan algorithm to mine frequent sequential patterns.
+#' \code{spark.findFrequentSequentialPatterns} returns a complete set of frequent sequential
+#' patterns.
+#' For more details, see
+#' \href{https://spark.apache.org/docs/latest/mllib-frequent-pattern-mining.html#prefixspan}{
+#' PrefixSpan}.
+#'
+#  Find frequent sequential patterns.
+#' @param data A SparkDataFrame.
+#' @param minSupport Minimal support level.
+#' @param maxPatternLength Maximal pattern length.
+#' @param maxLocalProjDBSize Maximum number of items (including delimiters used in the internal
+#'                           storage format) allowed in a projected database before local
+#'                           processing.
+#' @param sequenceCol name of the sequence column in dataset.
+#' @param ... additional argument(s) passed to the method.
+#' @return A complete set of frequent sequential patterns in the input sequences of itemsets.
+#'         The returned \code{SparkDataFrame} contains columns of sequence and corresponding
+#'         frequency. The schema of it will be:
+#'         \code{sequence: ArrayType(ArrayType(T))} (T is the item type)
+#'         \code{freq: Long}
+#' @rdname spark.prefixSpan
+#' @aliases findFrequentSequentialPatterns,PrefixSpan,SparkDataFrame-method
+#' @examples
+#' \dontrun{
+#' df <- createDataFrame(list(list(list(list(1L, 2L), list(3L))),
+#'                       list(list(list(1L), list(3L, 2L), list(1L, 2L))),
+#'                       list(list(list(1L, 2L), list(5L))),
+#'                       list(list(list(6L)))), schema = c("sequence"))
+#' frequency <- spark.findFrequentSequentialPatterns(df, minSupport = 0.5, maxPatternLength = 5L,
+#'                                                   maxLocalProjDBSize = 32000000L)
+#' showDF(frequency)
+#' }
+#' @note spark.findFrequentSequentialPatterns(SparkDataFrame) since 3.0.0
+setMethod("spark.findFrequentSequentialPatterns",
+          signature(data = "SparkDataFrame"),
+          function(data, minSupport = 0.1, maxPatternLength = 10L,
+            maxLocalProjDBSize = 32000000L, sequenceCol = "sequence") {
+              if (!is.numeric(minSupport) || minSupport < 0) {
+                stop("minSupport should be a number with value >= 0.")
+              }
+              if (!is.integer(maxPatternLength) || maxPatternLength <= 0) {
+                stop("maxPatternLength should be a number with value > 0.")
+              }
+              if (!is.numeric(maxLocalProjDBSize) || maxLocalProjDBSize <= 0) {
+                stop("maxLocalProjDBSize should be a number with value > 0.")
+              }
+
+              jobj <- callJStatic("org.apache.spark.ml.r.PrefixSpanWrapper", "getPrefixSpan",
+                                  as.numeric(minSupport), as.integer(maxPatternLength),
+                                  as.numeric(maxLocalProjDBSize), as.character(sequenceCol))
+              object <- new("PrefixSpan", jobj = jobj)
+              dataFrame(callJMethod(object@jobj, "findFrequentSequentialPatterns", data@sdf))
+            }
+          )
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index d3a9cbae7d808..ac289d38d01bd 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -88,49 +88,6 @@ sparkR.stop <- function() {
   sparkR.session.stop()
 }
 
-#' (Deprecated) Initialize a new Spark Context
-#'
-#' This function initializes a new SparkContext.
-#'
-#' @param master The Spark master URL
-#' @param appName Application name to register with cluster manager
-#' @param sparkHome Spark Home directory
-#' @param sparkEnvir Named list of environment variables to set on worker nodes
-#' @param sparkExecutorEnv Named list of environment variables to be used when launching executors
-#' @param sparkJars Character vector of jar files to pass to the worker nodes
-#' @param sparkPackages Character vector of package coordinates
-#' @seealso \link{sparkR.session}
-#' @rdname sparkR.init-deprecated
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init("local[2]", "SparkR", "/home/spark")
-#' sc <- sparkR.init("local[2]", "SparkR", "/home/spark",
-#'                  list(spark.executor.memory="1g"))
-#' sc <- sparkR.init("yarn-client", "SparkR", "/home/spark",
-#'                  list(spark.executor.memory="4g"),
-#'                  list(LD_LIBRARY_PATH="/directory of JVM libraries (libjvm.so) on workers/"),
-#'                  c("one.jar", "two.jar", "three.jar"),
-#'                  c("com.databricks:spark-avro_2.11:2.0.1"))
-#'}
-#' @note sparkR.init since 1.4.0
-sparkR.init <- function(
-  master = "",
-  appName = "SparkR",
-  sparkHome = Sys.getenv("SPARK_HOME"),
-  sparkEnvir = list(),
-  sparkExecutorEnv = list(),
-  sparkJars = "",
-  sparkPackages = "") {
-  .Deprecated("sparkR.session")
-  sparkR.sparkContext(master,
-     appName,
-     sparkHome,
-     convertNamedListToEnv(sparkEnvir),
-     convertNamedListToEnv(sparkExecutorEnv),
-     sparkJars,
-     sparkPackages)
-}
-
 # Internal function to handle creating the SparkContext.
 sparkR.sparkContext <- function(
   master = "",
@@ -272,61 +229,6 @@ sparkR.sparkContext <- function(
   sc
 }
 
-#' (Deprecated) Initialize a new SQLContext
-#'
-#' This function creates a SparkContext from an existing JavaSparkContext and
-#' then uses it to initialize a new SQLContext
-#'
-#' Starting SparkR 2.0, a SparkSession is initialized and returned instead.
-#' This API is deprecated and kept for backward compatibility only.
-#'
-#' @param jsc The existing JavaSparkContext created with SparkR.init()
-#' @seealso \link{sparkR.session}
-#' @rdname sparkRSQL.init-deprecated
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#'}
-#' @note sparkRSQL.init since 1.4.0
-sparkRSQL.init <- function(jsc = NULL) {
-  .Deprecated("sparkR.session")
-
-  if (exists(".sparkRsession", envir = .sparkREnv)) {
-    return(get(".sparkRsession", envir = .sparkREnv))
-  }
-
-  # Default to without Hive support for backward compatibility.
-  sparkR.session(enableHiveSupport = FALSE)
-}
-
-#' (Deprecated) Initialize a new HiveContext
-#'
-#' This function creates a HiveContext from an existing JavaSparkContext
-#'
-#' Starting SparkR 2.0, a SparkSession is initialized and returned instead.
-#' This API is deprecated and kept for backward compatibility only.
-#'
-#' @param jsc The existing JavaSparkContext created with SparkR.init()
-#' @seealso \link{sparkR.session}
-#' @rdname sparkRHive.init-deprecated
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRHive.init(sc)
-#'}
-#' @note sparkRHive.init since 1.4.0
-sparkRHive.init <- function(jsc = NULL) {
-  .Deprecated("sparkR.session")
-
-  if (exists(".sparkRsession", envir = .sparkREnv)) {
-    return(get(".sparkRsession", envir = .sparkREnv))
-  }
-
-  # Default to without Hive support for backward compatibility.
-  sparkR.session(enableHiveSupport = TRUE)
-}
-
 #' Get the existing SparkSession or initialize a new SparkSession.
 #'
 #' SparkSession is the entry point into SparkR. \code{sparkR.session} gets the existing
@@ -482,26 +384,11 @@ sparkR.uiWebUrl <- function() {
 #' setJobGroup("myJobGroup", "My job group description", TRUE)
 #'}
 #' @note setJobGroup since 1.5.0
-#' @method setJobGroup default
-setJobGroup.default <- function(groupId, description, interruptOnCancel) {
+setJobGroup <- function(groupId, description, interruptOnCancel) {
   sc <- getSparkContext()
   invisible(callJMethod(sc, "setJobGroup", groupId, description, interruptOnCancel))
 }
 
-setJobGroup <- function(sc, groupId, description, interruptOnCancel) {
-  if (class(sc) == "jobj" && any(grepl("JavaSparkContext", getClassName.jobj(sc)))) {
-    .Deprecated("setJobGroup(groupId, description, interruptOnCancel)",
-                old = "setJobGroup(sc, groupId, description, interruptOnCancel)")
-    setJobGroup.default(groupId, description, interruptOnCancel)
-  } else {
-    # Parameter order is shifted
-    groupIdToUse <- sc
-    descriptionToUse <- groupId
-    interruptOnCancelToUse <- description
-    setJobGroup.default(groupIdToUse, descriptionToUse, interruptOnCancelToUse)
-  }
-}
-
 #' Clear current job group ID and its description
 #'
 #' @rdname clearJobGroup
@@ -512,22 +399,11 @@ setJobGroup <- function(sc, groupId, description, interruptOnCancel) {
 #' clearJobGroup()
 #'}
 #' @note clearJobGroup since 1.5.0
-#' @method clearJobGroup default
-clearJobGroup.default <- function() {
+clearJobGroup <- function() {
   sc <- getSparkContext()
   invisible(callJMethod(sc, "clearJobGroup"))
 }
 
-clearJobGroup <- function(sc) {
-  if (!missing(sc) &&
-      class(sc) == "jobj" &&
-      any(grepl("JavaSparkContext", getClassName.jobj(sc)))) {
-    .Deprecated("clearJobGroup()", old = "clearJobGroup(sc)")
-  }
-  clearJobGroup.default()
-}
-
-
 #' Cancel active jobs for the specified group
 #'
 #' @param groupId the ID of job group to be cancelled
@@ -539,23 +415,11 @@ clearJobGroup <- function(sc) {
 #' cancelJobGroup("myJobGroup")
 #'}
 #' @note cancelJobGroup since 1.5.0
-#' @method cancelJobGroup default
-cancelJobGroup.default <- function(groupId) {
+cancelJobGroup <- function(groupId) {
   sc <- getSparkContext()
   invisible(callJMethod(sc, "cancelJobGroup", groupId))
 }
 
-cancelJobGroup <- function(sc, groupId) {
-  if (class(sc) == "jobj" && any(grepl("JavaSparkContext", getClassName.jobj(sc)))) {
-    .Deprecated("cancelJobGroup(groupId)", old = "cancelJobGroup(sc, groupId)")
-    cancelJobGroup.default(groupId)
-  } else {
-    # Parameter order is shifted
-    groupIdToUse <- sc
-    cancelJobGroup.default(groupIdToUse)
-  }
-}
-
 #' Set a human readable description of the current job.
 #'
 #' Set a description that is shown as a job description in UI.
@@ -626,6 +490,8 @@ sparkConfToSubmitOps[["spark.driver.extraLibraryPath"]] <- "--driver-library-pat
 sparkConfToSubmitOps[["spark.master"]] <- "--master"
 sparkConfToSubmitOps[["spark.yarn.keytab"]] <- "--keytab"
 sparkConfToSubmitOps[["spark.yarn.principal"]] <- "--principal"
+sparkConfToSubmitOps[["spark.kerberos.keytab"]] <- "--keytab"
+sparkConfToSubmitOps[["spark.kerberos.principal"]] <- "--principal"
 
 
 # Utility function that returns Spark Submit arguments as a string
diff --git a/R/pkg/R/stats.R b/R/pkg/R/stats.R
index 497f18c763048..7252351ebebb2 100644
--- a/R/pkg/R/stats.R
+++ b/R/pkg/R/stats.R
@@ -109,7 +109,7 @@ setMethod("corr",
 #'
 #' Finding frequent items for columns, possibly with false positives.
 #' Using the frequent element count algorithm described in
-#' \url{http://dx.doi.org/10.1145/762471.762473}, proposed by Karp, Schenker, and Papadimitriou.
+#' \url{https://doi.org/10.1145/762471.762473}, proposed by Karp, Schenker, and Papadimitriou.
 #'
 #' @param x A SparkDataFrame.
 #' @param cols A vector column names to search frequent items in.
@@ -143,7 +143,7 @@ setMethod("freqItems", signature(x = "SparkDataFrame", cols = "character"),
 #' *exact* rank of x is close to (p * N). More precisely,
 #'   floor((p - err) * N) <= rank(x) <= ceil((p + err) * N).
 #' This method implements a variation of the Greenwald-Khanna algorithm (with some speed
-#' optimizations). The algorithm was first present in [[http://dx.doi.org/10.1145/375663.375670
+#' optimizations). The algorithm was first present in [[https://doi.org/10.1145/375663.375670
 #' Space-efficient Online Computation of Quantile Summaries]] by Greenwald and Khanna.
 #' Note that NA values will be ignored in numerical columns before calculation. For
 #'   columns only containing NA values, an empty list is returned.
diff --git a/R/pkg/inst/profile/general.R b/R/pkg/inst/profile/general.R
index 8c75c19ca7ac3..3efb460846fc2 100644
--- a/R/pkg/inst/profile/general.R
+++ b/R/pkg/inst/profile/general.R
@@ -16,6 +16,10 @@
 #
 
 .First <- function() {
+  if (utils::compareVersion(paste0(R.version$major, ".", R.version$minor), "3.4.0") == -1) {
+    warning("Support for R prior to version 3.4 is deprecated since Spark 3.0.0")
+  }
+
   packageDir <- Sys.getenv("SPARKR_PACKAGE_DIR")
   dirs <- strsplit(packageDir, ",")[[1]]
   .libPaths(c(dirs, .libPaths()))
diff --git a/R/pkg/inst/profile/shell.R b/R/pkg/inst/profile/shell.R
index 8a8111a8c5419..32eb3671b5941 100644
--- a/R/pkg/inst/profile/shell.R
+++ b/R/pkg/inst/profile/shell.R
@@ -16,6 +16,10 @@
 #
 
 .First <- function() {
+  if (utils::compareVersion(paste0(R.version$major, ".", R.version$minor), "3.4.0") == -1) {
+    warning("Support for R prior to version 3.4 is deprecated since Spark 3.0.0")
+  }
+
   home <- Sys.getenv("SPARK_HOME")
   .libPaths(c(file.path(home, "R", "lib"), .libPaths()))
   Sys.setenv(NOAWT = 1)
diff --git a/R/pkg/tests/fulltests/test_Serde.R b/R/pkg/tests/fulltests/test_Serde.R
index 3577929323b8b..1525bdb2f5c8b 100644
--- a/R/pkg/tests/fulltests/test_Serde.R
+++ b/R/pkg/tests/fulltests/test_Serde.R
@@ -124,3 +124,35 @@ test_that("SerDe of list of lists", {
 })
 
 sparkR.session.stop()
+
+# Note that this test should be at the end of tests since the configruations used here are not
+# specific to sessions, and the Spark context is restarted.
+test_that("createDataFrame large objects", {
+  for (encryptionEnabled in list("true", "false")) {
+    # To simulate a large object scenario, we set spark.r.maxAllocationLimit to a smaller value
+    conf <- list(spark.r.maxAllocationLimit = "100",
+                 spark.io.encryption.enabled = encryptionEnabled)
+
+    suppressWarnings(sparkR.session(master = sparkRTestMaster,
+                                    sparkConfig = conf,
+                                    enableHiveSupport = FALSE))
+
+    sc <- getSparkContext()
+    actual <- callJStatic("org.apache.spark.api.r.RUtils", "getEncryptionEnabled", sc)
+    expected <- as.logical(encryptionEnabled)
+    expect_equal(actual, expected)
+
+    tryCatch({
+      # suppress warnings from dot in the field names. See also SPARK-21536.
+      df <- suppressWarnings(createDataFrame(iris, numPartitions = 3))
+      expect_equal(getNumPartitions(df), 3)
+      expect_equal(dim(df), dim(iris))
+
+      df <- createDataFrame(cars, numPartitions = 3)
+      expect_equal(collect(df), cars)
+    },
+    finally = {
+      sparkR.stop()
+    })
+  }
+})
diff --git a/R/pkg/tests/fulltests/test_context.R b/R/pkg/tests/fulltests/test_context.R
index 288a2714a554e..eb8d2a700e1ea 100644
--- a/R/pkg/tests/fulltests/test_context.R
+++ b/R/pkg/tests/fulltests/test_context.R
@@ -54,15 +54,6 @@ test_that("Check masked functions", {
                sort(namesOfMaskedCompletely, na.last = TRUE))
 })
 
-test_that("repeatedly starting and stopping SparkR", {
-  for (i in 1:4) {
-    sc <- suppressWarnings(sparkR.init(master = sparkRTestMaster))
-    rdd <- parallelize(sc, 1:20, 2L)
-    expect_equal(countRDD(rdd), 20)
-    suppressWarnings(sparkR.stop())
-  }
-})
-
 test_that("repeatedly starting and stopping SparkSession", {
   for (i in 1:4) {
     sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
@@ -101,9 +92,6 @@ test_that("job group functions can be called", {
   cancelJobGroup("groupId")
   clearJobGroup()
 
-  suppressWarnings(setJobGroup(sc, "groupId", "job description", TRUE))
-  suppressWarnings(cancelJobGroup(sc, "groupId"))
-  suppressWarnings(clearJobGroup(sc))
   sparkR.session.stop()
 })
 
diff --git a/R/pkg/tests/fulltests/test_mllib_fpm.R b/R/pkg/tests/fulltests/test_mllib_fpm.R
index d80f66a25de1c..daf9ff97a8216 100644
--- a/R/pkg/tests/fulltests/test_mllib_fpm.R
+++ b/R/pkg/tests/fulltests/test_mllib_fpm.R
@@ -83,4 +83,20 @@ test_that("spark.fpGrowth", {
 
 })
 
+test_that("spark.prefixSpan", {
+    df <- createDataFrame(list(list(list(list(1L, 2L), list(3L))),
+                          list(list(list(1L), list(3L, 2L), list(1L, 2L))),
+                          list(list(list(1L, 2L), list(5L))),
+                          list(list(list(6L)))), schema = c("sequence"))
+    result1 <- spark.findFrequentSequentialPatterns(df, minSupport = 0.5, maxPatternLength = 5L,
+                                                    maxLocalProjDBSize = 32000000L)
+
+    expected_result <- createDataFrame(list(list(list(list(1L)), 3L),
+                                            list(list(list(3L)), 2L),
+                                            list(list(list(2L)), 3L),
+                                            list(list(list(1L, 2L)), 3L),
+                                            list(list(list(1L), list(3L)), 2L)),
+                                            schema = c("sequence", "freq"))
+  })
+
 sparkR.session.stop()
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 0c4bdb31b027b..0d5118c127f2b 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -106,15 +106,6 @@ if (is_windows()) {
   Sys.setenv(TZ = "GMT")
 }
 
-test_that("calling sparkRSQL.init returns existing SQL context", {
-  sqlContext <- suppressWarnings(sparkRSQL.init(sc))
-  expect_equal(suppressWarnings(sparkRSQL.init(sc)), sqlContext)
-})
-
-test_that("calling sparkRSQL.init returns existing SparkSession", {
-  expect_equal(suppressWarnings(sparkRSQL.init(sc)), sparkSession)
-})
-
 test_that("calling sparkR.session returns existing SparkSession", {
   expect_equal(sparkR.session(), sparkSession)
 })
@@ -221,7 +212,7 @@ test_that("structField type strings", {
 
 test_that("create DataFrame from RDD", {
   rdd <- lapply(parallelize(sc, 1:10), function(x) { list(x, as.character(x)) })
-  df <- createDataFrame(rdd, list("a", "b"))
+  df <- SparkR::createDataFrame(rdd, list("a", "b"))
   dfAsDF <- as.DataFrame(rdd, list("a", "b"))
   expect_is(df, "SparkDataFrame")
   expect_is(dfAsDF, "SparkDataFrame")
@@ -287,7 +278,7 @@ test_that("create DataFrame from RDD", {
 
   df <- as.DataFrame(cars, numPartitions = 2)
   expect_equal(getNumPartitions(df), 2)
-  df <- createDataFrame(cars, numPartitions = 3)
+  df <- SparkR::createDataFrame(cars, numPartitions = 3)
   expect_equal(getNumPartitions(df), 3)
   # validate limit by num of rows
   df <- createDataFrame(cars, numPartitions = 60)
@@ -308,7 +299,7 @@ test_that("create DataFrame from RDD", {
   sql("CREATE TABLE people (name string, age double, height float)")
   df <- read.df(jsonPathNa, "json", schema)
   insertInto(df, "people")
-  expect_equal(collect(sql("SELECT age from people WHERE name = 'Bob'"))$age,
+  expect_equal(collect(SparkR::sql("SELECT age from people WHERE name = 'Bob'"))$age,
                c(16))
   expect_equal(collect(sql("SELECT height from people WHERE name ='Bob'"))$height,
                c(176.5))
@@ -316,18 +307,6 @@ test_that("create DataFrame from RDD", {
   unsetHiveContext()
 })
 
-test_that("createDataFrame uses files for large objects", {
-  # To simulate a large file scenario, we set spark.r.maxAllocationLimit to a smaller value
-  conf <- callJMethod(sparkSession, "conf")
-  callJMethod(conf, "set", "spark.r.maxAllocationLimit", "100")
-  df <- suppressWarnings(createDataFrame(iris, numPartitions = 3))
-  expect_equal(getNumPartitions(df), 3)
-
-  # Resetting the conf back to default value
-  callJMethod(conf, "set", "spark.r.maxAllocationLimit", toString(.Machine$integer.max / 10))
-  expect_equal(dim(df), dim(iris))
-})
-
 test_that("read/write csv as DataFrame", {
   if (windows_with_hadoop()) {
     csvPath <- tempfile(pattern = "sparkr-test", fileext = ".csv")
@@ -640,14 +619,10 @@ test_that("read/write json files", {
     jsonPath3 <- tempfile(pattern = "jsonPath3", fileext = ".json")
     write.json(df, jsonPath3)
 
-    # Test read.json()/jsonFile() works with multiple input paths
+    # Test read.json() works with multiple input paths
     jsonDF1 <- read.json(c(jsonPath2, jsonPath3))
     expect_is(jsonDF1, "SparkDataFrame")
     expect_equal(count(jsonDF1), 6)
-    # Suppress warnings because jsonFile is deprecated
-    jsonDF2 <- suppressWarnings(jsonFile(c(jsonPath2, jsonPath3)))
-    expect_is(jsonDF2, "SparkDataFrame")
-    expect_equal(count(jsonDF2), 6)
 
     unlink(jsonPath2)
     unlink(jsonPath3)
@@ -667,20 +642,6 @@ test_that("read/write json files - compression option", {
   unlink(jsonPath)
 })
 
-test_that("jsonRDD() on a RDD with json string", {
-  sqlContext <- suppressWarnings(sparkRSQL.init(sc))
-  rdd <- parallelize(sc, mockLines)
-  expect_equal(countRDD(rdd), 3)
-  df <- suppressWarnings(jsonRDD(sqlContext, rdd))
-  expect_is(df, "SparkDataFrame")
-  expect_equal(count(df), 3)
-
-  rdd2 <- flatMap(rdd, function(x) c(x, x))
-  df <- suppressWarnings(jsonRDD(sqlContext, rdd2))
-  expect_is(df, "SparkDataFrame")
-  expect_equal(count(df), 6)
-})
-
 test_that("test tableNames and tables", {
   count <- count(listTables())
 
@@ -695,10 +656,10 @@ test_that("test tableNames and tables", {
   expect_true("tableName" %in% colnames(tables()))
   expect_true(all(c("tableName", "database", "isTemporary") %in% colnames(tables())))
 
-  suppressWarnings(registerTempTable(df, "table2"))
+  createOrReplaceTempView(df, "table2")
   tables <- listTables()
   expect_equal(count(tables), count + 2)
-  suppressWarnings(dropTempTable("table1"))
+  dropTempView("table1")
   expect_true(dropTempView("table2"))
 
   tables <- listTables()
@@ -1418,7 +1379,7 @@ test_that("column operators", {
 
 test_that("column functions", {
   c <- column("a")
-  c1 <- abs(c) + acos(c) + approxCountDistinct(c) + ascii(c) + asin(c) + atan(c)
+  c1 <- abs(c) + acos(c) + approx_count_distinct(c) + ascii(c) + asin(c) + atan(c)
   c2 <- avg(c) + base64(c) + bin(c) + bitwiseNOT(c) + cbrt(c) + ceil(c) + cos(c)
   c3 <- cosh(c) + count(c) + crc32(c) + hash(c) + exp(c)
   c4 <- explode(c) + expm1(c) + factorial(c) + first(c) + floor(c) + hex(c)
@@ -1427,7 +1388,7 @@ test_that("column functions", {
   c7 <- mean(c) + min(c) + month(c) + negate(c) + posexplode(c) + quarter(c)
   c8 <- reverse(c) + rint(c) + round(c) + rtrim(c) + sha1(c) + monotonically_increasing_id()
   c9 <- signum(c) + sin(c) + sinh(c) + size(c) + stddev(c) + soundex(c) + sqrt(c) + sum(c)
-  c10 <- sumDistinct(c) + tan(c) + tanh(c) + toDegrees(c) + toRadians(c)
+  c10 <- sumDistinct(c) + tan(c) + tanh(c) + degrees(c) + radians(c)
   c11 <- to_date(c) + trim(c) + unbase64(c) + unhex(c) + upper(c)
   c12 <- variance(c) + ltrim(c, "a") + rtrim(c, "b") + trim(c, "c")
   c13 <- lead("col", 1) + lead(c, 1) + lag("col", 1) + lag(c, 1)
@@ -1659,7 +1620,20 @@ test_that("column functions", {
   expect_equal(collect(select(df, bround(df$x, 0)))[[1]][1], 2)
   expect_equal(collect(select(df, bround(df$x, 0)))[[1]][2], 4)
 
-  # Test to_json(), from_json()
+  # Test from_csv(), schema_of_csv()
+  df <- as.DataFrame(list(list("col" = "1")))
+  c <- collect(select(df, alias(from_csv(df$col, "a INT"), "csv")))
+  expect_equal(c[[1]][[1]]$a, 1)
+  c <- collect(select(df, alias(from_csv(df$col, lit("a INT")), "csv")))
+  expect_equal(c[[1]][[1]]$a, 1)
+
+  df <- as.DataFrame(list(list("col" = "1")))
+  c <- collect(select(df, schema_of_csv("Amsterdam,2018")))
+  expect_equal(c[[1]], "struct<_c0:string,_c1:int>")
+  c <- collect(select(df, schema_of_csv(lit("Amsterdam,2018"))))
+  expect_equal(c[[1]], "struct<_c0:string,_c1:int>")
+
+  # Test to_json(), from_json(), schema_of_json()
   df <- sql("SELECT array(named_struct('name', 'Bob'), named_struct('name', 'Alice')) as people")
   j <- collect(select(df, alias(to_json(df$people), "json")))
   expect_equal(j[order(j$json), ][1], "[{\"name\":\"Bob\"},{\"name\":\"Alice\"}]")
@@ -1686,6 +1660,12 @@ test_that("column functions", {
     expect_true(any(apply(s, 1, function(x) { x[[1]]$age == 16 })))
   }
 
+  df <- as.DataFrame(list(list("col" = "1")))
+  c <- collect(select(df, schema_of_json('{"name":"Bob"}')))
+  expect_equal(c[[1]], "struct<name:string>")
+  c <- collect(select(df, schema_of_json(lit('{"name":"Bob"}'))))
+  expect_equal(c[[1]], "struct<name:string>")
+
   # Test to_json() supports arrays of primitive types and arrays
   df <- sql("SELECT array(19, 42, 70) as age")
   j <- collect(select(df, alias(to_json(df$age), "json")))
@@ -1699,14 +1679,14 @@ test_that("column functions", {
   df <- as.DataFrame(list(list("col" = "{\"date\":\"21/10/2014\"}")))
   schema2 <- structType(structField("date", "date"))
   s <- collect(select(df, from_json(df$col, schema2)))
-  expect_equal(s[[1]][[1]], NA)
+  expect_equal(s[[1]][[1]]$date, NA)
   s <- collect(select(df, from_json(df$col, schema2, dateFormat = "dd/MM/yyyy")))
   expect_is(s[[1]][[1]]$date, "Date")
   expect_equal(as.character(s[[1]][[1]]$date), "2014-10-21")
 
   # check for unparseable
   df <- as.DataFrame(list(list("a" = "")))
-  expect_equal(collect(select(df, from_json(df$a, schema)))[[1]][[1]], NA)
+  expect_equal(collect(select(df, from_json(df$a, schema)))[[1]][[1]]$a, NA)
 
   # check if array type in string is correctly supported.
   jsonArr <- "[{\"name\":\"Bob\"}, {\"name\":\"Alice\"}]"
@@ -1721,6 +1701,11 @@ test_that("column functions", {
     expect_equal(arr$arrcol[[1]][[2]]$name, "Alice")
   }
 
+  # Test to_csv()
+  df <- sql("SELECT named_struct('name', 'Bob') as people")
+  j <- collect(select(df, alias(to_csv(df$people), "csv")))
+  expect_equal(j[order(j$csv), ][1], "Bob")
+
   # Test create_array() and create_map()
   df <- as.DataFrame(data.frame(
     x = c(1.0, 2.0), y = c(-1.0, 3.0), z = c(-2.0, 5.0)
@@ -1831,6 +1816,14 @@ test_that("string operators", {
     collect(select(df4, split_string(df4$a, "\\\\")))[1, 1],
     list(list("a.b@c.d   1", "b"))
   )
+  expect_equal(
+    collect(select(df4, split_string(df4$a, "\\.", 2)))[1, 1],
+    list(list("a", "b@c.d   1\\b"))
+  )
+  expect_equal(
+    collect(select(df4, split_string(df4$a, "b", 0)))[1, 1],
+    list(list("a.", "@c.d   1\\", ""))
+  )
 
   l5 <- list(list(a = "abc"))
   df5 <- createDataFrame(l5)
@@ -2419,6 +2412,15 @@ test_that("join(), crossJoin() and merge() on a DataFrame", {
   expect_true(any(grepl("BroadcastHashJoin", execution_plan_broadcast)))
 })
 
+test_that("test hint", {
+  df <- sql("SELECT * FROM range(10e10)")
+  hintList <- list("hint2", "hint3", "hint4")
+  execution_plan_hint <- capture.output(
+    explain(hint(df, "hint1", 1.23456, "aaaaaaaaaa", hintList), TRUE)
+  )
+  expect_true(any(grepl("1.23456, aaaaaaaaaa", execution_plan_hint)))
+})
+
 test_that("toJSON() on DataFrame", {
   df <- as.DataFrame(cars)
   df_json <- toJSON(df)
@@ -2467,6 +2469,7 @@ test_that("union(), unionByName(), rbind(), except(), and intersect() on a DataF
   expect_is(unioned, "SparkDataFrame")
   expect_equal(count(unioned), 6)
   expect_equal(first(unioned)$name, "Michael")
+  expect_equal(count(arrange(suppressWarnings(union(df, df2)), df$age)), 6)
   expect_equal(count(arrange(suppressWarnings(unionAll(df, df2)), df$age)), 6)
 
   df1 <- select(df2, "age", "name")
@@ -2646,17 +2649,14 @@ test_that("read/write Parquet files", {
     expect_is(df2, "SparkDataFrame")
     expect_equal(count(df2), 3)
 
-    # Test write.parquet/saveAsParquetFile and read.parquet/parquetFile
+    # Test write.parquet and read.parquet
     parquetPath2 <- tempfile(pattern = "parquetPath2", fileext = ".parquet")
     write.parquet(df, parquetPath2)
     parquetPath3 <- tempfile(pattern = "parquetPath3", fileext = ".parquet")
-    suppressWarnings(saveAsParquetFile(df, parquetPath3))
+    write.parquet(df, parquetPath3)
     parquetDF <- read.parquet(c(parquetPath2, parquetPath3))
     expect_is(parquetDF, "SparkDataFrame")
     expect_equal(count(parquetDF), count(df) * 2)
-    parquetDF2 <- suppressWarnings(parquetFile(parquetPath2, parquetPath3))
-    expect_is(parquetDF2, "SparkDataFrame")
-    expect_equal(count(parquetDF2), count(df) * 2)
 
     # Test if varargs works with variables
     saveMode <- "overwrite"
@@ -2704,8 +2704,16 @@ test_that("read/write text files", {
   expect_equal(colnames(df2), c("value"))
   expect_equal(count(df2), count(df) * 2)
 
+  df3 <- createDataFrame(list(list(1L, "1"), list(2L, "2"), list(1L, "1"), list(2L, "2")),
+                         schema = c("key", "value"))
+  textPath3 <- tempfile(pattern = "textPath3", fileext = ".txt")
+  write.df(df3, textPath3, "text", mode = "overwrite", partitionBy = "key")
+  df4 <- read.df(textPath3, "text")
+  expect_equal(count(df3), count(df4))
+
   unlink(textPath)
   unlink(textPath2)
+  unlink(textPath3)
 })
 
 test_that("read/write text files - compression option", {
@@ -3457,39 +3465,6 @@ test_that("Window functions on a DataFrame", {
   expect_equal(result, expected)
 })
 
-test_that("createDataFrame sqlContext parameter backward compatibility", {
-  sqlContext <- suppressWarnings(sparkRSQL.init(sc))
-  a <- 1:3
-  b <- c("a", "b", "c")
-  ldf <- data.frame(a, b)
-  # Call function with namespace :: operator - SPARK-16538
-  df <- suppressWarnings(SparkR::createDataFrame(sqlContext, ldf))
-  expect_equal(columns(df), c("a", "b"))
-  expect_equal(dtypes(df), list(c("a", "int"), c("b", "string")))
-  expect_equal(count(df), 3)
-  ldf2 <- collect(df)
-  expect_equal(ldf$a, ldf2$a)
-
-  df2 <- suppressWarnings(createDataFrame(sqlContext, iris))
-  expect_equal(count(df2), 150)
-  expect_equal(ncol(df2), 5)
-
-  df3 <- suppressWarnings(read.df(sqlContext, jsonPath, "json"))
-  expect_is(df3, "SparkDataFrame")
-  expect_equal(count(df3), 3)
-
-  before <- suppressWarnings(createDataFrame(sqlContext, iris))
-  after <- suppressWarnings(createDataFrame(iris))
-  expect_equal(collect(before), collect(after))
-
-  # more tests for SPARK-16538
-  createOrReplaceTempView(df, "table")
-  SparkR::listTables()
-  SparkR::sql("SELECT 1")
-  suppressWarnings(SparkR::sql(sqlContext, "SELECT * FROM table"))
-  suppressWarnings(SparkR::dropTempTable(sqlContext, "table"))
-})
-
 test_that("randomSplit", {
   num <- 4000
   df <- createDataFrame(data.frame(id = 1:num))
@@ -3676,7 +3651,7 @@ test_that("catalog APIs, listTables, listColumns, listFunctions", {
 
   createOrReplaceTempView(as.DataFrame(cars), "cars")
 
-  tb <- listTables()
+  tb <- SparkR::listTables()
   expect_equal(nrow(tb), count + 1)
   tbs <- collect(tb)
   expect_true(nrow(tbs[tbs$name == "cars", ]) > 0)
diff --git a/R/pkg/tests/fulltests/test_sparkSQL_eager.R b/R/pkg/tests/fulltests/test_sparkSQL_eager.R
new file mode 100644
index 0000000000000..9b4489a47b655
--- /dev/null
+++ b/R/pkg/tests/fulltests/test_sparkSQL_eager.R
@@ -0,0 +1,72 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+library(testthat)
+
+context("test show SparkDataFrame when eager execution is enabled.")
+
+test_that("eager execution is not enabled", {
+  # Start Spark session without eager execution enabled
+  sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
+
+  df <- createDataFrame(faithful)
+  expect_is(df, "SparkDataFrame")
+  expected <- "eruptions:double, waiting:double"
+  expect_output(show(df), expected)
+
+  # Stop Spark session
+  sparkR.session.stop()
+})
+
+test_that("eager execution is enabled", {
+  # Start Spark session with eager execution enabled
+  sparkConfig <- list(spark.sql.repl.eagerEval.enabled = "true")
+
+  sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE, sparkConfig = sparkConfig)
+
+  df <- createDataFrame(faithful)
+  expect_is(df, "SparkDataFrame")
+  expected <- paste0("(+---------+-------+\n",
+                     "|eruptions|waiting|\n",
+                     "+---------+-------+\n)*",
+                     "(only showing top 20 rows)")
+  expect_output(show(df), expected)
+
+  # Stop Spark session
+  sparkR.session.stop()
+})
+
+test_that("eager execution is enabled with maxNumRows and truncate set", {
+  # Start Spark session with eager execution enabled
+  sparkConfig <- list(spark.sql.repl.eagerEval.enabled = "true",
+                      spark.sql.repl.eagerEval.maxNumRows = as.integer(5),
+                      spark.sql.repl.eagerEval.truncate = as.integer(2))
+
+  sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE, sparkConfig = sparkConfig)
+
+  df <- arrange(createDataFrame(faithful), "waiting")
+  expect_is(df, "SparkDataFrame")
+  expected <- paste0("(+---------+-------+\n",
+                     "|eruptions|waiting|\n",
+                     "+---------+-------+\n",
+                     "|       1.|     43|\n)*",
+                     "(only showing top 5 rows)")
+  expect_output(show(df), expected)
+
+  # Stop Spark session
+  sparkR.session.stop()
+})
diff --git a/R/pkg/tests/fulltests/test_streaming.R b/R/pkg/tests/fulltests/test_streaming.R
index bfb1a046490ec..6f0d2aefee886 100644
--- a/R/pkg/tests/fulltests/test_streaming.R
+++ b/R/pkg/tests/fulltests/test_streaming.R
@@ -127,6 +127,7 @@ test_that("Specify a schema by using a DDL-formatted string when reading", {
   expect_false(awaitTermination(q, 5 * 1000))
   callJMethod(q@ssq, "processAllAvailable")
   expect_equal(head(sql("SELECT count(*) FROM people3"))[[1]], 3)
+  stopQuery(q)
 
   expect_error(read.stream(path = parquetPath, schema = "name stri"),
                "DataType stri is not supported.")
diff --git a/R/pkg/tests/run-all.R b/R/pkg/tests/run-all.R
index 94d75188fb948..1e96418558883 100644
--- a/R/pkg/tests/run-all.R
+++ b/R/pkg/tests/run-all.R
@@ -18,50 +18,55 @@
 library(testthat)
 library(SparkR)
 
-# Turn all warnings into errors
-options("warn" = 2)
+# SPARK-25572
+if (identical(Sys.getenv("NOT_CRAN"), "true")) {
 
-if (.Platform$OS.type == "windows") {
-  Sys.setenv(TZ = "GMT")
-}
+  # Turn all warnings into errors
+  options("warn" = 2)
 
-# Setup global test environment
-# Install Spark first to set SPARK_HOME
+  if (.Platform$OS.type == "windows") {
+    Sys.setenv(TZ = "GMT")
+  }
 
-# NOTE(shivaram): We set overwrite to handle any old tar.gz files or directories left behind on
-# CRAN machines. For Jenkins we should already have SPARK_HOME set.
-install.spark(overwrite = TRUE)
+  # Setup global test environment
+  # Install Spark first to set SPARK_HOME
 
-sparkRDir <- file.path(Sys.getenv("SPARK_HOME"), "R")
-sparkRWhitelistSQLDirs <- c("spark-warehouse", "metastore_db")
-invisible(lapply(sparkRWhitelistSQLDirs,
-                 function(x) { unlink(file.path(sparkRDir, x), recursive = TRUE, force = TRUE)}))
-sparkRFilesBefore <- list.files(path = sparkRDir, all.files = TRUE)
+  # NOTE(shivaram): We set overwrite to handle any old tar.gz files or directories left behind on
+  # CRAN machines. For Jenkins we should already have SPARK_HOME set.
+  install.spark(overwrite = TRUE)
 
-sparkRTestMaster <- "local[1]"
-sparkRTestConfig <- list()
-if (identical(Sys.getenv("NOT_CRAN"), "true")) {
-  sparkRTestMaster <- ""
-} else {
-  # Disable hsperfdata on CRAN
-  old_java_opt <- Sys.getenv("_JAVA_OPTIONS")
-  Sys.setenv("_JAVA_OPTIONS" = paste("-XX:-UsePerfData", old_java_opt))
-  tmpDir <- tempdir()
-  tmpArg <- paste0("-Djava.io.tmpdir=", tmpDir)
-  sparkRTestConfig <- list(spark.driver.extraJavaOptions = tmpArg,
-                           spark.executor.extraJavaOptions = tmpArg)
-}
+  sparkRDir <- file.path(Sys.getenv("SPARK_HOME"), "R")
+  sparkRWhitelistSQLDirs <- c("spark-warehouse", "metastore_db")
+  invisible(lapply(sparkRWhitelistSQLDirs,
+                   function(x) { unlink(file.path(sparkRDir, x), recursive = TRUE, force = TRUE)}))
+  sparkRFilesBefore <- list.files(path = sparkRDir, all.files = TRUE)
 
-test_package("SparkR")
+  sparkRTestMaster <- "local[1]"
+  sparkRTestConfig <- list()
+  if (identical(Sys.getenv("NOT_CRAN"), "true")) {
+    sparkRTestMaster <- ""
+  } else {
+    # Disable hsperfdata on CRAN
+    old_java_opt <- Sys.getenv("_JAVA_OPTIONS")
+    Sys.setenv("_JAVA_OPTIONS" = paste("-XX:-UsePerfData", old_java_opt))
+    tmpDir <- tempdir()
+    tmpArg <- paste0("-Djava.io.tmpdir=", tmpDir)
+    sparkRTestConfig <- list(spark.driver.extraJavaOptions = tmpArg,
+                             spark.executor.extraJavaOptions = tmpArg)
+  }
 
-if (identical(Sys.getenv("NOT_CRAN"), "true")) {
-  # set random seed for predictable results. mostly for base's sample() in tree and classification
-  set.seed(42)
-  # for testthat 1.0.2 later, change reporter from "summary" to default_reporter()
-  testthat:::run_tests("SparkR",
-                       file.path(sparkRDir, "pkg", "tests", "fulltests"),
-                       NULL,
-                       "summary")
-}
+  test_package("SparkR")
+
+  if (identical(Sys.getenv("NOT_CRAN"), "true")) {
+    # set random seed for predictable results. mostly for base's sample() in tree and classification
+    set.seed(42)
+    # for testthat 1.0.2 later, change reporter from "summary" to default_reporter()
+    testthat:::run_tests("SparkR",
+                         file.path(sparkRDir, "pkg", "tests", "fulltests"),
+                         NULL,
+                         "summary")
+  }
 
-SparkR:::uninstallDownloadedSpark()
+  SparkR:::uninstallDownloadedSpark()
+
+}
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index 090363c5f8a3e..f80b45b4f36a8 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -57,6 +57,20 @@ First, let's load and attach the package.
 library(SparkR)
 ```
 
+```{r, include=FALSE}
+# disable eval if java version not supported
+override_eval <- tryCatch(!is.numeric(SparkR:::checkJavaVersion()),
+          error = function(e) { TRUE },
+          warning = function(e) { TRUE })
+
+if (override_eval) {
+  opts_hooks$set(eval = function(options) {
+    options$eval = FALSE
+    options
+  })
+}
+```
+
 `SparkSession` is the entry point into SparkR which connects your R program to a Spark cluster. You can create a `SparkSession` using `sparkR.session` and pass in options such as the application name, any Spark packages depended on, etc.
 
 We use default settings in which it runs in local mode. It auto downloads Spark package in the background if no previous installation is found. For more details about setup, see [Spark Session](#SetupSparkSession).
@@ -157,8 +171,8 @@ Property Name | Property group | spark-submit equivalent
 `spark.driver.extraClassPath` | Runtime Environment | `--driver-class-path`
 `spark.driver.extraJavaOptions` | Runtime Environment | `--driver-java-options`
 `spark.driver.extraLibraryPath` | Runtime Environment | `--driver-library-path`
-`spark.yarn.keytab` | Application Properties | `--keytab`
-`spark.yarn.principal` | Application Properties | `--principal`
+`spark.kerberos.keytab` | Application Properties | `--keytab`
+`spark.kerberos.principal` | Application Properties | `--principal`
 
 **For Windows users**: Due to different file prefixes across operating systems, to avoid the issue of potential wrong prefix, a current workaround is to specify `spark.sql.warehouse.dir` when starting the `SparkSession`.
 
@@ -542,6 +556,7 @@ SparkR supports the following machine learning models and algorithms.
 #### Frequent Pattern Mining
 
 * FP-growth
+* PrefixSpan
 
 #### Statistics
 
@@ -998,6 +1013,18 @@ We can make predictions based on the `antecedent`.
 head(predict(fpm, df))
 ```
 
+#### PrefixSpan
+
+`spark.findFrequentSequentialPatterns` method can be used to find the complete set of frequent sequential patterns in the input sequences of itemsets.
+
+```{r}
+df <- createDataFrame(list(list(list(list(1L, 2L), list(3L))),
+                      list(list(list(1L), list(3L, 2L), list(1L, 2L))),
+                      list(list(list(1L, 2L), list(5L))),
+                      list(list(list(6L)))), schema = c("sequence"))
+head(spark.findFrequentSequentialPatterns(df, minSupport = 0.5, maxPatternLength = 5L))
+```
+
 #### Kolmogorov-Smirnov Test
 
 `spark.kstest` runs a two-sided, one-sample [Kolmogorov-Smirnov (KS) test](https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test).
diff --git a/assembly/README b/assembly/README
index affd281a1385c..1fd6d8858348c 100644
--- a/assembly/README
+++ b/assembly/README
@@ -9,4 +9,4 @@ This module is off by default. To activate it specify the profile in the command
 
 If you need to build an assembly for a different version of Hadoop the
 hadoop-version system property needs to be set as in this example:
-  -Dhadoop.version=2.7.7
+  -Dhadoop.version=2.7.4
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 9608c96fd5369..68ebfadb668ab 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -20,12 +20,12 @@
   <modelVersion>4.0.0</modelVersion>
   <parent>
     <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.11</artifactId>
-    <version>2.4.0-SNAPSHOT</version>
+    <artifactId>spark-parent_2.12</artifactId>
+    <version>3.0.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
-  <artifactId>spark-assembly_2.11</artifactId>
+  <artifactId>spark-assembly_2.12</artifactId>
   <name>Spark Project Assembly</name>
   <url>http://spark.apache.org/</url>
   <packaging>pom</packaging>
diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh
index d6371051ef7fb..fbf9c9e448fd1 100755
--- a/bin/docker-image-tool.sh
+++ b/bin/docker-image-tool.sh
@@ -29,6 +29,20 @@ if [ -z "${SPARK_HOME}" ]; then
 fi
 . "${SPARK_HOME}/bin/load-spark-env.sh"
 
+CTX_DIR="$SPARK_HOME/target/tmp/docker"
+
+function is_dev_build {
+  [ ! -f "$SPARK_HOME/RELEASE" ]
+}
+
+function cleanup_ctx_dir {
+  if is_dev_build; then
+    rm -rf "$CTX_DIR"
+  fi
+}
+
+trap cleanup_ctx_dir EXIT
+
 function image_ref {
   local image="$1"
   local add_repo="${2:-1}"
@@ -41,55 +55,142 @@ function image_ref {
   echo "$image"
 }
 
+function docker_push {
+  local image_name="$1"
+  if [ ! -z $(docker images -q "$(image_ref ${image_name})") ]; then
+    docker push "$(image_ref ${image_name})"
+    if [ $? -ne 0 ]; then
+      error "Failed to push $image_name Docker image."
+    fi
+  else
+    echo "$(image_ref ${image_name}) image not found. Skipping push for this image."
+  fi
+}
+
+# Create a smaller build context for docker in dev builds to make the build faster. Docker
+# uploads all of the current directory to the daemon, and it can get pretty big with dev
+# builds that contain test log files and other artifacts.
+#
+# Three build contexts are created, one for each image: base, pyspark, and sparkr. For them
+# to have the desired effect, the docker command needs to be executed inside the appropriate
+# context directory.
+#
+# Note: docker does not support symlinks in the build context.
+function create_dev_build_context {(
+  set -e
+  local BASE_CTX="$CTX_DIR/base"
+  mkdir -p "$BASE_CTX/kubernetes"
+  cp -r "resource-managers/kubernetes/docker/src/main/dockerfiles" \
+    "$BASE_CTX/kubernetes/dockerfiles"
+
+  cp -r "assembly/target/scala-$SPARK_SCALA_VERSION/jars" "$BASE_CTX/jars"
+  cp -r "resource-managers/kubernetes/integration-tests/tests" \
+    "$BASE_CTX/kubernetes/tests"
+
+  mkdir "$BASE_CTX/examples"
+  cp -r "examples/src" "$BASE_CTX/examples/src"
+  # Copy just needed examples jars instead of everything.
+  mkdir "$BASE_CTX/examples/jars"
+  for i in examples/target/scala-$SPARK_SCALA_VERSION/jars/*; do
+    if [ ! -f "$BASE_CTX/jars/$(basename $i)" ]; then
+      cp $i "$BASE_CTX/examples/jars"
+    fi
+  done
+
+  for other in bin sbin data; do
+    cp -r "$other" "$BASE_CTX/$other"
+  done
+
+  local PYSPARK_CTX="$CTX_DIR/pyspark"
+  mkdir -p "$PYSPARK_CTX/kubernetes"
+  cp -r "resource-managers/kubernetes/docker/src/main/dockerfiles" \
+    "$PYSPARK_CTX/kubernetes/dockerfiles"
+  mkdir "$PYSPARK_CTX/python"
+  cp -r "python/lib" "$PYSPARK_CTX/python/lib"
+
+  local R_CTX="$CTX_DIR/sparkr"
+  mkdir -p "$R_CTX/kubernetes"
+  cp -r "resource-managers/kubernetes/docker/src/main/dockerfiles" \
+    "$R_CTX/kubernetes/dockerfiles"
+  cp -r "R" "$R_CTX/R"
+)}
+
+function img_ctx_dir {
+  if is_dev_build; then
+    echo "$CTX_DIR/$1"
+  else
+    echo "$SPARK_HOME"
+  fi
+}
+
 function build {
   local BUILD_ARGS
-  local IMG_PATH
-
-  if [ ! -f "$SPARK_HOME/RELEASE" ]; then
-    # Set image build arguments accordingly if this is a source repo and not a distribution archive.
-    IMG_PATH=resource-managers/kubernetes/docker/src/main/dockerfiles
-    BUILD_ARGS=(
-      ${BUILD_PARAMS}
-      --build-arg
-      img_path=$IMG_PATH
-      --build-arg
-      spark_jars=assembly/target/scala-$SPARK_SCALA_VERSION/jars
-    )
-  else
-    # Not passed as an argument to docker, but used to validate the Spark directory.
-    IMG_PATH="kubernetes/dockerfiles"
-    BUILD_ARGS=(${BUILD_PARAMS})
+  local SPARK_ROOT="$SPARK_HOME"
+
+  if is_dev_build; then
+    create_dev_build_context || error "Failed to create docker build context."
+    SPARK_ROOT="$CTX_DIR/base"
   fi
 
-  if [ ! -d "$IMG_PATH" ]; then
+  # Verify that the Docker image content directory is present
+  if [ ! -d "$SPARK_ROOT/kubernetes/dockerfiles" ]; then
     error "Cannot find docker image. This script must be run from a runnable distribution of Apache Spark."
   fi
+
+  # Verify that Spark has actually been built/is a runnable distribution
+  # i.e. the Spark JARs that the Docker files will place into the image are present
+  local TOTAL_JARS=$(ls $SPARK_ROOT/jars/spark-* | wc -l)
+  TOTAL_JARS=$(( $TOTAL_JARS ))
+  if [ "${TOTAL_JARS}" -eq 0 ]; then
+    error "Cannot find Spark JARs. This script assumes that Apache Spark has first been built locally or this is a runnable distribution."
+  fi
+
+  local BUILD_ARGS=(${BUILD_PARAMS})
+
+  # If a custom SPARK_UID was set add it to build arguments
+  if [ -n "$SPARK_UID" ]; then
+    BUILD_ARGS+=(--build-arg spark_uid=$SPARK_UID)
+  fi
+
   local BINDING_BUILD_ARGS=(
     ${BUILD_PARAMS}
     --build-arg
     base_img=$(image_ref spark)
   )
-  local BASEDOCKERFILE=${BASEDOCKERFILE:-"$IMG_PATH/spark/Dockerfile"}
-  local PYDOCKERFILE=${PYDOCKERFILE:-"$IMG_PATH/spark/bindings/python/Dockerfile"}
-  local RDOCKERFILE=${RDOCKERFILE:-"$IMG_PATH/spark/bindings/R/Dockerfile"}
+  local BASEDOCKERFILE=${BASEDOCKERFILE:-"kubernetes/dockerfiles/spark/Dockerfile"}
+  local PYDOCKERFILE=${PYDOCKERFILE:-false}
+  local RDOCKERFILE=${RDOCKERFILE:-false}
 
-  docker build $NOCACHEARG "${BUILD_ARGS[@]}" \
+  (cd $(img_ctx_dir base) && docker build $NOCACHEARG "${BUILD_ARGS[@]}" \
     -t $(image_ref spark) \
-    -f "$BASEDOCKERFILE" .
+    -f "$BASEDOCKERFILE" .)
+  if [ $? -ne 0 ]; then
+    error "Failed to build Spark JVM Docker image, please refer to Docker build output for details."
+  fi
 
-  docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
-    -t $(image_ref spark-py) \
-    -f "$PYDOCKERFILE" .
+  if [ "${PYDOCKERFILE}" != "false" ]; then
+    (cd $(img_ctx_dir pyspark) && docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
+      -t $(image_ref spark-py) \
+      -f "$PYDOCKERFILE" .)
+      if [ $? -ne 0 ]; then
+        error "Failed to build PySpark Docker image, please refer to Docker build output for details."
+      fi
+  fi
 
-  docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
-    -t $(image_ref spark-r) \
-    -f "$RDOCKERFILE" .
+  if [ "${RDOCKERFILE}" != "false" ]; then
+    (cd $(img_ctx_dir sparkr) && docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
+      -t $(image_ref spark-r) \
+      -f "$RDOCKERFILE" .)
+    if [ $? -ne 0 ]; then
+      error "Failed to build SparkR Docker image, please refer to Docker build output for details."
+    fi
+  fi
 }
 
 function push {
-  docker push "$(image_ref spark)"
-  docker push "$(image_ref spark-py)"
-  docker push "$(image_ref spark-r)"
+  docker_push "spark"
+  docker_push "spark-py"
+  docker_push "spark-r"
 }
 
 function usage {
@@ -104,14 +205,18 @@ Commands:
 
 Options:
   -f file               Dockerfile to build for JVM based Jobs. By default builds the Dockerfile shipped with Spark.
-  -p file               Dockerfile to build for PySpark Jobs. Builds Python dependencies and ships with Spark.
-  -R file               Dockerfile to build for SparkR Jobs. Builds R dependencies and ships with Spark.
+  -p file               (Optional) Dockerfile to build for PySpark Jobs. Builds Python dependencies and ships with Spark.
+                        Skips building PySpark docker image if not specified.
+  -R file               (Optional) Dockerfile to build for SparkR Jobs. Builds R dependencies and ships with Spark.
+                        Skips building SparkR docker image if not specified.
   -r repo               Repository address.
   -t tag                Tag to apply to the built image, or to identify the image to be pushed.
   -m                    Use minikube's Docker daemon.
   -n                    Build docker image with --no-cache
-  -b arg      Build arg to build or push the image. For multiple build args, this option needs to
-              be used separately for each build arg.
+  -u uid                UID to use in the USER directive to set the user the main Spark process runs as inside the
+                        resulting container
+  -b arg                Build arg to build or push the image. For multiple build args, this option needs to
+                        be used separately for each build arg.
 
 Using minikube when building images will do so directly into minikube's Docker daemon.
 There is no need to push the images into minikube in that case, they'll be automatically
@@ -125,6 +230,9 @@ Examples:
   - Build image in minikube with tag "testing"
     $0 -m -t testing build
 
+  - Build PySpark docker image
+    $0 -r docker.io/myrepo -t v2.3.0 -p kubernetes/dockerfiles/spark/bindings/python/Dockerfile build
+
   - Build and push image with tag "v2.3.0" to docker.io/myrepo
     $0 -r docker.io/myrepo -t v2.3.0 build
     $0 -r docker.io/myrepo -t v2.3.0 push
@@ -143,7 +251,8 @@ PYDOCKERFILE=
 RDOCKERFILE=
 NOCACHEARG=
 BUILD_PARAMS=
-while getopts f:p:R:mr:t:n:b: option
+SPARK_UID=
+while getopts f:p:R:mr:t:nb:u: option
 do
  case "${option}"
  in
@@ -158,8 +267,12 @@ do
    if ! which minikube 1>/dev/null; then
      error "Cannot find minikube."
    fi
+   if ! minikube status 1>/dev/null; then
+     error "Cannot contact minikube. Make sure it's running."
+   fi
    eval $(minikube docker-env)
    ;;
+  u) SPARK_UID=${OPTARG};;
  esac
 done
 
diff --git a/bin/load-spark-env.sh b/bin/load-spark-env.sh
index 0b5006dbd63ac..0ada5d8d0fc1d 100644
--- a/bin/load-spark-env.sh
+++ b/bin/load-spark-env.sh
@@ -26,15 +26,17 @@ if [ -z "${SPARK_HOME}" ]; then
   source "$(dirname "$0")"/find-spark-home
 fi
 
+SPARK_ENV_SH="spark-env.sh"
 if [ -z "$SPARK_ENV_LOADED" ]; then
   export SPARK_ENV_LOADED=1
 
   export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"${SPARK_HOME}"/conf}"
 
-  if [ -f "${SPARK_CONF_DIR}/spark-env.sh" ]; then
+  SPARK_ENV_SH="${SPARK_CONF_DIR}/${SPARK_ENV_SH}"
+  if [[ -f "${SPARK_ENV_SH}" ]]; then
     # Promote all variable declarations to environment (exported) variables
     set -a
-    . "${SPARK_CONF_DIR}/spark-env.sh"
+    . ${SPARK_ENV_SH}
     set +a
   fi
 fi
@@ -42,19 +44,22 @@ fi
 # Setting SPARK_SCALA_VERSION if not already set.
 
 if [ -z "$SPARK_SCALA_VERSION" ]; then
+  SCALA_VERSION_1=2.12
+  SCALA_VERSION_2=2.11
 
-  ASSEMBLY_DIR2="${SPARK_HOME}/assembly/target/scala-2.11"
-  ASSEMBLY_DIR1="${SPARK_HOME}/assembly/target/scala-2.12"
-
-  if [[ -d "$ASSEMBLY_DIR2" && -d "$ASSEMBLY_DIR1" ]]; then
-    echo -e "Presence of build for multiple Scala versions detected." 1>&2
-    echo -e 'Either clean one of them or, export SPARK_SCALA_VERSION in spark-env.sh.' 1>&2
+  ASSEMBLY_DIR_1="${SPARK_HOME}/assembly/target/scala-${SCALA_VERSION_1}"
+  ASSEMBLY_DIR_2="${SPARK_HOME}/assembly/target/scala-${SCALA_VERSION_2}"
+  ENV_VARIABLE_DOC="https://spark.apache.org/docs/latest/configuration.html#environment-variables"
+  if [[ -d "$ASSEMBLY_DIR_1" && -d "$ASSEMBLY_DIR_2" ]]; then
+    echo "Presence of build for multiple Scala versions detected ($ASSEMBLY_DIR_1 and $ASSEMBLY_DIR_2)." 1>&2
+    echo "Remove one of them or, export SPARK_SCALA_VERSION=$SCALA_VERSION_1 in ${SPARK_ENV_SH}." 1>&2
+    echo "Visit ${ENV_VARIABLE_DOC} for more details about setting environment variables in spark-env.sh." 1>&2
     exit 1
   fi
 
-  if [ -d "$ASSEMBLY_DIR2" ]; then
-    export SPARK_SCALA_VERSION="2.11"
+  if [[ -d "$ASSEMBLY_DIR_1" ]]; then
+    export SPARK_SCALA_VERSION=${SCALA_VERSION_1}
   else
-    export SPARK_SCALA_VERSION="2.12"
+    export SPARK_SCALA_VERSION=${SCALA_VERSION_2}
   fi
 fi
diff --git a/bin/pyspark b/bin/pyspark
index 5d5affb1f97c3..1dcddcc6196b8 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -57,7 +57,7 @@ export PYSPARK_PYTHON
 
 # Add the PySpark classes to the Python path:
 export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH"
-export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.7-src.zip:$PYTHONPATH"
+export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.8.1-src.zip:$PYTHONPATH"
 
 # Load the PySpark shell.py script when ./pyspark is used interactively:
 export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"
diff --git a/bin/pyspark2.cmd b/bin/pyspark2.cmd
index 15fa910c277b3..479fd464c7d3e 100644
--- a/bin/pyspark2.cmd
+++ b/bin/pyspark2.cmd
@@ -30,7 +30,7 @@ if "x%PYSPARK_DRIVER_PYTHON%"=="x" (
 )
 
 set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH%
-set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.7-src.zip;%PYTHONPATH%
+set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.8.1-src.zip;%PYTHONPATH%
 
 set OLD_PYTHONSTARTUP=%PYTHONSTARTUP%
 set PYTHONSTARTUP=%SPARK_HOME%\python\pyspark\shell.py
diff --git a/bin/spark-shell b/bin/spark-shell
index 421f36cac3d47..e920137974980 100755
--- a/bin/spark-shell
+++ b/bin/spark-shell
@@ -32,7 +32,10 @@ if [ -z "${SPARK_HOME}" ]; then
   source "$(dirname "$0")"/find-spark-home
 fi
 
-export _SPARK_CMD_USAGE="Usage: ./bin/spark-shell [options]"
+export _SPARK_CMD_USAGE="Usage: ./bin/spark-shell [options]
+
+Scala REPL options:
+  -I <file>                   preload <file>, enforcing line-by-line interpretation"
 
 # SPARK-4161: scala does not assume use of the java classpath,
 # so we need to add the "-Dscala.usejavacp=true" flag manually. We
diff --git a/bin/spark-shell2.cmd b/bin/spark-shell2.cmd
index aaf71906c6526..549bf43bb6078 100644
--- a/bin/spark-shell2.cmd
+++ b/bin/spark-shell2.cmd
@@ -20,7 +20,13 @@ rem
 rem Figure out where the Spark framework is installed
 call "%~dp0find-spark-home.cmd"
 
-set _SPARK_CMD_USAGE=Usage: .\bin\spark-shell.cmd [options]
+set LF=^
+
+
+rem two empty lines are required
+set _SPARK_CMD_USAGE=Usage: .\bin\spark-shell.cmd [options]^%LF%%LF%^%LF%%LF%^
+Scala REPL options:^%LF%%LF%^
+  -I ^<file^>                   preload ^<file^>, enforcing line-by-line interpretation
 
 rem SPARK-4161: scala does not assume use of the java classpath,
 rem so we need to add the "-Dscala.usejavacp=true" flag manually. We
diff --git a/build/mvn b/build/mvn
index 2487b81abb4ea..4cb10e0d03fa4 100755
--- a/build/mvn
+++ b/build/mvn
@@ -116,7 +116,8 @@ install_zinc() {
 # the build/ folder
 install_scala() {
   # determine the Scala version used in Spark
-  local scala_version=`grep "scala.version" "${_DIR}/../pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'`
+  local scala_binary_version=`grep "scala.binary.version" "${_DIR}/../pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'`
+  local scala_version=`grep "scala.version" "${_DIR}/../pom.xml" | grep ${scala_binary_version} | head -n1 | awk -F '[<>]' '{print $3}'`
   local scala_bin="${_DIR}/scala-${scala_version}/bin/scala"
   local TYPESAFE_MIRROR=${TYPESAFE_MIRROR:-https://downloads.lightbend.com}
 
@@ -153,6 +154,7 @@ if [ -n "${ZINC_INSTALL_FLAG}" -o -z "`"${ZINC_BIN}" -status -port ${ZINC_PORT}`
   export ZINC_OPTS=${ZINC_OPTS:-"$_COMPILE_JVM_OPTS"}
   "${ZINC_BIN}" -shutdown -port ${ZINC_PORT}
   "${ZINC_BIN}" -start -port ${ZINC_PORT} \
+    -server 127.0.0.1 -idle-timeout 3h \
     -scala-compiler "${SCALA_COMPILER}" \
     -scala-library "${SCALA_LIBRARY}" &>/dev/null
 fi
@@ -162,5 +164,12 @@ export MAVEN_OPTS=${MAVEN_OPTS:-"$_COMPILE_JVM_OPTS"}
 
 echo "Using \`mvn\` from path: $MVN_BIN" 1>&2
 
-# Last, call the `mvn` command as usual
+# call the `mvn` command as usual
+# SPARK-25854
 "${MVN_BIN}" -DzincPort=${ZINC_PORT} "$@"
+MVN_RETCODE=$?
+
+# Try to shut down zinc explicitly if the server is still running.
+"${ZINC_BIN}" -shutdown -port ${ZINC_PORT}
+
+exit $MVN_RETCODE
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 8c148359c3029..f042a12fda3d2 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -21,12 +21,12 @@
   <modelVersion>4.0.0</modelVersion>
   <parent>
     <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.11</artifactId>
-    <version>2.4.0-SNAPSHOT</version>
+    <artifactId>spark-parent_2.12</artifactId>
+    <version>3.0.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
-  <artifactId>spark-kvstore_2.11</artifactId>
+  <artifactId>spark-kvstore_2.12</artifactId>
   <packaging>jar</packaging>
   <name>Spark Project Local DB</name>
   <url>http://spark.apache.org/</url>
diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVStoreSerializer.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVStoreSerializer.java
index bd8d9486acde5..771a9541bb349 100644
--- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVStoreSerializer.java
+++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVStoreSerializer.java
@@ -54,11 +54,8 @@ public KVStoreSerializer() {
       return ((String) o).getBytes(UTF_8);
     } else {
       ByteArrayOutputStream bytes = new ByteArrayOutputStream();
-      GZIPOutputStream out = new GZIPOutputStream(bytes);
-      try {
+      try (GZIPOutputStream out = new GZIPOutputStream(bytes)) {
         mapper.writeValue(out, o);
-      } finally {
-        out.close();
       }
       return bytes.toByteArray();
     }
@@ -69,11 +66,8 @@ public KVStoreSerializer() {
     if (klass.equals(String.class)) {
       return (T) new String(data, UTF_8);
     } else {
-      GZIPInputStream in = new GZIPInputStream(new ByteArrayInputStream(data));
-      try {
+      try (GZIPInputStream in = new GZIPInputStream(new ByteArrayInputStream(data))) {
         return mapper.readValue(in, klass);
-      } finally {
-        in.close();
       }
     }
   }
diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBIterator.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBIterator.java
index f62e85d435318..e3efc92c4a54a 100644
--- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBIterator.java
+++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBIterator.java
@@ -196,6 +196,7 @@ public synchronized void close() throws IOException {
    * when Scala wrappers are used, this makes sure that, hopefully, the JNI resources held by
    * the iterator will eventually be released.
    */
+  @SuppressWarnings("deprecation")
   @Override
   protected void finalize() throws Throwable {
     db.closeIterator(this);
diff --git a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBSuite.java b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBSuite.java
index 205f7df87c5bc..39a952f2b0df9 100644
--- a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBSuite.java
+++ b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBSuite.java
@@ -217,7 +217,7 @@ public void testSkip() throws Exception {
   public void testNegativeIndexValues() throws Exception {
     List<Integer> expected = Arrays.asList(-100, -50, 0, 50, 100);
 
-    expected.stream().forEach(i -> {
+    expected.forEach(i -> {
       try {
         db.write(createCustomType1(i));
       } catch (Exception e) {
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 8ca7733507f1b..56d01fa0e8b3d 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -21,12 +21,12 @@
   <modelVersion>4.0.0</modelVersion>
   <parent>
     <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.11</artifactId>
-    <version>2.4.0-SNAPSHOT</version>
+    <artifactId>spark-parent_2.12</artifactId>
+    <version>3.0.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
-  <artifactId>spark-network-common_2.11</artifactId>
+  <artifactId>spark-network-common_2.12</artifactId>
   <packaging>jar</packaging>
   <name>Spark Project Networking</name>
   <url>http://spark.apache.org/</url>
diff --git a/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java b/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java
index ae91bc9cfdd08..480b52652de53 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java
@@ -21,6 +21,8 @@
 import java.util.List;
 
 import io.netty.channel.Channel;
+import io.netty.channel.ChannelPipeline;
+import io.netty.channel.EventLoopGroup;
 import io.netty.channel.socket.SocketChannel;
 import io.netty.handler.timeout.IdleStateHandler;
 import org.slf4j.Logger;
@@ -32,11 +34,13 @@
 import org.apache.spark.network.client.TransportResponseHandler;
 import org.apache.spark.network.protocol.MessageDecoder;
 import org.apache.spark.network.protocol.MessageEncoder;
+import org.apache.spark.network.server.ChunkFetchRequestHandler;
 import org.apache.spark.network.server.RpcHandler;
 import org.apache.spark.network.server.TransportChannelHandler;
 import org.apache.spark.network.server.TransportRequestHandler;
 import org.apache.spark.network.server.TransportServer;
 import org.apache.spark.network.server.TransportServerBootstrap;
+import org.apache.spark.network.util.IOMode;
 import org.apache.spark.network.util.NettyUtils;
 import org.apache.spark.network.util.TransportConf;
 import org.apache.spark.network.util.TransportFrameDecoder;
@@ -61,6 +65,7 @@
   private final TransportConf conf;
   private final RpcHandler rpcHandler;
   private final boolean closeIdleConnections;
+  private final boolean isClientOnly;
 
   /**
    * Force to create MessageEncoder and MessageDecoder so that we can make sure they will be created
@@ -77,17 +82,54 @@
   private static final MessageEncoder ENCODER = MessageEncoder.INSTANCE;
   private static final MessageDecoder DECODER = MessageDecoder.INSTANCE;
 
+  // Separate thread pool for handling ChunkFetchRequest. This helps to enable throttling
+  // max number of TransportServer worker threads that are blocked on writing response
+  // of ChunkFetchRequest message back to the client via the underlying channel.
+  private static EventLoopGroup chunkFetchWorkers;
+
   public TransportContext(TransportConf conf, RpcHandler rpcHandler) {
-    this(conf, rpcHandler, false);
+    this(conf, rpcHandler, false, false);
   }
 
   public TransportContext(
       TransportConf conf,
       RpcHandler rpcHandler,
       boolean closeIdleConnections) {
+    this(conf, rpcHandler, closeIdleConnections, false);
+  }
+
+  /**
+   * Enables TransportContext initialization for underlying client and server.
+   *
+   * @param conf TransportConf
+   * @param rpcHandler RpcHandler responsible for handling requests and responses.
+   * @param closeIdleConnections Close idle connections if it is set to true.
+   * @param isClientOnly This config indicates the TransportContext is only used by a client.
+   *                     This config is more important when external shuffle is enabled.
+   *                     It stops creating extra event loop and subsequent thread pool
+   *                     for shuffle clients to handle chunked fetch requests.
+   */
+  public TransportContext(
+      TransportConf conf,
+      RpcHandler rpcHandler,
+      boolean closeIdleConnections,
+      boolean isClientOnly) {
     this.conf = conf;
     this.rpcHandler = rpcHandler;
     this.closeIdleConnections = closeIdleConnections;
+    this.isClientOnly = isClientOnly;
+
+    synchronized(TransportContext.class) {
+      if (chunkFetchWorkers == null &&
+          conf.getModuleName() != null &&
+          conf.getModuleName().equalsIgnoreCase("shuffle") &&
+          !isClientOnly) {
+        chunkFetchWorkers = NettyUtils.createEventLoop(
+            IOMode.valueOf(conf.ioMode()),
+            conf.chunkFetchHandlerThreads(),
+            "shuffle-chunk-fetch-handler");
+      }
+    }
   }
 
   /**
@@ -144,14 +186,23 @@ public TransportChannelHandler initializePipeline(
       RpcHandler channelRpcHandler) {
     try {
       TransportChannelHandler channelHandler = createChannelHandler(channel, channelRpcHandler);
-      channel.pipeline()
+      ChunkFetchRequestHandler chunkFetchHandler =
+        createChunkFetchHandler(channelHandler, channelRpcHandler);
+      ChannelPipeline pipeline = channel.pipeline()
         .addLast("encoder", ENCODER)
         .addLast(TransportFrameDecoder.HANDLER_NAME, NettyUtils.createFrameDecoder())
         .addLast("decoder", DECODER)
-        .addLast("idleStateHandler", new IdleStateHandler(0, 0, conf.connectionTimeoutMs() / 1000))
+        .addLast("idleStateHandler",
+          new IdleStateHandler(0, 0, conf.connectionTimeoutMs() / 1000))
         // NOTE: Chunks are currently guaranteed to be returned in the order of request, but this
         // would require more logic to guarantee if this were not part of the same event loop.
         .addLast("handler", channelHandler);
+      // Use a separate EventLoopGroup to handle ChunkFetchRequest messages for shuffle rpcs.
+      if (conf.getModuleName() != null &&
+          conf.getModuleName().equalsIgnoreCase("shuffle")
+          && !isClientOnly) {
+        pipeline.addLast(chunkFetchWorkers, "chunkFetchHandler", chunkFetchHandler);
+      }
       return channelHandler;
     } catch (RuntimeException e) {
       logger.error("Error while initializing Netty pipeline", e);
@@ -173,5 +224,14 @@ private TransportChannelHandler createChannelHandler(Channel channel, RpcHandler
       conf.connectionTimeoutMs(), closeIdleConnections);
   }
 
+  /**
+   * Creates the dedicated ChannelHandler for ChunkFetchRequest messages.
+   */
+  private ChunkFetchRequestHandler createChunkFetchHandler(TransportChannelHandler channelHandler,
+      RpcHandler rpcHandler) {
+    return new ChunkFetchRequestHandler(channelHandler.getClient(),
+      rpcHandler.getStreamManager(), conf.maxChunksBeingTransferred());
+  }
+
   public TransportConf getConf() { return conf; }
 }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/buffer/ManagedBuffer.java b/common/network-common/src/main/java/org/apache/spark/network/buffer/ManagedBuffer.java
index 1861f8d7fd8f3..2d573f512437e 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/buffer/ManagedBuffer.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/buffer/ManagedBuffer.java
@@ -36,7 +36,10 @@
  */
 public abstract class ManagedBuffer {
 
-  /** Number of bytes of the data. */
+  /**
+   * Number of bytes of the data. If this buffer will decrypt for all of the views into the data,
+   * this is the size of the decrypted data.
+   */
   public abstract long size();
 
   /**
diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java
index 056505ef53356..64fdb32a67ada 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java
@@ -159,15 +159,21 @@ public void close() throws IOException {
     // accurately report the errors when they happen.
     RuntimeException error = null;
     byte[] dummy = new byte[8];
-    try {
-      doCipherOp(encryptor, dummy, true);
-    } catch (Exception e) {
-      error = new RuntimeException(e);
+    if (encryptor != null) {
+      try {
+        doCipherOp(Cipher.ENCRYPT_MODE, dummy, true);
+      } catch (Exception e) {
+        error = new RuntimeException(e);
+      }
+      encryptor = null;
     }
-    try {
-      doCipherOp(decryptor, dummy, true);
-    } catch (Exception e) {
-      error = new RuntimeException(e);
+    if (decryptor != null) {
+      try {
+        doCipherOp(Cipher.DECRYPT_MODE, dummy, true);
+      } catch (Exception e) {
+        error = new RuntimeException(e);
+      }
+      decryptor = null;
     }
     random.close();
 
@@ -189,11 +195,11 @@ public void close() throws IOException {
   }
 
   private byte[] decrypt(byte[] in) throws GeneralSecurityException {
-    return doCipherOp(decryptor, in, false);
+    return doCipherOp(Cipher.DECRYPT_MODE, in, false);
   }
 
   private byte[] encrypt(byte[] in) throws GeneralSecurityException {
-    return doCipherOp(encryptor, in, false);
+    return doCipherOp(Cipher.ENCRYPT_MODE, in, false);
   }
 
   private void initializeForAuth(String cipher, byte[] nonce, SecretKeySpec key)
@@ -205,11 +211,13 @@ private void initializeForAuth(String cipher, byte[] nonce, SecretKeySpec key)
     byte[] iv = new byte[conf.ivLength()];
     System.arraycopy(nonce, 0, iv, 0, Math.min(nonce.length, iv.length));
 
-    encryptor = CryptoCipherFactory.getCryptoCipher(cipher, cryptoConf);
-    encryptor.init(Cipher.ENCRYPT_MODE, key, new IvParameterSpec(iv));
+    CryptoCipher _encryptor = CryptoCipherFactory.getCryptoCipher(cipher, cryptoConf);
+    _encryptor.init(Cipher.ENCRYPT_MODE, key, new IvParameterSpec(iv));
+    this.encryptor = _encryptor;
 
-    decryptor = CryptoCipherFactory.getCryptoCipher(cipher, cryptoConf);
-    decryptor.init(Cipher.DECRYPT_MODE, key, new IvParameterSpec(iv));
+    CryptoCipher _decryptor = CryptoCipherFactory.getCryptoCipher(cipher, cryptoConf);
+    _decryptor.init(Cipher.DECRYPT_MODE, key, new IvParameterSpec(iv));
+    this.decryptor = _decryptor;
   }
 
   /**
@@ -241,29 +249,52 @@ private SecretKeySpec generateKey(String kdf, int iterations, byte[] salt, int k
     return new SecretKeySpec(key.getEncoded(), conf.keyAlgorithm());
   }
 
-  private byte[] doCipherOp(CryptoCipher cipher, byte[] in, boolean isFinal)
+  private byte[] doCipherOp(int mode, byte[] in, boolean isFinal)
     throws GeneralSecurityException {
 
-    Preconditions.checkState(cipher != null);
+    CryptoCipher cipher;
+    switch (mode) {
+      case Cipher.ENCRYPT_MODE:
+        cipher = encryptor;
+        break;
+      case Cipher.DECRYPT_MODE:
+        cipher = decryptor;
+        break;
+      default:
+        throw new IllegalArgumentException(String.valueOf(mode));
+    }
 
-    int scale = 1;
-    while (true) {
-      int size = in.length * scale;
-      byte[] buffer = new byte[size];
-      try {
-        int outSize = isFinal ? cipher.doFinal(in, 0, in.length, buffer, 0)
-          : cipher.update(in, 0, in.length, buffer, 0);
-        if (outSize != buffer.length) {
-          byte[] output = new byte[outSize];
-          System.arraycopy(buffer, 0, output, 0, output.length);
-          return output;
-        } else {
-          return buffer;
+    Preconditions.checkState(cipher != null, "Cipher is invalid because of previous error.");
+
+    try {
+      int scale = 1;
+      while (true) {
+        int size = in.length * scale;
+        byte[] buffer = new byte[size];
+        try {
+          int outSize = isFinal ? cipher.doFinal(in, 0, in.length, buffer, 0)
+            : cipher.update(in, 0, in.length, buffer, 0);
+          if (outSize != buffer.length) {
+            byte[] output = new byte[outSize];
+            System.arraycopy(buffer, 0, output, 0, output.length);
+            return output;
+          } else {
+            return buffer;
+          }
+        } catch (ShortBufferException e) {
+          // Try again with a bigger buffer.
+          scale *= 2;
         }
-      } catch (ShortBufferException e) {
-        // Try again with a bigger buffer.
-        scale *= 2;
       }
+    } catch (InternalError ie) {
+      // SPARK-25535. The commons-cryto library will throw InternalError if something goes wrong,
+      // and leave bad state behind in the Java wrappers, so it's not safe to use them afterwards.
+      if (mode == Cipher.ENCRYPT_MODE) {
+        this.encryptor = null;
+      } else {
+        this.decryptor = null;
+      }
+      throw ie;
     }
   }
 
diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java
index b64e4b7a970b5..2745052265f7f 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java
@@ -107,45 +107,72 @@ public void addToChannel(Channel ch) throws IOException {
   private static class EncryptionHandler extends ChannelOutboundHandlerAdapter {
     private final ByteArrayWritableChannel byteChannel;
     private final CryptoOutputStream cos;
+    private boolean isCipherValid;
 
     EncryptionHandler(TransportCipher cipher) throws IOException {
       byteChannel = new ByteArrayWritableChannel(STREAM_BUFFER_SIZE);
       cos = cipher.createOutputStream(byteChannel);
+      isCipherValid = true;
     }
 
     @Override
     public void write(ChannelHandlerContext ctx, Object msg, ChannelPromise promise)
       throws Exception {
-      ctx.write(new EncryptedMessage(cos, msg, byteChannel), promise);
+      ctx.write(new EncryptedMessage(this, cos, msg, byteChannel), promise);
     }
 
     @Override
     public void close(ChannelHandlerContext ctx, ChannelPromise promise) throws Exception {
       try {
-        cos.close();
+        if (isCipherValid) {
+          cos.close();
+        }
       } finally {
         super.close(ctx, promise);
       }
     }
+
+    /**
+     * SPARK-25535. Workaround for CRYPTO-141. Avoid further interaction with the underlying cipher
+     * after an error occurs.
+     */
+    void reportError() {
+      this.isCipherValid = false;
+    }
+
+    boolean isCipherValid() {
+      return isCipherValid;
+    }
   }
 
   private static class DecryptionHandler extends ChannelInboundHandlerAdapter {
     private final CryptoInputStream cis;
     private final ByteArrayReadableChannel byteChannel;
+    private boolean isCipherValid;
 
     DecryptionHandler(TransportCipher cipher) throws IOException {
       byteChannel = new ByteArrayReadableChannel();
       cis = cipher.createInputStream(byteChannel);
+      isCipherValid = true;
     }
 
     @Override
     public void channelRead(ChannelHandlerContext ctx, Object data) throws Exception {
+      if (!isCipherValid) {
+        throw new IOException("Cipher is in invalid state.");
+      }
       byteChannel.feedData((ByteBuf) data);
 
       byte[] decryptedData = new byte[byteChannel.readableBytes()];
       int offset = 0;
       while (offset < decryptedData.length) {
-        offset += cis.read(decryptedData, offset, decryptedData.length - offset);
+        // SPARK-25535: workaround for CRYPTO-141.
+        try {
+          offset += cis.read(decryptedData, offset, decryptedData.length - offset);
+        } catch (InternalError ie) {
+          isCipherValid = false;
+          throw ie;
+        }
       }
 
       ctx.fireChannelRead(Unpooled.wrappedBuffer(decryptedData, 0, decryptedData.length));
@@ -154,7 +181,9 @@ public void channelRead(ChannelHandlerContext ctx, Object data) throws Exception
     @Override
     public void channelInactive(ChannelHandlerContext ctx) throws Exception {
       try {
-        cis.close();
+        if (isCipherValid) {
+          cis.close();
+        }
       } finally {
         super.channelInactive(ctx);
       }
@@ -165,8 +194,9 @@ public void channelInactive(ChannelHandlerContext ctx) throws Exception {
     private final boolean isByteBuf;
     private final ByteBuf buf;
     private final FileRegion region;
+    private final CryptoOutputStream cos;
+    private final EncryptionHandler handler;
     private long transferred;
-    private CryptoOutputStream cos;
 
     // Due to streaming issue CRYPTO-125: https://issues.apache.org/jira/browse/CRYPTO-125, it has
     // to utilize two helper ByteArrayWritableChannel for streaming. One is used to receive raw data
@@ -176,9 +206,14 @@ public void channelInactive(ChannelHandlerContext ctx) throws Exception {
 
     private ByteBuffer currentEncrypted;
 
-    EncryptedMessage(CryptoOutputStream cos, Object msg, ByteArrayWritableChannel ch) {
+    EncryptedMessage(
+        EncryptionHandler handler,
+        CryptoOutputStream cos,
+        Object msg,
+        ByteArrayWritableChannel ch) {
       Preconditions.checkArgument(msg instanceof ByteBuf || msg instanceof FileRegion,
         "Unrecognized message type: %s", msg.getClass().getName());
+      this.handler = handler;
       this.isByteBuf = msg instanceof ByteBuf;
       this.buf = isByteBuf ? (ByteBuf) msg : null;
       this.region = isByteBuf ? null : (FileRegion) msg;
@@ -261,6 +296,9 @@ public long transferTo(WritableByteChannel target, long position) throws IOExcep
     }
 
     private void encryptMore() throws IOException {
+      if (!handler.isCipherValid()) {
+        throw new IOException("Cipher is in invalid state.");
+      }
       byteRawChannel.reset();
 
       if (isByteBuf) {
@@ -269,8 +307,14 @@ private void encryptMore() throws IOException {
       } else {
         region.transferTo(byteRawChannel, region.transferred());
       }
-      cos.write(byteRawChannel.getData(), 0, byteRawChannel.length());
-      cos.flush();
+
+      try {
+        cos.write(byteRawChannel.getData(), 0, byteRawChannel.length());
+        cos.flush();
+      } catch (InternalError ie) {
+        handler.reportError();
+        throw ie;
+      }
 
       currentEncrypted = ByteBuffer.wrap(byteEncChannel.getData(),
         0, byteEncChannel.length());
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchFailure.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchFailure.java
index 7b28a9a969486..a7afbfa8621c8 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchFailure.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchFailure.java
@@ -33,7 +33,7 @@ public ChunkFetchFailure(StreamChunkId streamChunkId, String errorString) {
   }
 
   @Override
-  public Type type() { return Type.ChunkFetchFailure; }
+  public Message.Type type() { return Type.ChunkFetchFailure; }
 
   @Override
   public int encodedLength() {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchRequest.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchRequest.java
index 26d063feb5fe3..fe54fcc50dc86 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchRequest.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchRequest.java
@@ -32,7 +32,7 @@ public ChunkFetchRequest(StreamChunkId streamChunkId) {
   }
 
   @Override
-  public Type type() { return Type.ChunkFetchRequest; }
+  public Message.Type type() { return Type.ChunkFetchRequest; }
 
   @Override
   public int encodedLength() {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchSuccess.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchSuccess.java
index 94c2ac9b20e43..d5c9a9b3202fb 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchSuccess.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchSuccess.java
@@ -39,7 +39,7 @@ public ChunkFetchSuccess(StreamChunkId streamChunkId, ManagedBuffer buffer) {
   }
 
   @Override
-  public Type type() { return Type.ChunkFetchSuccess; }
+  public Message.Type type() { return Type.ChunkFetchSuccess; }
 
   @Override
   public int encodedLength() {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/OneWayMessage.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/OneWayMessage.java
index f7ffb1bd49bb6..1632fb9e03687 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/OneWayMessage.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/OneWayMessage.java
@@ -34,7 +34,7 @@ public OneWayMessage(ManagedBuffer body) {
   }
 
   @Override
-  public Type type() { return Type.OneWayMessage; }
+  public Message.Type type() { return Type.OneWayMessage; }
 
   @Override
   public int encodedLength() {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcFailure.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcFailure.java
index a76624ef5dc96..61061903de23f 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcFailure.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcFailure.java
@@ -31,7 +31,7 @@ public RpcFailure(long requestId, String errorString) {
   }
 
   @Override
-  public Type type() { return Type.RpcFailure; }
+  public Message.Type type() { return Type.RpcFailure; }
 
   @Override
   public int encodedLength() {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcRequest.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcRequest.java
index 2b30920f0598d..cc1bb95d2d566 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcRequest.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcRequest.java
@@ -38,7 +38,7 @@ public RpcRequest(long requestId, ManagedBuffer message) {
   }
 
   @Override
-  public Type type() { return Type.RpcRequest; }
+  public Message.Type type() { return Type.RpcRequest; }
 
   @Override
   public int encodedLength() {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcResponse.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcResponse.java
index d73014ecd8506..c03291e9c0b23 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcResponse.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcResponse.java
@@ -33,7 +33,7 @@ public RpcResponse(long requestId, ManagedBuffer message) {
   }
 
   @Override
-  public Type type() { return Type.RpcResponse; }
+  public Message.Type type() { return Type.RpcResponse; }
 
   @Override
   public int encodedLength() {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamFailure.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamFailure.java
index 258ef81c6783d..68fcfa7748611 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamFailure.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamFailure.java
@@ -33,7 +33,7 @@ public StreamFailure(String streamId, String error) {
   }
 
   @Override
-  public Type type() { return Type.StreamFailure; }
+  public Message.Type type() { return Type.StreamFailure; }
 
   @Override
   public int encodedLength() {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamRequest.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamRequest.java
index dc183c043ed9a..1b135af752bd8 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamRequest.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamRequest.java
@@ -34,7 +34,7 @@ public StreamRequest(String streamId) {
    }
 
   @Override
-  public Type type() { return Type.StreamRequest; }
+  public Message.Type type() { return Type.StreamRequest; }
 
   @Override
   public int encodedLength() {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamResponse.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamResponse.java
index 50b811604b84b..568108c4fe5e8 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamResponse.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamResponse.java
@@ -40,7 +40,7 @@ public StreamResponse(String streamId, long byteCount, ManagedBuffer buffer) {
   }
 
   @Override
-  public Type type() { return Type.StreamResponse; }
+  public Message.Type type() { return Type.StreamResponse; }
 
   @Override
   public int encodedLength() {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/UploadStream.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/UploadStream.java
index fa1d26e76b852..7d21151e01074 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/UploadStream.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/UploadStream.java
@@ -52,7 +52,7 @@ private UploadStream(long requestId, ManagedBuffer meta, long bodyByteCount) {
   }
 
   @Override
-  public Type type() { return Type.UploadStream; }
+  public Message.Type type() { return Type.UploadStream; }
 
   @Override
   public int encodedLength() {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslMessage.java b/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslMessage.java
index 7331c2b481fb1..1b03300d948e2 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslMessage.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslMessage.java
@@ -23,6 +23,7 @@
 import org.apache.spark.network.buffer.NettyManagedBuffer;
 import org.apache.spark.network.protocol.Encoders;
 import org.apache.spark.network.protocol.AbstractMessage;
+import org.apache.spark.network.protocol.Message;
 
 /**
  * Encodes a Sasl-related message which is attempting to authenticate using some credentials tagged
@@ -46,7 +47,7 @@
   }
 
   @Override
-  public Type type() { return Type.User; }
+  public Message.Type type() { return Type.User; }
 
   @Override
   public int encodedLength() {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/ChunkFetchRequestHandler.java b/common/network-common/src/main/java/org/apache/spark/network/server/ChunkFetchRequestHandler.java
new file mode 100644
index 0000000000000..f08d8b0f984cf
--- /dev/null
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/ChunkFetchRequestHandler.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.server;
+
+import java.net.SocketAddress;
+
+import com.google.common.base.Throwables;
+import io.netty.channel.Channel;
+import io.netty.channel.ChannelFuture;
+import io.netty.channel.ChannelFutureListener;
+import io.netty.channel.ChannelHandlerContext;
+import io.netty.channel.SimpleChannelInboundHandler;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.protocol.ChunkFetchFailure;
+import org.apache.spark.network.protocol.ChunkFetchRequest;
+import org.apache.spark.network.protocol.ChunkFetchSuccess;
+import org.apache.spark.network.protocol.Encodable;
+
+import static org.apache.spark.network.util.NettyUtils.*;
+
+/**
+ * A dedicated ChannelHandler for processing ChunkFetchRequest messages. When sending response
+ * of ChunkFetchRequest messages to the clients, the thread performing the I/O on the underlying
+ * channel could potentially be blocked due to disk contentions. If several hundreds of clients
+ * send ChunkFetchRequest to the server at the same time, it could potentially occupying all
+ * threads from TransportServer's default EventLoopGroup for waiting for disk reads before it
+ * can send the block data back to the client as part of the ChunkFetchSuccess messages. As a
+ * result, it would leave no threads left to process other RPC messages, which takes much less
+ * time to process, and could lead to client timing out on either performing SASL authentication,
+ * registering executors, or waiting for response for an OpenBlocks messages.
+ */
+public class ChunkFetchRequestHandler extends SimpleChannelInboundHandler<ChunkFetchRequest> {
+  private static final Logger logger = LoggerFactory.getLogger(ChunkFetchRequestHandler.class);
+
+  private final TransportClient client;
+  private final StreamManager streamManager;
+  /** The max number of chunks being transferred and not finished yet. */
+  private final long maxChunksBeingTransferred;
+
+  public ChunkFetchRequestHandler(
+      TransportClient client,
+      StreamManager streamManager,
+      Long maxChunksBeingTransferred) {
+    this.client = client;
+    this.streamManager = streamManager;
+    this.maxChunksBeingTransferred = maxChunksBeingTransferred;
+  }
+
+  @Override
+  public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception {
+    logger.warn("Exception in connection from " + getRemoteAddress(ctx.channel()), cause);
+    ctx.close();
+  }
+
+  @Override
+  protected void channelRead0(
+      ChannelHandlerContext ctx,
+      final ChunkFetchRequest msg) throws Exception {
+    Channel channel = ctx.channel();
+    if (logger.isTraceEnabled()) {
+      logger.trace("Received req from {} to fetch block {}", getRemoteAddress(channel),
+        msg.streamChunkId);
+    }
+    long chunksBeingTransferred = streamManager.chunksBeingTransferred();
+    if (chunksBeingTransferred >= maxChunksBeingTransferred) {
+      logger.warn("The number of chunks being transferred {} is above {}, close the connection.",
+        chunksBeingTransferred, maxChunksBeingTransferred);
+      channel.close();
+      return;
+    }
+    ManagedBuffer buf;
+    try {
+      streamManager.checkAuthorization(client, msg.streamChunkId.streamId);
+      streamManager.registerChannel(channel, msg.streamChunkId.streamId);
+      buf = streamManager.getChunk(msg.streamChunkId.streamId, msg.streamChunkId.chunkIndex);
+    } catch (Exception e) {
+      logger.error(String.format("Error opening block %s for request from %s",
+        msg.streamChunkId, getRemoteAddress(channel)), e);
+      respond(channel, new ChunkFetchFailure(msg.streamChunkId,
+        Throwables.getStackTraceAsString(e)));
+      return;
+    }
+
+    streamManager.chunkBeingSent(msg.streamChunkId.streamId);
+    respond(channel, new ChunkFetchSuccess(msg.streamChunkId, buf)).addListener(
+      (ChannelFutureListener) future -> streamManager.chunkSent(msg.streamChunkId.streamId));
+  }
+
+  /**
+   * The invocation to channel.writeAndFlush is async, and the actual I/O on the
+   * channel will be handled by the EventLoop the channel is registered to. So even
+   * though we are processing the ChunkFetchRequest in a separate thread pool, the actual I/O,
+   * which is the potentially blocking call that could deplete server handler threads, is still
+   * being processed by TransportServer's default EventLoopGroup. In order to throttle the max
+   * number of threads that channel I/O for sending response to ChunkFetchRequest, the thread
+   * calling channel.writeAndFlush will wait for the completion of sending response back to
+   * client by invoking await(). This will throttle the rate at which threads from
+   * ChunkFetchRequest dedicated EventLoopGroup submit channel I/O requests to TransportServer's
+   * default EventLoopGroup, thus making sure that we can reserve some threads in
+   * TransportServer's default EventLoopGroup for handling other RPC messages.
+   */
+  private ChannelFuture respond(
+      final Channel channel,
+      final Encodable result) throws InterruptedException {
+    final SocketAddress remoteAddress = channel.remoteAddress();
+    return channel.writeAndFlush(result).await().addListener((ChannelFutureListener) future -> {
+      if (future.isSuccess()) {
+        logger.trace("Sent result {} to client {}", result, remoteAddress);
+      } else {
+        logger.error(String.format("Error sending result %s to %s; closing connection",
+          result, remoteAddress), future.cause());
+        channel.close();
+      }
+    });
+  }
+}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java b/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java
index 56782a8327876..c824a7b0d4740 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java
@@ -18,7 +18,7 @@
 package org.apache.spark.network.server;
 
 import io.netty.channel.ChannelHandlerContext;
-import io.netty.channel.ChannelInboundHandlerAdapter;
+import io.netty.channel.SimpleChannelInboundHandler;
 import io.netty.handler.timeout.IdleState;
 import io.netty.handler.timeout.IdleStateEvent;
 import org.slf4j.Logger;
@@ -26,6 +26,8 @@
 
 import org.apache.spark.network.client.TransportClient;
 import org.apache.spark.network.client.TransportResponseHandler;
+import org.apache.spark.network.protocol.ChunkFetchRequest;
+import org.apache.spark.network.protocol.Message;
 import org.apache.spark.network.protocol.RequestMessage;
 import org.apache.spark.network.protocol.ResponseMessage;
 import static org.apache.spark.network.util.NettyUtils.getRemoteAddress;
@@ -47,7 +49,7 @@
  * on the channel for at least `requestTimeoutMs`. Note that this is duplex traffic; we will not
  * timeout if the client is continuously sending but getting no responses, for simplicity.
  */
-public class TransportChannelHandler extends ChannelInboundHandlerAdapter {
+public class TransportChannelHandler extends SimpleChannelInboundHandler<Message> {
   private static final Logger logger = LoggerFactory.getLogger(TransportChannelHandler.class);
 
   private final TransportClient client;
@@ -112,8 +114,21 @@ public void channelInactive(ChannelHandlerContext ctx) throws Exception {
     super.channelInactive(ctx);
   }
 
+  /**
+   * Overwrite acceptInboundMessage to properly delegate ChunkFetchRequest messages
+   * to ChunkFetchRequestHandler.
+   */
   @Override
-  public void channelRead(ChannelHandlerContext ctx, Object request) throws Exception {
+  public boolean acceptInboundMessage(Object msg) throws Exception {
+    if (msg instanceof ChunkFetchRequest) {
+      return false;
+    } else {
+      return super.acceptInboundMessage(msg);
+    }
+  }
+
+  @Override
+  public void channelRead0(ChannelHandlerContext ctx, Message request) throws Exception {
     if (request instanceof RequestMessage) {
       requestHandler.handle((RequestMessage) request);
     } else if (request instanceof ResponseMessage) {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java b/common/network-common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java
index 9fac96dbe450d..3e089b4cae273 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java
@@ -24,6 +24,7 @@
 import com.google.common.base.Throwables;
 import io.netty.channel.Channel;
 import io.netty.channel.ChannelFuture;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -97,9 +98,7 @@ public void channelInactive() {
 
   @Override
   public void handle(RequestMessage request) {
-    if (request instanceof ChunkFetchRequest) {
-      processFetchRequest((ChunkFetchRequest) request);
-    } else if (request instanceof RpcRequest) {
+    if (request instanceof RpcRequest) {
       processRpcRequest((RpcRequest) request);
     } else if (request instanceof OneWayMessage) {
       processOneWayMessage((OneWayMessage) request);
@@ -112,36 +111,6 @@ public void handle(RequestMessage request) {
     }
   }
 
-  private void processFetchRequest(final ChunkFetchRequest req) {
-    if (logger.isTraceEnabled()) {
-      logger.trace("Received req from {} to fetch block {}", getRemoteAddress(channel),
-        req.streamChunkId);
-    }
-    long chunksBeingTransferred = streamManager.chunksBeingTransferred();
-    if (chunksBeingTransferred >= maxChunksBeingTransferred) {
-      logger.warn("The number of chunks being transferred {} is above {}, close the connection.",
-        chunksBeingTransferred, maxChunksBeingTransferred);
-      channel.close();
-      return;
-    }
-    ManagedBuffer buf;
-    try {
-      streamManager.checkAuthorization(reverseClient, req.streamChunkId.streamId);
-      streamManager.registerChannel(channel, req.streamChunkId.streamId);
-      buf = streamManager.getChunk(req.streamChunkId.streamId, req.streamChunkId.chunkIndex);
-    } catch (Exception e) {
-      logger.error(String.format("Error opening block %s for request from %s",
-        req.streamChunkId, getRemoteAddress(channel)), e);
-      respond(new ChunkFetchFailure(req.streamChunkId, Throwables.getStackTraceAsString(e)));
-      return;
-    }
-
-    streamManager.chunkBeingSent(req.streamChunkId.streamId);
-    respond(new ChunkFetchSuccess(req.streamChunkId, buf)).addListener(future -> {
-      streamManager.chunkSent(req.streamChunkId.streamId);
-    });
-  }
-
   private void processStreamRequest(final StreamRequest req) {
     if (logger.isTraceEnabled()) {
       logger.trace("Received req from {} to fetch stream {}", getRemoteAddress(channel),
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
index 34e4bb5912dcb..43a6bc7dc3d06 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
@@ -21,6 +21,7 @@
 import java.util.Properties;
 
 import com.google.common.primitives.Ints;
+import io.netty.util.NettyRuntime;
 
 /**
  * A central location that tracks all the settings we expose to users.
@@ -281,4 +282,35 @@ public Properties cryptoConf() {
   public long maxChunksBeingTransferred() {
     return conf.getLong("spark.shuffle.maxChunksBeingTransferred", Long.MAX_VALUE);
   }
+
+  /**
+   * Percentage of io.serverThreads used by netty to process ChunkFetchRequest.
+   * Shuffle server will use a separate EventLoopGroup to process ChunkFetchRequest messages.
+   * Although when calling the async writeAndFlush on the underlying channel to send
+   * response back to client, the I/O on the channel is still being handled by
+   * {@link org.apache.spark.network.server.TransportServer}'s default EventLoopGroup
+   * that's registered with the Channel, by waiting inside the ChunkFetchRequest handler
+   * threads for the completion of sending back responses, we are able to put a limit on
+   * the max number of threads from TransportServer's default EventLoopGroup that are
+   * going to be consumed by writing response to ChunkFetchRequest, which are I/O intensive
+   * and could take long time to process due to disk contentions. By configuring a slightly
+   * higher number of shuffler server threads, we are able to reserve some threads for
+   * handling other RPC messages, thus making the Client less likely to experience timeout
+   * when sending RPC messages to the shuffle server. The number of threads used for handling
+   * chunked fetch requests are percentage of io.serverThreads (if defined) else it is a percentage
+   * of 2 * #cores. However, a percentage of 0 means netty default number of threads which
+   * is 2 * #cores ignoring io.serverThreads. The percentage here is configured via
+   * spark.shuffle.server.chunkFetchHandlerThreadsPercent. The returned value is rounded off to
+   * ceiling of the nearest integer.
+   */
+  public int chunkFetchHandlerThreads() {
+    if (!this.getModuleName().equalsIgnoreCase("shuffle")) {
+      return 0;
+    }
+    int chunkFetchHandlerThreadsPercent =
+      conf.getInt("spark.shuffle.server.chunkFetchHandlerThreadsPercent", 100);
+    return (int)Math.ceil(
+     (this.serverThreads() > 0 ? this.serverThreads() : 2 * NettyRuntime.availableProcessors()) *
+     chunkFetchHandlerThreadsPercent/(double)100);
+  }
 }
diff --git a/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java b/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java
index 824482af08dd4..37a8664a52661 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java
@@ -143,37 +143,39 @@ public void releaseBuffers() {
   }
 
   private FetchResult fetchChunks(List<Integer> chunkIndices) throws Exception {
-    TransportClient client = clientFactory.createClient(TestUtils.getLocalHost(), server.getPort());
-    final Semaphore sem = new Semaphore(0);
-
     final FetchResult res = new FetchResult();
-    res.successChunks = Collections.synchronizedSet(new HashSet<Integer>());
-    res.failedChunks = Collections.synchronizedSet(new HashSet<Integer>());
-    res.buffers = Collections.synchronizedList(new LinkedList<ManagedBuffer>());
 
-    ChunkReceivedCallback callback = new ChunkReceivedCallback() {
-      @Override
-      public void onSuccess(int chunkIndex, ManagedBuffer buffer) {
-        buffer.retain();
-        res.successChunks.add(chunkIndex);
-        res.buffers.add(buffer);
-        sem.release();
-      }
+    try (TransportClient client =
+      clientFactory.createClient(TestUtils.getLocalHost(), server.getPort())) {
+      final Semaphore sem = new Semaphore(0);
+
+      res.successChunks = Collections.synchronizedSet(new HashSet<Integer>());
+      res.failedChunks = Collections.synchronizedSet(new HashSet<Integer>());
+      res.buffers = Collections.synchronizedList(new LinkedList<ManagedBuffer>());
+
+      ChunkReceivedCallback callback = new ChunkReceivedCallback() {
+        @Override
+        public void onSuccess(int chunkIndex, ManagedBuffer buffer) {
+          buffer.retain();
+          res.successChunks.add(chunkIndex);
+          res.buffers.add(buffer);
+          sem.release();
+        }
 
-      @Override
-      public void onFailure(int chunkIndex, Throwable e) {
-        res.failedChunks.add(chunkIndex);
-        sem.release();
-      }
-    };
+        @Override
+        public void onFailure(int chunkIndex, Throwable e) {
+          res.failedChunks.add(chunkIndex);
+          sem.release();
+        }
+      };
 
-    for (int chunkIndex : chunkIndices) {
-      client.fetchChunk(STREAM_ID, chunkIndex, callback);
-    }
-    if (!sem.tryAcquire(chunkIndices.size(), 5, TimeUnit.SECONDS)) {
-      fail("Timeout getting response from the server");
+      for (int chunkIndex : chunkIndices) {
+        client.fetchChunk(STREAM_ID, chunkIndex, callback);
+      }
+      if (!sem.tryAcquire(chunkIndices.size(), 5, TimeUnit.SECONDS)) {
+        fail("Timeout getting response from the server");
+      }
     }
-    client.close();
     return res;
   }
 
diff --git a/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchRequestHandlerSuite.java b/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchRequestHandlerSuite.java
new file mode 100644
index 0000000000000..2c72c53a33ae8
--- /dev/null
+++ b/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchRequestHandlerSuite.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network;
+
+import io.netty.channel.ChannelHandlerContext;
+import java.util.ArrayList;
+import java.util.List;
+
+import io.netty.channel.Channel;
+import org.apache.spark.network.server.ChunkFetchRequestHandler;
+import org.junit.Test;
+
+import static org.mockito.Mockito.*;
+
+import org.apache.commons.lang3.tuple.ImmutablePair;
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.protocol.*;
+import org.apache.spark.network.server.NoOpRpcHandler;
+import org.apache.spark.network.server.OneForOneStreamManager;
+import org.apache.spark.network.server.RpcHandler;
+
+public class ChunkFetchRequestHandlerSuite {
+
+  @Test
+  public void handleChunkFetchRequest() throws Exception {
+    RpcHandler rpcHandler = new NoOpRpcHandler();
+    OneForOneStreamManager streamManager = (OneForOneStreamManager) (rpcHandler.getStreamManager());
+    Channel channel = mock(Channel.class);
+    ChannelHandlerContext context = mock(ChannelHandlerContext.class);
+    when(context.channel())
+      .thenAnswer(invocationOnMock0 -> {
+        return channel;
+      });
+    List<Pair<Object, ExtendedChannelPromise>> responseAndPromisePairs =
+      new ArrayList<>();
+    when(channel.writeAndFlush(any()))
+      .thenAnswer(invocationOnMock0 -> {
+        Object response = invocationOnMock0.getArguments()[0];
+        ExtendedChannelPromise channelFuture = new ExtendedChannelPromise(channel);
+        responseAndPromisePairs.add(ImmutablePair.of(response, channelFuture));
+        return channelFuture;
+      });
+
+    // Prepare the stream.
+    List<ManagedBuffer> managedBuffers = new ArrayList<>();
+    managedBuffers.add(new TestManagedBuffer(10));
+    managedBuffers.add(new TestManagedBuffer(20));
+    managedBuffers.add(new TestManagedBuffer(30));
+    managedBuffers.add(new TestManagedBuffer(40));
+    long streamId = streamManager.registerStream("test-app", managedBuffers.iterator());
+    streamManager.registerChannel(channel, streamId);
+    TransportClient reverseClient = mock(TransportClient.class);
+    ChunkFetchRequestHandler requestHandler = new ChunkFetchRequestHandler(reverseClient,
+      rpcHandler.getStreamManager(), 2L);
+
+    RequestMessage request0 = new ChunkFetchRequest(new StreamChunkId(streamId, 0));
+    requestHandler.channelRead(context, request0);
+    assert responseAndPromisePairs.size() == 1;
+    assert responseAndPromisePairs.get(0).getLeft() instanceof ChunkFetchSuccess;
+    assert ((ChunkFetchSuccess) (responseAndPromisePairs.get(0).getLeft())).body() ==
+      managedBuffers.get(0);
+
+    RequestMessage request1 = new ChunkFetchRequest(new StreamChunkId(streamId, 1));
+    requestHandler.channelRead(context, request1);
+    assert responseAndPromisePairs.size() == 2;
+    assert responseAndPromisePairs.get(1).getLeft() instanceof ChunkFetchSuccess;
+    assert ((ChunkFetchSuccess) (responseAndPromisePairs.get(1).getLeft())).body() ==
+      managedBuffers.get(1);
+
+    // Finish flushing the response for request0.
+    responseAndPromisePairs.get(0).getRight().finish(true);
+
+    RequestMessage request2 = new ChunkFetchRequest(new StreamChunkId(streamId, 2));
+    requestHandler.channelRead(context, request2);
+    assert responseAndPromisePairs.size() == 3;
+    assert responseAndPromisePairs.get(2).getLeft() instanceof ChunkFetchSuccess;
+    assert ((ChunkFetchSuccess) (responseAndPromisePairs.get(2).getLeft())).body() ==
+      managedBuffers.get(2);
+
+    RequestMessage request3 = new ChunkFetchRequest(new StreamChunkId(streamId, 3));
+    requestHandler.channelRead(context, request3);
+    verify(channel, times(1)).close();
+    assert responseAndPromisePairs.size() == 3;
+  }
+}
diff --git a/common/network-common/src/test/java/org/apache/spark/network/ExtendedChannelPromise.java b/common/network-common/src/test/java/org/apache/spark/network/ExtendedChannelPromise.java
new file mode 100644
index 0000000000000..573ffd627a2e7
--- /dev/null
+++ b/common/network-common/src/test/java/org/apache/spark/network/ExtendedChannelPromise.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import io.netty.channel.Channel;
+import io.netty.channel.ChannelPromise;
+import io.netty.channel.DefaultChannelPromise;
+import io.netty.util.concurrent.Future;
+import io.netty.util.concurrent.GenericFutureListener;
+
+class ExtendedChannelPromise extends DefaultChannelPromise {
+
+  private List<GenericFutureListener<Future<Void>>> listeners = new ArrayList<>();
+  private boolean success;
+
+  ExtendedChannelPromise(Channel channel) {
+    super(channel);
+    success = false;
+  }
+
+  @Override
+  public ChannelPromise addListener(
+      GenericFutureListener<? extends Future<? super Void>> listener) {
+    @SuppressWarnings("unchecked")
+    GenericFutureListener<Future<Void>> gfListener =
+        (GenericFutureListener<Future<Void>>) listener;
+    listeners.add(gfListener);
+    return super.addListener(listener);
+  }
+
+  @Override
+  public boolean isSuccess() {
+    return success;
+  }
+
+  @Override
+  public ChannelPromise await() throws InterruptedException {
+    return this;
+  }
+
+  public void finish(boolean success) {
+    this.success = success;
+    listeners.forEach(listener -> {
+      try {
+        listener.operationComplete(this);
+      } catch (Exception e) {
+        // do nothing
+      }
+    });
+  }
+}
diff --git a/common/network-common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java b/common/network-common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java
index 1f4d75c7e2ec5..1c0aa4da27ff9 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java
@@ -371,23 +371,33 @@ private void assertErrorsContain(Set<String> errors, Set<String> contains) {
 
   private void assertErrorAndClosed(RpcResult result, String expectedError) {
     assertTrue("unexpected success: " + result.successMessages, result.successMessages.isEmpty());
-    // we expect 1 additional error, which contains *either* "closed" or "Connection reset"
     Set<String> errors = result.errorMessages;
     assertEquals("Expected 2 errors, got " + errors.size() + "errors: " +
         errors, 2, errors.size());
 
+    // We expect 1 additional error due to closed connection and here are possible keywords in the
+    // error message.
+    Set<String> possibleClosedErrors = Sets.newHashSet(
+        "closed",
+        "Connection reset",
+        "java.nio.channels.ClosedChannelException",
+        "java.io.IOException: Broken pipe"
+    );
     Set<String> containsAndClosed = Sets.newHashSet(expectedError);
-    containsAndClosed.add("closed");
-    containsAndClosed.add("Connection reset");
+    containsAndClosed.addAll(possibleClosedErrors);
 
     Pair<Set<String>, Set<String>> r = checkErrorsContain(errors, containsAndClosed);
 
-    Set<String> errorsNotFound = r.getRight();
-    assertEquals(1, errorsNotFound.size());
-    String err = errorsNotFound.iterator().next();
-    assertTrue(err.equals("closed") || err.equals("Connection reset"));
+    assertTrue("Got a non-empty set " + r.getLeft(), r.getLeft().isEmpty());
 
-    assertTrue(r.getLeft().isEmpty());
+    Set<String> errorsNotFound = r.getRight();
+    assertEquals(
+        "The size of " + errorsNotFound + " was not " + (possibleClosedErrors.size() - 1),
+        possibleClosedErrors.size() - 1,
+        errorsNotFound.size());
+    for (String err: errorsNotFound) {
+      assertTrue("Found a wrong error " + err, containsAndClosed.contains(err));
+    }
   }
 
   private Pair<Set<String>, Set<String>> checkErrorsContain(
diff --git a/common/network-common/src/test/java/org/apache/spark/network/TransportRequestHandlerSuite.java b/common/network-common/src/test/java/org/apache/spark/network/TransportRequestHandlerSuite.java
index 2656cbee95a20..ad640415a8e6d 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/TransportRequestHandlerSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/TransportRequestHandlerSuite.java
@@ -21,10 +21,6 @@
 import java.util.List;
 
 import io.netty.channel.Channel;
-import io.netty.channel.ChannelPromise;
-import io.netty.channel.DefaultChannelPromise;
-import io.netty.util.concurrent.Future;
-import io.netty.util.concurrent.GenericFutureListener;
 import org.junit.Test;
 
 import static org.mockito.Mockito.*;
@@ -42,7 +38,7 @@
 public class TransportRequestHandlerSuite {
 
   @Test
-  public void handleFetchRequestAndStreamRequest() throws Exception {
+  public void handleStreamRequest() throws Exception {
     RpcHandler rpcHandler = new NoOpRpcHandler();
     OneForOneStreamManager streamManager = (OneForOneStreamManager) (rpcHandler.getStreamManager());
     Channel channel = mock(Channel.class);
@@ -68,18 +64,18 @@ public void handleFetchRequestAndStreamRequest() throws Exception {
     TransportRequestHandler requestHandler = new TransportRequestHandler(channel, reverseClient,
       rpcHandler, 2L);
 
-    RequestMessage request0 = new ChunkFetchRequest(new StreamChunkId(streamId, 0));
+    RequestMessage request0 = new StreamRequest(String.format("%d_%d", streamId, 0));
     requestHandler.handle(request0);
     assert responseAndPromisePairs.size() == 1;
-    assert responseAndPromisePairs.get(0).getLeft() instanceof ChunkFetchSuccess;
-    assert ((ChunkFetchSuccess) (responseAndPromisePairs.get(0).getLeft())).body() ==
+    assert responseAndPromisePairs.get(0).getLeft() instanceof StreamResponse;
+    assert ((StreamResponse) (responseAndPromisePairs.get(0).getLeft())).body() ==
       managedBuffers.get(0);
 
-    RequestMessage request1 = new ChunkFetchRequest(new StreamChunkId(streamId, 1));
+    RequestMessage request1 = new StreamRequest(String.format("%d_%d", streamId, 1));
     requestHandler.handle(request1);
     assert responseAndPromisePairs.size() == 2;
-    assert responseAndPromisePairs.get(1).getLeft() instanceof ChunkFetchSuccess;
-    assert ((ChunkFetchSuccess) (responseAndPromisePairs.get(1).getLeft())).body() ==
+    assert responseAndPromisePairs.get(1).getLeft() instanceof StreamResponse;
+    assert ((StreamResponse) (responseAndPromisePairs.get(1).getLeft())).body() ==
       managedBuffers.get(1);
 
     // Finish flushing the response for request0.
@@ -99,41 +95,4 @@ public void handleFetchRequestAndStreamRequest() throws Exception {
     verify(channel, times(1)).close();
     assert responseAndPromisePairs.size() == 3;
   }
-
-  private class ExtendedChannelPromise extends DefaultChannelPromise {
-
-    private List<GenericFutureListener<Future<Void>>> listeners = new ArrayList<>();
-    private boolean success;
-
-    ExtendedChannelPromise(Channel channel) {
-      super(channel);
-      success = false;
-    }
-
-    @Override
-    public ChannelPromise addListener(
-        GenericFutureListener<? extends Future<? super Void>> listener) {
-      @SuppressWarnings("unchecked")
-      GenericFutureListener<Future<Void>> gfListener =
-          (GenericFutureListener<Future<Void>>) listener;
-      listeners.add(gfListener);
-      return super.addListener(listener);
-    }
-
-    @Override
-    public boolean isSuccess() {
-      return success;
-    }
-
-    public void finish(boolean success) {
-      this.success = success;
-      listeners.forEach(listener -> {
-        try {
-          listener.operationComplete(this);
-        } catch (Exception e) {
-          // do nothing
-        }
-      });
-    }
-  }
 }
diff --git a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java
index a3519fe4a423e..c0aa298a4017c 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java
@@ -18,8 +18,11 @@
 package org.apache.spark.network.crypto;
 
 import java.util.Arrays;
+import java.util.Map;
+import java.security.InvalidKeyException;
 import static java.nio.charset.StandardCharsets.UTF_8;
 
+import com.google.common.collect.ImmutableMap;
 import org.junit.BeforeClass;
 import org.junit.Test;
 import static org.junit.Assert.*;
@@ -104,4 +107,18 @@ public void testBadChallenge() throws Exception {
       challenge.cipher, challenge.keyLength, challenge.nonce, badChallenge));
   }
 
+  @Test(expected = InvalidKeyException.class)
+  public void testBadKeySize() throws Exception {
+    Map<String, String> mconf = ImmutableMap.of("spark.network.crypto.keyLength", "42");
+    TransportConf conf = new TransportConf("rpc", new MapConfigProvider(mconf));
+
+    try (AuthEngine engine = new AuthEngine("appId", "secret", conf)) {
+      engine.challenge();
+      fail("Should have failed to create challenge message.");
+
+      // Call close explicitly to make sure it's idempotent.
+      engine.close();
+    }
+  }
+
 }
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 05335df61a664..a6d99813a8501 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -21,12 +21,12 @@
   <modelVersion>4.0.0</modelVersion>
   <parent>
     <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.11</artifactId>
-    <version>2.4.0-SNAPSHOT</version>
+    <artifactId>spark-parent_2.12</artifactId>
+    <version>3.0.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
-  <artifactId>spark-network-shuffle_2.11</artifactId>
+  <artifactId>spark-network-shuffle_2.12</artifactId>
   <packaging>jar</packaging>
   <name>Spark Project Shuffle Streaming Service</name>
   <url>http://spark.apache.org/</url>
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/DownloadFile.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/DownloadFile.java
new file mode 100644
index 0000000000000..633622b35175b
--- /dev/null
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/DownloadFile.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.io.IOException;
+
+/**
+ * A handle on the file used when fetching remote data to disk.  Used to ensure the lifecycle of
+ * writing the data, reading it back, and then cleaning it up is followed.  Specific implementations
+ * may also handle encryption.  The data can be read only via DownloadFileWritableChannel,
+ * which ensures data is not read until after the writer is closed.
+ */
+public interface DownloadFile {
+  /**
+   * Delete the file.
+   *
+   * @return  <code>true</code> if and only if the file or directory is
+   *          successfully deleted; <code>false</code> otherwise
+   */
+  boolean delete();
+
+  /**
+   * A channel for writing data to the file.  This special channel allows access to the data for
+   * reading, after the channel is closed, via {@link DownloadFileWritableChannel#closeAndRead()}.
+   */
+  DownloadFileWritableChannel openForWriting() throws IOException;
+
+  /**
+   * The path of the file, intended only for debug purposes.
+   */
+  String path();
+}
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/TempFileManager.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/DownloadFileManager.java
similarity index 75%
rename from common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/TempFileManager.java
rename to common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/DownloadFileManager.java
index 552364d274f19..c335a17ae1fe0 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/TempFileManager.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/DownloadFileManager.java
@@ -17,20 +17,20 @@
 
 package org.apache.spark.network.shuffle;
 
-import java.io.File;
+import org.apache.spark.network.util.TransportConf;
 
 /**
- * A manager to create temp block files to reduce the memory usage and also clean temp
- * files when they won't be used any more.
+ * A manager to create temp block files used when fetching remote data to reduce the memory usage.
+ * It will clean files when they won't be used any more.
  */
-public interface TempFileManager {
+public interface DownloadFileManager {
 
   /** Create a temp block file. */
-  File createTempFile();
+  DownloadFile createTempFile(TransportConf transportConf);
 
   /**
    * Register a temp file to clean up when it won't be used any more. Return whether the
    * file is registered successfully. If `false`, the caller should clean up the file by itself.
    */
-  boolean registerTempFileToClean(File file);
+  boolean registerTempFileToClean(DownloadFile file);
 }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/DownloadFileWritableChannel.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/DownloadFileWritableChannel.java
new file mode 100644
index 0000000000000..dbbbac43eb741
--- /dev/null
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/DownloadFileWritableChannel.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+
+import java.nio.channels.WritableByteChannel;
+
+/**
+ * A channel for writing data which is fetched to disk, which allows access to the written data only
+ * after the writer has been closed.  Used with DownloadFile and DownloadFileManager.
+ */
+public interface DownloadFileWritableChannel extends WritableByteChannel {
+  ManagedBuffer closeAndRead();
+}
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java
index 7ed0b6e93a7a8..e49e27ab5aa79 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java
@@ -76,7 +76,7 @@ protected void checkInit() {
   @Override
   public void init(String appId) {
     this.appId = appId;
-    TransportContext context = new TransportContext(conf, new NoOpRpcHandler(), true);
+    TransportContext context = new TransportContext(conf, new NoOpRpcHandler(), true, true);
     List<TransportClientBootstrap> bootstraps = Lists.newArrayList();
     if (authEnabled) {
       bootstraps.add(new AuthClientBootstrap(conf, appId, secretKeyHolder));
@@ -91,7 +91,7 @@ public void fetchBlocks(
       String execId,
       String[] blockIds,
       BlockFetchingListener listener,
-      TempFileManager tempFileManager) {
+      DownloadFileManager downloadFileManager) {
     checkInit();
     logger.debug("External shuffle fetch from {}:{} (executor id {})", host, port, execId);
     try {
@@ -99,7 +99,7 @@ public void fetchBlocks(
           (blockIds1, listener1) -> {
             TransportClient client = clientFactory.createClient(host, port);
             new OneForOneBlockFetcher(client, appId, execId,
-              blockIds1, listener1, conf, tempFileManager).start();
+              blockIds1, listener1, conf, downloadFileManager).start();
           };
 
       int maxRetries = conf.maxIORetries();
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
index 0bc571874f07c..30587023877c1 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
@@ -17,18 +17,13 @@
 
 package org.apache.spark.network.shuffle;
 
-import java.io.File;
-import java.io.FileOutputStream;
 import java.io.IOException;
 import java.nio.ByteBuffer;
-import java.nio.channels.Channels;
-import java.nio.channels.WritableByteChannel;
 import java.util.Arrays;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import org.apache.spark.network.buffer.FileSegmentManagedBuffer;
 import org.apache.spark.network.buffer.ManagedBuffer;
 import org.apache.spark.network.client.ChunkReceivedCallback;
 import org.apache.spark.network.client.RpcResponseCallback;
@@ -58,7 +53,7 @@
   private final BlockFetchingListener listener;
   private final ChunkReceivedCallback chunkCallback;
   private final TransportConf transportConf;
-  private final TempFileManager tempFileManager;
+  private final DownloadFileManager downloadFileManager;
 
   private StreamHandle streamHandle = null;
 
@@ -79,14 +74,14 @@ public OneForOneBlockFetcher(
       String[] blockIds,
       BlockFetchingListener listener,
       TransportConf transportConf,
-      TempFileManager tempFileManager) {
+      DownloadFileManager downloadFileManager) {
     this.client = client;
     this.openMessage = new OpenBlocks(appId, execId, blockIds);
     this.blockIds = blockIds;
     this.listener = listener;
     this.chunkCallback = new ChunkCallback();
     this.transportConf = transportConf;
-    this.tempFileManager = tempFileManager;
+    this.downloadFileManager = downloadFileManager;
   }
 
   /** Callback invoked on receipt of each chunk. We equate a single chunk to a single block. */
@@ -125,7 +120,7 @@ public void onSuccess(ByteBuffer response) {
           // Immediately request all chunks -- we expect that the total size of the request is
           // reasonable due to higher level chunking in [[ShuffleBlockFetcherIterator]].
           for (int i = 0; i < streamHandle.numChunks; i++) {
-            if (tempFileManager != null) {
+            if (downloadFileManager != null) {
               client.stream(OneForOneStreamManager.genStreamChunkId(streamHandle.streamId, i),
                 new DownloadCallback(i));
             } else {
@@ -159,13 +154,13 @@ private void failRemainingBlocks(String[] failedBlockIds, Throwable e) {
 
   private class DownloadCallback implements StreamCallback {
 
-    private WritableByteChannel channel = null;
-    private File targetFile = null;
+    private DownloadFileWritableChannel channel = null;
+    private DownloadFile targetFile = null;
     private int chunkIndex;
 
     DownloadCallback(int chunkIndex) throws IOException {
-      this.targetFile = tempFileManager.createTempFile();
-      this.channel = Channels.newChannel(new FileOutputStream(targetFile));
+      this.targetFile = downloadFileManager.createTempFile(transportConf);
+      this.channel = targetFile.openForWriting();
       this.chunkIndex = chunkIndex;
     }
 
@@ -178,11 +173,8 @@ public void onData(String streamId, ByteBuffer buf) throws IOException {
 
     @Override
     public void onComplete(String streamId) throws IOException {
-      channel.close();
-      ManagedBuffer buffer = new FileSegmentManagedBuffer(transportConf, targetFile, 0,
-        targetFile.length());
-      listener.onBlockFetchSuccess(blockIds[chunkIndex], buffer);
-      if (!tempFileManager.registerTempFileToClean(targetFile)) {
+      listener.onBlockFetchSuccess(blockIds[chunkIndex], channel.closeAndRead());
+      if (!downloadFileManager.registerTempFileToClean(targetFile)) {
         targetFile.delete();
       }
     }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockFetcher.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockFetcher.java
index f309dda8afca6..6bf3da94030d4 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockFetcher.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockFetcher.java
@@ -101,7 +101,7 @@ void createAndStart(String[] blockIds, BlockFetchingListener listener)
 
   public RetryingBlockFetcher(
       TransportConf conf,
-      BlockFetchStarter fetchStarter,
+      RetryingBlockFetcher.BlockFetchStarter fetchStarter,
       String[] blockIds,
       BlockFetchingListener listener) {
     this.fetchStarter = fetchStarter;
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleClient.java
index 18b04fedcac5b..62b99c40f61f9 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleClient.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleClient.java
@@ -43,7 +43,7 @@ public void init(String appId) { }
    * @param execId the executor id.
    * @param blockIds block ids to fetch.
    * @param listener the listener to receive block fetching status.
-   * @param tempFileManager TempFileManager to create and clean temp files.
+   * @param downloadFileManager DownloadFileManager to create and clean temp files.
    *                        If it's not <code>null</code>, the remote blocks will be streamed
    *                        into temp shuffle files to reduce the memory usage, otherwise,
    *                        they will be kept in memory.
@@ -54,7 +54,7 @@ public abstract void fetchBlocks(
       String execId,
       String[] blockIds,
       BlockFetchingListener listener,
-      TempFileManager tempFileManager);
+      DownloadFileManager downloadFileManager);
 
   /**
    * Get the shuffle MetricsSet from ShuffleClient, this will be used in MetricsSystem to
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleIndexInformation.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleIndexInformation.java
index 386738ece51a6..371149bef3974 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleIndexInformation.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleIndexInformation.java
@@ -37,14 +37,8 @@ public ShuffleIndexInformation(File indexFile) throws IOException {
     size = (int)indexFile.length();
     ByteBuffer buffer = ByteBuffer.allocate(size);
     offsets = buffer.asLongBuffer();
-    DataInputStream dis = null;
-    try {
-      dis = new DataInputStream(Files.newInputStream(indexFile.toPath()));
+    try (DataInputStream dis = new DataInputStream(Files.newInputStream(indexFile.toPath()))) {
       dis.readFully(buffer.array());
-    } finally {
-      if (dis != null) {
-        dis.close();
-      }
     }
   }
 
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/SimpleDownloadFile.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/SimpleDownloadFile.java
new file mode 100644
index 0000000000000..670612fd6f66a
--- /dev/null
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/SimpleDownloadFile.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.network.shuffle;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.Channels;
+import java.nio.channels.WritableByteChannel;
+
+import org.apache.spark.network.buffer.FileSegmentManagedBuffer;
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.util.TransportConf;
+
+/**
+ * A DownloadFile that does not take any encryption settings into account for reading and
+ * writing data.
+ *
+ * This does *not* mean the data in the file is un-encrypted -- it could be that the data is
+ * already encrypted when its written, and subsequent layer is responsible for decrypting.
+ */
+public class SimpleDownloadFile implements DownloadFile {
+
+  private final File file;
+  private final TransportConf transportConf;
+
+  public SimpleDownloadFile(File file, TransportConf transportConf) {
+    this.file = file;
+    this.transportConf = transportConf;
+  }
+
+  @Override
+  public boolean delete() {
+    return file.delete();
+  }
+
+  @Override
+  public DownloadFileWritableChannel openForWriting() throws IOException {
+    return new SimpleDownloadWritableChannel();
+  }
+
+  @Override
+  public String path() {
+    return file.getAbsolutePath();
+  }
+
+  private class SimpleDownloadWritableChannel implements DownloadFileWritableChannel {
+
+    private final WritableByteChannel channel;
+
+    SimpleDownloadWritableChannel() throws FileNotFoundException {
+      channel = Channels.newChannel(new FileOutputStream(file));
+    }
+
+    @Override
+    public ManagedBuffer closeAndRead() {
+      return new FileSegmentManagedBuffer(transportConf, file, 0, file.length());
+    }
+
+    @Override
+    public int write(ByteBuffer src) throws IOException {
+      return channel.write(src);
+    }
+
+    @Override
+    public boolean isOpen() {
+      return channel.isOpen();
+    }
+
+    @Override
+    public void close() throws IOException {
+      channel.close();
+    }
+  }
+}
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java
index d2072a54fa415..459629c5f05fe 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java
@@ -98,19 +98,19 @@ public void testSortShuffleBlocks() throws IOException {
     resolver.registerExecutor("app0", "exec0",
       dataContext.createExecutorInfo(SORT_MANAGER));
 
-    InputStream block0Stream =
-      resolver.getBlockData("app0", "exec0", 0, 0, 0).createInputStream();
-    String block0 = CharStreams.toString(
-        new InputStreamReader(block0Stream, StandardCharsets.UTF_8));
-    block0Stream.close();
-    assertEquals(sortBlock0, block0);
-
-    InputStream block1Stream =
-      resolver.getBlockData("app0", "exec0", 0, 0, 1).createInputStream();
-    String block1 = CharStreams.toString(
-        new InputStreamReader(block1Stream, StandardCharsets.UTF_8));
-    block1Stream.close();
-    assertEquals(sortBlock1, block1);
+    try (InputStream block0Stream = resolver.getBlockData(
+        "app0", "exec0", 0, 0, 0).createInputStream()) {
+      String block0 =
+        CharStreams.toString(new InputStreamReader(block0Stream, StandardCharsets.UTF_8));
+      assertEquals(sortBlock0, block0);
+    }
+
+    try (InputStream block1Stream = resolver.getBlockData(
+        "app0", "exec0", 0, 0, 1).createInputStream()) {
+      String block1 =
+        CharStreams.toString(new InputStreamReader(block1Stream, StandardCharsets.UTF_8));
+      assertEquals(sortBlock1, block1);
+    }
   }
 
   @Test
@@ -149,7 +149,7 @@ public void testNormalizeAndInternPathname() {
 
   private void assertPathsMatch(String p1, String p2, String p3, String expectedPathname) {
     String normPathname =
-        ExternalShuffleBlockResolver.createNormalizedInternedPathname(p1, p2, p3);
+      ExternalShuffleBlockResolver.createNormalizedInternedPathname(p1, p2, p3);
     assertEquals(expectedPathname, normPathname);
     File file = new File(normPathname);
     String returnedPath = file.getPath();
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
index a6a1b8d0ac3f1..526b96b364473 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
@@ -133,37 +133,37 @@ private FetchResult fetchBlocks(
 
     final Semaphore requestsRemaining = new Semaphore(0);
 
-    ExternalShuffleClient client = new ExternalShuffleClient(clientConf, null, false, 5000);
-    client.init(APP_ID);
-    client.fetchBlocks(TestUtils.getLocalHost(), port, execId, blockIds,
-      new BlockFetchingListener() {
-        @Override
-        public void onBlockFetchSuccess(String blockId, ManagedBuffer data) {
-          synchronized (this) {
-            if (!res.successBlocks.contains(blockId) && !res.failedBlocks.contains(blockId)) {
-              data.retain();
-              res.successBlocks.add(blockId);
-              res.buffers.add(data);
-              requestsRemaining.release();
+    try (ExternalShuffleClient client = new ExternalShuffleClient(clientConf, null, false, 5000)) {
+      client.init(APP_ID);
+      client.fetchBlocks(TestUtils.getLocalHost(), port, execId, blockIds,
+        new BlockFetchingListener() {
+          @Override
+          public void onBlockFetchSuccess(String blockId, ManagedBuffer data) {
+            synchronized (this) {
+              if (!res.successBlocks.contains(blockId) && !res.failedBlocks.contains(blockId)) {
+                data.retain();
+                res.successBlocks.add(blockId);
+                res.buffers.add(data);
+                requestsRemaining.release();
+              }
             }
           }
-        }
-
-        @Override
-        public void onBlockFetchFailure(String blockId, Throwable exception) {
-          synchronized (this) {
-            if (!res.successBlocks.contains(blockId) && !res.failedBlocks.contains(blockId)) {
-              res.failedBlocks.add(blockId);
-              requestsRemaining.release();
+
+          @Override
+          public void onBlockFetchFailure(String blockId, Throwable exception) {
+            synchronized (this) {
+              if (!res.successBlocks.contains(blockId) && !res.failedBlocks.contains(blockId)) {
+                res.failedBlocks.add(blockId);
+                requestsRemaining.release();
+              }
             }
           }
-        }
-      }, null);
+        }, null);
 
-    if (!requestsRemaining.tryAcquire(blockIds.length, 5, TimeUnit.SECONDS)) {
-      fail("Timeout getting response from the server");
+      if (!requestsRemaining.tryAcquire(blockIds.length, 5, TimeUnit.SECONDS)) {
+        fail("Timeout getting response from the server");
+      }
     }
-    client.close();
     return res;
   }
 
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java
index 16bad9f1b319d..82caf392b821b 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java
@@ -96,14 +96,16 @@ private void validate(String appId, String secretKey, boolean encrypt)
         ImmutableMap.of("spark.authenticate.enableSaslEncryption", "true")));
     }
 
-    ExternalShuffleClient client =
-      new ExternalShuffleClient(testConf, new TestSecretKeyHolder(appId, secretKey), true, 5000);
-    client.init(appId);
-    // Registration either succeeds or throws an exception.
-    client.registerWithShuffleServer(TestUtils.getLocalHost(), server.getPort(), "exec0",
-      new ExecutorShuffleInfo(new String[0], 0,
-        "org.apache.spark.shuffle.sort.SortShuffleManager"));
-    client.close();
+    try (ExternalShuffleClient client =
+        new ExternalShuffleClient(
+          testConf, new TestSecretKeyHolder(appId, secretKey), true, 5000)) {
+      client.init(appId);
+      // Registration either succeeds or throws an exception.
+      client.registerWithShuffleServer(TestUtils.getLocalHost(), server.getPort(), "exec0",
+        new ExecutorShuffleInfo(
+          new String[0], 0, "org.apache.spark.shuffle.sort.SortShuffleManager")
+      );
+    }
   }
 
   /** Provides a secret key holder which always returns the given secret key, for a single appId. */
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 564e6583c909e..55cdc3140aa08 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -21,12 +21,12 @@
   <modelVersion>4.0.0</modelVersion>
   <parent>
     <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.11</artifactId>
-    <version>2.4.0-SNAPSHOT</version>
+    <artifactId>spark-parent_2.12</artifactId>
+    <version>3.0.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
-  <artifactId>spark-network-yarn_2.11</artifactId>
+  <artifactId>spark-network-yarn_2.12</artifactId>
   <packaging>jar</packaging>
   <name>Spark Project YARN Shuffle Service</name>
   <url>http://spark.apache.org/</url>
diff --git a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
index d8b2ed6b5dc7b..72ae1a1295236 100644
--- a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
+++ b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
@@ -35,6 +35,8 @@
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.metrics2.impl.MetricsSystemImpl;
+import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.server.api.*;
 import org.apache.spark.network.util.LevelDBProvider;
@@ -168,6 +170,15 @@ protected void serviceInit(Configuration conf) throws Exception {
       TransportConf transportConf = new TransportConf("shuffle", new HadoopConfigProvider(conf));
       blockHandler = new ExternalShuffleBlockHandler(transportConf, registeredExecutorFile);
 
+      // register metrics on the block handler into the Node Manager's metrics system.
+      YarnShuffleServiceMetrics serviceMetrics =
+        new YarnShuffleServiceMetrics(blockHandler.getAllMetrics());
+
+      MetricsSystemImpl metricsSystem = (MetricsSystemImpl) DefaultMetricsSystem.instance();
+      metricsSystem.register(
+        "sparkShuffleService", "Metrics on the Spark Shuffle Service", serviceMetrics);
+      logger.info("Registered metrics with Hadoop's DefaultMetricsSystem");
+
       // If authentication is enabled, set up the shuffle server to use a
       // special RPC handler that filters out unauthenticated fetch requests
       List<TransportServerBootstrap> bootstraps = Lists.newArrayList();
diff --git a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleServiceMetrics.java b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleServiceMetrics.java
new file mode 100644
index 0000000000000..3e4d479b862b3
--- /dev/null
+++ b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleServiceMetrics.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.yarn;
+
+import java.util.Map;
+
+import com.codahale.metrics.*;
+import org.apache.hadoop.metrics2.MetricsCollector;
+import org.apache.hadoop.metrics2.MetricsInfo;
+import org.apache.hadoop.metrics2.MetricsRecordBuilder;
+import org.apache.hadoop.metrics2.MetricsSource;
+
+/**
+ * Forward {@link org.apache.spark.network.shuffle.ExternalShuffleBlockHandler.ShuffleMetrics}
+ * to hadoop metrics system.
+ * NodeManager by default exposes JMX endpoint where can be collected.
+ */
+class YarnShuffleServiceMetrics implements MetricsSource {
+
+  private final MetricSet metricSet;
+
+  YarnShuffleServiceMetrics(MetricSet metricSet) {
+    this.metricSet = metricSet;
+  }
+
+  /**
+   * Get metrics from the source
+   *
+   * @param collector to contain the resulting metrics snapshot
+   * @param all       if true, return all metrics even if unchanged.
+   */
+  @Override
+  public void getMetrics(MetricsCollector collector, boolean all) {
+    MetricsRecordBuilder metricsRecordBuilder = collector.addRecord("sparkShuffleService");
+
+    for (Map.Entry<String, Metric> entry : metricSet.getMetrics().entrySet()) {
+      collectMetric(metricsRecordBuilder, entry.getKey(), entry.getValue());
+    }
+  }
+
+  /**
+   * The metric types used in
+   * {@link org.apache.spark.network.shuffle.ExternalShuffleBlockHandler.ShuffleMetrics}.
+   * Visible for testing.
+   */
+  public static void collectMetric(
+    MetricsRecordBuilder metricsRecordBuilder, String name, Metric metric) {
+
+    if (metric instanceof Timer) {
+      Timer t = (Timer) metric;
+      metricsRecordBuilder
+        .addCounter(new ShuffleServiceMetricsInfo(name + "_count", "Count of timer " + name),
+          t.getCount())
+        .addGauge(
+          new ShuffleServiceMetricsInfo(name + "_rate15", "15 minute rate of timer " + name),
+          t.getFifteenMinuteRate())
+        .addGauge(
+          new ShuffleServiceMetricsInfo(name + "_rate5", "5 minute rate of timer " + name),
+          t.getFiveMinuteRate())
+        .addGauge(
+          new ShuffleServiceMetricsInfo(name + "_rate1", "1 minute rate of timer " + name),
+          t.getOneMinuteRate())
+        .addGauge(new ShuffleServiceMetricsInfo(name + "_rateMean", "Mean rate of timer " + name),
+          t.getMeanRate());
+    } else if (metric instanceof Meter) {
+      Meter m = (Meter) metric;
+      metricsRecordBuilder
+        .addCounter(new ShuffleServiceMetricsInfo(name + "_count", "Count of meter " + name),
+          m.getCount())
+        .addGauge(
+          new ShuffleServiceMetricsInfo(name + "_rate15", "15 minute rate of meter " + name),
+          m.getFifteenMinuteRate())
+        .addGauge(
+          new ShuffleServiceMetricsInfo(name + "_rate5", "5 minute rate of meter " + name),
+          m.getFiveMinuteRate())
+        .addGauge(
+          new ShuffleServiceMetricsInfo(name + "_rate1", "1 minute rate of meter " + name),
+          m.getOneMinuteRate())
+        .addGauge(new ShuffleServiceMetricsInfo(name + "_rateMean", "Mean rate of meter " + name),
+          m.getMeanRate());
+    } else if (metric instanceof Gauge) {
+      final Object gaugeValue = ((Gauge) metric).getValue();
+      if (gaugeValue instanceof Integer) {
+        metricsRecordBuilder.addGauge(getShuffleServiceMetricsInfo(name), (Integer) gaugeValue);
+      } else if (gaugeValue instanceof Long) {
+        metricsRecordBuilder.addGauge(getShuffleServiceMetricsInfo(name), (Long) gaugeValue);
+      } else if (gaugeValue instanceof Float) {
+        metricsRecordBuilder.addGauge(getShuffleServiceMetricsInfo(name), (Float) gaugeValue);
+      } else if (gaugeValue instanceof Double) {
+        metricsRecordBuilder.addGauge(getShuffleServiceMetricsInfo(name), (Double) gaugeValue);
+      } else {
+        throw new IllegalStateException(
+                "Not supported class type of metric[" + name + "] for value " + gaugeValue);
+      }
+    }
+  }
+
+  private static MetricsInfo getShuffleServiceMetricsInfo(String name) {
+    return new ShuffleServiceMetricsInfo(name, "Value of gauge " + name);
+  }
+
+  private static class ShuffleServiceMetricsInfo implements MetricsInfo {
+
+    private final String name;
+    private final String description;
+
+    ShuffleServiceMetricsInfo(String name, String description) {
+      this.name = name;
+      this.description = description;
+    }
+
+    @Override
+    public String name() {
+      return name;
+    }
+
+    @Override
+    public String description() {
+      return description;
+    }
+  }
+}
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 2f04abe8c7e88..3c3c0d2d96a1c 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -21,12 +21,12 @@
   <modelVersion>4.0.0</modelVersion>
   <parent>
     <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.11</artifactId>
-    <version>2.4.0-SNAPSHOT</version>
+    <artifactId>spark-parent_2.12</artifactId>
+    <version>3.0.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
-  <artifactId>spark-sketch_2.11</artifactId>
+  <artifactId>spark-sketch_2.12</artifactId>
   <packaging>jar</packaging>
   <name>Spark Project Sketch</name>
   <url>http://spark.apache.org/</url>
diff --git a/common/sketch/src/main/java/org/apache/spark/util/sketch/CountMinSketch.java b/common/sketch/src/main/java/org/apache/spark/util/sketch/CountMinSketch.java
index f7c22dddb8cc0..06a248c9a27c2 100644
--- a/common/sketch/src/main/java/org/apache/spark/util/sketch/CountMinSketch.java
+++ b/common/sketch/src/main/java/org/apache/spark/util/sketch/CountMinSketch.java
@@ -191,10 +191,9 @@ public static CountMinSketch readFrom(InputStream in) throws IOException {
    * Reads in a {@link CountMinSketch} from a byte array.
    */
   public static CountMinSketch readFrom(byte[] bytes) throws IOException {
-    InputStream in = new ByteArrayInputStream(bytes);
-    CountMinSketch cms = readFrom(in);
-    in.close();
-    return cms;
+    try (InputStream in = new ByteArrayInputStream(bytes)) {
+      return readFrom(in);
+    }
   }
 
   /**
diff --git a/common/sketch/src/main/java/org/apache/spark/util/sketch/CountMinSketchImpl.java b/common/sketch/src/main/java/org/apache/spark/util/sketch/CountMinSketchImpl.java
index fd1906d2e5ae9..b78c1677a1213 100644
--- a/common/sketch/src/main/java/org/apache/spark/util/sketch/CountMinSketchImpl.java
+++ b/common/sketch/src/main/java/org/apache/spark/util/sketch/CountMinSketchImpl.java
@@ -322,10 +322,10 @@ public void writeTo(OutputStream out) throws IOException {
 
   @Override
   public byte[] toByteArray() throws IOException {
-    ByteArrayOutputStream out = new ByteArrayOutputStream();
-    writeTo(out);
-    out.close();
-    return out.toByteArray();
+    try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
+      writeTo(out);
+      return out.toByteArray();
+    }
   }
 
   public static CountMinSketchImpl readFrom(InputStream in) throws IOException {
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index ba127408e1c59..883b73a69c9de 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -21,12 +21,12 @@
   <modelVersion>4.0.0</modelVersion>
   <parent>
     <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.11</artifactId>
-    <version>2.4.0-SNAPSHOT</version>
+    <artifactId>spark-parent_2.12</artifactId>
+    <version>3.0.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
-  <artifactId>spark-tags_2.11</artifactId>
+  <artifactId>spark-tags_2.12</artifactId>
   <packaging>jar</packaging>
   <name>Spark Project Tags</name>
   <url>http://spark.apache.org/</url>
diff --git a/common/tags/src/main/java/org/apache/spark/annotation/DeveloperApi.java b/common/tags/src/main/java/org/apache/spark/annotation/DeveloperApi.java
index 0ecef6db0e039..890f2faca28b0 100644
--- a/common/tags/src/main/java/org/apache/spark/annotation/DeveloperApi.java
+++ b/common/tags/src/main/java/org/apache/spark/annotation/DeveloperApi.java
@@ -29,6 +29,7 @@
  * of the known issue that Scaladoc displays only either the annotation or the comment, whichever
  * comes first.
  */
+@Documented
 @Retention(RetentionPolicy.RUNTIME)
 @Target({ElementType.TYPE, ElementType.FIELD, ElementType.METHOD, ElementType.PARAMETER,
         ElementType.CONSTRUCTOR, ElementType.LOCAL_VARIABLE, ElementType.PACKAGE})
diff --git a/common/tags/src/main/java/org/apache/spark/annotation/Evolving.java b/common/tags/src/main/java/org/apache/spark/annotation/Evolving.java
new file mode 100644
index 0000000000000..87e8948f204ff
--- /dev/null
+++ b/common/tags/src/main/java/org/apache/spark/annotation/Evolving.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.annotation;
+
+import java.lang.annotation.*;
+
+/**
+ * APIs that are meant to evolve towards becoming stable APIs, but are not stable APIs yet.
+ * Evolving interfaces can change from one feature release to another release (i.e. 2.1 to 2.2).
+ */
+@Documented
+@Retention(RetentionPolicy.RUNTIME)
+@Target({ElementType.TYPE, ElementType.FIELD, ElementType.METHOD, ElementType.PARAMETER,
+  ElementType.CONSTRUCTOR, ElementType.LOCAL_VARIABLE, ElementType.PACKAGE})
+public @interface Evolving {}
diff --git a/common/tags/src/main/java/org/apache/spark/annotation/Experimental.java b/common/tags/src/main/java/org/apache/spark/annotation/Experimental.java
index ff8120291455f..96875920cd9c3 100644
--- a/common/tags/src/main/java/org/apache/spark/annotation/Experimental.java
+++ b/common/tags/src/main/java/org/apache/spark/annotation/Experimental.java
@@ -30,6 +30,7 @@
  * of the known issue that Scaladoc displays only either the annotation or the comment, whichever
  * comes first.
  */
+@Documented
 @Retention(RetentionPolicy.RUNTIME)
 @Target({ElementType.TYPE, ElementType.FIELD, ElementType.METHOD, ElementType.PARAMETER,
         ElementType.CONSTRUCTOR, ElementType.LOCAL_VARIABLE, ElementType.PACKAGE})
diff --git a/common/tags/src/main/java/org/apache/spark/annotation/InterfaceStability.java b/common/tags/src/main/java/org/apache/spark/annotation/InterfaceStability.java
deleted file mode 100644
index 323098f69c6e1..0000000000000
--- a/common/tags/src/main/java/org/apache/spark/annotation/InterfaceStability.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.annotation;
-
-import java.lang.annotation.Documented;
-
-/**
- * Annotation to inform users of how much to rely on a particular package,
- * class or method not changing over time.
- */
-public class InterfaceStability {
-
-  /**
-   * Stable APIs that retain source and binary compatibility within a major release.
-   * These interfaces can change from one major release to another major release
-   * (e.g. from 1.0 to 2.0).
-   */
-  @Documented
-  public @interface Stable {};
-
-  /**
-   * APIs that are meant to evolve towards becoming stable APIs, but are not stable APIs yet.
-   * Evolving interfaces can change from one feature release to another release (i.e. 2.1 to 2.2).
-   */
-  @Documented
-  public @interface Evolving {};
-
-  /**
-   * Unstable APIs, with no guarantee on stability.
-   * Classes that are unannotated are considered Unstable.
-   */
-  @Documented
-  public @interface Unstable {};
-}
diff --git a/common/tags/src/main/java/org/apache/spark/annotation/Private.java b/common/tags/src/main/java/org/apache/spark/annotation/Private.java
index 9082fcf0c84bc..a460d608ae16b 100644
--- a/common/tags/src/main/java/org/apache/spark/annotation/Private.java
+++ b/common/tags/src/main/java/org/apache/spark/annotation/Private.java
@@ -17,10 +17,7 @@
 
 package org.apache.spark.annotation;
 
-import java.lang.annotation.ElementType;
-import java.lang.annotation.Retention;
-import java.lang.annotation.RetentionPolicy;
-import java.lang.annotation.Target;
+import java.lang.annotation.*;
 
 /**
  * A class that is considered private to the internals of Spark -- there is a high-likelihood
@@ -35,6 +32,7 @@
  * of the known issue that Scaladoc displays only either the annotation or the comment, whichever
  * comes first.
  */
+@Documented
 @Retention(RetentionPolicy.RUNTIME)
 @Target({ElementType.TYPE, ElementType.FIELD, ElementType.METHOD, ElementType.PARAMETER,
         ElementType.CONSTRUCTOR, ElementType.LOCAL_VARIABLE, ElementType.PACKAGE})
diff --git a/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/SparkSinkThreadFactory.scala b/common/tags/src/main/java/org/apache/spark/annotation/Stable.java
similarity index 61%
rename from external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/SparkSinkThreadFactory.scala
rename to common/tags/src/main/java/org/apache/spark/annotation/Stable.java
index 845fc8debda75..b198bfbe91e10 100644
--- a/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/SparkSinkThreadFactory.scala
+++ b/common/tags/src/main/java/org/apache/spark/annotation/Stable.java
@@ -14,22 +14,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.spark.streaming.flume.sink
 
-import java.util.concurrent.ThreadFactory
-import java.util.concurrent.atomic.AtomicLong
+package org.apache.spark.annotation;
+
+import java.lang.annotation.*;
 
 /**
- * Thread factory that generates daemon threads with a specified name format.
+ * Stable APIs that retain source and binary compatibility within a major release.
+ * These interfaces can change from one major release to another major release
+ * (e.g. from 1.0 to 2.0).
  */
-private[sink] class SparkSinkThreadFactory(nameFormat: String) extends ThreadFactory {
-
-  private val threadId = new AtomicLong()
-
-  override def newThread(r: Runnable): Thread = {
-    val t = new Thread(r, nameFormat.format(threadId.incrementAndGet()))
-    t.setDaemon(true)
-    t
-  }
-
-}
+@Documented
+@Retention(RetentionPolicy.RUNTIME)
+@Target({ElementType.TYPE, ElementType.FIELD, ElementType.METHOD, ElementType.PARAMETER,
+  ElementType.CONSTRUCTOR, ElementType.LOCAL_VARIABLE, ElementType.PACKAGE})
+public @interface Stable {}
diff --git a/common/tags/src/main/java/org/apache/spark/annotation/Unstable.java b/common/tags/src/main/java/org/apache/spark/annotation/Unstable.java
new file mode 100644
index 0000000000000..88ee72125b23f
--- /dev/null
+++ b/common/tags/src/main/java/org/apache/spark/annotation/Unstable.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.annotation;
+
+import java.lang.annotation.*;
+
+/**
+ * Unstable APIs, with no guarantee on stability.
+ * Classes that are unannotated are considered Unstable.
+ */
+@Documented
+@Retention(RetentionPolicy.RUNTIME)
+@Target({ElementType.TYPE, ElementType.FIELD, ElementType.METHOD, ElementType.PARAMETER,
+  ElementType.CONSTRUCTOR, ElementType.LOCAL_VARIABLE, ElementType.PACKAGE})
+public @interface Unstable {}
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 1527854730394..93a4f67fd23f2 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -21,12 +21,12 @@
   <modelVersion>4.0.0</modelVersion>
   <parent>
     <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.11</artifactId>
-    <version>2.4.0-SNAPSHOT</version>
+    <artifactId>spark-parent_2.12</artifactId>
+    <version>3.0.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
-  <artifactId>spark-unsafe_2.11</artifactId>
+  <artifactId>spark-unsafe_2.12</artifactId>
   <packaging>jar</packaging>
   <name>Spark Project Unsafe</name>
   <url>http://spark.apache.org/</url>
@@ -89,6 +89,11 @@
       <artifactId>commons-lang3</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-text</artifactId>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/expressions/HiveHasher.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/expressions/HiveHasher.java
index 62b75ae8aa01d..73577437ac506 100644
--- a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/expressions/HiveHasher.java
+++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/expressions/HiveHasher.java
@@ -17,8 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions;
 
-import org.apache.spark.unsafe.memory.MemoryBlock;
-import org.apache.spark.unsafe.types.UTF8String;
+import org.apache.spark.unsafe.Platform;
 
 /**
  * Simulates Hive's hashing function from Hive v1.2.1
@@ -39,21 +38,12 @@ public static int hashLong(long input) {
     return (int) ((input >>> 32) ^ input);
   }
 
-  public static int hashUnsafeBytesBlock(MemoryBlock mb) {
-    long lengthInBytes = mb.size();
+  public static int hashUnsafeBytes(Object base, long offset, int lengthInBytes) {
     assert (lengthInBytes >= 0): "lengthInBytes cannot be negative";
     int result = 0;
-    for (long i = 0; i < lengthInBytes; i++) {
-      result = (result * 31) + (int) mb.getByte(i);
+    for (int i = 0; i < lengthInBytes; i++) {
+      result = (result * 31) + (int) Platform.getByte(base, offset + i);
     }
     return result;
   }
-
-  public static int hashUnsafeBytes(Object base, long offset, int lengthInBytes) {
-    return hashUnsafeBytesBlock(MemoryBlock.allocateFromObject(base, offset, lengthInBytes));
-  }
-
-  public static int hashUTF8String(UTF8String str) {
-    return hashUnsafeBytesBlock(str.getMemoryBlock());
-  }
 }
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
index 54dcadf3a7754..4563efcfcf474 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
@@ -19,10 +19,10 @@
 
 import java.lang.reflect.Constructor;
 import java.lang.reflect.Field;
+import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
 import java.nio.ByteBuffer;
 
-import sun.misc.Cleaner;
 import sun.misc.Unsafe;
 
 public final class Platform {
@@ -67,6 +67,60 @@
     unaligned = _unaligned;
   }
 
+  // Access fields and constructors once and store them, for performance:
+
+  private static final Constructor<?> DBB_CONSTRUCTOR;
+  private static final Field DBB_CLEANER_FIELD;
+  static {
+    try {
+      Class<?> cls = Class.forName("java.nio.DirectByteBuffer");
+      Constructor<?> constructor = cls.getDeclaredConstructor(Long.TYPE, Integer.TYPE);
+      constructor.setAccessible(true);
+      Field cleanerField = cls.getDeclaredField("cleaner");
+      cleanerField.setAccessible(true);
+      DBB_CONSTRUCTOR = constructor;
+      DBB_CLEANER_FIELD = cleanerField;
+    } catch (ClassNotFoundException | NoSuchMethodException | NoSuchFieldException e) {
+      throw new IllegalStateException(e);
+    }
+  }
+
+  private static final Method CLEANER_CREATE_METHOD;
+  static {
+    // The implementation of Cleaner changed from JDK 8 to 9
+    // Split java.version on non-digit chars:
+    int majorVersion = Integer.parseInt(System.getProperty("java.version").split("\\D+")[0]);
+    String cleanerClassName;
+    if (majorVersion < 9) {
+      cleanerClassName = "sun.misc.Cleaner";
+    } else {
+      cleanerClassName = "jdk.internal.ref.Cleaner";
+    }
+    try {
+      Class<?> cleanerClass = Class.forName(cleanerClassName);
+      Method createMethod = cleanerClass.getMethod("create", Object.class, Runnable.class);
+      // Accessing jdk.internal.ref.Cleaner should actually fail by default in JDK 9+,
+      // unfortunately, unless the user has allowed access with something like
+      // --add-opens java.base/java.lang=ALL-UNNAMED  If not, we can't really use the Cleaner
+      // hack below. It doesn't break, just means the user might run into the default JVM limit
+      // on off-heap memory and increase it or set the flag above. This tests whether it's
+      // available:
+      try {
+        createMethod.invoke(null, null, null);
+      } catch (IllegalAccessException e) {
+        // Don't throw an exception, but can't log here?
+        createMethod = null;
+      } catch (InvocationTargetException ite) {
+        // shouldn't happen; report it
+        throw new IllegalStateException(ite);
+      }
+      CLEANER_CREATE_METHOD = createMethod;
+    } catch (ClassNotFoundException | NoSuchMethodException e) {
+      throw new IllegalStateException(e);
+    }
+
+  }
+
   /**
    * @return true when running JVM is having sun's Unsafe package available in it and underlying
    *         system having unaligned-access capability.
@@ -120,6 +174,11 @@ public static float getFloat(Object object, long offset) {
   }
 
   public static void putFloat(Object object, long offset, float value) {
+    if (Float.isNaN(value)) {
+      value = Float.NaN;
+    } else if (value == -0.0f) {
+      value = 0.0f;
+    }
     _UNSAFE.putFloat(object, offset, value);
   }
 
@@ -128,6 +187,11 @@ public static double getDouble(Object object, long offset) {
   }
 
   public static void putDouble(Object object, long offset, double value) {
+    if (Double.isNaN(value)) {
+      value = Double.NaN;
+    } else if (value == -0.0d) {
+      value = 0.0d;
+    }
     _UNSAFE.putDouble(object, offset, value);
   }
 
@@ -159,18 +223,18 @@ public static long reallocateMemory(long address, long oldSize, long newSize) {
    * MaxDirectMemorySize limit (the default limit is too low and we do not want to require users
    * to increase it).
    */
-  @SuppressWarnings("unchecked")
   public static ByteBuffer allocateDirectBuffer(int size) {
     try {
-      Class<?> cls = Class.forName("java.nio.DirectByteBuffer");
-      Constructor<?> constructor = cls.getDeclaredConstructor(Long.TYPE, Integer.TYPE);
-      constructor.setAccessible(true);
-      Field cleanerField = cls.getDeclaredField("cleaner");
-      cleanerField.setAccessible(true);
       long memory = allocateMemory(size);
-      ByteBuffer buffer = (ByteBuffer) constructor.newInstance(memory, size);
-      Cleaner cleaner = Cleaner.create(buffer, () -> freeMemory(memory));
-      cleanerField.set(buffer, cleaner);
+      ByteBuffer buffer = (ByteBuffer) DBB_CONSTRUCTOR.newInstance(memory, size);
+      if (CLEANER_CREATE_METHOD != null) {
+        try {
+          DBB_CLEANER_FIELD.set(buffer,
+              CLEANER_CREATE_METHOD.invoke(null, buffer, (Runnable) () -> freeMemory(memory)));
+        } catch (IllegalAccessException | InvocationTargetException e) {
+          throw new IllegalStateException(e);
+        }
+      }
       return buffer;
     } catch (Exception e) {
       throwException(e);
@@ -187,7 +251,7 @@ public static void setMemory(long address, byte value, long size) {
   }
 
   public static void copyMemory(
-      Object src, long srcOffset, Object dst, long dstOffset, long length) {
+    Object src, long srcOffset, Object dst, long dstOffset, long length) {
     // Check if dstOffset is before or after srcOffset to determine if we should copy
     // forward or backwards. This is necessary in case src and dst overlap.
     if (dstOffset < srcOffset) {
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/UnsafeAlignedOffset.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/UnsafeAlignedOffset.java
index be62e40412f83..546e8780a6606 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/UnsafeAlignedOffset.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/UnsafeAlignedOffset.java
@@ -39,7 +39,9 @@ public static int getSize(Object object, long offset) {
       case 8:
         return (int)Platform.getLong(object, offset);
       default:
+        // checkstyle.off: RegexpSinglelineJava
         throw new AssertionError("Illegal UAO_SIZE");
+        // checkstyle.on: RegexpSinglelineJava
     }
   }
 
@@ -52,7 +54,9 @@ public static void putSize(Object object, long offset, int value) {
         Platform.putLong(object, offset, value);
         break;
       default:
+        // checkstyle.off: RegexpSinglelineJava
         throw new AssertionError("Illegal UAO_SIZE");
+        // checkstyle.on: RegexpSinglelineJava
     }
   }
 }
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java
index ef0f78d95d1ee..cec8c30887e2f 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java
@@ -18,7 +18,6 @@
 package org.apache.spark.unsafe.array;
 
 import org.apache.spark.unsafe.Platform;
-import org.apache.spark.unsafe.memory.MemoryBlock;
 
 public class ByteArrayMethods {
 
@@ -53,25 +52,15 @@ public static long roundNumberOfBytesToNearestWord(long numBytes) {
   public static int MAX_ROUNDED_ARRAY_LENGTH = Integer.MAX_VALUE - 15;
 
   private static final boolean unaligned = Platform.unaligned();
-  /**
-   * MemoryBlock equality check for MemoryBlocks.
-   * @return true if the arrays are equal, false otherwise
-   */
-  public static boolean arrayEqualsBlock(
-      MemoryBlock leftBase, long leftOffset, MemoryBlock rightBase, long rightOffset, long length) {
-    return arrayEquals(leftBase.getBaseObject(), leftBase.getBaseOffset() + leftOffset,
-      rightBase.getBaseObject(), rightBase.getBaseOffset() + rightOffset, length);
-  }
-
   /**
    * Optimized byte array equality check for byte arrays.
    * @return true if the arrays are equal, false otherwise
    */
   public static boolean arrayEquals(
-      Object leftBase, long leftOffset, Object rightBase, long rightOffset, long length) {
+      Object leftBase, long leftOffset, Object rightBase, long rightOffset, final long length) {
     int i = 0;
 
-    // check if starts align and we can get both offsets to be aligned
+    // check if stars align and we can get both offsets to be aligned
     if ((leftOffset % 8) == (rightOffset % 8)) {
       while ((leftOffset + i) % 8 != 0 && i < length) {
         if (Platform.getByte(leftBase, leftOffset + i) !=
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/array/LongArray.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/array/LongArray.java
index b74d2de0691d5..2cd39bd60c2ac 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/array/LongArray.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/array/LongArray.java
@@ -17,6 +17,7 @@
 
 package org.apache.spark.unsafe.array;
 
+import org.apache.spark.unsafe.Platform;
 import org.apache.spark.unsafe.memory.MemoryBlock;
 
 /**
@@ -32,12 +33,16 @@
   private static final long WIDTH = 8;
 
   private final MemoryBlock memory;
+  private final Object baseObj;
+  private final long baseOffset;
 
   private final long length;
 
   public LongArray(MemoryBlock memory) {
     assert memory.size() < (long) Integer.MAX_VALUE * 8: "Array size >= Integer.MAX_VALUE elements";
     this.memory = memory;
+    this.baseObj = memory.getBaseObject();
+    this.baseOffset = memory.getBaseOffset();
     this.length = memory.size() / WIDTH;
   }
 
@@ -46,11 +51,11 @@ public MemoryBlock memoryBlock() {
   }
 
   public Object getBaseObject() {
-    return memory.getBaseObject();
+    return baseObj;
   }
 
   public long getBaseOffset() {
-    return memory.getBaseOffset();
+    return baseOffset;
   }
 
   /**
@@ -64,8 +69,8 @@ public long size() {
    * Fill this all with 0L.
    */
   public void zeroOut() {
-    for (long off = 0; off < length * WIDTH; off += WIDTH) {
-      memory.putLong(off, 0);
+    for (long off = baseOffset; off < baseOffset + length * WIDTH; off += WIDTH) {
+      Platform.putLong(baseObj, off, 0);
     }
   }
 
@@ -75,7 +80,7 @@ public void zeroOut() {
   public void set(int index, long value) {
     assert index >= 0 : "index (" + index + ") should >= 0";
     assert index < length : "index (" + index + ") should < length (" + length + ")";
-    memory.putLong(index * WIDTH, value);
+    Platform.putLong(baseObj, baseOffset + index * WIDTH, value);
   }
 
   /**
@@ -84,6 +89,6 @@ public void set(int index, long value) {
   public long get(int index) {
     assert index >= 0 : "index (" + index + ") should >= 0";
     assert index < length : "index (" + index + ") should < length (" + length + ")";
-    return memory.getLong(index * WIDTH);
+    return Platform.getLong(baseObj, baseOffset + index * WIDTH);
   }
 }
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java
index aff6e93d647fe..d239de6083ad0 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java
@@ -17,10 +17,7 @@
 
 package org.apache.spark.unsafe.hash;
 
-import com.google.common.primitives.Ints;
-
-import org.apache.spark.unsafe.memory.MemoryBlock;
-import org.apache.spark.unsafe.types.UTF8String;
+import org.apache.spark.unsafe.Platform;
 
 /**
  * 32-bit Murmur3 hasher.  This is based on Guava's Murmur3_32HashFunction.
@@ -52,70 +49,49 @@ public static int hashInt(int input, int seed) {
   }
 
   public int hashUnsafeWords(Object base, long offset, int lengthInBytes) {
-    return hashUnsafeWordsBlock(MemoryBlock.allocateFromObject(base, offset, lengthInBytes), seed);
+    return hashUnsafeWords(base, offset, lengthInBytes, seed);
   }
 
-  public static int hashUnsafeWordsBlock(MemoryBlock base, int seed) {
+  public static int hashUnsafeWords(Object base, long offset, int lengthInBytes, int seed) {
     // This is based on Guava's `Murmur32_Hasher.processRemaining(ByteBuffer)` method.
-    int lengthInBytes = Ints.checkedCast(base.size());
     assert (lengthInBytes % 8 == 0): "lengthInBytes must be a multiple of 8 (word-aligned)";
-    int h1 = hashBytesByIntBlock(base, seed);
+    int h1 = hashBytesByInt(base, offset, lengthInBytes, seed);
     return fmix(h1, lengthInBytes);
   }
 
-  public static int hashUnsafeWords(Object base, long offset, int lengthInBytes, int seed) {
-    // This is based on Guava's `Murmur32_Hasher.processRemaining(ByteBuffer)` method.
-    return hashUnsafeWordsBlock(MemoryBlock.allocateFromObject(base, offset, lengthInBytes), seed);
-  }
-
-  public static int hashUnsafeBytesBlock(MemoryBlock base, int seed) {
+  public static int hashUnsafeBytes(Object base, long offset, int lengthInBytes, int seed) {
     // This is not compatible with original and another implementations.
     // But remain it for backward compatibility for the components existing before 2.3.
-    int lengthInBytes = Ints.checkedCast(base.size());
     assert (lengthInBytes >= 0): "lengthInBytes cannot be negative";
     int lengthAligned = lengthInBytes - lengthInBytes % 4;
-    int h1 = hashBytesByIntBlock(base.subBlock(0, lengthAligned), seed);
+    int h1 = hashBytesByInt(base, offset, lengthAligned, seed);
     for (int i = lengthAligned; i < lengthInBytes; i++) {
-      int halfWord = base.getByte(i);
+      int halfWord = Platform.getByte(base, offset + i);
       int k1 = mixK1(halfWord);
       h1 = mixH1(h1, k1);
     }
     return fmix(h1, lengthInBytes);
   }
 
-  public static int hashUTF8String(UTF8String str, int seed) {
-    return hashUnsafeBytesBlock(str.getMemoryBlock(), seed);
-  }
-
-  public static int hashUnsafeBytes(Object base, long offset, int lengthInBytes, int seed) {
-    return hashUnsafeBytesBlock(MemoryBlock.allocateFromObject(base, offset, lengthInBytes), seed);
-  }
-
   public static int hashUnsafeBytes2(Object base, long offset, int lengthInBytes, int seed) {
-    return hashUnsafeBytes2Block(MemoryBlock.allocateFromObject(base, offset, lengthInBytes), seed);
-  }
-
-  public static int hashUnsafeBytes2Block(MemoryBlock base, int seed) {
-    // This is compatible with original and other implementations.
+    // This is compatible with original and another implementations.
     // Use this method for new components after Spark 2.3.
-    int lengthInBytes = Ints.checkedCast(base.size());
-    assert (lengthInBytes >= 0) : "lengthInBytes cannot be negative";
+    assert (lengthInBytes >= 0): "lengthInBytes cannot be negative";
     int lengthAligned = lengthInBytes - lengthInBytes % 4;
-    int h1 = hashBytesByIntBlock(base.subBlock(0, lengthAligned), seed);
+    int h1 = hashBytesByInt(base, offset, lengthAligned, seed);
     int k1 = 0;
     for (int i = lengthAligned, shift = 0; i < lengthInBytes; i++, shift += 8) {
-      k1 ^= (base.getByte(i) & 0xFF) << shift;
+      k1 ^= (Platform.getByte(base, offset + i) & 0xFF) << shift;
     }
     h1 ^= mixK1(k1);
     return fmix(h1, lengthInBytes);
   }
 
-  private static int hashBytesByIntBlock(MemoryBlock base, int seed) {
-    long lengthInBytes = base.size();
+  private static int hashBytesByInt(Object base, long offset, int lengthInBytes, int seed) {
     assert (lengthInBytes % 4 == 0);
     int h1 = seed;
-    for (long i = 0; i < lengthInBytes; i += 4) {
-      int halfWord = base.getInt(i);
+    for (int i = 0; i < lengthInBytes; i += 4) {
+      int halfWord = Platform.getInt(base, offset + i);
       int k1 = mixK1(halfWord);
       h1 = mixH1(h1, k1);
     }
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/ByteArrayMemoryBlock.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/ByteArrayMemoryBlock.java
deleted file mode 100644
index 9f238632bc87a..0000000000000
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/ByteArrayMemoryBlock.java
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe.memory;
-
-import com.google.common.primitives.Ints;
-
-import org.apache.spark.unsafe.Platform;
-
-/**
- * A consecutive block of memory with a byte array on Java heap.
- */
-public final class ByteArrayMemoryBlock extends MemoryBlock {
-
-  private final byte[] array;
-
-  public ByteArrayMemoryBlock(byte[] obj, long offset, long size) {
-    super(obj, offset, size);
-    this.array = obj;
-    assert(offset + size <= Platform.BYTE_ARRAY_OFFSET + obj.length) :
-      "The sum of size " + size + " and offset " + offset + " should not be larger than " +
-        "the size of the given memory space " + (obj.length + Platform.BYTE_ARRAY_OFFSET);
-  }
-
-  public ByteArrayMemoryBlock(long length) {
-    this(new byte[Ints.checkedCast(length)], Platform.BYTE_ARRAY_OFFSET, length);
-  }
-
-  @Override
-  public MemoryBlock subBlock(long offset, long size) {
-    checkSubBlockRange(offset, size);
-    if (offset == 0 && size == this.size()) return this;
-    return new ByteArrayMemoryBlock(array, this.offset + offset, size);
-  }
-
-  public byte[] getByteArray() { return array; }
-
-  /**
-   * Creates a memory block pointing to the memory used by the byte array.
-   */
-  public static ByteArrayMemoryBlock fromArray(final byte[] array) {
-    return new ByteArrayMemoryBlock(array, Platform.BYTE_ARRAY_OFFSET, array.length);
-  }
-
-  @Override
-  public int getInt(long offset) {
-    return Platform.getInt(array, this.offset + offset);
-  }
-
-  @Override
-  public void putInt(long offset, int value) {
-    Platform.putInt(array, this.offset + offset, value);
-  }
-
-  @Override
-  public boolean getBoolean(long offset) {
-    return Platform.getBoolean(array, this.offset + offset);
-  }
-
-  @Override
-  public void putBoolean(long offset, boolean value) {
-    Platform.putBoolean(array, this.offset + offset, value);
-  }
-
-  @Override
-  public byte getByte(long offset) {
-    return array[(int)(this.offset + offset - Platform.BYTE_ARRAY_OFFSET)];
-  }
-
-  @Override
-  public void putByte(long offset, byte value) {
-    array[(int)(this.offset + offset - Platform.BYTE_ARRAY_OFFSET)] = value;
-  }
-
-  @Override
-  public short getShort(long offset) {
-    return Platform.getShort(array, this.offset + offset);
-  }
-
-  @Override
-  public void putShort(long offset, short value) {
-    Platform.putShort(array, this.offset + offset, value);
-  }
-
-  @Override
-  public long getLong(long offset) {
-    return Platform.getLong(array, this.offset + offset);
-  }
-
-  @Override
-  public void putLong(long offset, long value) {
-    Platform.putLong(array, this.offset + offset, value);
-  }
-
-  @Override
-  public float getFloat(long offset) {
-    return Platform.getFloat(array, this.offset + offset);
-  }
-
-  @Override
-  public void putFloat(long offset, float value) {
-    Platform.putFloat(array, this.offset + offset, value);
-  }
-
-  @Override
-  public double getDouble(long offset) {
-    return Platform.getDouble(array, this.offset + offset);
-  }
-
-  @Override
-  public void putDouble(long offset, double value) {
-    Platform.putDouble(array, this.offset + offset, value);
-  }
-}
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java
index 36caf80888cda..2733760dd19ef 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java
@@ -23,6 +23,8 @@
 import java.util.LinkedList;
 import java.util.Map;
 
+import org.apache.spark.unsafe.Platform;
+
 /**
  * A simple {@link MemoryAllocator} that can allocate up to 16GB using a JVM long primitive array.
  */
@@ -56,7 +58,7 @@ public MemoryBlock allocate(long size) throws OutOfMemoryError {
             final long[] array = arrayReference.get();
             if (array != null) {
               assert (array.length * 8L >= size);
-              MemoryBlock memory = OnHeapMemoryBlock.fromArray(array, size);
+              MemoryBlock memory = new MemoryBlock(array, Platform.LONG_ARRAY_OFFSET, size);
               if (MemoryAllocator.MEMORY_DEBUG_FILL_ENABLED) {
                 memory.fill(MemoryAllocator.MEMORY_DEBUG_FILL_CLEAN_VALUE);
               }
@@ -68,7 +70,7 @@ public MemoryBlock allocate(long size) throws OutOfMemoryError {
       }
     }
     long[] array = new long[numWords];
-    MemoryBlock memory = OnHeapMemoryBlock.fromArray(array, size);
+    MemoryBlock memory = new MemoryBlock(array, Platform.LONG_ARRAY_OFFSET, size);
     if (MemoryAllocator.MEMORY_DEBUG_FILL_ENABLED) {
       memory.fill(MemoryAllocator.MEMORY_DEBUG_FILL_CLEAN_VALUE);
     }
@@ -77,13 +79,12 @@ public MemoryBlock allocate(long size) throws OutOfMemoryError {
 
   @Override
   public void free(MemoryBlock memory) {
-    assert(memory instanceof OnHeapMemoryBlock);
-    assert (memory.getBaseObject() != null) :
+    assert (memory.obj != null) :
       "baseObject was null; are you trying to use the on-heap allocator to free off-heap memory?";
-    assert (memory.getPageNumber() != MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER) :
+    assert (memory.pageNumber != MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER) :
       "page has already been freed";
-    assert ((memory.getPageNumber() == MemoryBlock.NO_PAGE_NUMBER)
-            || (memory.getPageNumber() == MemoryBlock.FREED_IN_TMM_PAGE_NUMBER)) :
+    assert ((memory.pageNumber == MemoryBlock.NO_PAGE_NUMBER)
+            || (memory.pageNumber == MemoryBlock.FREED_IN_TMM_PAGE_NUMBER)) :
       "TMM-allocated pages must first be freed via TMM.freePage(), not directly in allocator " +
         "free()";
 
@@ -93,12 +94,12 @@ public void free(MemoryBlock memory) {
     }
 
     // Mark the page as freed (so we can detect double-frees).
-    memory.setPageNumber(MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER);
+    memory.pageNumber = MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER;
 
     // As an additional layer of defense against use-after-free bugs, we mutate the
     // MemoryBlock to null out its reference to the long[] array.
-    long[] array = ((OnHeapMemoryBlock)memory).getLongArray();
-    memory.resetObjAndOffset();
+    long[] array = (long[]) memory.obj;
+    memory.setObjAndOffset(null, 0);
 
     long alignedSize = ((size + 7) / 8) * 8;
     if (shouldPool(alignedSize)) {
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryAllocator.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryAllocator.java
index 38315fb97b46a..7b588681d9790 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryAllocator.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryAllocator.java
@@ -38,7 +38,7 @@
 
   void free(MemoryBlock memory);
 
-  UnsafeMemoryAllocator UNSAFE = new UnsafeMemoryAllocator();
+  MemoryAllocator UNSAFE = new UnsafeMemoryAllocator();
 
-  HeapMemoryAllocator HEAP = new HeapMemoryAllocator();
+  MemoryAllocator HEAP = new HeapMemoryAllocator();
 }
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryBlock.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryBlock.java
index ca7213bbf92da..c333857358d30 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryBlock.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryBlock.java
@@ -22,10 +22,10 @@
 import org.apache.spark.unsafe.Platform;
 
 /**
- * A representation of a consecutive memory block in Spark. It defines the common interfaces
- * for memory accessing and mutating.
+ * A consecutive block of memory, starting at a {@link MemoryLocation} with a fixed size.
  */
-public abstract class MemoryBlock {
+public class MemoryBlock extends MemoryLocation {
+
   /** Special `pageNumber` value for pages which were not allocated by TaskMemoryManagers */
   public static final int NO_PAGE_NUMBER = -1;
 
@@ -45,163 +45,38 @@
    */
   public static final int FREED_IN_ALLOCATOR_PAGE_NUMBER = -3;
 
-  @Nullable
-  protected Object obj;
-
-  protected long offset;
-
-  protected long length;
+  private final long length;
 
   /**
    * Optional page number; used when this MemoryBlock represents a page allocated by a
-   * TaskMemoryManager. This field can be updated using setPageNumber method so that
-   * this can be modified by the TaskMemoryManager, which lives in a different package.
+   * TaskMemoryManager. This field is public so that it can be modified by the TaskMemoryManager,
+   * which lives in a different package.
    */
-  private int pageNumber = NO_PAGE_NUMBER;
+  public int pageNumber = NO_PAGE_NUMBER;
 
-  protected MemoryBlock(@Nullable Object obj, long offset, long length) {
-    if (offset < 0 || length < 0) {
-      throw new IllegalArgumentException(
-        "Length " + length + " and offset " + offset + "must be non-negative");
-    }
-    this.obj = obj;
-    this.offset = offset;
+  public MemoryBlock(@Nullable Object obj, long offset, long length) {
+    super(obj, offset);
     this.length = length;
   }
 
-  protected MemoryBlock() {
-    this(null, 0, 0);
-  }
-
-  public final Object getBaseObject() {
-    return obj;
-  }
-
-  public final long getBaseOffset() {
-    return offset;
-  }
-
-  public void resetObjAndOffset() {
-    this.obj = null;
-    this.offset = 0;
-  }
-
   /**
    * Returns the size of the memory block.
    */
-  public final long size() {
+  public long size() {
     return length;
   }
 
-  public final void setPageNumber(int pageNum) {
-    pageNumber = pageNum;
-  }
-
-  public final int getPageNumber() {
-    return pageNumber;
-  }
-
-  /**
-   * Fills the memory block with the specified byte value.
-   */
-  public final void fill(byte value) {
-    Platform.setMemory(obj, offset, length, value);
-  }
-
-  /**
-   * Instantiate MemoryBlock for given object type with new offset
-   */
-  public static final MemoryBlock allocateFromObject(Object obj, long offset, long length) {
-    MemoryBlock mb = null;
-    if (obj instanceof byte[]) {
-      byte[] array = (byte[])obj;
-      mb = new ByteArrayMemoryBlock(array, offset, length);
-    } else if (obj instanceof long[]) {
-      long[] array = (long[])obj;
-      mb = new OnHeapMemoryBlock(array, offset, length);
-    } else if (obj == null) {
-      // we assume that to pass null pointer means off-heap
-      mb = new OffHeapMemoryBlock(offset, length);
-    } else {
-      throw new UnsupportedOperationException(
-        "Instantiate MemoryBlock for type " + obj.getClass() + " is not supported now");
-    }
-    return mb;
-  }
-
   /**
-   * Just instantiate the sub-block with the same type of MemoryBlock with the new size and relative
-   * offset from the original offset. The data is not copied.
-   * If parameters are invalid, an exception is thrown.
+   * Creates a memory block pointing to the memory used by the long array.
    */
-  public abstract MemoryBlock subBlock(long offset, long size);
-
-  protected void checkSubBlockRange(long offset, long size) {
-    if (offset < 0 || size < 0) {
-      throw new ArrayIndexOutOfBoundsException(
-        "Size " + size + " and offset " + offset + " must be non-negative");
-    }
-    if (offset + size > length) {
-      throw new ArrayIndexOutOfBoundsException("The sum of size " + size + " and offset " +
-        offset + " should not be larger than the length " + length + " in the MemoryBlock");
-    }
+  public static MemoryBlock fromLongArray(final long[] array) {
+    return new MemoryBlock(array, Platform.LONG_ARRAY_OFFSET, array.length * 8L);
   }
 
   /**
-   * getXXX/putXXX does not ensure guarantee behavior if the offset is invalid. e.g  cause illegal
-   * memory access, throw an exception, or etc.
-   * getXXX/putXXX uses an index based on this.offset that includes the size of metadata such as
-   * JVM object header. The offset is 0-based and is expected as an logical offset in the memory
-   * block.
+   * Fills the memory block with the specified byte value.
    */
-  public abstract int getInt(long offset);
-
-  public abstract void putInt(long offset, int value);
-
-  public abstract boolean getBoolean(long offset);
-
-  public abstract void putBoolean(long offset, boolean value);
-
-  public abstract byte getByte(long offset);
-
-  public abstract void putByte(long offset, byte value);
-
-  public abstract short getShort(long offset);
-
-  public abstract void putShort(long offset, short value);
-
-  public abstract long getLong(long offset);
-
-  public abstract void putLong(long offset, long value);
-
-  public abstract float getFloat(long offset);
-
-  public abstract void putFloat(long offset, float value);
-
-  public abstract double getDouble(long offset);
-
-  public abstract void putDouble(long offset, double value);
-
-  public static final void copyMemory(
-      MemoryBlock src, long srcOffset, MemoryBlock dst, long dstOffset, long length) {
-    assert(srcOffset + length <= src.length && dstOffset + length <= dst.length);
-    Platform.copyMemory(src.getBaseObject(), src.getBaseOffset() + srcOffset,
-      dst.getBaseObject(), dst.getBaseOffset() + dstOffset, length);
-  }
-
-  public static final void copyMemory(MemoryBlock src, MemoryBlock dst, long length) {
-    assert(length <= src.length && length <= dst.length);
-    Platform.copyMemory(src.getBaseObject(), src.getBaseOffset(),
-      dst.getBaseObject(), dst.getBaseOffset(), length);
-  }
-
-  public final void copyFrom(Object src, long srcOffset, long dstOffset, long length) {
-    assert(length <= this.length - srcOffset);
-    Platform.copyMemory(src, srcOffset, obj, offset + dstOffset, length);
-  }
-
-  public final void writeTo(long srcOffset, Object dst, long dstOffset, long length) {
-    assert(length <= this.length - srcOffset);
-    Platform.copyMemory(obj, offset + srcOffset, dst, dstOffset, length);
+  public void fill(byte value) {
+    Platform.setMemory(obj, offset, length, value);
   }
 }
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryLocation.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryLocation.java
new file mode 100644
index 0000000000000..74ebc87dc978c
--- /dev/null
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryLocation.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe.memory;
+
+import javax.annotation.Nullable;
+
+/**
+ * A memory location. Tracked either by a memory address (with off-heap allocation),
+ * or by an offset from a JVM object (in-heap allocation).
+ */
+public class MemoryLocation {
+
+  @Nullable
+  Object obj;
+
+  long offset;
+
+  public MemoryLocation(@Nullable Object obj, long offset) {
+    this.obj = obj;
+    this.offset = offset;
+  }
+
+  public MemoryLocation() {
+    this(null, 0);
+  }
+
+  public void setObjAndOffset(Object newObj, long newOffset) {
+    this.obj = newObj;
+    this.offset = newOffset;
+  }
+
+  public final Object getBaseObject() {
+    return obj;
+  }
+
+  public final long getBaseOffset() {
+    return offset;
+  }
+}
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/OffHeapMemoryBlock.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/OffHeapMemoryBlock.java
deleted file mode 100644
index 3431b08980eb8..0000000000000
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/OffHeapMemoryBlock.java
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe.memory;
-
-import org.apache.spark.unsafe.Platform;
-
-public class OffHeapMemoryBlock extends MemoryBlock {
-  public static final OffHeapMemoryBlock NULL = new OffHeapMemoryBlock(0, 0);
-
-  public OffHeapMemoryBlock(long address, long size) {
-    super(null, address, size);
-  }
-
-  @Override
-  public MemoryBlock subBlock(long offset, long size) {
-    checkSubBlockRange(offset, size);
-    if (offset == 0 && size == this.size()) return this;
-    return new OffHeapMemoryBlock(this.offset + offset, size);
-  }
-
-  @Override
-  public final int getInt(long offset) {
-    return Platform.getInt(null, this.offset + offset);
-  }
-
-  @Override
-  public final void putInt(long offset, int value) {
-    Platform.putInt(null, this.offset + offset, value);
-  }
-
-  @Override
-  public final boolean getBoolean(long offset) {
-    return Platform.getBoolean(null, this.offset + offset);
-  }
-
-  @Override
-  public final void putBoolean(long offset, boolean value) {
-    Platform.putBoolean(null, this.offset + offset, value);
-  }
-
-  @Override
-  public final byte getByte(long offset) {
-    return Platform.getByte(null, this.offset + offset);
-  }
-
-  @Override
-  public final void putByte(long offset, byte value) {
-    Platform.putByte(null, this.offset + offset, value);
-  }
-
-  @Override
-  public final short getShort(long offset) {
-    return Platform.getShort(null, this.offset + offset);
-  }
-
-  @Override
-  public final void putShort(long offset, short value) {
-    Platform.putShort(null, this.offset + offset, value);
-  }
-
-  @Override
-  public final long getLong(long offset) {
-    return Platform.getLong(null, this.offset + offset);
-  }
-
-  @Override
-  public final void putLong(long offset, long value) {
-    Platform.putLong(null, this.offset + offset, value);
-  }
-
-  @Override
-  public final float getFloat(long offset) {
-    return Platform.getFloat(null, this.offset + offset);
-  }
-
-  @Override
-  public final void putFloat(long offset, float value) {
-    Platform.putFloat(null, this.offset + offset, value);
-  }
-
-  @Override
-  public final double getDouble(long offset) {
-    return Platform.getDouble(null, this.offset + offset);
-  }
-
-  @Override
-  public final void putDouble(long offset, double value) {
-    Platform.putDouble(null, this.offset + offset, value);
-  }
-}
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/OnHeapMemoryBlock.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/OnHeapMemoryBlock.java
deleted file mode 100644
index ee42bc27c9c5f..0000000000000
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/OnHeapMemoryBlock.java
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe.memory;
-
-import com.google.common.primitives.Ints;
-
-import org.apache.spark.unsafe.Platform;
-
-/**
- * A consecutive block of memory with a long array on Java heap.
- */
-public final class OnHeapMemoryBlock extends MemoryBlock {
-
-  private final long[] array;
-
-  public OnHeapMemoryBlock(long[] obj, long offset, long size) {
-    super(obj, offset, size);
-    this.array = obj;
-    assert(offset + size <= obj.length * 8L + Platform.LONG_ARRAY_OFFSET) :
-      "The sum of size " + size + " and offset " + offset + " should not be larger than " +
-        "the size of the given memory space " + (obj.length * 8L + Platform.LONG_ARRAY_OFFSET);
-  }
-
-  public OnHeapMemoryBlock(long size) {
-    this(new long[Ints.checkedCast((size + 7) / 8)], Platform.LONG_ARRAY_OFFSET, size);
-  }
-
-  @Override
-  public MemoryBlock subBlock(long offset, long size) {
-    checkSubBlockRange(offset, size);
-    if (offset == 0 && size == this.size()) return this;
-    return new OnHeapMemoryBlock(array, this.offset + offset, size);
-  }
-
-  public long[] getLongArray() { return array; }
-
-  /**
-   * Creates a memory block pointing to the memory used by the long array.
-   */
-  public static OnHeapMemoryBlock fromArray(final long[] array) {
-    return new OnHeapMemoryBlock(array, Platform.LONG_ARRAY_OFFSET, array.length * 8L);
-  }
-
-  public static OnHeapMemoryBlock fromArray(final long[] array, long size) {
-    return new OnHeapMemoryBlock(array, Platform.LONG_ARRAY_OFFSET, size);
-  }
-
-  @Override
-  public int getInt(long offset) {
-    return Platform.getInt(array, this.offset + offset);
-  }
-
-  @Override
-  public void putInt(long offset, int value) {
-    Platform.putInt(array, this.offset + offset, value);
-  }
-
-  @Override
-  public boolean getBoolean(long offset) {
-    return Platform.getBoolean(array, this.offset + offset);
-  }
-
-  @Override
-  public void putBoolean(long offset, boolean value) {
-    Platform.putBoolean(array, this.offset + offset, value);
-  }
-
-  @Override
-  public byte getByte(long offset) {
-    return Platform.getByte(array, this.offset + offset);
-  }
-
-  @Override
-  public void putByte(long offset, byte value) {
-    Platform.putByte(array, this.offset + offset, value);
-  }
-
-  @Override
-  public short getShort(long offset) {
-    return Platform.getShort(array, this.offset + offset);
-  }
-
-  @Override
-  public void putShort(long offset, short value) {
-    Platform.putShort(array, this.offset + offset, value);
-  }
-
-  @Override
-  public long getLong(long offset) {
-    return Platform.getLong(array, this.offset + offset);
-  }
-
-  @Override
-  public void putLong(long offset, long value) {
-    Platform.putLong(array, this.offset + offset, value);
-  }
-
-  @Override
-  public float getFloat(long offset) {
-    return Platform.getFloat(array, this.offset + offset);
-  }
-
-  @Override
-  public void putFloat(long offset, float value) {
-    Platform.putFloat(array, this.offset + offset, value);
-  }
-
-  @Override
-  public double getDouble(long offset) {
-    return Platform.getDouble(array, this.offset + offset);
-  }
-
-  @Override
-  public void putDouble(long offset, double value) {
-    Platform.putDouble(array, this.offset + offset, value);
-  }
-}
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/UnsafeMemoryAllocator.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/UnsafeMemoryAllocator.java
index 5310bdf2779a9..4368fb615ba1e 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/UnsafeMemoryAllocator.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/UnsafeMemoryAllocator.java
@@ -25,9 +25,9 @@
 public class UnsafeMemoryAllocator implements MemoryAllocator {
 
   @Override
-  public OffHeapMemoryBlock allocate(long size) throws OutOfMemoryError {
+  public MemoryBlock allocate(long size) throws OutOfMemoryError {
     long address = Platform.allocateMemory(size);
-    OffHeapMemoryBlock memory = new OffHeapMemoryBlock(address, size);
+    MemoryBlock memory = new MemoryBlock(null, address, size);
     if (MemoryAllocator.MEMORY_DEBUG_FILL_ENABLED) {
       memory.fill(MemoryAllocator.MEMORY_DEBUG_FILL_CLEAN_VALUE);
     }
@@ -36,25 +36,22 @@ public OffHeapMemoryBlock allocate(long size) throws OutOfMemoryError {
 
   @Override
   public void free(MemoryBlock memory) {
-    assert(memory instanceof OffHeapMemoryBlock) :
-      "UnsafeMemoryAllocator can only free OffHeapMemoryBlock.";
-    if (memory == OffHeapMemoryBlock.NULL) return;
-    assert (memory.getPageNumber() != MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER) :
+    assert (memory.obj == null) :
+      "baseObject not null; are you trying to use the off-heap allocator to free on-heap memory?";
+    assert (memory.pageNumber != MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER) :
       "page has already been freed";
-    assert ((memory.getPageNumber() == MemoryBlock.NO_PAGE_NUMBER)
-            || (memory.getPageNumber() == MemoryBlock.FREED_IN_TMM_PAGE_NUMBER)) :
+    assert ((memory.pageNumber == MemoryBlock.NO_PAGE_NUMBER)
+            || (memory.pageNumber == MemoryBlock.FREED_IN_TMM_PAGE_NUMBER)) :
       "TMM-allocated pages must be freed via TMM.freePage(), not directly in allocator free()";
 
     if (MemoryAllocator.MEMORY_DEBUG_FILL_ENABLED) {
       memory.fill(MemoryAllocator.MEMORY_DEBUG_FILL_FREED_VALUE);
     }
-
     Platform.freeMemory(memory.offset);
-
     // As an additional layer of defense against use-after-free bugs, we mutate the
     // MemoryBlock to reset its pointer.
-    memory.resetObjAndOffset();
+    memory.offset = 0;
     // Mark the page as freed (so we can detect double-frees).
-    memory.setPageNumber(MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER);
+    memory.pageNumber = MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER;
   }
 }
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index e91fc4391425c..3a3bfc4a94bb3 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -34,8 +34,6 @@
 import org.apache.spark.unsafe.Platform;
 import org.apache.spark.unsafe.array.ByteArrayMethods;
 import org.apache.spark.unsafe.hash.Murmur3_x86_32;
-import org.apache.spark.unsafe.memory.ByteArrayMemoryBlock;
-import org.apache.spark.unsafe.memory.MemoryBlock;
 
 import static org.apache.spark.unsafe.Platform.*;
 
@@ -53,13 +51,12 @@
 
   // These are only updated by readExternal() or read()
   @Nonnull
-  private MemoryBlock base;
-  // While numBytes has the same value as base.size(), to keep as int avoids cast from long to int
+  private Object base;
+  private long offset;
   private int numBytes;
 
-  public MemoryBlock getMemoryBlock() { return base; }
-  public Object getBaseObject() { return base.getBaseObject(); }
-  public long getBaseOffset() { return base.getBaseOffset(); }
+  public Object getBaseObject() { return base; }
+  public long getBaseOffset() { return offset; }
 
   /**
    * A char in UTF-8 encoding can take 1-4 bytes depending on the first byte which
@@ -112,8 +109,7 @@
    */
   public static UTF8String fromBytes(byte[] bytes) {
     if (bytes != null) {
-      return new UTF8String(
-        new ByteArrayMemoryBlock(bytes, BYTE_ARRAY_OFFSET, bytes.length));
+      return new UTF8String(bytes, BYTE_ARRAY_OFFSET, bytes.length);
     } else {
       return null;
     }
@@ -126,13 +122,19 @@ public static UTF8String fromBytes(byte[] bytes) {
    */
   public static UTF8String fromBytes(byte[] bytes, int offset, int numBytes) {
     if (bytes != null) {
-      return new UTF8String(
-        new ByteArrayMemoryBlock(bytes, BYTE_ARRAY_OFFSET + offset, numBytes));
+      return new UTF8String(bytes, BYTE_ARRAY_OFFSET + offset, numBytes);
     } else {
       return null;
     }
   }
 
+  /**
+   * Creates an UTF8String from given address (base and offset) and length.
+   */
+  public static UTF8String fromAddress(Object base, long offset, int numBytes) {
+    return new UTF8String(base, offset, numBytes);
+  }
+
   /**
    * Creates an UTF8String from String.
    */
@@ -149,13 +151,16 @@ public static UTF8String blankString(int length) {
     return fromBytes(spaces);
   }
 
-  public UTF8String(MemoryBlock base) {
+  protected UTF8String(Object base, long offset, int numBytes) {
     this.base = base;
-    this.numBytes = Ints.checkedCast(base.size());
+    this.offset = offset;
+    this.numBytes = numBytes;
   }
 
   // for serialization
-  public UTF8String() {}
+  public UTF8String() {
+    this(null, 0, 0);
+  }
 
   /**
    * Writes the content of this string into a memory address, identified by an object and an offset.
@@ -163,7 +168,7 @@ public UTF8String() {}
    * bytes in this string.
    */
   public void writeToMemory(Object target, long targetOffset) {
-    base.writeTo(0, target, targetOffset, numBytes);
+    Platform.copyMemory(base, offset, target, targetOffset, numBytes);
   }
 
   public void writeTo(ByteBuffer buffer) {
@@ -183,9 +188,8 @@ public void writeTo(ByteBuffer buffer) {
    */
   @Nonnull
   public ByteBuffer getByteBuffer() {
-    long offset = base.getBaseOffset();
-    if (base instanceof ByteArrayMemoryBlock && offset >= BYTE_ARRAY_OFFSET) {
-      final byte[] bytes = ((ByteArrayMemoryBlock) base).getByteArray();
+    if (base instanceof byte[] && offset >= BYTE_ARRAY_OFFSET) {
+      final byte[] bytes = (byte[]) base;
 
       // the offset includes an object header... this is only needed for unsafe copies
       final long arrayOffset = offset - BYTE_ARRAY_OFFSET;
@@ -252,12 +256,12 @@ public long getPrefix() {
     long mask = 0;
     if (IS_LITTLE_ENDIAN) {
       if (numBytes >= 8) {
-        p = base.getLong(0);
+        p = Platform.getLong(base, offset);
       } else if (numBytes > 4) {
-        p = base.getLong(0);
+        p = Platform.getLong(base, offset);
         mask = (1L << (8 - numBytes) * 8) - 1;
       } else if (numBytes > 0) {
-        p = (long) base.getInt(0);
+        p = (long) Platform.getInt(base, offset);
         mask = (1L << (8 - numBytes) * 8) - 1;
       } else {
         p = 0;
@@ -266,12 +270,12 @@ public long getPrefix() {
     } else {
       // byteOrder == ByteOrder.BIG_ENDIAN
       if (numBytes >= 8) {
-        p = base.getLong(0);
+        p = Platform.getLong(base, offset);
       } else if (numBytes > 4) {
-        p = base.getLong(0);
+        p = Platform.getLong(base, offset);
         mask = (1L << (8 - numBytes) * 8) - 1;
       } else if (numBytes > 0) {
-        p = ((long) base.getInt(0)) << 32;
+        p = ((long) Platform.getInt(base, offset)) << 32;
         mask = (1L << (8 - numBytes) * 8) - 1;
       } else {
         p = 0;
@@ -286,13 +290,12 @@ public long getPrefix() {
    */
   public byte[] getBytes() {
     // avoid copy if `base` is `byte[]`
-    long offset = base.getBaseOffset();
-    if (offset == BYTE_ARRAY_OFFSET && base instanceof ByteArrayMemoryBlock
-      && (((ByteArrayMemoryBlock) base).getByteArray()).length == numBytes) {
-      return ((ByteArrayMemoryBlock) base).getByteArray();
+    if (offset == BYTE_ARRAY_OFFSET && base instanceof byte[]
+      && ((byte[]) base).length == numBytes) {
+      return (byte[]) base;
     } else {
       byte[] bytes = new byte[numBytes];
-      base.writeTo(0, bytes, BYTE_ARRAY_OFFSET, numBytes);
+      copyMemory(base, offset, bytes, BYTE_ARRAY_OFFSET, numBytes);
       return bytes;
     }
   }
@@ -322,7 +325,7 @@ public UTF8String substring(final int start, final int until) {
 
     if (i > j) {
       byte[] bytes = new byte[i - j];
-      base.writeTo(j, bytes, BYTE_ARRAY_OFFSET, i - j);
+      copyMemory(base, offset + j, bytes, BYTE_ARRAY_OFFSET, i - j);
       return fromBytes(bytes);
     } else {
       return EMPTY_UTF8;
@@ -363,14 +366,14 @@ public boolean contains(final UTF8String substring) {
    * Returns the byte at position `i`.
    */
   private byte getByte(int i) {
-    return base.getByte(i);
+    return Platform.getByte(base, offset + i);
   }
 
   private boolean matchAt(final UTF8String s, int pos) {
     if (s.numBytes + pos > numBytes || pos < 0) {
       return false;
     }
-    return ByteArrayMethods.arrayEqualsBlock(base, pos, s.base, 0, s.numBytes);
+    return ByteArrayMethods.arrayEquals(base, offset + pos, s.base, s.offset, s.numBytes);
   }
 
   public boolean startsWith(final UTF8String prefix) {
@@ -497,7 +500,8 @@ public int findInSet(UTF8String match) {
     for (int i = 0; i < numBytes; i++) {
       if (getByte(i) == (byte) ',') {
         if (i - (lastComma + 1) == match.numBytes &&
-          ByteArrayMethods.arrayEqualsBlock(base, lastComma + 1, match.base, 0, match.numBytes)) {
+          ByteArrayMethods.arrayEquals(base, offset + (lastComma + 1), match.base, match.offset,
+            match.numBytes)) {
           return n;
         }
         lastComma = i;
@@ -505,7 +509,8 @@ public int findInSet(UTF8String match) {
       }
     }
     if (numBytes - (lastComma + 1) == match.numBytes &&
-      ByteArrayMethods.arrayEqualsBlock(base, lastComma + 1, match.base, 0, match.numBytes)) {
+      ByteArrayMethods.arrayEquals(base, offset + (lastComma + 1), match.base, match.offset,
+        match.numBytes)) {
       return n;
     }
     return 0;
@@ -520,7 +525,7 @@ public int findInSet(UTF8String match) {
   private UTF8String copyUTF8String(int start, int end) {
     int len = end - start + 1;
     byte[] newBytes = new byte[len];
-    base.writeTo(start, newBytes, BYTE_ARRAY_OFFSET, len);
+    copyMemory(base, offset + start, newBytes, BYTE_ARRAY_OFFSET, len);
     return UTF8String.fromBytes(newBytes);
   }
 
@@ -667,7 +672,8 @@ public UTF8String reverse() {
     int i = 0; // position in byte
     while (i < numBytes) {
       int len = numBytesForFirstByte(getByte(i));
-      base.writeTo(i, result, BYTE_ARRAY_OFFSET + result.length - i - len, len);
+      copyMemory(this.base, this.offset + i, result,
+        BYTE_ARRAY_OFFSET + result.length - i - len, len);
 
       i += len;
     }
@@ -681,7 +687,7 @@ public UTF8String repeat(int times) {
     }
 
     byte[] newBytes = new byte[numBytes * times];
-    base.writeTo(0, newBytes, BYTE_ARRAY_OFFSET, numBytes);
+    copyMemory(this.base, this.offset, newBytes, BYTE_ARRAY_OFFSET, numBytes);
 
     int copied = 1;
     while (copied < times) {
@@ -718,7 +724,7 @@ public int indexOf(UTF8String v, int start) {
       if (i + v.numBytes > numBytes) {
         return -1;
       }
-      if (ByteArrayMethods.arrayEqualsBlock(base, i, v.base, 0, v.numBytes)) {
+      if (ByteArrayMethods.arrayEquals(base, offset + i, v.base, v.offset, v.numBytes)) {
         return c;
       }
       i += numBytesForFirstByte(getByte(i));
@@ -734,7 +740,7 @@ public int indexOf(UTF8String v, int start) {
   private int find(UTF8String str, int start) {
     assert (str.numBytes > 0);
     while (start <= numBytes - str.numBytes) {
-      if (ByteArrayMethods.arrayEqualsBlock(base, start, str.base, 0, str.numBytes)) {
+      if (ByteArrayMethods.arrayEquals(base, offset + start, str.base, str.offset, str.numBytes)) {
         return start;
       }
       start += 1;
@@ -748,7 +754,7 @@ private int find(UTF8String str, int start) {
   private int rfind(UTF8String str, int start) {
     assert (str.numBytes > 0);
     while (start >= 0) {
-      if (ByteArrayMethods.arrayEqualsBlock(base, start, str.base, 0, str.numBytes)) {
+      if (ByteArrayMethods.arrayEquals(base, offset + start, str.base, str.offset, str.numBytes)) {
         return start;
       }
       start -= 1;
@@ -781,7 +787,7 @@ public UTF8String subStringIndex(UTF8String delim, int count) {
         return EMPTY_UTF8;
       }
       byte[] bytes = new byte[idx];
-      base.writeTo(0, bytes, BYTE_ARRAY_OFFSET, idx);
+      copyMemory(base, offset, bytes, BYTE_ARRAY_OFFSET, idx);
       return fromBytes(bytes);
 
     } else {
@@ -801,7 +807,7 @@ public UTF8String subStringIndex(UTF8String delim, int count) {
       }
       int size = numBytes - delim.numBytes - idx;
       byte[] bytes = new byte[size];
-      base.writeTo(idx + delim.numBytes, bytes, BYTE_ARRAY_OFFSET, size);
+      copyMemory(base, offset + idx + delim.numBytes, bytes, BYTE_ARRAY_OFFSET, size);
       return fromBytes(bytes);
     }
   }
@@ -824,15 +830,15 @@ public UTF8String rpad(int len, UTF8String pad) {
       UTF8String remain = pad.substring(0, spaces - padChars * count);
 
       byte[] data = new byte[this.numBytes + pad.numBytes * count + remain.numBytes];
-      base.writeTo(0, data, BYTE_ARRAY_OFFSET, this.numBytes);
+      copyMemory(this.base, this.offset, data, BYTE_ARRAY_OFFSET, this.numBytes);
       int offset = this.numBytes;
       int idx = 0;
       while (idx < count) {
-        pad.base.writeTo(0, data, BYTE_ARRAY_OFFSET + offset, pad.numBytes);
+        copyMemory(pad.base, pad.offset, data, BYTE_ARRAY_OFFSET + offset, pad.numBytes);
         ++ idx;
         offset += pad.numBytes;
       }
-      remain.base.writeTo(0, data, BYTE_ARRAY_OFFSET + offset, remain.numBytes);
+      copyMemory(remain.base, remain.offset, data, BYTE_ARRAY_OFFSET + offset, remain.numBytes);
 
       return UTF8String.fromBytes(data);
     }
@@ -860,13 +866,13 @@ public UTF8String lpad(int len, UTF8String pad) {
       int offset = 0;
       int idx = 0;
       while (idx < count) {
-        pad.base.writeTo(0, data, BYTE_ARRAY_OFFSET + offset, pad.numBytes);
+        copyMemory(pad.base, pad.offset, data, BYTE_ARRAY_OFFSET + offset, pad.numBytes);
         ++ idx;
         offset += pad.numBytes;
       }
-      remain.base.writeTo(0, data, BYTE_ARRAY_OFFSET + offset, remain.numBytes);
+      copyMemory(remain.base, remain.offset, data, BYTE_ARRAY_OFFSET + offset, remain.numBytes);
       offset += remain.numBytes;
-      base.writeTo(0, data, BYTE_ARRAY_OFFSET + offset, numBytes());
+      copyMemory(this.base, this.offset, data, BYTE_ARRAY_OFFSET + offset, numBytes());
 
       return UTF8String.fromBytes(data);
     }
@@ -891,8 +897,8 @@ public static UTF8String concat(UTF8String... inputs) {
     int offset = 0;
     for (int i = 0; i < inputs.length; i++) {
       int len = inputs[i].numBytes;
-      inputs[i].base.writeTo(
-        0,
+      copyMemory(
+        inputs[i].base, inputs[i].offset,
         result, BYTE_ARRAY_OFFSET + offset,
         len);
       offset += len;
@@ -931,8 +937,8 @@ public static UTF8String concatWs(UTF8String separator, UTF8String... inputs) {
     for (int i = 0, j = 0; i < inputs.length; i++) {
       if (inputs[i] != null) {
         int len = inputs[i].numBytes;
-        inputs[i].base.writeTo(
-          0,
+        copyMemory(
+          inputs[i].base, inputs[i].offset,
           result, BYTE_ARRAY_OFFSET + offset,
           len);
         offset += len;
@@ -940,8 +946,8 @@ public static UTF8String concatWs(UTF8String separator, UTF8String... inputs) {
         j++;
         // Add separator if this is not the last input.
         if (j < numInputs) {
-          separator.base.writeTo(
-            0,
+          copyMemory(
+            separator.base, separator.offset,
             result, BYTE_ARRAY_OFFSET + offset,
             separator.numBytes);
           offset += separator.numBytes;
@@ -952,6 +958,12 @@ public static UTF8String concatWs(UTF8String separator, UTF8String... inputs) {
   }
 
   public UTF8String[] split(UTF8String pattern, int limit) {
+    // Java String's split method supports "ignore empty string" behavior when the limit is 0
+    // whereas other languages do not. To avoid this java specific behavior, we fall back to
+    // -1 when the limit is 0.
+    if (limit == 0) {
+      limit = -1;
+    }
     String[] splits = toString().split(pattern.toString(), limit);
     UTF8String[] res = new UTF8String[splits.length];
     for (int i = 0; i < res.length; i++) {
@@ -1215,7 +1227,7 @@ public UTF8String clone() {
 
   public UTF8String copy() {
     byte[] bytes = new byte[numBytes];
-    base.writeTo(0, bytes, BYTE_ARRAY_OFFSET, numBytes);
+    copyMemory(base, offset, bytes, BYTE_ARRAY_OFFSET, numBytes);
     return fromBytes(bytes);
   }
 
@@ -1223,10 +1235,11 @@ public UTF8String copy() {
   public int compareTo(@Nonnull final UTF8String other) {
     int len = Math.min(numBytes, other.numBytes);
     int wordMax = (len / 8) * 8;
-    MemoryBlock rbase = other.base;
+    long roffset = other.offset;
+    Object rbase = other.base;
     for (int i = 0; i < wordMax; i += 8) {
-      long left = base.getLong(i);
-      long right = rbase.getLong(i);
+      long left = getLong(base, offset + i);
+      long right = getLong(rbase, roffset + i);
       if (left != right) {
         if (IS_LITTLE_ENDIAN) {
           return Long.compareUnsigned(Long.reverseBytes(left), Long.reverseBytes(right));
@@ -1237,7 +1250,7 @@ public int compareTo(@Nonnull final UTF8String other) {
     }
     for (int i = wordMax; i < len; i++) {
       // In UTF-8, the byte should be unsigned, so we should compare them as unsigned int.
-      int res = (getByte(i) & 0xFF) - (rbase.getByte(i) & 0xFF);
+      int res = (getByte(i) & 0xFF) - (Platform.getByte(rbase, roffset + i) & 0xFF);
       if (res != 0) {
         return res;
       }
@@ -1256,7 +1269,7 @@ public boolean equals(final Object other) {
       if (numBytes != o.numBytes) {
         return false;
       }
-      return ByteArrayMethods.arrayEqualsBlock(base, 0, o.base, 0, numBytes);
+      return ByteArrayMethods.arrayEquals(base, offset, o.base, o.offset, numBytes);
     } else {
       return false;
     }
@@ -1312,8 +1325,8 @@ public int levenshteinDistance(UTF8String other) {
               num_bytes_j != numBytesForFirstByte(s.getByte(i_bytes))) {
           cost = 1;
         } else {
-          cost = (ByteArrayMethods.arrayEqualsBlock(t.base, j_bytes, s.base,
-            i_bytes, num_bytes_j)) ? 0 : 1;
+          cost = (ByteArrayMethods.arrayEquals(t.base, t.offset + j_bytes, s.base,
+              s.offset + i_bytes, num_bytes_j)) ? 0 : 1;
         }
         d[i + 1] = Math.min(Math.min(d[i] + 1, p[i + 1] + 1), p[i] + cost);
       }
@@ -1328,7 +1341,7 @@ public int levenshteinDistance(UTF8String other) {
 
   @Override
   public int hashCode() {
-    return Murmur3_x86_32.hashUnsafeBytesBlock(base,42);
+    return Murmur3_x86_32.hashUnsafeBytes(base, offset, numBytes, 42);
   }
 
   /**
@@ -1391,10 +1404,10 @@ public void writeExternal(ObjectOutput out) throws IOException {
   }
 
   public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
+    offset = BYTE_ARRAY_OFFSET;
     numBytes = in.readInt();
-    byte[] bytes = new byte[numBytes];
-    in.readFully(bytes);
-    base = ByteArrayMemoryBlock.fromArray(bytes);
+    base = new byte[numBytes];
+    in.readFully((byte[]) base);
   }
 
   @Override
@@ -1406,10 +1419,10 @@ public void write(Kryo kryo, Output out) {
 
   @Override
   public void read(Kryo kryo, Input in) {
-    numBytes = in.readInt();
-    byte[] bytes = new byte[numBytes];
-    in.read(bytes);
-    base = ByteArrayMemoryBlock.fromArray(bytes);
+    this.offset = BYTE_ARRAY_OFFSET;
+    this.numBytes = in.readInt();
+    this.base = new byte[numBytes];
+    in.read((byte[]) base);
   }
 
 }
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
index 583a148b3845d..2474081dad5c9 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
@@ -81,7 +81,7 @@ public void freeingOnHeapMemoryBlockResetsBaseObjectAndOffset() {
     MemoryAllocator.HEAP.free(block);
     Assert.assertNull(block.getBaseObject());
     Assert.assertEquals(0, block.getBaseOffset());
-    Assert.assertEquals(MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER, block.getPageNumber());
+    Assert.assertEquals(MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER, block.pageNumber);
   }
 
   @Test
@@ -92,7 +92,7 @@ public void freeingOffHeapMemoryBlockResetsOffset() {
     MemoryAllocator.UNSAFE.free(block);
     Assert.assertNull(block.getBaseObject());
     Assert.assertEquals(0, block.getBaseOffset());
-    Assert.assertEquals(MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER, block.getPageNumber());
+    Assert.assertEquals(MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER, block.pageNumber);
   }
 
   @Test(expected = AssertionError.class)
@@ -157,4 +157,22 @@ public void heapMemoryReuse() {
     Assert.assertEquals(onheap4.size(), 1024 * 1024 + 7);
     Assert.assertEquals(obj3, onheap4.getBaseObject());
   }
+
+  @Test
+  // SPARK-26021
+  public void writeMinusZeroIsReplacedWithZero() {
+    byte[] doubleBytes = new byte[Double.BYTES];
+    byte[] floatBytes = new byte[Float.BYTES];
+    Platform.putDouble(doubleBytes, Platform.BYTE_ARRAY_OFFSET, -0.0d);
+    Platform.putFloat(floatBytes, Platform.BYTE_ARRAY_OFFSET, -0.0f);
+
+    byte[] doubleBytes2 = new byte[Double.BYTES];
+    byte[] floatBytes2 = new byte[Float.BYTES];
+    Platform.putDouble(doubleBytes, Platform.BYTE_ARRAY_OFFSET, 0.0d);
+    Platform.putFloat(floatBytes, Platform.BYTE_ARRAY_OFFSET, 0.0f);
+
+    // Make sure the bytes we write from 0.0 and -0.0 are same.
+    Assert.assertArrayEquals(doubleBytes, doubleBytes2);
+    Assert.assertArrayEquals(floatBytes, floatBytes2);
+  }
 }
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/array/LongArraySuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/array/LongArraySuite.java
index 8c2e98c2bfc54..fb8e53b3348f3 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/array/LongArraySuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/array/LongArraySuite.java
@@ -20,13 +20,14 @@
 import org.junit.Assert;
 import org.junit.Test;
 
-import org.apache.spark.unsafe.memory.OnHeapMemoryBlock;
+import org.apache.spark.unsafe.memory.MemoryBlock;
 
 public class LongArraySuite {
 
   @Test
   public void basicTest() {
-    LongArray arr = new LongArray(new OnHeapMemoryBlock(16));
+    long[] bytes = new long[2];
+    LongArray arr = new LongArray(MemoryBlock.fromLongArray(bytes));
     arr.set(0, 1L);
     arr.set(1, 2L);
     arr.set(1, 3L);
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/hash/Murmur3_x86_32Suite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/hash/Murmur3_x86_32Suite.java
index d9898771720ae..6348a73bf3895 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/hash/Murmur3_x86_32Suite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/hash/Murmur3_x86_32Suite.java
@@ -70,24 +70,6 @@ public void testKnownBytesInputs() {
       Murmur3_x86_32.hashUnsafeBytes2(tes, Platform.BYTE_ARRAY_OFFSET, tes.length, 0));
   }
 
-  @Test
-  public void testKnownWordsInputs() {
-    byte[] bytes = new byte[16];
-    long offset = Platform.BYTE_ARRAY_OFFSET;
-    for (int i = 0; i < 16; i++) {
-      bytes[i] = 0;
-    }
-    Assert.assertEquals(-300363099, Murmur3_x86_32.hashUnsafeWords(bytes, offset, 16, 42));
-    for (int i = 0; i < 16; i++) {
-      bytes[i] = -1;
-    }
-    Assert.assertEquals(-1210324667, Murmur3_x86_32.hashUnsafeWords(bytes, offset, 16, 42));
-    for (int i = 0; i < 16; i++) {
-      bytes[i] = (byte)i;
-    }
-    Assert.assertEquals(-634919701, Murmur3_x86_32.hashUnsafeWords(bytes, offset, 16, 42));
-  }
-
   @Test
   public void randomizedStressTest() {
     int size = 65536;
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/memory/MemoryBlockSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/memory/MemoryBlockSuite.java
deleted file mode 100644
index ef5ff8ee70ec0..0000000000000
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/memory/MemoryBlockSuite.java
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe.memory;
-
-import org.apache.spark.unsafe.Platform;
-import org.junit.Assert;
-import org.junit.Test;
-
-import java.nio.ByteOrder;
-
-import static org.hamcrest.core.StringContains.containsString;
-
-public class MemoryBlockSuite {
-  private static final boolean bigEndianPlatform =
-    ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN);
-
-  private void check(MemoryBlock memory, Object obj, long offset, int length) {
-    memory.setPageNumber(1);
-    memory.fill((byte)-1);
-    memory.putBoolean(0, true);
-    memory.putByte(1, (byte)127);
-    memory.putShort(2, (short)257);
-    memory.putInt(4, 0x20000002);
-    memory.putLong(8, 0x1234567089ABCDEFL);
-    memory.putFloat(16, 1.0F);
-    memory.putLong(20, 0x1234567089ABCDEFL);
-    memory.putDouble(28, 2.0);
-    MemoryBlock.copyMemory(memory, 0L, memory, 36, 4);
-    int[] a = new int[2];
-    a[0] = 0x12345678;
-    a[1] = 0x13579BDF;
-    memory.copyFrom(a, Platform.INT_ARRAY_OFFSET, 40, 8);
-    byte[] b = new byte[8];
-    memory.writeTo(40, b, Platform.BYTE_ARRAY_OFFSET, 8);
-
-    Assert.assertEquals(obj, memory.getBaseObject());
-    Assert.assertEquals(offset, memory.getBaseOffset());
-    Assert.assertEquals(length, memory.size());
-    Assert.assertEquals(1, memory.getPageNumber());
-    Assert.assertEquals(true, memory.getBoolean(0));
-    Assert.assertEquals((byte)127, memory.getByte(1 ));
-    Assert.assertEquals((short)257, memory.getShort(2));
-    Assert.assertEquals(0x20000002, memory.getInt(4));
-    Assert.assertEquals(0x1234567089ABCDEFL, memory.getLong(8));
-    Assert.assertEquals(1.0F, memory.getFloat(16), 0);
-    Assert.assertEquals(0x1234567089ABCDEFL, memory.getLong(20));
-    Assert.assertEquals(2.0, memory.getDouble(28), 0);
-    Assert.assertEquals(true, memory.getBoolean(36));
-    Assert.assertEquals((byte)127, memory.getByte(37 ));
-    Assert.assertEquals((short)257, memory.getShort(38));
-    Assert.assertEquals(a[0], memory.getInt(40));
-    Assert.assertEquals(a[1], memory.getInt(44));
-    if (bigEndianPlatform) {
-      Assert.assertEquals(a[0],
-        ((int)b[0] & 0xff) << 24 | ((int)b[1] & 0xff) << 16 |
-        ((int)b[2] & 0xff) << 8 | ((int)b[3] & 0xff));
-      Assert.assertEquals(a[1],
-        ((int)b[4] & 0xff) << 24 | ((int)b[5] & 0xff) << 16 |
-        ((int)b[6] & 0xff) << 8 | ((int)b[7] & 0xff));
-    } else {
-      Assert.assertEquals(a[0],
-        ((int)b[3] & 0xff) << 24 | ((int)b[2] & 0xff) << 16 |
-        ((int)b[1] & 0xff) << 8 | ((int)b[0] & 0xff));
-      Assert.assertEquals(a[1],
-        ((int)b[7] & 0xff) << 24 | ((int)b[6] & 0xff) << 16 |
-        ((int)b[5] & 0xff) << 8 | ((int)b[4] & 0xff));
-    }
-    for (int i = 48; i < memory.size(); i++) {
-      Assert.assertEquals((byte) -1, memory.getByte(i));
-    }
-
-    assert(memory.subBlock(0, memory.size()) == memory);
-
-    try {
-      memory.subBlock(-8, 8);
-      Assert.fail();
-    } catch (Exception expected) {
-      Assert.assertThat(expected.getMessage(), containsString("non-negative"));
-    }
-
-    try {
-      memory.subBlock(0, -8);
-      Assert.fail();
-    } catch (Exception expected) {
-      Assert.assertThat(expected.getMessage(), containsString("non-negative"));
-    }
-
-    try {
-      memory.subBlock(0, length + 8);
-      Assert.fail();
-    } catch (Exception expected) {
-      Assert.assertThat(expected.getMessage(), containsString("should not be larger than"));
-    }
-
-    try {
-      memory.subBlock(8, length - 4);
-      Assert.fail();
-    } catch (Exception expected) {
-      Assert.assertThat(expected.getMessage(), containsString("should not be larger than"));
-    }
-
-    try {
-      memory.subBlock(length + 8, 4);
-      Assert.fail();
-    } catch (Exception expected) {
-      Assert.assertThat(expected.getMessage(), containsString("should not be larger than"));
-    }
-
-    memory.setPageNumber(MemoryBlock.NO_PAGE_NUMBER);
-  }
-
-  @Test
-  public void testByteArrayMemoryBlock() {
-    byte[] obj = new byte[56];
-    long offset = Platform.BYTE_ARRAY_OFFSET;
-    int length = obj.length;
-
-    MemoryBlock memory = new ByteArrayMemoryBlock(obj, offset, length);
-    check(memory, obj, offset, length);
-
-    memory = ByteArrayMemoryBlock.fromArray(obj);
-    check(memory, obj, offset, length);
-
-    obj = new byte[112];
-    memory = new ByteArrayMemoryBlock(obj, offset, length);
-    check(memory, obj, offset, length);
-  }
-
-  @Test
-  public void testOnHeapMemoryBlock() {
-    long[] obj = new long[7];
-    long offset = Platform.LONG_ARRAY_OFFSET;
-    int length = obj.length * 8;
-
-    MemoryBlock memory = new OnHeapMemoryBlock(obj, offset, length);
-    check(memory, obj, offset, length);
-
-    memory = OnHeapMemoryBlock.fromArray(obj);
-    check(memory, obj, offset, length);
-
-    obj = new long[14];
-    memory = new OnHeapMemoryBlock(obj, offset, length);
-    check(memory, obj, offset, length);
-  }
-
-  @Test
-  public void testOffHeapArrayMemoryBlock() {
-    MemoryAllocator memoryAllocator = new UnsafeMemoryAllocator();
-    MemoryBlock memory = memoryAllocator.allocate(56);
-    Object obj = memory.getBaseObject();
-    long offset = memory.getBaseOffset();
-    int length = 56;
-
-    check(memory, obj, offset, length);
-    memoryAllocator.free(memory);
-
-    long address = Platform.allocateMemory(112);
-    memory = new OffHeapMemoryBlock(address, length);
-    obj = memory.getBaseObject();
-    offset = memory.getBaseOffset();
-    check(memory, obj, offset, length);
-    Platform.freeMemory(address);
-  }
-}
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
index 42dda30480702..cf9cc6b1800a9 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
@@ -25,8 +25,7 @@
 import java.util.*;
 
 import com.google.common.collect.ImmutableMap;
-import org.apache.spark.unsafe.memory.ByteArrayMemoryBlock;
-import org.apache.spark.unsafe.memory.OnHeapMemoryBlock;
+import org.apache.spark.unsafe.Platform;
 import org.junit.Test;
 
 import static org.junit.Assert.*;
@@ -394,12 +393,14 @@ public void substringSQL() {
 
   @Test
   public void split() {
-    assertTrue(Arrays.equals(fromString("ab,def,ghi").split(fromString(","), -1),
-      new UTF8String[]{fromString("ab"), fromString("def"), fromString("ghi")}));
-    assertTrue(Arrays.equals(fromString("ab,def,ghi").split(fromString(","), 2),
-      new UTF8String[]{fromString("ab"), fromString("def,ghi")}));
-    assertTrue(Arrays.equals(fromString("ab,def,ghi").split(fromString(","), 2),
-      new UTF8String[]{fromString("ab"), fromString("def,ghi")}));
+    UTF8String[] negativeAndZeroLimitCase =
+      new UTF8String[]{fromString("ab"), fromString("def"), fromString("ghi"), fromString("")};
+    assertTrue(Arrays.equals(fromString("ab,def,ghi,").split(fromString(","), 0),
+      negativeAndZeroLimitCase));
+    assertTrue(Arrays.equals(fromString("ab,def,ghi,").split(fromString(","), -1),
+      negativeAndZeroLimitCase));
+    assertTrue(Arrays.equals(fromString("ab,def,ghi,").split(fromString(","), 2),
+      new UTF8String[]{fromString("ab"), fromString("def,ghi,")}));
   }
 
   @Test
@@ -513,6 +514,21 @@ public void soundex() {
     assertEquals(fromString("世界千世").soundex(), fromString("世界千世"));
   }
 
+  @Test
+  public void writeToOutputStreamUnderflow() throws IOException {
+    // offset underflow is apparently supported?
+    final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
+    final byte[] test = "01234567".getBytes(StandardCharsets.UTF_8);
+
+    for (int i = 1; i <= Platform.BYTE_ARRAY_OFFSET; ++i) {
+      UTF8String.fromAddress(test, Platform.BYTE_ARRAY_OFFSET - i, test.length + i)
+          .writeTo(outputStream);
+      final ByteBuffer buffer = ByteBuffer.wrap(outputStream.toByteArray(), i, test.length);
+      assertEquals("01234567", StandardCharsets.UTF_8.decode(buffer).toString());
+      outputStream.reset();
+    }
+  }
+
   @Test
   public void writeToOutputStreamSlice() throws IOException {
     final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
@@ -520,7 +536,7 @@ public void writeToOutputStreamSlice() throws IOException {
 
     for (int i = 0; i < test.length; ++i) {
       for (int j = 0; j < test.length - i; ++j) {
-        new UTF8String(ByteArrayMemoryBlock.fromArray(test).subBlock(i, j))
+        UTF8String.fromAddress(test, Platform.BYTE_ARRAY_OFFSET + i, j)
             .writeTo(outputStream);
 
         assertArrayEquals(Arrays.copyOfRange(test, i, i + j), outputStream.toByteArray());
@@ -551,7 +567,7 @@ public void writeToOutputStreamOverflow() throws IOException {
 
     for (final long offset : offsets) {
       try {
-        new UTF8String(ByteArrayMemoryBlock.fromArray(test).subBlock(offset, test.length))
+        fromAddress(test, BYTE_ARRAY_OFFSET + offset, test.length)
             .writeTo(outputStream);
 
         throw new IllegalStateException(Long.toString(offset));
@@ -578,25 +594,26 @@ public void writeToOutputStream() throws IOException {
   }
 
   @Test
-  public void writeToOutputStreamLongArray() throws IOException {
+  public void writeToOutputStreamIntArray() throws IOException {
     // verify that writes work on objects that are not byte arrays
-    final ByteBuffer buffer = StandardCharsets.UTF_8.encode("3千大千世界");
+    final ByteBuffer buffer = StandardCharsets.UTF_8.encode("大千世界");
     buffer.position(0);
     buffer.order(ByteOrder.nativeOrder());
 
     final int length = buffer.limit();
-    assertEquals(16, length);
+    assertEquals(12, length);
 
-    final int longs = length / 8;
-    final long[] array = new long[longs];
+    final int ints = length / 4;
+    final int[] array = new int[ints];
 
-    for (int i = 0; i < longs; ++i) {
-      array[i] = buffer.getLong();
+    for (int i = 0; i < ints; ++i) {
+      array[i] = buffer.getInt();
     }
 
     final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
-    new UTF8String(OnHeapMemoryBlock.fromArray(array)).writeTo(outputStream);
-    assertEquals("3千大千世界", outputStream.toString("UTF-8"));
+    fromAddress(array, Platform.INT_ARRAY_OFFSET, length)
+        .writeTo(outputStream);
+    assertEquals("大千世界", outputStream.toString("UTF-8"));
   }
 
   @Test
diff --git a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala
index 7d3331f44f015..fdb81a06d41c9 100644
--- a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala
+++ b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.unsafe.types
 
-import org.apache.commons.lang3.StringUtils
+import org.apache.commons.text.similarity.LevenshteinDistance
 import org.scalacheck.{Arbitrary, Gen}
 import org.scalatest.prop.GeneratorDrivenPropertyChecks
 // scalastyle:off
@@ -63,6 +63,7 @@ class UTF8StringPropertyCheckSuite extends FunSuite with GeneratorDrivenProperty
     }
   }
 
+  // scalastyle:off caselocale
   test("toUpperCase") {
     forAll { (s: String) =>
       assert(toUTF8(s).toUpperCase === toUTF8(s.toUpperCase))
@@ -74,6 +75,7 @@ class UTF8StringPropertyCheckSuite extends FunSuite with GeneratorDrivenProperty
       assert(toUTF8(s).toLowerCase === toUTF8(s.toLowerCase))
     }
   }
+  // scalastyle:on caselocale
 
   test("compare") {
     forAll { (s1: String, s2: String) =>
@@ -230,7 +232,7 @@ class UTF8StringPropertyCheckSuite extends FunSuite with GeneratorDrivenProperty
   test("levenshteinDistance") {
     forAll { (one: String, another: String) =>
       assert(toUTF8(one).levenshteinDistance(toUTF8(another)) ===
-        StringUtils.getLevenshteinDistance(one, another))
+        LevenshteinDistance.getDefaultInstance.apply(one, another))
     }
   }
 
diff --git a/core/benchmarks/KryoBenchmark-results.txt b/core/benchmarks/KryoBenchmark-results.txt
new file mode 100644
index 0000000000000..91e22f3afc14f
--- /dev/null
+++ b/core/benchmarks/KryoBenchmark-results.txt
@@ -0,0 +1,29 @@
+================================================================================================
+Benchmark Kryo Unsafe vs safe Serialization
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.13.6
+Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz
+
+Benchmark Kryo Unsafe vs safe Serialization: Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+basicTypes: Int with unsafe:true               138 /  149          7.2         138.0       1.0X
+basicTypes: Long with unsafe:true              168 /  173          6.0         167.7       0.8X
+basicTypes: Float with unsafe:true             153 /  174          6.5         153.1       0.9X
+basicTypes: Double with unsafe:true            161 /  185          6.2         161.1       0.9X
+Array: Int with unsafe:true                      2 /    3        409.7           2.4      56.5X
+Array: Long with unsafe:true                     4 /    5        232.5           4.3      32.1X
+Array: Float with unsafe:true                    3 /    4        367.3           2.7      50.7X
+Array: Double with unsafe:true                   4 /    5        228.5           4.4      31.5X
+Map of string->Double  with unsafe:true         38 /   45         26.5          37.8       3.7X
+basicTypes: Int with unsafe:false              176 /  187          5.7         175.9       0.8X
+basicTypes: Long with unsafe:false             191 /  203          5.2         191.2       0.7X
+basicTypes: Float with unsafe:false            166 /  176          6.0         166.2       0.8X
+basicTypes: Double with unsafe:false           174 /  190          5.7         174.3       0.8X
+Array: Int with unsafe:false                    19 /   26         52.9          18.9       7.3X
+Array: Long with unsafe:false                   27 /   31         37.7          26.5       5.2X
+Array: Float with unsafe:false                   8 /   10        124.3           8.0      17.2X
+Array: Double with unsafe:false                 12 /   13         83.6          12.0      11.5X
+Map of string->Double  with unsafe:false        38 /   42         26.1          38.3       3.6X
+
+
diff --git a/core/benchmarks/KryoSerializerBenchmark-results.txt b/core/benchmarks/KryoSerializerBenchmark-results.txt
new file mode 100644
index 0000000000000..c3ce336d93241
--- /dev/null
+++ b/core/benchmarks/KryoSerializerBenchmark-results.txt
@@ -0,0 +1,12 @@
+================================================================================================
+Benchmark KryoPool vs "pool of 1"
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14
+Intel(R) Core(TM) i7-4770HQ CPU @ 2.20GHz
+Benchmark KryoPool vs "pool of 1":       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+KryoPool:true                                 2682 / 3425          0.0     5364627.9       1.0X
+KryoPool:false                                8176 / 9292          0.0    16351252.2       0.3X
+
+
diff --git a/core/pom.xml b/core/pom.xml
index 5fa3a86de6b01..49b1a54e32598 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -20,12 +20,12 @@
   <modelVersion>4.0.0</modelVersion>
   <parent>
     <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.11</artifactId>
-    <version>2.4.0-SNAPSHOT</version>
+    <artifactId>spark-parent_2.12</artifactId>
+    <version>3.0.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
-  <artifactId>spark-core_2.11</artifactId>
+  <artifactId>spark-core_2.12</artifactId>
   <properties>
     <sbt.project.name>core</sbt.project.name>
   </properties>
@@ -56,7 +56,7 @@
     </dependency>
     <dependency>
       <groupId>org.apache.xbean</groupId>
-      <artifactId>xbean-asm6-shaded</artifactId>
+      <artifactId>xbean-asm7-shaded</artifactId>
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
@@ -350,7 +350,7 @@
     <dependency>
       <groupId>net.sf.py4j</groupId>
       <artifactId>py4j</artifactId>
-      <version>0.10.7</version>
+      <version>0.10.8.1</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
@@ -408,6 +408,19 @@
       <scope>provided</scope>
     </dependency>
 
+    <!--
+     The following kafka dependency used to obtain delegation token.
+     In order to prevent spark-core from depending on kafka, these deps have been placed in the
+     "provided" scope, rather than the "compile" scope, and NoClassDefFoundError exceptions are
+     handled when the user explicitly use neither spark-streaming-kafka nor spark-sql-kafka modules.
+    -->
+    <dependency>
+      <groupId>org.apache.kafka</groupId>
+      <artifactId>kafka-clients</artifactId>
+      <version>${kafka.version}</version>
+      <scope>provided</scope>
+    </dependency>
+
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
diff --git a/core/src/main/java/org/apache/spark/ExecutorPlugin.java b/core/src/main/java/org/apache/spark/ExecutorPlugin.java
new file mode 100644
index 0000000000000..f86520c81df33
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/ExecutorPlugin.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark;
+
+import org.apache.spark.annotation.DeveloperApi;
+
+/**
+ * A plugin which can be automatically instantiated within each Spark executor.  Users can specify
+ * plugins which should be created with the "spark.executor.plugins" configuration.  An instance
+ * of each plugin will be created for every executor, including those created by dynamic allocation,
+ * before the executor starts running any tasks.
+ *
+ * The specific api exposed to the end users still considered to be very unstable.  We will
+ * hopefully be able to keep compatibility by providing default implementations for any methods
+ * added, but make no guarantees this will always be possible across all Spark releases.
+ *
+ * Spark does nothing to verify the plugin is doing legitimate things, or to manage the resources
+ * it uses.  A plugin acquires the same privileges as the user running the task.  A bad plugin
+ * could also interfere with task execution and make the executor fail in unexpected ways.
+ */
+@DeveloperApi
+public interface ExecutorPlugin {
+
+  /**
+   * Initialize the executor plugin.
+   *
+   * <p>Each executor will, during its initialization, invoke this method on each
+   * plugin provided in the spark.executor.plugins configuration.</p>
+   *
+   * <p>Plugins should create threads in their implementation of this method for
+   * any polling, blocking, or intensive computation.</p>
+   */
+  default void init() {}
+
+  /**
+   * Clean up and terminate this plugin.
+   *
+   * <p>This function is called during the executor shutdown phase. The executor
+   * will wait for the plugin to terminate before continuing its own shutdown.</p>
+   */
+  default void shutdown() {}
+}
diff --git a/core/src/main/java/org/apache/spark/SparkFirehoseListener.java b/core/src/main/java/org/apache/spark/SparkFirehoseListener.java
index 94c5c11b61a50..731f6fc767dfd 100644
--- a/core/src/main/java/org/apache/spark/SparkFirehoseListener.java
+++ b/core/src/main/java/org/apache/spark/SparkFirehoseListener.java
@@ -103,6 +103,12 @@ public final void onExecutorMetricsUpdate(
     onEvent(executorMetricsUpdate);
   }
 
+  @Override
+  public final void onStageExecutorMetrics(
+      SparkListenerStageExecutorMetrics executorMetrics) {
+    onEvent(executorMetrics);
+  }
+
   @Override
   public final void onExecutorAdded(SparkListenerExecutorAdded executorAdded) {
     onEvent(executorAdded);
diff --git a/core/src/main/java/org/apache/spark/api/java/JavaSparkContextVarargsWorkaround.java b/core/src/main/java/org/apache/spark/api/java/JavaSparkContextVarargsWorkaround.java
deleted file mode 100644
index 0dd8fafbf2c82..0000000000000
--- a/core/src/main/java/org/apache/spark/api/java/JavaSparkContextVarargsWorkaround.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.api.java;
-
-import java.util.ArrayList;
-import java.util.List;
-
-// See
-// http://scala-programming-language.1934581.n4.nabble.com/Workaround-for-implementing-java-varargs-in-2-7-2-final-tp1944767p1944772.html
-abstract class JavaSparkContextVarargsWorkaround {
-
-  @SafeVarargs
-  public final <T> JavaRDD<T> union(JavaRDD<T>... rdds) {
-    if (rdds.length == 0) {
-      throw new IllegalArgumentException("Union called on empty list");
-    }
-    List<JavaRDD<T>> rest = new ArrayList<>(rdds.length - 1);
-    for (int i = 1; i < rdds.length; i++) {
-      rest.add(rdds[i]);
-    }
-    return union(rdds[0], rest);
-  }
-
-  public JavaDoubleRDD union(JavaDoubleRDD... rdds) {
-    if (rdds.length == 0) {
-      throw new IllegalArgumentException("Union called on empty list");
-    }
-    List<JavaDoubleRDD> rest = new ArrayList<>(rdds.length - 1);
-    for (int i = 1; i < rdds.length; i++) {
-      rest.add(rdds[i]);
-    }
-    return union(rdds[0], rest);
-  }
-
-  @SafeVarargs
-  public final <K, V> JavaPairRDD<K, V> union(JavaPairRDD<K, V>... rdds) {
-    if (rdds.length == 0) {
-      throw new IllegalArgumentException("Union called on empty list");
-    }
-    List<JavaPairRDD<K, V>> rest = new ArrayList<>(rdds.length - 1);
-    for (int i = 1; i < rdds.length; i++) {
-      rest.add(rdds[i]);
-    }
-    return union(rdds[0], rest);
-  }
-
-  // These methods take separate "first" and "rest" elements to avoid having the same type erasure
-  public abstract <T> JavaRDD<T> union(JavaRDD<T> first, List<JavaRDD<T>> rest);
-  public abstract JavaDoubleRDD union(JavaDoubleRDD first, List<JavaDoubleRDD> rest);
-  public abstract <K, V> JavaPairRDD<K, V> union(JavaPairRDD<K, V> first, List<JavaPairRDD<K, V>>
-    rest);
-}
diff --git a/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java b/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java
index f6d1288cb263d..92bf0ecc1b5cb 100644
--- a/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java
+++ b/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java
@@ -27,7 +27,7 @@
  * to read a file to avoid extra copy of data between Java and
  * native memory which happens when using {@link java.io.BufferedInputStream}.
  * Unfortunately, this is not something already available in JDK,
- * {@link sun.nio.ch.ChannelInputStream} supports reading a file using nio,
+ * {@code sun.nio.ch.ChannelInputStream} supports reading a file using nio,
  * but does not support buffering.
  */
 public final class NioBufferedFileInputStream extends InputStream {
@@ -130,6 +130,7 @@ public synchronized void close() throws IOException {
     StorageUtils.dispose(byteBuffer);
   }
 
+  @SuppressWarnings("deprecation")
   @Override
   protected void finalize() throws IOException {
     close();
diff --git a/core/src/main/java/org/apache/spark/io/ReadAheadInputStream.java b/core/src/main/java/org/apache/spark/io/ReadAheadInputStream.java
index 0cced9e222952..2e18715b600e0 100644
--- a/core/src/main/java/org/apache/spark/io/ReadAheadInputStream.java
+++ b/core/src/main/java/org/apache/spark/io/ReadAheadInputStream.java
@@ -135,62 +135,58 @@ private void readAsync() throws IOException {
     } finally {
       stateChangeLock.unlock();
     }
-    executorService.execute(new Runnable() {
-
-      @Override
-      public void run() {
-        stateChangeLock.lock();
-        try {
-          if (isClosed) {
-            readInProgress = false;
-            return;
-          }
-          // Flip this so that the close method will not close the underlying input stream when we
-          // are reading.
-          isReading = true;
-        } finally {
-          stateChangeLock.unlock();
+    executorService.execute(() -> {
+      stateChangeLock.lock();
+      try {
+        if (isClosed) {
+          readInProgress = false;
+          return;
         }
+        // Flip this so that the close method will not close the underlying input stream when we
+        // are reading.
+        isReading = true;
+      } finally {
+        stateChangeLock.unlock();
+      }
 
-        // Please note that it is safe to release the lock and read into the read ahead buffer
-        // because either of following two conditions will hold - 1. The active buffer has
-        // data available to read so the reader will not read from the read ahead buffer.
-        // 2. This is the first time read is called or the active buffer is exhausted,
-        // in that case the reader waits for this async read to complete.
-        // So there is no race condition in both the situations.
-        int read = 0;
-        int off = 0, len = arr.length;
-        Throwable exception = null;
-        try {
-          // try to fill the read ahead buffer.
-          // if a reader is waiting, possibly return early.
-          do {
-            read = underlyingInputStream.read(arr, off, len);
-            if (read <= 0) break;
-            off += read;
-            len -= read;
-          } while (len > 0 && !isWaiting.get());
-        } catch (Throwable ex) {
-          exception = ex;
-          if (ex instanceof Error) {
-            // `readException` may not be reported to the user. Rethrow Error to make sure at least
-            // The user can see Error in UncaughtExceptionHandler.
-            throw (Error) ex;
-          }
-        } finally {
-          stateChangeLock.lock();
-          readAheadBuffer.limit(off);
-          if (read < 0 || (exception instanceof EOFException)) {
-            endOfStream = true;
-          } else if (exception != null) {
-            readAborted = true;
-            readException = exception;
-          }
-          readInProgress = false;
-          signalAsyncReadComplete();
-          stateChangeLock.unlock();
-          closeUnderlyingInputStreamIfNecessary();
+      // Please note that it is safe to release the lock and read into the read ahead buffer
+      // because either of following two conditions will hold - 1. The active buffer has
+      // data available to read so the reader will not read from the read ahead buffer.
+      // 2. This is the first time read is called or the active buffer is exhausted,
+      // in that case the reader waits for this async read to complete.
+      // So there is no race condition in both the situations.
+      int read = 0;
+      int off = 0, len = arr.length;
+      Throwable exception = null;
+      try {
+        // try to fill the read ahead buffer.
+        // if a reader is waiting, possibly return early.
+        do {
+          read = underlyingInputStream.read(arr, off, len);
+          if (read <= 0) break;
+          off += read;
+          len -= read;
+        } while (len > 0 && !isWaiting.get());
+      } catch (Throwable ex) {
+        exception = ex;
+        if (ex instanceof Error) {
+          // `readException` may not be reported to the user. Rethrow Error to make sure at least
+          // The user can see Error in UncaughtExceptionHandler.
+          throw (Error) ex;
         }
+      } finally {
+        stateChangeLock.lock();
+        readAheadBuffer.limit(off);
+        if (read < 0 || (exception instanceof EOFException)) {
+          endOfStream = true;
+        } else if (exception != null) {
+          readAborted = true;
+          readException = exception;
+        }
+        readInProgress = false;
+        signalAsyncReadComplete();
+        stateChangeLock.unlock();
+        closeUnderlyingInputStreamIfNecessary();
       }
     });
   }
diff --git a/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java b/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
index 115e1fbb79a2e..4bfd2d358f36f 100644
--- a/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
+++ b/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
@@ -83,10 +83,10 @@ public void spill() throws IOException {
   public abstract long spill(long size, MemoryConsumer trigger) throws IOException;
 
   /**
-   * Allocates a LongArray of `size`. Note that this method may throw `OutOfMemoryError` if Spark
-   * doesn't have enough memory for this allocation, or throw `TooLargePageException` if this
-   * `LongArray` is too large to fit in a single page. The caller side should take care of these
-   * two exceptions, or make sure the `size` is small enough that won't trigger exceptions.
+   * Allocates a LongArray of `size`. Note that this method may throw `SparkOutOfMemoryError`
+   * if Spark doesn't have enough memory for this allocation, or throw `TooLargePageException`
+   * if this `LongArray` is too large to fit in a single page. The caller side should take care of
+   * these two exceptions, or make sure the `size` is small enough that won't trigger exceptions.
    *
    * @throws SparkOutOfMemoryError
    * @throws TooLargePageException
@@ -111,7 +111,7 @@ public void freeArray(LongArray array) {
   /**
    * Allocate a memory block with at least `required` bytes.
    *
-   * @throws OutOfMemoryError
+   * @throws SparkOutOfMemoryError
    */
   protected MemoryBlock allocatePage(long required) {
     MemoryBlock page = taskMemoryManager.allocatePage(Math.max(pageSize, required), this);
@@ -154,7 +154,9 @@ private void throwOom(final MemoryBlock page, final long required) {
       taskMemoryManager.freePage(page, this);
     }
     taskMemoryManager.showMemoryUsage();
+    // checkstyle.off: RegexpSinglelineJava
     throw new SparkOutOfMemoryError("Unable to acquire " + required + " bytes of memory, got " +
       got);
+    // checkstyle.on: RegexpSinglelineJava
   }
 }
diff --git a/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java b/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
index 8651a639c07f7..28b646ba3c951 100644
--- a/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
+++ b/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
@@ -194,8 +194,10 @@ public long acquireExecutionMemory(long required, MemoryConsumer consumer) {
             throw new RuntimeException(e.getMessage());
           } catch (IOException e) {
             logger.error("error while calling spill() on " + c, e);
+            // checkstyle.off: RegexpSinglelineJava
             throw new SparkOutOfMemoryError("error while calling spill() on " + c + " : "
               + e.getMessage());
+            // checkstyle.on: RegexpSinglelineJava
           }
         }
       }
@@ -215,8 +217,10 @@ public long acquireExecutionMemory(long required, MemoryConsumer consumer) {
           throw new RuntimeException(e.getMessage());
         } catch (IOException e) {
           logger.error("error while calling spill() on " + consumer, e);
+          // checkstyle.off: RegexpSinglelineJava
           throw new SparkOutOfMemoryError("error while calling spill() on " + consumer + " : "
             + e.getMessage());
+          // checkstyle.on: RegexpSinglelineJava
         }
       }
 
@@ -311,7 +315,7 @@ public MemoryBlock allocatePage(long size, MemoryConsumer consumer) {
       // this could trigger spilling to free some pages.
       return allocatePage(size, consumer);
     }
-    page.setPageNumber(pageNumber);
+    page.pageNumber = pageNumber;
     pageTable[pageNumber] = page;
     if (logger.isTraceEnabled()) {
       logger.trace("Allocate page number {} ({} bytes)", pageNumber, acquired);
@@ -323,25 +327,25 @@ public MemoryBlock allocatePage(long size, MemoryConsumer consumer) {
    * Free a block of memory allocated via {@link TaskMemoryManager#allocatePage}.
    */
   public void freePage(MemoryBlock page, MemoryConsumer consumer) {
-    assert (page.getPageNumber() != MemoryBlock.NO_PAGE_NUMBER) :
+    assert (page.pageNumber != MemoryBlock.NO_PAGE_NUMBER) :
       "Called freePage() on memory that wasn't allocated with allocatePage()";
-    assert (page.getPageNumber() != MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER) :
+    assert (page.pageNumber != MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER) :
       "Called freePage() on a memory block that has already been freed";
-    assert (page.getPageNumber() != MemoryBlock.FREED_IN_TMM_PAGE_NUMBER) :
+    assert (page.pageNumber != MemoryBlock.FREED_IN_TMM_PAGE_NUMBER) :
             "Called freePage() on a memory block that has already been freed";
-    assert(allocatedPages.get(page.getPageNumber()));
-    pageTable[page.getPageNumber()] = null;
+    assert(allocatedPages.get(page.pageNumber));
+    pageTable[page.pageNumber] = null;
     synchronized (this) {
-      allocatedPages.clear(page.getPageNumber());
+      allocatedPages.clear(page.pageNumber);
     }
     if (logger.isTraceEnabled()) {
-      logger.trace("Freed page number {} ({} bytes)", page.getPageNumber(), page.size());
+      logger.trace("Freed page number {} ({} bytes)", page.pageNumber, page.size());
     }
     long pageSize = page.size();
     // Clear the page number before passing the block to the MemoryAllocator's free().
     // Doing this allows the MemoryAllocator to detect when a TaskMemoryManager-managed
     // page has been inappropriately directly freed without calling TMM.freePage().
-    page.setPageNumber(MemoryBlock.FREED_IN_TMM_PAGE_NUMBER);
+    page.pageNumber = MemoryBlock.FREED_IN_TMM_PAGE_NUMBER;
     memoryManager.tungstenMemoryAllocator().free(page);
     releaseExecutionMemory(pageSize, consumer);
   }
@@ -363,7 +367,7 @@ public long encodePageNumberAndOffset(MemoryBlock page, long offsetInPage) {
       // relative to the page's base offset; this relative offset will fit in 51 bits.
       offsetInPage -= page.getBaseOffset();
     }
-    return encodePageNumberAndOffset(page.getPageNumber(), offsetInPage);
+    return encodePageNumberAndOffset(page.pageNumber, offsetInPage);
   }
 
   @VisibleForTesting
@@ -434,7 +438,7 @@ public long cleanUpAllAllocatedMemory() {
       for (MemoryBlock page : pageTable) {
         if (page != null) {
           logger.debug("unreleased page: " + page + " in task " + taskAttemptId);
-          page.setPageNumber(MemoryBlock.FREED_IN_TMM_PAGE_NUMBER);
+          page.pageNumber = MemoryBlock.FREED_IN_TMM_PAGE_NUMBER;
           memoryManager.tungstenMemoryAllocator().free(page);
         }
       }
diff --git a/core/src/main/java/org/apache/spark/package-info.java b/core/src/main/java/org/apache/spark/package-info.java
index 4426c7afcebdd..a029931f9e4c0 100644
--- a/core/src/main/java/org/apache/spark/package-info.java
+++ b/core/src/main/java/org/apache/spark/package-info.java
@@ -16,8 +16,8 @@
  */
 
 /**
- * Core Spark classes in Scala. A few classes here, such as {@link org.apache.spark.Accumulator}
- * and {@link org.apache.spark.storage.StorageLevel}, are also used in Java, but the
+ * Core Spark classes in Scala. A few classes here, such as
+ * {@link org.apache.spark.storage.StorageLevel}, are also used in Java, but the
  * {@link org.apache.spark.api.java} package contains the main Java API.
  */
 package org.apache.spark;
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
index e3bd5496cf5ba..fda33cd8293d5 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
@@ -37,12 +37,11 @@
 import org.apache.spark.Partitioner;
 import org.apache.spark.ShuffleDependency;
 import org.apache.spark.SparkConf;
-import org.apache.spark.TaskContext;
-import org.apache.spark.executor.ShuffleWriteMetrics;
 import org.apache.spark.scheduler.MapStatus;
 import org.apache.spark.scheduler.MapStatus$;
 import org.apache.spark.serializer.Serializer;
 import org.apache.spark.serializer.SerializerInstance;
+import org.apache.spark.shuffle.ShuffleWriteMetricsReporter;
 import org.apache.spark.shuffle.IndexShuffleBlockResolver;
 import org.apache.spark.shuffle.ShuffleWriter;
 import org.apache.spark.storage.*;
@@ -79,7 +78,7 @@
   private final int numPartitions;
   private final BlockManager blockManager;
   private final Partitioner partitioner;
-  private final ShuffleWriteMetrics writeMetrics;
+  private final ShuffleWriteMetricsReporter writeMetrics;
   private final int shuffleId;
   private final int mapId;
   private final Serializer serializer;
@@ -103,8 +102,8 @@
       IndexShuffleBlockResolver shuffleBlockResolver,
       BypassMergeSortShuffleHandle<K, V> handle,
       int mapId,
-      TaskContext taskContext,
-      SparkConf conf) {
+      SparkConf conf,
+      ShuffleWriteMetricsReporter writeMetrics) {
     // Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided
     this.fileBufferSize = (int) conf.getSizeAsKb("spark.shuffle.file.buffer", "32k") * 1024;
     this.transferToEnabled = conf.getBoolean("spark.file.transferTo", true);
@@ -114,7 +113,7 @@
     this.shuffleId = dep.shuffleId();
     this.partitioner = dep.partitioner();
     this.numPartitions = partitioner.numPartitions();
-    this.writeMetrics = taskContext.taskMetrics().shuffleWriteMetrics();
+    this.writeMetrics = writeMetrics;
     this.serializer = dep.serializer();
     this.shuffleBlockResolver = shuffleBlockResolver;
   }
@@ -125,7 +124,7 @@ public void write(Iterator<Product2<K, V>> records) throws IOException {
     if (!records.hasNext()) {
       partitionLengths = new long[numPartitions];
       shuffleBlockResolver.writeIndexFileAndCommit(shuffleId, mapId, partitionLengths, null);
-      mapStatus = MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths, 0);
+      mapStatus = MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths);
       return;
     }
     final SerializerInstance serInstance = serializer.newInstance();
@@ -152,9 +151,9 @@ public void write(Iterator<Product2<K, V>> records) throws IOException {
     }
 
     for (int i = 0; i < numPartitions; i++) {
-      final DiskBlockObjectWriter writer = partitionWriters[i];
-      partitionWriterSegments[i] = writer.commitAndGet();
-      writer.close();
+      try (DiskBlockObjectWriter writer = partitionWriters[i]) {
+        partitionWriterSegments[i] = writer.commitAndGet();
+      }
     }
 
     File output = shuffleBlockResolver.getDataFile(shuffleId, mapId);
@@ -167,8 +166,7 @@ public void write(Iterator<Product2<K, V>> records) throws IOException {
         logger.error("Error while deleting temp file {}", tmp.getAbsolutePath());
       }
     }
-    mapStatus = MapStatus$.MODULE$.apply(
-      blockManager.shuffleServerId(), partitionLengths, writeMetrics.recordsWritten());
+    mapStatus = MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths);
   }
 
   @VisibleForTesting
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
index c7d2db4217d96..6ee9d5f0eec3b 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
@@ -38,6 +38,7 @@
 import org.apache.spark.memory.TooLargePageException;
 import org.apache.spark.serializer.DummySerializerInstance;
 import org.apache.spark.serializer.SerializerInstance;
+import org.apache.spark.shuffle.ShuffleWriteMetricsReporter;
 import org.apache.spark.storage.BlockManager;
 import org.apache.spark.storage.DiskBlockObjectWriter;
 import org.apache.spark.storage.FileSegment;
@@ -75,7 +76,7 @@
   private final TaskMemoryManager taskMemoryManager;
   private final BlockManager blockManager;
   private final TaskContext taskContext;
-  private final ShuffleWriteMetrics writeMetrics;
+  private final ShuffleWriteMetricsReporter writeMetrics;
 
   /**
    * Force this sorter to spill when there are this many elements in memory.
@@ -113,7 +114,7 @@
       int initialSize,
       int numPartitions,
       SparkConf conf,
-      ShuffleWriteMetrics writeMetrics) {
+      ShuffleWriteMetricsReporter writeMetrics) {
     super(memoryManager,
       (int) Math.min(PackedRecordPointer.MAXIMUM_PAGE_SIZE_BYTES, memoryManager.pageSizeBytes()),
       memoryManager.getTungstenMemoryMode());
@@ -144,7 +145,7 @@
    */
   private void writeSortedFile(boolean isLastFile) {
 
-    final ShuffleWriteMetrics writeMetricsToUse;
+    final ShuffleWriteMetricsReporter writeMetricsToUse;
 
     if (isLastFile) {
       // We're writing the final non-spill file, so we _do_ want to count this as shuffle bytes.
@@ -181,42 +182,43 @@ private void writeSortedFile(boolean isLastFile) {
     // around this, we pass a dummy no-op serializer.
     final SerializerInstance ser = DummySerializerInstance.INSTANCE;
 
-    final DiskBlockObjectWriter writer =
-      blockManager.getDiskWriter(blockId, file, ser, fileBufferSizeBytes, writeMetricsToUse);
-
     int currentPartition = -1;
-    final int uaoSize = UnsafeAlignedOffset.getUaoSize();
-    while (sortedRecords.hasNext()) {
-      sortedRecords.loadNext();
-      final int partition = sortedRecords.packedRecordPointer.getPartitionId();
-      assert (partition >= currentPartition);
-      if (partition != currentPartition) {
-        // Switch to the new partition
-        if (currentPartition != -1) {
-          final FileSegment fileSegment = writer.commitAndGet();
-          spillInfo.partitionLengths[currentPartition] = fileSegment.length();
+    final FileSegment committedSegment;
+    try (DiskBlockObjectWriter writer =
+        blockManager.getDiskWriter(blockId, file, ser, fileBufferSizeBytes, writeMetricsToUse)) {
+
+      final int uaoSize = UnsafeAlignedOffset.getUaoSize();
+      while (sortedRecords.hasNext()) {
+        sortedRecords.loadNext();
+        final int partition = sortedRecords.packedRecordPointer.getPartitionId();
+        assert (partition >= currentPartition);
+        if (partition != currentPartition) {
+          // Switch to the new partition
+          if (currentPartition != -1) {
+            final FileSegment fileSegment = writer.commitAndGet();
+            spillInfo.partitionLengths[currentPartition] = fileSegment.length();
+          }
+          currentPartition = partition;
         }
-        currentPartition = partition;
-      }
 
-      final long recordPointer = sortedRecords.packedRecordPointer.getRecordPointer();
-      final Object recordPage = taskMemoryManager.getPage(recordPointer);
-      final long recordOffsetInPage = taskMemoryManager.getOffsetInPage(recordPointer);
-      int dataRemaining = UnsafeAlignedOffset.getSize(recordPage, recordOffsetInPage);
-      long recordReadPosition = recordOffsetInPage + uaoSize; // skip over record length
-      while (dataRemaining > 0) {
-        final int toTransfer = Math.min(diskWriteBufferSize, dataRemaining);
-        Platform.copyMemory(
-          recordPage, recordReadPosition, writeBuffer, Platform.BYTE_ARRAY_OFFSET, toTransfer);
-        writer.write(writeBuffer, 0, toTransfer);
-        recordReadPosition += toTransfer;
-        dataRemaining -= toTransfer;
+        final long recordPointer = sortedRecords.packedRecordPointer.getRecordPointer();
+        final Object recordPage = taskMemoryManager.getPage(recordPointer);
+        final long recordOffsetInPage = taskMemoryManager.getOffsetInPage(recordPointer);
+        int dataRemaining = UnsafeAlignedOffset.getSize(recordPage, recordOffsetInPage);
+        long recordReadPosition = recordOffsetInPage + uaoSize; // skip over record length
+        while (dataRemaining > 0) {
+          final int toTransfer = Math.min(diskWriteBufferSize, dataRemaining);
+          Platform.copyMemory(
+            recordPage, recordReadPosition, writeBuffer, Platform.BYTE_ARRAY_OFFSET, toTransfer);
+          writer.write(writeBuffer, 0, toTransfer);
+          recordReadPosition += toTransfer;
+          dataRemaining -= toTransfer;
+        }
+        writer.recordWritten();
       }
-      writer.recordWritten();
-    }
 
-    final FileSegment committedSegment = writer.commitAndGet();
-    writer.close();
+      committedSegment = writer.commitAndGet();
+    }
     // If `writeSortedFile()` was called from `closeAndGetSpills()` and no records were inserted,
     // then the file might be empty. Note that it might be better to avoid calling
     // writeSortedFile() in that case.
@@ -240,9 +242,14 @@ private void writeSortedFile(boolean isLastFile) {
       //
       // Note that we intentionally ignore the value of `writeMetricsToUse.shuffleWriteTime()`.
       // Consistent with ExternalSorter, we do not count this IO towards shuffle write time.
-      // This means that this IO time is not accounted for anywhere; SPARK-3577 will fix this.
-      writeMetrics.incRecordsWritten(writeMetricsToUse.recordsWritten());
-      taskContext.taskMetrics().incDiskBytesSpilled(writeMetricsToUse.bytesWritten());
+      // SPARK-3577 tracks the spill time separately.
+
+      // This is guaranteed to be a ShuffleWriteMetrics based on the if check in the beginning
+      // of this method.
+      writeMetrics.incRecordsWritten(
+        ((ShuffleWriteMetrics)writeMetricsToUse).recordsWritten());
+      taskContext.taskMetrics().incDiskBytesSpilled(
+        ((ShuffleWriteMetrics)writeMetricsToUse).bytesWritten());
     }
   }
 
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleInMemorySorter.java b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleInMemorySorter.java
index 4b48599ad311e..0d069125dc60e 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleInMemorySorter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleInMemorySorter.java
@@ -20,6 +20,7 @@
 import java.util.Comparator;
 
 import org.apache.spark.memory.MemoryConsumer;
+import org.apache.spark.unsafe.Platform;
 import org.apache.spark.unsafe.array.LongArray;
 import org.apache.spark.unsafe.memory.MemoryBlock;
 import org.apache.spark.util.collection.Sorter;
@@ -112,7 +113,13 @@ public void reset() {
 
   public void expandPointerArray(LongArray newArray) {
     assert(newArray.size() > array.size());
-    MemoryBlock.copyMemory(array.memoryBlock(), newArray.memoryBlock(), pos * 8L);
+    Platform.copyMemory(
+      array.getBaseObject(),
+      array.getBaseOffset(),
+      newArray.getBaseObject(),
+      newArray.getBaseOffset(),
+      pos * 8L
+    );
     consumer.freeArray(array);
     array = newArray;
     usableCapacity = getUsableCapacity();
@@ -181,7 +188,10 @@ public ShuffleSorterIterator getSortedIterator() {
         PackedRecordPointer.PARTITION_ID_START_BYTE_INDEX,
         PackedRecordPointer.PARTITION_ID_END_BYTE_INDEX, false, false);
     } else {
-      MemoryBlock unused = array.memoryBlock().subBlock(pos * 8L, (array.size() - pos) * 8L);
+      MemoryBlock unused = new MemoryBlock(
+        array.getBaseObject(),
+        array.getBaseOffset() + pos * 8L,
+        (array.size() - pos) * 8L);
       LongArray buffer = new LongArray(unused);
       Sorter<PackedRecordPointer, LongArray> sorter =
         new Sorter<>(new ShuffleSortDataFormat(buffer));
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleSortDataFormat.java b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleSortDataFormat.java
index 254449e95443e..717bdd79d47ef 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleSortDataFormat.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleSortDataFormat.java
@@ -17,8 +17,8 @@
 
 package org.apache.spark.shuffle.sort;
 
+import org.apache.spark.unsafe.Platform;
 import org.apache.spark.unsafe.array.LongArray;
-import org.apache.spark.unsafe.memory.MemoryBlock;
 import org.apache.spark.util.collection.SortDataFormat;
 
 final class ShuffleSortDataFormat extends SortDataFormat<PackedRecordPointer, LongArray> {
@@ -60,8 +60,13 @@ public void copyElement(LongArray src, int srcPos, LongArray dst, int dstPos) {
 
   @Override
   public void copyRange(LongArray src, int srcPos, LongArray dst, int dstPos, int length) {
-    MemoryBlock.copyMemory(src.memoryBlock(), srcPos * 8L,
-      dst.memoryBlock(),dstPos * 8L,length * 8L);
+    Platform.copyMemory(
+      src.getBaseObject(),
+      src.getBaseOffset() + srcPos * 8L,
+      dst.getBaseObject(),
+      dst.getBaseOffset() + dstPos * 8L,
+      length * 8L
+    );
   }
 
   @Override
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
index 069e6d5f224d7..4b0c74341551e 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
@@ -37,7 +37,6 @@
 
 import org.apache.spark.*;
 import org.apache.spark.annotation.Private;
-import org.apache.spark.executor.ShuffleWriteMetrics;
 import org.apache.spark.io.CompressionCodec;
 import org.apache.spark.io.CompressionCodec$;
 import org.apache.spark.io.NioBufferedFileInputStream;
@@ -47,6 +46,7 @@
 import org.apache.spark.network.util.LimitedInputStream;
 import org.apache.spark.scheduler.MapStatus;
 import org.apache.spark.scheduler.MapStatus$;
+import org.apache.spark.shuffle.ShuffleWriteMetricsReporter;
 import org.apache.spark.serializer.SerializationStream;
 import org.apache.spark.serializer.SerializerInstance;
 import org.apache.spark.shuffle.IndexShuffleBlockResolver;
@@ -73,7 +73,7 @@
   private final TaskMemoryManager memoryManager;
   private final SerializerInstance serializer;
   private final Partitioner partitioner;
-  private final ShuffleWriteMetrics writeMetrics;
+  private final ShuffleWriteMetricsReporter writeMetrics;
   private final int shuffleId;
   private final int mapId;
   private final TaskContext taskContext;
@@ -122,7 +122,8 @@ public UnsafeShuffleWriter(
       SerializedShuffleHandle<K, V> handle,
       int mapId,
       TaskContext taskContext,
-      SparkConf sparkConf) throws IOException {
+      SparkConf sparkConf,
+      ShuffleWriteMetricsReporter writeMetrics) throws IOException {
     final int numPartitions = handle.dependency().partitioner().numPartitions();
     if (numPartitions > SortShuffleManager.MAX_SHUFFLE_OUTPUT_PARTITIONS_FOR_SERIALIZED_MODE()) {
       throw new IllegalArgumentException(
@@ -138,7 +139,7 @@ public UnsafeShuffleWriter(
     this.shuffleId = dep.shuffleId();
     this.serializer = dep.serializer().newInstance();
     this.partitioner = dep.partitioner();
-    this.writeMetrics = taskContext.taskMetrics().shuffleWriteMetrics();
+    this.writeMetrics = writeMetrics;
     this.taskContext = taskContext;
     this.sparkConf = sparkConf;
     this.transferToEnabled = sparkConf.getBoolean("spark.file.transferTo", true);
@@ -248,8 +249,7 @@ void closeAndWriteOutput() throws IOException {
         logger.error("Error while deleting temp file {}", tmp.getAbsolutePath());
       }
     }
-    mapStatus = MapStatus$.MODULE$.apply(
-      blockManager.shuffleServerId(), partitionLengths, writeMetrics.recordsWritten());
+    mapStatus = MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths);
   }
 
   @VisibleForTesting
diff --git a/core/src/main/java/org/apache/spark/storage/TimeTrackingOutputStream.java b/core/src/main/java/org/apache/spark/storage/TimeTrackingOutputStream.java
index 5d0555a8c28e1..fcba3b73445c9 100644
--- a/core/src/main/java/org/apache/spark/storage/TimeTrackingOutputStream.java
+++ b/core/src/main/java/org/apache/spark/storage/TimeTrackingOutputStream.java
@@ -21,7 +21,7 @@
 import java.io.OutputStream;
 
 import org.apache.spark.annotation.Private;
-import org.apache.spark.executor.ShuffleWriteMetrics;
+import org.apache.spark.shuffle.ShuffleWriteMetricsReporter;
 
 /**
  * Intercepts write calls and tracks total time spent writing in order to update shuffle write
@@ -30,10 +30,11 @@
 @Private
 public final class TimeTrackingOutputStream extends OutputStream {
 
-  private final ShuffleWriteMetrics writeMetrics;
+  private final ShuffleWriteMetricsReporter writeMetrics;
   private final OutputStream outputStream;
 
-  public TimeTrackingOutputStream(ShuffleWriteMetrics writeMetrics, OutputStream outputStream) {
+  public TimeTrackingOutputStream(
+      ShuffleWriteMetricsReporter writeMetrics, OutputStream outputStream) {
     this.writeMetrics = writeMetrics;
     this.outputStream = outputStream;
   }
diff --git a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
index 9b6cbab38cbcc..a4e88598f7607 100644
--- a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
+++ b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
@@ -31,6 +31,7 @@
 import org.apache.spark.SparkEnv;
 import org.apache.spark.executor.ShuffleWriteMetrics;
 import org.apache.spark.memory.MemoryConsumer;
+import org.apache.spark.memory.SparkOutOfMemoryError;
 import org.apache.spark.memory.TaskMemoryManager;
 import org.apache.spark.serializer.SerializerManager;
 import org.apache.spark.storage.BlockManager;
@@ -741,7 +742,7 @@ public boolean append(Object kbase, long koff, int klen, Object vbase, long voff
         if (numKeys >= growthThreshold && longArray.size() < MAX_CAPACITY) {
           try {
             growAndRehash();
-          } catch (OutOfMemoryError oom) {
+          } catch (SparkOutOfMemoryError oom) {
             canGrowArray = false;
           }
         }
@@ -757,7 +758,7 @@ public boolean append(Object kbase, long koff, int klen, Object vbase, long voff
   private boolean acquireNewPage(long required) {
     try {
       currentPage = allocatePage(required);
-    } catch (OutOfMemoryError e) {
+    } catch (SparkOutOfMemoryError e) {
       return false;
     }
     dataPages.add(currentPage);
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
index 399251b80e649..5056652a2420b 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
@@ -544,7 +544,7 @@ public long spill() throws IOException {
           // is accessing the current record. We free this page in that caller's next loadNext()
           // call.
           for (MemoryBlock page : allocatedPages) {
-            if (!loaded || page.getPageNumber() !=
+            if (!loaded || page.pageNumber !=
                     ((UnsafeInMemorySorter.SortedIterator)upstream).getCurrentPageNumber()) {
               released += page.size();
               freePage(page);
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
index 717823ebbd320..1a9453a8b3e80 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
@@ -26,6 +26,7 @@
 import org.apache.spark.memory.MemoryConsumer;
 import org.apache.spark.memory.SparkOutOfMemoryError;
 import org.apache.spark.memory.TaskMemoryManager;
+import org.apache.spark.unsafe.Platform;
 import org.apache.spark.unsafe.UnsafeAlignedOffset;
 import org.apache.spark.unsafe.array.LongArray;
 import org.apache.spark.unsafe.memory.MemoryBlock;
@@ -213,9 +214,16 @@ public boolean hasSpaceForAnotherRecord() {
 
   public void expandPointerArray(LongArray newArray) {
     if (newArray.size() < array.size()) {
+      // checkstyle.off: RegexpSinglelineJava
       throw new SparkOutOfMemoryError("Not enough memory to grow pointer array");
+      // checkstyle.on: RegexpSinglelineJava
     }
-    MemoryBlock.copyMemory(array.memoryBlock(), newArray.memoryBlock(), pos * 8L);
+    Platform.copyMemory(
+      array.getBaseObject(),
+      array.getBaseOffset(),
+      newArray.getBaseObject(),
+      newArray.getBaseOffset(),
+      pos * 8L);
     consumer.freeArray(array);
     array = newArray;
     usableCapacity = getUsableCapacity();
@@ -342,7 +350,10 @@ public UnsafeSorterIterator getSortedIterator() {
           array, nullBoundaryPos, (pos - nullBoundaryPos) / 2L, 0, 7,
           radixSortSupport.sortDescending(), radixSortSupport.sortSigned());
       } else {
-        MemoryBlock unused = array.memoryBlock().subBlock(pos * 8L, (array.size() - pos) * 8L);
+        MemoryBlock unused = new MemoryBlock(
+          array.getBaseObject(),
+          array.getBaseOffset() + pos * 8L,
+          (array.size() - pos) * 8L);
         LongArray buffer = new LongArray(unused);
         Sorter<RecordPointerAndKeyPrefix, LongArray> sorter =
           new Sorter<>(new UnsafeSortDataFormat(buffer));
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillWriter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillWriter.java
index 9399024f01783..c1d71a23b1dbe 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillWriter.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillWriter.java
@@ -42,7 +42,10 @@
 
   private final SparkConf conf = new SparkConf();
 
-  /** The buffer size to use when writing the sorted records to an on-disk file */
+  /**
+   * The buffer size to use when writing the sorted records to an on-disk file, and
+   * this space used by prefix + len + recordLength must be greater than 4 + 8 bytes.
+   */
   private final int diskWriteBufferSize =
     (int) (long) conf.get(package$.MODULE$.SHUFFLE_DISK_WRITE_BUFFER_SIZE());
 
diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html b/core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html
index 5c91304e49fd7..f2c17aef097a4 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html
+++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html
@@ -16,10 +16,10 @@
 -->
 
 <script id="executors-summary-template" type="text/html">
-  <h4 style="clear: left; display: inline-block;">Summary</h4>
+  <h4 class="title-table">Summary</h4>
   <div class="container-fluid">
     <div class="container-fluid">
-      <table id="summary-execs-table" class="table table-striped compact">
+      <table id="summary-execs-table" class="table table-striped compact cell-border">
         <thead>
         <th></th>
         <th>RDD Blocks</th>
@@ -64,10 +64,10 @@ <h4 style="clear: left; display: inline-block;">Summary</h4>
       </table>
     </div>
   </div>
-  <h4 style="clear: left; display: inline-block;">Executors</h4>
+  <h4 class="title-table">Executors</h4>
   <div class="container-fluid">
     <div class="container-fluid">
-      <table id="active-executors-table" class="table table-striped compact">
+      <table id="active-executors-table" class="table table-striped compact cell-border">
         <thead>
         <tr>
           <th>
diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
index 6717af3ac4daf..a48c02ae279ba 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
@@ -59,78 +59,6 @@ $(document).ajaxStart(function () {
     $.blockUI({message: '<h3>Loading Executors Page...</h3>'});
 });
 
-function createTemplateURI(appId) {
-    var words = document.baseURI.split('/');
-    var ind = words.indexOf("proxy");
-    if (ind > 0) {
-        var baseURI = words.slice(0, ind + 1).join('/') + '/' + appId + '/static/executorspage-template.html';
-        return baseURI;
-    }
-    ind = words.indexOf("history");
-    if(ind > 0) {
-        var baseURI = words.slice(0, ind).join('/') + '/static/executorspage-template.html';
-        return baseURI;
-    }
-    return location.origin + "/static/executorspage-template.html";
-}
-
-function getStandAloneppId(cb) {
-    var words = document.baseURI.split('/');
-    var ind = words.indexOf("proxy");
-    if (ind > 0) {
-        var appId = words[ind + 1];
-        cb(appId);
-        return;
-    }
-    ind = words.indexOf("history");
-    if (ind > 0) {
-        var appId = words[ind + 1];
-        cb(appId);
-        return;
-    }
-    //Looks like Web UI is running in standalone mode
-    //Let's get application-id using REST End Point
-    $.getJSON(location.origin + "/api/v1/applications", function(response, status, jqXHR) {
-        if (response && response.length > 0) {
-            var appId = response[0].id
-            cb(appId);
-            return;
-        }
-    });
-}
-
-function createRESTEndPoint(appId) {
-    var words = document.baseURI.split('/');
-    var ind = words.indexOf("proxy");
-    if (ind > 0) {
-        var appId = words[ind + 1];
-        var newBaseURI = words.slice(0, ind + 2).join('/');
-        return newBaseURI + "/api/v1/applications/" + appId + "/allexecutors"
-    }
-    ind = words.indexOf("history");
-    if (ind > 0) {
-        var appId = words[ind + 1];
-        var attemptId = words[ind + 2];
-        var newBaseURI = words.slice(0, ind).join('/');
-        if (isNaN(attemptId)) {
-            return newBaseURI + "/api/v1/applications/" + appId + "/allexecutors";
-        } else {
-            return newBaseURI + "/api/v1/applications/" + appId + "/" + attemptId + "/allexecutors";
-        }
-    }
-    return location.origin + "/api/v1/applications/" + appId + "/allexecutors";
-}
-
-function formatLogsCells(execLogs, type) {
-    if (type !== 'display') return Object.keys(execLogs);
-    if (!execLogs) return;
-    var result = '';
-    $.each(execLogs, function (logName, logUrl) {
-        result += '<div><a href=' + logUrl + '>' + logName + '</a></div>'
-    });
-    return result;
-}
-
 function logsExist(execs) {
     return execs.some(function(exec) {
         return !($.isEmptyObject(exec["executorLogs"]));
@@ -178,17 +106,13 @@ function totalDurationColor(totalGCTime, totalDuration) {
 }
 
 $(document).ready(function () {
-    $.extend($.fn.dataTable.defaults, {
-        stateSave: true,
-        lengthMenu: [[20, 40, 60, 100, -1], [20, 40, 60, 100, "All"]],
-        pageLength: 20
-    });
+    setDataTableDefaults();
 
     executorsSummary = $("#active-executors");
 
-    getStandAloneppId(function (appId) {
+    getStandAloneAppId(function (appId) {
 
-        var endPoint = createRESTEndPoint(appId);
+        var endPoint = createRESTEndPointForExecutorsPage(appId);
         $.getJSON(endPoint, function (response, status, jqXHR) {
             var summary = [];
             var allExecCnt = 0;
@@ -408,7 +332,7 @@ $(document).ready(function () {
             };
 
             var data = {executors: response, "execSummary": [activeSummary, deadSummary, totalSummary]};
-            $.get(createTemplateURI(appId), function (template) {
+            $.get(createTemplateURI(appId, "executorspage"), function (template) {
 
                 executorsSummary.append(Mustache.render($(template).filter("#executors-summary-template").html(), data));
                 var selector = "#active-executors-table";
diff --git a/core/src/main/resources/org/apache/spark/ui/static/images/sort_asc.png b/core/src/main/resources/org/apache/spark/ui/static/images/sort_asc.png
new file mode 100644
index 0000000000000..e1ba61a8055fc
Binary files /dev/null and b/core/src/main/resources/org/apache/spark/ui/static/images/sort_asc.png differ
diff --git a/core/src/main/resources/org/apache/spark/ui/static/images/sort_asc_disabled.png b/core/src/main/resources/org/apache/spark/ui/static/images/sort_asc_disabled.png
new file mode 100644
index 0000000000000..fb11dfe24a6c5
Binary files /dev/null and b/core/src/main/resources/org/apache/spark/ui/static/images/sort_asc_disabled.png differ
diff --git a/core/src/main/resources/org/apache/spark/ui/static/images/sort_both.png b/core/src/main/resources/org/apache/spark/ui/static/images/sort_both.png
new file mode 100644
index 0000000000000..af5bc7c5a10b9
Binary files /dev/null and b/core/src/main/resources/org/apache/spark/ui/static/images/sort_both.png differ
diff --git a/core/src/main/resources/org/apache/spark/ui/static/images/sort_desc.png b/core/src/main/resources/org/apache/spark/ui/static/images/sort_desc.png
new file mode 100644
index 0000000000000..0e156deb5f61d
Binary files /dev/null and b/core/src/main/resources/org/apache/spark/ui/static/images/sort_desc.png differ
diff --git a/core/src/main/resources/org/apache/spark/ui/static/images/sort_desc_disabled.png b/core/src/main/resources/org/apache/spark/ui/static/images/sort_desc_disabled.png
new file mode 100644
index 0000000000000..c9fdd8a1502fd
Binary files /dev/null and b/core/src/main/resources/org/apache/spark/ui/static/images/sort_desc_disabled.png differ
diff --git a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
new file mode 100644
index 0000000000000..4c83ec7e95ab1
--- /dev/null
+++ b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
@@ -0,0 +1,958 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+var shouldBlockUI = true;
+
+$(document).ajaxStop(function () {
+    if (shouldBlockUI) {
+        $.unblockUI();
+        shouldBlockUI = false;
+    }
+});
+
+$(document).ajaxStart(function () {
+    if (shouldBlockUI) {
+        $.blockUI({message: '<h3>Loading Stage Page...</h3>'});
+    }
+});
+
+$.extend( $.fn.dataTable.ext.type.order, {
+    "duration-pre": ConvertDurationString,
+
+    "duration-asc": function ( a, b ) {
+        a = ConvertDurationString( a );
+        b = ConvertDurationString( b );
+        return ((a < b) ? -1 : ((a > b) ? 1 : 0));
+    },
+
+    "duration-desc": function ( a, b ) {
+        a = ConvertDurationString( a );
+        b = ConvertDurationString( b );
+        return ((a < b) ? 1 : ((a > b) ? -1 : 0));
+    }
+} );
+
+// This function will only parse the URL under certain format
+// e.g. (history) https://domain:50509/history/application_1536254569791_3806251/1/stages/stage/?id=4&attempt=1
+// e.g. (proxy) https://domain:50505/proxy/application_1502220952225_59143/stages/stage?id=4&attempt=1
+function stageEndPoint(appId) {
+    var queryString = document.baseURI.split('?');
+    var words = document.baseURI.split('/');
+    var indexOfProxy = words.indexOf("proxy");
+    var stageId = queryString[1].split("&").filter(word => word.includes("id="))[0].split("=")[1];
+    if (indexOfProxy > 0) {
+        var appId = words[indexOfProxy + 1];
+        var newBaseURI = words.slice(0, words.indexOf("proxy") + 2).join('/');
+        return newBaseURI + "/api/v1/applications/" + appId + "/stages/" + stageId;
+    }
+    var indexOfHistory = words.indexOf("history");
+    if (indexOfHistory > 0) {
+        var appId = words[indexOfHistory + 1];
+        var appAttemptId = words[indexOfHistory + 2];
+        var newBaseURI = words.slice(0, words.indexOf("history")).join('/');
+        if (isNaN(appAttemptId) || appAttemptId == "0") {
+            return newBaseURI + "/api/v1/applications/" + appId + "/stages/" + stageId;
+        } else {
+            return newBaseURI + "/api/v1/applications/" + appId + "/" + appAttemptId + "/stages/" + stageId;
+        }
+    }
+    return location.origin + "/api/v1/applications/" + appId + "/stages/" + stageId;
+}
+
+function getColumnNameForTaskMetricSummary(columnKey) {
+    switch(columnKey) {
+        case "executorRunTime":
+            return "Duration";
+
+        case "jvmGcTime":
+            return "GC Time";
+
+        case "gettingResultTime":
+            return "Getting Result Time";
+
+        case "inputMetrics":
+            return "Input Size / Records";
+
+        case "outputMetrics":
+            return "Output Size / Records";
+
+        case "peakExecutionMemory":
+            return "Peak Execution Memory";
+
+        case "resultSerializationTime":
+            return "Result Serialization Time";
+
+        case "schedulerDelay":
+            return "Scheduler Delay";
+
+        case "diskBytesSpilled":
+            return "Shuffle spill (disk)";
+
+        case "memoryBytesSpilled":
+            return "Shuffle spill (memory)";
+
+        case "shuffleReadMetrics":
+            return "Shuffle Read Size / Records";
+
+        case "shuffleWriteMetrics":
+            return "Shuffle Write Size / Records";
+
+        case "executorDeserializeTime":
+            return "Task Deserialization Time";
+
+        case "shuffleReadBlockedTime":
+            return "Shuffle Read Blocked Time";
+
+        case "shuffleRemoteReads":
+            return "Shuffle Remote Reads";
+
+        default:
+            return "NA";
+    }
+}
+
+function displayRowsForSummaryMetricsTable(row, type, columnIndex) {
+    switch(row.columnKey) {
+        case 'inputMetrics':
+            var str = formatBytes(row.data.bytesRead[columnIndex], type) + " / " +
+              row.data.recordsRead[columnIndex];
+            return str;
+            break;
+
+        case 'outputMetrics':
+            var str = formatBytes(row.data.bytesWritten[columnIndex], type) + " / " +
+              row.data.recordsWritten[columnIndex];
+            return str;
+            break;
+
+        case 'shuffleReadMetrics':
+            var str = formatBytes(row.data.readBytes[columnIndex], type) + " / " +
+              row.data.readRecords[columnIndex];
+            return str;
+            break;
+
+        case 'shuffleReadBlockedTime':
+            var str = formatDuration(row.data.fetchWaitTime[columnIndex]);
+            return str;
+            break;
+
+        case 'shuffleRemoteReads':
+            var str = formatBytes(row.data.remoteBytesRead[columnIndex], type);
+            return str;
+            break;
+
+        case 'shuffleWriteMetrics':
+            var str = formatBytes(row.data.writeBytes[columnIndex], type) + " / " +
+              row.data.writeRecords[columnIndex];
+            return str;
+            break;
+
+        default:
+            return (row.columnKey == 'peakExecutionMemory' || row.columnKey == 'memoryBytesSpilled'
+                    || row.columnKey == 'diskBytesSpilled') ? formatBytes(
+                    row.data[columnIndex], type) : (formatDuration(row.data[columnIndex]));
+
+    }
+}
+
+function createDataTableForTaskSummaryMetricsTable(taskSummaryMetricsTable) {
+    var taskMetricsTable = "#summary-metrics-table";
+    if ($.fn.dataTable.isDataTable(taskMetricsTable)) {
+        taskSummaryMetricsDataTable.clear().draw();
+        taskSummaryMetricsDataTable.rows.add(taskSummaryMetricsTable).draw();
+    } else {
+        var taskConf = {
+            "data": taskSummaryMetricsTable,
+            "columns": [
+                {data : 'metric'},
+                // Min
+                {
+                    data: function (row, type) {
+                        return displayRowsForSummaryMetricsTable(row, type, 0);
+                    }
+                },
+                // 25th percentile
+                {
+                    data: function (row, type) {
+                        return displayRowsForSummaryMetricsTable(row, type, 1);
+                    }
+                },
+                // Median
+                {
+                    data: function (row, type) {
+                        return displayRowsForSummaryMetricsTable(row, type, 2);
+                    }
+                },
+                // 75th percentile
+                {
+                    data: function (row, type) {
+                        return displayRowsForSummaryMetricsTable(row, type, 3);
+                    }
+                },
+                // Max
+                {
+                    data: function (row, type) {
+                        return displayRowsForSummaryMetricsTable(row, type, 4);
+                    }
+                }
+            ],
+            "columnDefs": [
+                { "type": "duration", "targets": 1 },
+                { "type": "duration", "targets": 2 },
+                { "type": "duration", "targets": 3 },
+                { "type": "duration", "targets": 4 },
+                { "type": "duration", "targets": 5 }
+            ],
+            "paging": false,
+            "searching": false,
+            "order": [[0, "asc"]],
+            "bSort": false,
+            "bAutoWidth": false
+        };
+        taskSummaryMetricsDataTable = $(taskMetricsTable).DataTable(taskConf);
+    }
+    taskSummaryMetricsTableCurrentStateArray = taskSummaryMetricsTable.slice();
+}
+
+function createRowMetadataForColumn(colKey, data, checkboxId) {
+  var row = {
+      "metric": getColumnNameForTaskMetricSummary(colKey),
+      "data": data,
+      "checkboxId": checkboxId,
+      "columnKey": colKey
+  };
+  return row;
+}
+
+function reselectCheckboxesBasedOnTaskTableState() {
+    var allChecked = true;
+    var taskSummaryMetricsTableCurrentFilteredArray = taskSummaryMetricsTableCurrentStateArray.slice();
+    if (typeof taskTableSelector !== 'undefined' && taskSummaryMetricsTableCurrentStateArray.length > 0) {
+        for (var k = 0; k < optionalColumns.length; k++) {
+            if (taskTableSelector.column(optionalColumns[k]).visible()) {
+                $("#box-"+optionalColumns[k]).prop('checked', true);
+                taskSummaryMetricsTableCurrentStateArray.push(taskSummaryMetricsTableArray.filter(row => (row.checkboxId).toString() == optionalColumns[k])[0]);
+                taskSummaryMetricsTableCurrentFilteredArray = taskSummaryMetricsTableCurrentStateArray.slice();
+            } else {
+                allChecked = false;
+            }
+        }
+        if (allChecked) {
+            $("#box-0").prop('checked', true);
+        }
+        createDataTableForTaskSummaryMetricsTable(taskSummaryMetricsTableCurrentFilteredArray);
+    }
+}
+
+function getStageAttemptId() {
+  var words = document.baseURI.split('?');
+  var attemptIdStr = words[1].split('&')[1];
+  var digitsRegex = /[0-9]+/;
+  // We are using regex here to extract the stage attempt id as there might be certain url's with format
+  // like /proxy/application_1539986433979_27115/stages/stage/?id=0&attempt=0#tasksTitle
+  var stgAttemptId = words[1].split("&").filter(
+      word => word.includes("attempt="))[0].split("=")[1].match(digitsRegex);
+  return stgAttemptId;
+}
+
+var taskSummaryMetricsTableArray = [];
+var taskSummaryMetricsTableCurrentStateArray = [];
+var taskSummaryMetricsDataTable;
+var optionalColumns = [11, 12, 13, 14, 15, 16, 17];
+var taskTableSelector;
+
+$(document).ready(function () {
+    setDataTableDefaults();
+
+    $("#showAdditionalMetrics").append(
+        "<div><a id='additionalMetrics'>" +
+        "<span class='expand-input-rate-arrow arrow-closed' id='arrowtoggle1'></span>" +
+        " Show Additional Metrics" +
+        "</a></div>" +
+        "<div class='container-fluid container-fluid-div' id='toggle-metrics' hidden>" +
+        "<div><input type='checkbox' class='toggle-vis' id='box-0' data-column='0'> Select All</div>" +
+        "<div id='scheduler_delay' class='scheduler-delay-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-11' data-column='11'> Scheduler Delay</div>" +
+        "<div id='task_deserialization_time' class='task-deserialization-time-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-12' data-column='12'> Task Deserialization Time</div>" +
+        "<div id='shuffle_read_blocked_time' class='shuffle-read-blocked-time-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-13' data-column='13'> Shuffle Read Blocked Time</div>" +
+        "<div id='shuffle_remote_reads' class='shuffle-remote-reads-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-14' data-column='14'> Shuffle Remote Reads</div>" +
+        "<div id='result_serialization_time' class='result-serialization-time-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-15' data-column='15'> Result Serialization Time</div>" +
+        "<div id='getting_result_time' class='getting-result-time-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-16' data-column='16'> Getting Result Time</div>" +
+        "<div id='peak_execution_memory' class='peak-execution-memory-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-17' data-column='17'> Peak Execution Memory</div>" +
+        "</div>");
+
+    $('#scheduler_delay').attr("data-toggle", "tooltip")
+        .attr("data-placement", "right")
+        .attr("title", "Scheduler delay includes time to ship the task from the scheduler to the executor, and time to send " +
+            "the task result from the executor to the scheduler. If scheduler delay is large, consider decreasing the size of tasks or decreasing the size of task results.");
+    $('#task_deserialization_time').attr("data-toggle", "tooltip")
+        .attr("data-placement", "right")
+        .attr("title", "Time spent deserializing the task closure on the executor, including the time to read the broadcasted task.");
+    $('#shuffle_read_blocked_time').attr("data-toggle", "tooltip")
+        .attr("data-placement", "right")
+        .attr("title", "Time that the task spent blocked waiting for shuffle data to be read from remote machines.");
+    $('#shuffle_remote_reads').attr("data-toggle", "tooltip")
+        .attr("data-placement", "right")
+        .attr("title", "Total shuffle bytes read from remote executors. This is a subset of the shuffle read bytes; the remaining shuffle data is read locally. ");
+    $('#result_serialization_time').attr("data-toggle", "tooltip")
+            .attr("data-placement", "right")
+            .attr("title", "Time spent serializing the task result on the executor before sending it back to the driver.");
+    $('#getting_result_time').attr("data-toggle", "tooltip")
+            .attr("data-placement", "right")
+            .attr("title", "Time that the driver spends fetching task results from workers. If this is large, consider decreasing the amount of data returned from each task.");
+    $('#peak_execution_memory').attr("data-toggle", "tooltip")
+            .attr("data-placement", "right")
+            .attr("title", "Execution memory refers to the memory used by internal data structures created during " +
+                "shuffles, aggregations and joins when Tungsten is enabled. The value of this accumulator " +
+                "should be approximately the sum of the peak sizes across all such data structures created " +
+                "in this task. For SQL jobs, this only tracks all unsafe operators, broadcast joins, and " +
+                "external sort.");
+    $('[data-toggle="tooltip"]').tooltip();
+    tasksSummary = $("#parent-container");
+    getStandAloneAppId(function (appId) {
+
+        var endPoint = stageEndPoint(appId);
+        var stageAttemptId = getStageAttemptId();
+        $.getJSON(endPoint + "/" + stageAttemptId, function(response, status, jqXHR) {
+
+            var responseBody = response;
+            var dataToShow = {};
+            dataToShow.showInputData = responseBody.inputBytes > 0;
+            dataToShow.showOutputData = responseBody.outputBytes > 0;
+            dataToShow.showShuffleReadData = responseBody.shuffleReadBytes > 0;
+            dataToShow.showShuffleWriteData = responseBody.shuffleWriteBytes > 0;
+            dataToShow.showBytesSpilledData =
+                (responseBody.diskBytesSpilled > 0 || responseBody.memoryBytesSpilled > 0);
+
+            if (!dataToShow.showShuffleReadData) {
+                $('#shuffle_read_blocked_time').remove();
+                $('#shuffle_remote_reads').remove();
+                optionalColumns.splice(2, 2);
+            }
+
+            // prepare data for executor summary table
+            stageExecutorSummaryInfoKeys = Object.keys(responseBody.executorSummary);
+            $.getJSON(createRESTEndPointForExecutorsPage(appId),
+              function(executorSummaryResponse, status, jqXHR) {
+                var executorDetailsMap = {};
+                executorSummaryResponse.forEach(function (executorDetail) {
+                    executorDetailsMap[executorDetail.id] = executorDetail;
+                });
+
+                var executorSummaryTable = [];
+                stageExecutorSummaryInfoKeys.forEach(function (columnKeyIndex) {
+                    var executorSummary = responseBody.executorSummary[columnKeyIndex];
+                    var executorDetail = executorDetailsMap[columnKeyIndex.toString()];
+                    executorSummary.id = columnKeyIndex;
+                    executorSummary.executorLogs = {};
+                    executorSummary.hostPort = "CANNOT FIND ADDRESS";
+
+                    if (executorDetail) {
+                        if (executorDetail["executorLogs"]) {
+                            responseBody.executorSummary[columnKeyIndex].executorLogs =
+                                executorDetail["executorLogs"];
+                            }
+                        if (executorDetail["hostPort"]) {
+                            responseBody.executorSummary[columnKeyIndex].hostPort =
+                                executorDetail["hostPort"];
+                        }
+                    }
+                    executorSummaryTable.push(responseBody.executorSummary[columnKeyIndex]);
+                });
+                // building task aggregated metrics by executor table
+                var executorSummaryConf = {
+                    "data": executorSummaryTable,
+                    "columns": [
+                        {data : "id"},
+                        {data : "executorLogs", render: formatLogsCells},
+                        {data : "hostPort"},
+                        {
+                            data : function (row, type) {
+                                return type === 'display' ? formatDuration(row.taskTime) : row.taskTime;
+                            }
+                        },
+                        {
+                            data : function (row, type) {
+                                var totaltasks = row.succeededTasks + row.failedTasks + row.killedTasks;
+                                return type === 'display' ? totaltasks : totaltasks.toString();
+                            }
+                        },
+                        {data : "failedTasks"},
+                        {data : "killedTasks"},
+                        {data : "succeededTasks"},
+                        {data : "isBlacklistedForStage"},
+                        {
+                            data : function (row, type) {
+                                return row.inputRecords != 0 ? formatBytes(row.inputBytes, type) + " / " + row.inputRecords : "";
+                            }
+                        },
+                        {
+                            data : function (row, type) {
+                                return row.outputRecords != 0 ? formatBytes(row.outputBytes, type) + " / " + row.outputRecords : "";
+                            }
+                        },
+                        {
+                            data : function (row, type) {
+                                return row.shuffleReadRecords != 0 ? formatBytes(row.shuffleRead, type) + " / " + row.shuffleReadRecords : "";
+                            }
+                        },
+                        {
+                            data : function (row, type) {
+                                return row.shuffleWriteRecords != 0 ? formatBytes(row.shuffleWrite, type) + " / " + row.shuffleWriteRecords : "";
+                            }
+                        },
+                        {
+                            data : function (row, type) {
+                                return typeof row.memoryBytesSpilled != 'undefined' ? formatBytes(row.memoryBytesSpilled, type) : "";
+                            }
+                        },
+                        {
+                            data : function (row, type) {
+                                return typeof row.diskBytesSpilled != 'undefined' ? formatBytes(row.diskBytesSpilled, type) : "";
+                            }
+                        }
+                    ],
+                    "order": [[0, "asc"]],
+                    "bAutoWidth": false
+                }
+                var executorSummaryTableSelector =
+                    $("#summary-executor-table").DataTable(executorSummaryConf);
+                $('#parent-container [data-toggle="tooltip"]').tooltip();
+
+                executorSummaryTableSelector.column(9).visible(dataToShow.showInputData);
+                if (dataToShow.showInputData) {
+                    $('#executor-summary-input').attr("data-toggle", "tooltip")
+                        .attr("data-placement", "top")
+                        .attr("title", "Bytes and records read from Hadoop or from Spark storage.");
+                    $('#executor-summary-input').tooltip(true);
+                }
+                executorSummaryTableSelector.column(10).visible(dataToShow.showOutputData);
+                if (dataToShow.showOutputData) {
+                    $('#executor-summary-output').attr("data-toggle", "tooltip")
+                        .attr("data-placement", "top")
+                        .attr("title", "Bytes and records written to Hadoop.");
+                    $('#executor-summary-output').tooltip(true);
+                }
+                executorSummaryTableSelector.column(11).visible(dataToShow.showShuffleReadData);
+                if (dataToShow.showShuffleReadData) {
+                    $('#executor-summary-shuffle-read').attr("data-toggle", "tooltip")
+                        .attr("data-placement", "top")
+                        .attr("title", "Total shuffle bytes and records read (includes both data read locally and data read from remote executors).");
+                    $('#executor-summary-shuffle-read').tooltip(true);
+                }
+                executorSummaryTableSelector.column(12).visible(dataToShow.showShuffleWriteData);
+                if (dataToShow.showShuffleWriteData) {
+                    $('#executor-summary-shuffle-write').attr("data-toggle", "tooltip")
+                        .attr("data-placement", "top")
+                        .attr("title", "Bytes and records written to disk in order to be read by a shuffle in a future stage.");
+                    $('#executor-summary-shuffle-write').tooltip(true);
+                }
+                executorSummaryTableSelector.column(13).visible(dataToShow.showBytesSpilledData);
+                executorSummaryTableSelector.column(14).visible(dataToShow.showBytesSpilledData);
+            });
+
+            // prepare data for accumulatorUpdates
+            var accumulatorTable = responseBody.accumulatorUpdates.filter(accumUpdate =>
+                !(accumUpdate.name).toString().includes("internal."));
+
+            // rendering the UI page
+            var data = {"executors": response};
+            $.get(createTemplateURI(appId, "stagespage"), function(template) {
+                tasksSummary.append(Mustache.render($(template).filter("#stages-summary-template").html(), data));
+
+                $("#additionalMetrics").click(function(){
+                    $("#arrowtoggle1").toggleClass("arrow-open arrow-closed");
+                    $("#toggle-metrics").toggle();
+                    if (window.localStorage) {
+                        window.localStorage.setItem("arrowtoggle1class", $("#arrowtoggle1").attr('class'));
+                    }
+                });
+
+                $("#aggregatedMetrics").click(function(){
+                    $("#arrowtoggle2").toggleClass("arrow-open arrow-closed");
+                    $("#toggle-aggregatedMetrics").toggle();
+                    if (window.localStorage) {
+                        window.localStorage.setItem("arrowtoggle2class", $("#arrowtoggle2").attr('class'));
+                    }
+                });
+
+                var quantiles = "0,0.25,0.5,0.75,1.0";
+                $.getJSON(endPoint + "/" + stageAttemptId + "/taskSummary?quantiles=" + quantiles,
+                  function(taskMetricsResponse, status, jqXHR) {
+                    var taskMetricKeys = Object.keys(taskMetricsResponse);
+                    taskMetricKeys.forEach(function (columnKey) {
+                        switch(columnKey) {
+                            case "shuffleReadMetrics":
+                                var row1 = createRowMetadataForColumn(
+                                    columnKey, taskMetricsResponse[columnKey], 3);
+                                var row2 = createRowMetadataForColumn(
+                                    "shuffleReadBlockedTime", taskMetricsResponse[columnKey], 13);
+                                var row3 = createRowMetadataForColumn(
+                                    "shuffleRemoteReads", taskMetricsResponse[columnKey], 14);
+                                if (dataToShow.showShuffleReadData) {
+                                    taskSummaryMetricsTableArray.push(row1);
+                                    taskSummaryMetricsTableArray.push(row2);
+                                    taskSummaryMetricsTableArray.push(row3);
+                                }
+                                break;
+
+                            case "schedulerDelay":
+                                var row = createRowMetadataForColumn(
+                                    columnKey, taskMetricsResponse[columnKey], 11);
+                                taskSummaryMetricsTableArray.push(row);
+                                break;
+
+                            case "executorDeserializeTime":
+                                var row = createRowMetadataForColumn(
+                                    columnKey, taskMetricsResponse[columnKey], 12);
+                                taskSummaryMetricsTableArray.push(row);
+                                break;
+
+                            case "resultSerializationTime":
+                                var row = createRowMetadataForColumn(
+                                    columnKey, taskMetricsResponse[columnKey], 15);
+                                taskSummaryMetricsTableArray.push(row);
+                                break;
+
+                            case "gettingResultTime":
+                                var row = createRowMetadataForColumn(
+                                    columnKey, taskMetricsResponse[columnKey], 16);
+                                taskSummaryMetricsTableArray.push(row);
+                                break;
+
+                            case "peakExecutionMemory":
+                                var row = createRowMetadataForColumn(
+                                    columnKey, taskMetricsResponse[columnKey], 17);
+                                taskSummaryMetricsTableArray.push(row);
+                                break;
+
+                            case "inputMetrics":
+                                var row = createRowMetadataForColumn(
+                                    columnKey, taskMetricsResponse[columnKey], 1);
+                                if (dataToShow.showInputData) {
+                                    taskSummaryMetricsTableArray.push(row);
+                                }
+                                break;
+
+                            case "outputMetrics":
+                                var row = createRowMetadataForColumn(
+                                    columnKey, taskMetricsResponse[columnKey], 2);
+                                if (dataToShow.showOutputData) {
+                                    taskSummaryMetricsTableArray.push(row);
+                                }
+                                break;
+
+                            case "shuffleWriteMetrics":
+                                var row = createRowMetadataForColumn(
+                                    columnKey, taskMetricsResponse[columnKey], 4);
+                                if (dataToShow.showShuffleWriteData) {
+                                    taskSummaryMetricsTableArray.push(row);
+                                }
+                                break;
+
+                            case "diskBytesSpilled":
+                                var row = createRowMetadataForColumn(
+                                    columnKey, taskMetricsResponse[columnKey], 5);
+                                if (dataToShow.showBytesSpilledData) {
+                                    taskSummaryMetricsTableArray.push(row);
+                                }
+                                break;
+
+                            case "memoryBytesSpilled":
+                                var row = createRowMetadataForColumn(
+                                    columnKey, taskMetricsResponse[columnKey], 6);
+                                if (dataToShow.showBytesSpilledData) {
+                                    taskSummaryMetricsTableArray.push(row);
+                                }
+                                break;
+
+                            default:
+                                if (getColumnNameForTaskMetricSummary(columnKey) != "NA") {
+                                    var row = createRowMetadataForColumn(
+                                        columnKey, taskMetricsResponse[columnKey], 0);
+                                    taskSummaryMetricsTableArray.push(row);
+                                }
+                                break;
+                        }
+                    });
+                    var taskSummaryMetricsTableFilteredArray =
+                        taskSummaryMetricsTableArray.filter(row => row.checkboxId < 11);
+                    taskSummaryMetricsTableCurrentStateArray = taskSummaryMetricsTableFilteredArray.slice();
+                    reselectCheckboxesBasedOnTaskTableState();
+                });
+
+                // building accumulator update table
+                var accumulatorConf = {
+                    "data": accumulatorTable,
+                    "columns": [
+                        {data : "id"},
+                        {data : "name"},
+                        {data : "value"}
+                    ],
+                    "paging": false,
+                    "searching": false,
+                    "order": [[0, "asc"]],
+                    "bAutoWidth": false
+                }
+                $("#accumulator-table").DataTable(accumulatorConf);
+
+                // building tasks table that uses server side functionality
+                var totalTasksToShow = responseBody.numCompleteTasks + responseBody.numActiveTasks;
+                var taskTable = "#active-tasks-table";
+                var taskConf = {
+                    "serverSide": true,
+                    "paging": true,
+                    "info": true,
+                    "processing": true,
+                    "lengthMenu": [[20, 40, 60, 100, totalTasksToShow], [20, 40, 60, 100, "All"]],
+                    "orderMulti": false,
+                    "bAutoWidth": false,
+                    "ajax": {
+                        "url": endPoint + "/" + stageAttemptId + "/taskTable",
+                        "data": function (data) {
+                            var columnIndexToSort = 0;
+                            var columnNameToSort = "Index";
+                            if (data.order[0].column && data.order[0].column != "") {
+                                columnIndexToSort = parseInt(data.order[0].column);
+                                columnNameToSort = data.columns[columnIndexToSort].name;
+                            }
+                            delete data.columns;
+                            data.numTasks = totalTasksToShow;
+                            data.columnIndexToSort = columnIndexToSort;
+                            data.columnNameToSort = columnNameToSort;
+                        },
+                        "dataSrc": function (jsons) {
+                            var jsonStr = JSON.stringify(jsons);
+                            var tasksToShow = JSON.parse(jsonStr);
+                            return tasksToShow.aaData;
+                        },
+                        "error": function (jqXHR, textStatus, errorThrown) {
+                            alert("Unable to connect to the server. Looks like the Spark " +
+                              "application must have ended. Please Switch to the history UI.");
+                            $("#active-tasks-table_processing").css("display","none");
+                        }
+                    },
+                    "columns": [
+                        {data: function (row, type) {
+                            return type !== 'display' ? (isNaN(row.index) ? 0 : row.index ) : row.index;
+                            },
+                            name: "Index"
+                        },
+                        {data : "taskId", name: "ID"},
+                        {data : "attempt", name: "Attempt"},
+                        {data : "status", name: "Status"},
+                        {data : "taskLocality", name: "Locality Level"},
+                        {data : "executorId", name: "Executor ID"},
+                        {data : "host", name: "Host"},
+                        {data : "executorLogs", name: "Logs", render: formatLogsCells},
+                        {data : "launchTime", name: "Launch Time", render: formatDate},
+                        {
+                            data : function (row, type) {
+                                if (row.duration) {
+                                    return type === 'display' ? formatDuration(row.duration) : row.duration;
+                                } else {
+                                    return "";
+                                }
+                            },
+                            name: "Duration"
+                        },
+                        {
+                            data : function (row, type) {
+                                if (row.taskMetrics && row.taskMetrics.jvmGcTime) {
+                                    return type === 'display' ? formatDuration(row.taskMetrics.jvmGcTime) : row.taskMetrics.jvmGcTime;
+                                } else {
+                                    return "";
+                                }
+                            },
+                            name: "GC Time"
+                        },
+                        {
+                            data : function (row, type) {
+                                if (row.schedulerDelay) {
+                                    return type === 'display' ? formatDuration(row.schedulerDelay) : row.schedulerDelay;
+                                } else {
+                                    return "";
+                                }
+                            },
+                            name: "Scheduler Delay"
+                        },
+                        {
+                            data : function (row, type) {
+                                if (row.taskMetrics && row.taskMetrics.executorDeserializeTime) {
+                                    return type === 'display' ? formatDuration(row.taskMetrics.executorDeserializeTime) : row.taskMetrics.executorDeserializeTime;
+                                } else {
+                                    return "";
+                                }
+                            },
+                            name: "Task Deserialization Time"
+                        },
+                        {
+                            data : function (row, type) {
+                                if (row.taskMetrics && row.taskMetrics.shuffleReadMetrics) {
+                                    return type === 'display' ? formatDuration(row.taskMetrics.shuffleReadMetrics.fetchWaitTime) : row.taskMetrics.shuffleReadMetrics.fetchWaitTime;
+                                } else {
+                                    return "";
+                                }
+                            },
+                            name: "Shuffle Read Blocked Time"
+                        },
+                        {
+                            data : function (row, type) {
+                                if (row.taskMetrics && row.taskMetrics.shuffleReadMetrics) {
+                                    return type === 'display' ? formatBytes(row.taskMetrics.shuffleReadMetrics.remoteBytesRead, type) : row.taskMetrics.shuffleReadMetrics.remoteBytesRead;
+                                } else {
+                                    return "";
+                                }
+                            },
+                            name: "Shuffle Remote Reads"
+                        },
+                        {
+                            data : function (row, type) {
+                                if (row.taskMetrics && row.taskMetrics.resultSerializationTime) {
+                                    return type === 'display' ? formatDuration(row.taskMetrics.resultSerializationTime) : row.taskMetrics.resultSerializationTime;
+                                } else {
+                                    return "";
+                                }
+                            },
+                            name: "Result Serialization Time"
+                        },
+                        {
+                            data : function (row, type) {
+                                if (row.gettingResultTime) {
+                                    return type === 'display' ? formatDuration(row.gettingResultTime) : row.gettingResultTime;
+                                } else {
+                                    return "";
+                                }
+                            },
+                            name: "Getting Result Time"
+                        },
+                        {
+                            data : function (row, type) {
+                                if (row.taskMetrics && row.taskMetrics.peakExecutionMemory) {
+                                    return type === 'display' ? formatBytes(row.taskMetrics.peakExecutionMemory, type) : row.taskMetrics.peakExecutionMemory;
+                                } else {
+                                    return "";
+                                }
+                            },
+                            name: "Peak Execution Memory"
+                        },
+                        {
+                            data : function (row, type) {
+                                if (accumulatorTable.length > 0 && row.accumulatorUpdates.length > 0) {
+                                    var accIndex = row.accumulatorUpdates.length - 1;
+                                    return row.accumulatorUpdates[accIndex].name + ' : ' + row.accumulatorUpdates[accIndex].update;
+                                } else {
+                                    return "";
+                                }
+                            },
+                            name: "Accumulators"
+                        },
+                        {
+                            data : function (row, type) {
+                                if (row.taskMetrics && row.taskMetrics.inputMetrics && row.taskMetrics.inputMetrics.bytesRead > 0) {
+                                    if (type === 'display') {
+                                        return formatBytes(row.taskMetrics.inputMetrics.bytesRead, type) + " / " + row.taskMetrics.inputMetrics.recordsRead;
+                                    } else {
+                                        return row.taskMetrics.inputMetrics.bytesRead + " / " + row.taskMetrics.inputMetrics.recordsRead;
+                                    }
+                                } else {
+                                    return "";
+                                }
+                            },
+                            name: "Input Size / Records"
+                        },
+                        {
+                            data : function (row, type) {
+                                if (row.taskMetrics && row.taskMetrics.outputMetrics && row.taskMetrics.outputMetrics.bytesWritten > 0) {
+                                    if (type === 'display') {
+                                        return formatBytes(row.taskMetrics.outputMetrics.bytesWritten, type) + " / " + row.taskMetrics.outputMetrics.recordsWritten;
+                                    } else {
+                                        return row.taskMetrics.outputMetrics.bytesWritten + " / " + row.taskMetrics.outputMetrics.recordsWritten;
+                                    }
+                                } else {
+                                    return "";
+                                }
+                            },
+                            name: "Output Size / Records"
+                        },
+                        {
+                            data : function (row, type) {
+                                if (row.taskMetrics && row.taskMetrics.shuffleWriteMetrics && row.taskMetrics.shuffleWriteMetrics.writeTime > 0) {
+                                    return type === 'display' ? formatDuration(parseInt(row.taskMetrics.shuffleWriteMetrics.writeTime) / 1000000) : row.taskMetrics.shuffleWriteMetrics.writeTime;
+                                } else {
+                                    return "";
+                                }
+                            },
+                            name: "Write Time"
+                        },
+                        {
+                            data : function (row, type) {
+                                if (row.taskMetrics && row.taskMetrics.shuffleWriteMetrics && row.taskMetrics.shuffleWriteMetrics.bytesWritten > 0) {
+                                    if (type === 'display') {
+                                        return formatBytes(row.taskMetrics.shuffleWriteMetrics.bytesWritten, type) + " / " + row.taskMetrics.shuffleWriteMetrics.recordsWritten;
+                                    } else {
+                                        return row.taskMetrics.shuffleWriteMetrics.bytesWritten + " / " + row.taskMetrics.shuffleWriteMetrics.recordsWritten;
+                                    }
+                                } else {
+                                    return "";
+                                }
+                            },
+                            name: "Shuffle Write Size / Records"
+                        },
+                        {
+                            data : function (row, type) {
+                                if (row.taskMetrics && row.taskMetrics.shuffleReadMetrics && row.taskMetrics.shuffleReadMetrics.localBytesRead > 0) {
+                                    var totalBytesRead = parseInt(row.taskMetrics.shuffleReadMetrics.localBytesRead) + parseInt(row.taskMetrics.shuffleReadMetrics.remoteBytesRead);
+                                    if (type === 'display') {
+                                        return formatBytes(totalBytesRead, type) + " / " + row.taskMetrics.shuffleReadMetrics.recordsRead;
+                                    } else {
+                                        return totalBytesRead + " / " + row.taskMetrics.shuffleReadMetrics.recordsRead;
+                                    }
+                                } else {
+                                    return "";
+                                }
+                            },
+                            name: "Shuffle Read Size / Records"
+                        },
+                        {
+                            data : function (row, type) {
+                                if (row.taskMetrics && row.taskMetrics.memoryBytesSpilled && row.taskMetrics.memoryBytesSpilled > 0) {
+                                    return type === 'display' ? formatBytes(row.taskMetrics.memoryBytesSpilled, type) : row.taskMetrics.memoryBytesSpilled;
+                                } else {
+                                    return "";
+                                }
+                            },
+                            name: "Shuffle Spill (Memory)"
+                        },
+                        {
+                            data : function (row, type) {
+                                if (row.taskMetrics && row.taskMetrics.diskBytesSpilled && row.taskMetrics.diskBytesSpilled > 0) {
+                                    return type === 'display' ? formatBytes(row.taskMetrics.diskBytesSpilled, type) : row.taskMetrics.diskBytesSpilled;
+                                } else {
+                                    return "";
+                                }
+                            },
+                            name: "Shuffle Spill (Disk)"
+                        },
+                        {
+                            data : function (row, type) {
+                                var msg = row.errorMessage;
+                                if (typeof msg === 'undefined') {
+                                    return "";
+                                } else {
+                                    var formHead = msg.substring(0, msg.indexOf("at"));
+                                    var form = "<span onclick=\"this.parentNode.querySelector('.stacktrace-details').classList.toggle('collapsed')\" class=\"expand-details\">+details</span>";
+                                    var formMsg = "<div class=\"stacktrace-details collapsed\"><pre>" + row.errorMessage + "</pre></div>";
+                                    return formHead + form + formMsg;
+                                }
+                            },
+                            name: "Errors"
+                        }
+                    ],
+                    "columnDefs": [
+                        { "visible": false, "targets": 11 },
+                        { "visible": false, "targets": 12 },
+                        { "visible": false, "targets": 13 },
+                        { "visible": false, "targets": 14 },
+                        { "visible": false, "targets": 15 },
+                        { "visible": false, "targets": 16 },
+                        { "visible": false, "targets": 17 },
+                        { "visible": false, "targets": 18 }
+                    ],
+                };
+                taskTableSelector = $(taskTable).DataTable(taskConf);
+                $('#active-tasks-table_filter input').unbind();
+                var searchEvent;
+                $('#active-tasks-table_filter input').bind('keyup', function(e) {
+                  if (typeof searchEvent !== 'undefined') {
+                    window.clearTimeout(searchEvent);
+                  }
+                  var value = this.value;
+                  searchEvent = window.setTimeout(function(){
+                    taskTableSelector.search( value ).draw();}, 500);
+                });
+                reselectCheckboxesBasedOnTaskTableState();
+
+                // hide or show columns dynamically event
+                $('input.toggle-vis').on('click', function(e){
+                    // Get the column
+                    var para = $(this).attr('data-column');
+                    if (para == "0") {
+                        var column = taskTableSelector.column(optionalColumns);
+                        if ($(this).is(":checked")) {
+                            $(".toggle-vis").prop('checked', true);
+                            column.visible(true);
+                            createDataTableForTaskSummaryMetricsTable(taskSummaryMetricsTableArray);
+                        } else {
+                            $(".toggle-vis").prop('checked', false);
+                            column.visible(false);
+                            var taskSummaryMetricsTableFilteredArray =
+                                taskSummaryMetricsTableArray.filter(row => row.checkboxId < 11);
+                            createDataTableForTaskSummaryMetricsTable(taskSummaryMetricsTableFilteredArray);
+                        }
+                    } else {
+                        var column = taskTableSelector.column(para);
+                        // Toggle the visibility
+                        column.visible(!column.visible());
+                        var taskSummaryMetricsTableFilteredArray = [];
+                        if ($(this).is(":checked")) {
+                            taskSummaryMetricsTableCurrentStateArray.push(taskSummaryMetricsTableArray.filter(row => (row.checkboxId).toString() == para)[0]);
+                            taskSummaryMetricsTableFilteredArray = taskSummaryMetricsTableCurrentStateArray.slice();
+                        } else {
+                            taskSummaryMetricsTableFilteredArray =
+                                taskSummaryMetricsTableCurrentStateArray.filter(row => (row.checkboxId).toString() != para);
+                        }
+                        createDataTableForTaskSummaryMetricsTable(taskSummaryMetricsTableFilteredArray);
+                    }
+                });
+
+                // title number and toggle list
+                $("#summaryMetricsTitle").html("Summary Metrics for " + "<a href='#tasksTitle'>" + responseBody.numCompleteTasks + " Completed Tasks" + "</a>");
+                $("#tasksTitle").html("Task (" + totalTasksToShow + ")");
+
+                // hide or show the accumulate update table
+                if (accumulatorTable.length == 0) {
+                    $("#accumulator-update-table").hide();
+                } else {
+                    taskTableSelector.column(18).visible(true);
+                    $("#accumulator-update-table").show();
+                }
+                // Showing relevant stage data depending on stage type for task table and executor
+                // summary table
+                taskTableSelector.column(19).visible(dataToShow.showInputData);
+                taskTableSelector.column(20).visible(dataToShow.showOutputData);
+                taskTableSelector.column(21).visible(dataToShow.showShuffleWriteData);
+                taskTableSelector.column(22).visible(dataToShow.showShuffleWriteData);
+                taskTableSelector.column(23).visible(dataToShow.showShuffleReadData);
+                taskTableSelector.column(24).visible(dataToShow.showBytesSpilledData);
+                taskTableSelector.column(25).visible(dataToShow.showBytesSpilledData);
+
+                if (window.localStorage) {
+                    if (window.localStorage.getItem("arrowtoggle1class") !== null &&
+                        window.localStorage.getItem("arrowtoggle1class").includes("arrow-open")) {
+                        $("#arrowtoggle1").toggleClass("arrow-open arrow-closed");
+                        $("#toggle-metrics").toggle();
+                    }
+                    if (window.localStorage.getItem("arrowtoggle2class") !== null &&
+                        window.localStorage.getItem("arrowtoggle2class").includes("arrow-open")) {
+                        $("#arrowtoggle2").toggleClass("arrow-open arrow-closed");
+                        $("#toggle-aggregatedMetrics").toggle();
+                    }
+                }
+            });
+        });
+    });
+});
diff --git a/core/src/main/resources/org/apache/spark/ui/static/stagespage-template.html b/core/src/main/resources/org/apache/spark/ui/static/stagespage-template.html
new file mode 100644
index 0000000000000..6f950c61b2d63
--- /dev/null
+++ b/core/src/main/resources/org/apache/spark/ui/static/stagespage-template.html
@@ -0,0 +1,124 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+<script id="stages-summary-template" type="text/html">
+    <h4 id="summaryMetricsTitle" class="title-table"></h4>
+    <div class="container-fluid">
+        <div class="container-fluid">
+            <table id="summary-metrics-table" class="table table-striped compact table-dataTable cell-border">
+                <thead>
+                <th>Metric</th>
+                <th>Min</th>
+                <th>25th percentile</th>
+                <th>Median</th>
+                <th>75th percentile</th>
+                <th>Max</th>
+                </thead>
+                <tbody>
+                </tbody>
+            </table>
+        </div>
+    </div>
+    <a id="aggregatedMetrics">
+        <span class="expand-input-rate-arrow arrow-closed" id="arrowtoggle2"></span>
+        <h4 class="title-table">Aggregated Metrics by Executor</h4>
+    </a>
+    <br>
+    <div class="container-fluid" id="toggle-aggregatedMetrics" hidden>
+        <div class="container-fluid">
+            <table id="summary-executor-table" class="table table-striped compact table-dataTable cell-border">
+                <thead>
+                <th>Executor ID</th>
+                <th>Logs</th>
+                <th>Address</th>
+                <th>Task Time</th>
+                <th>Total Tasks</th>
+                <th>Failed Tasks</th>
+                <th>Killed Tasks</th>
+                <th>Succeeded Tasks</th>
+                <th>
+          <span data-toggle="tooltip" data-placement="top"
+                title="Shows if this executor has been blacklisted by the scheduler due to task failures.">
+            Blacklisted</span>
+                </th>
+                <th><span id="executor-summary-input">Input Size / Records</span></th>
+                <th><span id="executor-summary-output">Output Size / Records</span></th>
+                <th><span id="executor-summary-shuffle-read">Shuffle Read Size / Records</span></th>
+                <th><span id="executor-summary-shuffle-write">Shuffle Write Size / Records</span></th>
+                <th>Shuffle Spill (Memory) </th>
+                <th>Shuffle Spill (Disk) </th>
+                </thead>
+                <tbody>
+                </tbody>
+            </table>
+        </div>
+    </div>
+    <div class="container-fluid" id="accumulator-update-table">
+        <h4 class="title-table">Accumulators</h4>
+        <div class="container-fluid">
+            <table id="accumulator-table" class="table table-striped compact table-dataTable cell-border">
+                <thead>
+                <th>ID</th>
+                <th>Name</th>
+                <th>Value</th>
+                </thead>
+                <tbody>
+                </tbody>
+            </table>
+        </div>
+    </div>
+    <h4 id="tasksTitle" class="title-table"></h4>
+    <div class="container-fluid">
+        <div class="container-fluid">
+            <table id="active-tasks-table" class="table table-striped compact table-dataTable cell-border">
+                <thead>
+                <tr>
+                    <th>Index</th>
+                    <th>Task ID</th>
+                    <th>Attempt</th>
+                    <th>Status</th>
+                    <th>Locality level</th>
+                    <th>Executor ID</th>
+                    <th>Host</th>
+                    <th>Logs</th>
+                    <th>Launch Time</th>
+                    <th>Duration</th>
+                    <th>GC Time</th>
+                    <th>Scheduler Delay</th>
+                    <th>Task Deserialization Time</th>
+                    <th>Shuffle Read Blocked Time</th>
+                    <th>Shuffle Remote Reads</th>
+                    <th>Result Serialization Time</th>
+                    <th>Getting Result Time</th>
+                    <th>Peak Execution Memory</th>
+                    <th>Accumulators</th>
+                    <th>Input Size / Records</th>
+                    <th>Output Size / Records</th>
+                    <th>Write Time</th>
+                    <th>Shuffle Write Size / Records</th>
+                    <th>Shuffle Read Size / Records</th>
+                    <th>Shuffle Spill (Memory)</th>
+                    <th>Shuffle Spill (Disk)</th>
+                    <th>Errors</th>
+                </tr>
+                </thead>
+                <tbody>
+                </tbody>
+            </table>
+        </div>
+    </div>
+</script>
diff --git a/core/src/main/resources/org/apache/spark/ui/static/utils.js b/core/src/main/resources/org/apache/spark/ui/static/utils.js
index 4f63f6413d6de..deeafad4eb5f5 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/utils.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/utils.js
@@ -18,7 +18,7 @@
 // this function works exactly the same as UIUtils.formatDuration
 function formatDuration(milliseconds) {
     if (milliseconds < 100) {
-        return milliseconds + " ms";
+        return parseInt(milliseconds).toFixed(1) + " ms";
     }
     var seconds = milliseconds * 1.0 / 1000;
     if (seconds < 1) {
@@ -74,3 +74,114 @@ function getTimeZone() {
     return new Date().toString().match(/\((.*)\)/)[1];
   }
 }
+
+function formatLogsCells(execLogs, type) {
+  if (type !== 'display') return Object.keys(execLogs);
+  if (!execLogs) return;
+  var result = '';
+  $.each(execLogs, function (logName, logUrl) {
+    result += '<div><a href=' + logUrl + '>' + logName + '</a></div>'
+  });
+  return result;
+}
+
+function getStandAloneAppId(cb) {
+  var words = document.baseURI.split('/');
+  var ind = words.indexOf("proxy");
+  if (ind > 0) {
+    var appId = words[ind + 1];
+    cb(appId);
+    return;
+  }
+  ind = words.indexOf("history");
+  if (ind > 0) {
+    var appId = words[ind + 1];
+    cb(appId);
+    return;
+  }
+  // Looks like Web UI is running in standalone mode
+  // Let's get application-id using REST End Point
+  $.getJSON(location.origin + "/api/v1/applications", function(response, status, jqXHR) {
+    if (response && response.length > 0) {
+      var appId = response[0].id;
+      cb(appId);
+      return;
+    }
+  });
+}
+
+// This function is a helper function for sorting in datatable.
+// When the data is in duration (e.g. 12ms 2s 2min 2h )
+// It will convert the string into integer for correct ordering
+function ConvertDurationString(data) {
+  data = data.toString();
+  var units = data.replace(/[\d\.]/g, '' )
+                  .replace(' ', '')
+                  .toLowerCase();
+  var multiplier = 1;
+
+  switch(units) {
+    case 's':
+      multiplier = 1000;
+      break;
+    case 'min':
+      multiplier = 600000;
+      break;
+    case 'h':
+      multiplier = 3600000;
+      break;
+    default:
+      break;
+  }
+  return parseFloat(data) * multiplier;
+}
+
+function createTemplateURI(appId, templateName) {
+  var words = document.baseURI.split('/');
+  var ind = words.indexOf("proxy");
+  if (ind > 0) {
+    var baseURI = words.slice(0, ind + 1).join('/') + '/' + appId + '/static/' + templateName + '-template.html';
+    return baseURI;
+  }
+  ind = words.indexOf("history");
+  if(ind > 0) {
+    var baseURI = words.slice(0, ind).join('/') + '/static/' + templateName + '-template.html';
+    return baseURI;
+  }
+  return location.origin + "/static/" + templateName + "-template.html";
+}
+
+function setDataTableDefaults() {
+  $.extend($.fn.dataTable.defaults, {
+    stateSave: true,
+    lengthMenu: [[20, 40, 60, 100, -1], [20, 40, 60, 100, "All"]],
+    pageLength: 20
+  });
+}
+
+function formatDate(date) {
+  if (date <= 0) return "-";
+  else return date.split(".")[0].replace("T", " ");
+}
+
+function createRESTEndPointForExecutorsPage(appId) {
+    var words = document.baseURI.split('/');
+    var ind = words.indexOf("proxy");
+    if (ind > 0) {
+        var appId = words[ind + 1];
+        var newBaseURI = words.slice(0, ind + 2).join('/');
+        return newBaseURI + "/api/v1/applications/" + appId + "/allexecutors"
+    }
+    ind = words.indexOf("history");
+    if (ind > 0) {
+        var appId = words[ind + 1];
+        var attemptId = words[ind + 2];
+        var newBaseURI = words.slice(0, ind).join('/');
+        if (isNaN(attemptId)) {
+            return newBaseURI + "/api/v1/applications/" + appId + "/allexecutors";
+        } else {
+            return newBaseURI + "/api/v1/applications/" + appId + "/" + attemptId + "/allexecutors";
+        }
+    }
+    return location.origin + "/api/v1/applications/" + appId + "/allexecutors";
+}
diff --git a/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/package.scala b/core/src/main/resources/org/apache/spark/ui/static/webui-dataTables.css
similarity index 79%
rename from external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/package.scala
rename to core/src/main/resources/org/apache/spark/ui/static/webui-dataTables.css
index 47c5187f8751f..f6b4abed21e0d 100644
--- a/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/package.scala
+++ b/core/src/main/resources/org/apache/spark/ui/static/webui-dataTables.css
@@ -15,9 +15,6 @@
  * limitations under the License.
  */
 
-package org.apache.spark.streaming
+table.dataTable thead .sorting_asc { background: url('images/sort_asc.png') no-repeat bottom right; }
 
-/**
- * Kafka receiver for spark streaming,
- */
-package object kafka
+table.dataTable thead .sorting_desc { background: url('images/sort_desc.png') no-repeat bottom right; }
\ No newline at end of file
diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.css b/core/src/main/resources/org/apache/spark/ui/static/webui.css
index 935d9b1aec615..fe5bb25687af1 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/webui.css
+++ b/core/src/main/resources/org/apache/spark/ui/static/webui.css
@@ -88,6 +88,10 @@ a.kill-link {
   float: right;
 }
 
+a.name-link {
+  word-wrap: break-word;
+}
+
 span.expand-details {
   font-size: 10pt;
   cursor: pointer;
@@ -251,4 +255,110 @@ a.expandbutton {
 
 .table-cell-width-limited td {
   max-width: 600px;
+}
+
+.paginate_button.active > a {
+    color: #999999;
+    text-decoration: underline;
+}
+
+.title-table {
+  clear: left;
+  display: inline-block;
+}
+
+.table-dataTable {
+  width: 100%;
+}
+
+.container-fluid-div {
+  width: 200px;
+}
+
+.scheduler-delay-checkbox-div {
+  width: 120px;
+}
+
+.task-deserialization-time-checkbox-div {
+  width: 175px;
+}
+
+.shuffle-read-blocked-time-checkbox-div {
+  width: 187px;
+}
+
+.shuffle-remote-reads-checkbox-div {
+  width: 157px;
+}
+
+.result-serialization-time-checkbox-div {
+  width: 171px;
+}
+
+.getting-result-time-checkbox-div {
+  width: 141px;
+}
+
+.peak-execution-memory-checkbox-div {
+  width: 170px;
+}
+
+#active-tasks-table th {
+  border-top: 1px solid #dddddd;
+  border-bottom: 1px solid #dddddd;
+  border-right: 1px solid #dddddd;
+}
+
+#active-tasks-table th:first-child {
+  border-left: 1px solid #dddddd;
+}
+
+#accumulator-table th {
+  border-top: 1px solid #dddddd;
+  border-bottom: 1px solid #dddddd;
+  border-right: 1px solid #dddddd;
+}
+
+#accumulator-table th:first-child {
+  border-left: 1px solid #dddddd;
+}
+
+#summary-executor-table th {
+  border-top: 1px solid #dddddd;
+  border-bottom: 1px solid #dddddd;
+  border-right: 1px solid #dddddd;
+}
+
+#summary-executor-table th:first-child {
+  border-left: 1px solid #dddddd;
+}
+
+#summary-metrics-table th {
+  border-top: 1px solid #dddddd;
+  border-bottom: 1px solid #dddddd;
+  border-right: 1px solid #dddddd;
+}
+
+#summary-metrics-table th:first-child {
+  border-left: 1px solid #dddddd;
+}
+
+#summary-execs-table th {
+  border-top: 1px solid #dddddd;
+  border-bottom: 1px solid #dddddd;
+  border-right: 1px solid #dddddd;
+}
+
+#summary-execs-table th:first-child {
+  border-left: 1px solid #dddddd;
+}
+
+#active-executors-table th {
+  border-top: 1px solid #dddddd;
+  border-bottom: 1px solid #dddddd;
+  border-right: 1px solid #dddddd;
+}
+
+#active-executors-table th:first-child {
+  border-left: 1px solid #dddddd;
 }
\ No newline at end of file
diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.js b/core/src/main/resources/org/apache/spark/ui/static/webui.js
index f01c567ba58ad..b1254e08fa504 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/webui.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/webui.js
@@ -83,4 +83,7 @@ $(function() {
   collapseTablePageLoad('collapse-aggregated-rdds','aggregated-rdds');
   collapseTablePageLoad('collapse-aggregated-activeBatches','aggregated-activeBatches');
   collapseTablePageLoad('collapse-aggregated-completedBatches','aggregated-completedBatches');
+  collapseTablePageLoad('collapse-aggregated-runningExecutions','aggregated-runningExecutions');
+  collapseTablePageLoad('collapse-aggregated-completedExecutions','aggregated-completedExecutions');
+  collapseTablePageLoad('collapse-aggregated-failedExecutions','aggregated-failedExecutions');
 });
\ No newline at end of file
diff --git a/core/src/main/scala/org/apache/spark/Accumulable.scala b/core/src/main/scala/org/apache/spark/Accumulable.scala
deleted file mode 100644
index 3092074232d18..0000000000000
--- a/core/src/main/scala/org/apache/spark/Accumulable.scala
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark
-
-import java.io.Serializable
-
-import scala.collection.generic.Growable
-import scala.reflect.ClassTag
-
-import org.apache.spark.scheduler.AccumulableInfo
-import org.apache.spark.serializer.JavaSerializer
-import org.apache.spark.util.{AccumulatorContext, AccumulatorMetadata, LegacyAccumulatorWrapper}
-
-
-/**
- * A data type that can be accumulated, i.e. has a commutative and associative "add" operation,
- * but where the result type, `R`, may be different from the element type being added, `T`.
- *
- * You must define how to add data, and how to merge two of these together.  For some data types,
- * such as a counter, these might be the same operation. In that case, you can use the simpler
- * [[org.apache.spark.Accumulator]]. They won't always be the same, though -- e.g., imagine you are
- * accumulating a set. You will add items to the set, and you will union two sets together.
- *
- * Operations are not thread-safe.
- *
- * @param id ID of this accumulator; for internal use only.
- * @param initialValue initial value of accumulator
- * @param param helper object defining how to add elements of type `R` and `T`
- * @param name human-readable name for use in Spark's web UI
- * @param countFailedValues whether to accumulate values from failed tasks. This is set to true
- *                          for system and time metrics like serialization time or bytes spilled,
- *                          and false for things with absolute values like number of input rows.
- *                          This should be used for internal metrics only.
- * @tparam R the full accumulated data (result type)
- * @tparam T partial data that can be added in
- */
-@deprecated("use AccumulatorV2", "2.0.0")
-class Accumulable[R, T] private (
-    val id: Long,
-    // SI-8813: This must explicitly be a private val, or else scala 2.11 doesn't compile
-    @transient private val initialValue: R,
-    param: AccumulableParam[R, T],
-    val name: Option[String],
-    private[spark] val countFailedValues: Boolean)
-  extends Serializable {
-
-  private[spark] def this(
-      initialValue: R,
-      param: AccumulableParam[R, T],
-      name: Option[String],
-      countFailedValues: Boolean) = {
-    this(AccumulatorContext.newId(), initialValue, param, name, countFailedValues)
-  }
-
-  private[spark] def this(initialValue: R, param: AccumulableParam[R, T], name: Option[String]) = {
-    this(initialValue, param, name, false /* countFailedValues */)
-  }
-
-  def this(initialValue: R, param: AccumulableParam[R, T]) = this(initialValue, param, None)
-
-  val zero = param.zero(initialValue)
-  private[spark] val newAcc = new LegacyAccumulatorWrapper(initialValue, param)
-  newAcc.metadata = AccumulatorMetadata(id, name, countFailedValues)
-  // Register the new accumulator in ctor, to follow the previous behaviour.
-  AccumulatorContext.register(newAcc)
-
-  /**
-   * Add more data to this accumulator / accumulable
-   * @param term the data to add
-   */
-  def += (term: T) { newAcc.add(term) }
-
-  /**
-   * Add more data to this accumulator / accumulable
-   * @param term the data to add
-   */
-  def add(term: T) { newAcc.add(term) }
-
-  /**
-   * Merge two accumulable objects together
-   *
-   * Normally, a user will not want to use this version, but will instead call `+=`.
-   * @param term the other `R` that will get merged with this
-   */
-  def ++= (term: R) { newAcc._value = param.addInPlace(newAcc._value, term) }
-
-  /**
-   * Merge two accumulable objects together
-   *
-   * Normally, a user will not want to use this version, but will instead call `add`.
-   * @param term the other `R` that will get merged with this
-   */
-  def merge(term: R) { newAcc._value = param.addInPlace(newAcc._value, term) }
-
-  /**
-   * Access the accumulator's current value; only allowed on driver.
-   */
-  def value: R = {
-    if (newAcc.isAtDriverSide) {
-      newAcc.value
-    } else {
-      throw new UnsupportedOperationException("Can't read accumulator value in task")
-    }
-  }
-
-  /**
-   * Get the current value of this accumulator from within a task.
-   *
-   * This is NOT the global value of the accumulator.  To get the global value after a
-   * completed operation on the dataset, call `value`.
-   *
-   * The typical use of this method is to directly mutate the local value, eg., to add
-   * an element to a Set.
-   */
-  def localValue: R = newAcc.value
-
-  /**
-   * Set the accumulator's value; only allowed on driver.
-   */
-  def value_= (newValue: R) {
-    if (newAcc.isAtDriverSide) {
-      newAcc._value = newValue
-    } else {
-      throw new UnsupportedOperationException("Can't assign accumulator value in task")
-    }
-  }
-
-  /**
-   * Set the accumulator's value. For internal use only.
-   */
-  def setValue(newValue: R): Unit = { newAcc._value = newValue }
-
-  /**
-   * Set the accumulator's value. For internal use only.
-   */
-  private[spark] def setValueAny(newValue: Any): Unit = { setValue(newValue.asInstanceOf[R]) }
-
-  /**
-   * Create an [[AccumulableInfo]] representation of this [[Accumulable]] with the provided values.
-   */
-  private[spark] def toInfo(update: Option[Any], value: Option[Any]): AccumulableInfo = {
-    val isInternal = name.exists(_.startsWith(InternalAccumulator.METRICS_PREFIX))
-    new AccumulableInfo(id, name, update, value, isInternal, countFailedValues)
-  }
-
-  override def toString: String = if (newAcc._value == null) "null" else newAcc._value.toString
-}
-
-
-/**
- * Helper object defining how to accumulate values of a particular type. An implicit
- * AccumulableParam needs to be available when you create [[Accumulable]]s of a specific type.
- *
- * @tparam R the full accumulated data (result type)
- * @tparam T partial data that can be added in
- */
-@deprecated("use AccumulatorV2", "2.0.0")
-trait AccumulableParam[R, T] extends Serializable {
-  /**
-   * Add additional data to the accumulator value. Is allowed to modify and return `r`
-   * for efficiency (to avoid allocating objects).
-   *
-   * @param r the current value of the accumulator
-   * @param t the data to be added to the accumulator
-   * @return the new value of the accumulator
-   */
-  def addAccumulator(r: R, t: T): R
-
-  /**
-   * Merge two accumulated values together. Is allowed to modify and return the first value
-   * for efficiency (to avoid allocating objects).
-   *
-   * @param r1 one set of accumulated data
-   * @param r2 another set of accumulated data
-   * @return both data sets merged together
-   */
-  def addInPlace(r1: R, r2: R): R
-
-  /**
-   * Return the "zero" (identity) value for an accumulator type, given its initial value. For
-   * example, if R was a vector of N dimensions, this would return a vector of N zeroes.
-   */
-  def zero(initialValue: R): R
-}
-
-
-@deprecated("use AccumulatorV2", "2.0.0")
-private[spark] class
-GrowableAccumulableParam[R : ClassTag, T]
-  (implicit rg: R => Growable[T] with TraversableOnce[T] with Serializable)
-  extends AccumulableParam[R, T] {
-
-  def addAccumulator(growable: R, elem: T): R = {
-    growable += elem
-    growable
-  }
-
-  def addInPlace(t1: R, t2: R): R = {
-    t1 ++= t2
-    t1
-  }
-
-  def zero(initialValue: R): R = {
-    // We need to clone initialValue, but it's hard to specify that R should also be Cloneable.
-    // Instead we'll serialize it to a buffer and load it back.
-    val ser = new JavaSerializer(new SparkConf(false)).newInstance()
-    val copy = ser.deserialize[R](ser.serialize(initialValue))
-    copy.clear()   // In case it contained stuff
-    copy
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/Accumulator.scala b/core/src/main/scala/org/apache/spark/Accumulator.scala
deleted file mode 100644
index 9d5fbefc824ad..0000000000000
--- a/core/src/main/scala/org/apache/spark/Accumulator.scala
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark
-
-/**
- * A simpler value of [[Accumulable]] where the result type being accumulated is the same
- * as the types of elements being merged, i.e. variables that are only "added" to through an
- * associative and commutative operation and can therefore be efficiently supported in parallel.
- * They can be used to implement counters (as in MapReduce) or sums. Spark natively supports
- * accumulators of numeric value types, and programmers can add support for new types.
- *
- * An accumulator is created from an initial value `v` by calling `SparkContext.accumulator`.
- * Tasks running on the cluster can then add to it using the `+=` operator.
- * However, they cannot read its value. Only the driver program can read the accumulator's value,
- * using its [[#value]] method.
- *
- * The interpreter session below shows an accumulator being used to add up the elements of an array:
- *
- * {{{
- * scala> val accum = sc.accumulator(0)
- * accum: org.apache.spark.Accumulator[Int] = 0
- *
- * scala> sc.parallelize(Array(1, 2, 3, 4)).foreach(x => accum += x)
- * ...
- * 10/09/29 18:41:08 INFO SparkContext: Tasks finished in 0.317106 s
- *
- * scala> accum.value
- * res2: Int = 10
- * }}}
- *
- * @param initialValue initial value of accumulator
- * @param param helper object defining how to add elements of type `T`
- * @param name human-readable name associated with this accumulator
- * @param countFailedValues whether to accumulate values from failed tasks
- * @tparam T result type
-*/
-@deprecated("use AccumulatorV2", "2.0.0")
-class Accumulator[T] private[spark] (
-    // SI-8813: This must explicitly be a private val, or else scala 2.11 doesn't compile
-    @transient private val initialValue: T,
-    param: AccumulatorParam[T],
-    name: Option[String] = None,
-    countFailedValues: Boolean = false)
-  extends Accumulable[T, T](initialValue, param, name, countFailedValues)
-
-
-/**
- * A simpler version of [[org.apache.spark.AccumulableParam]] where the only data type you can add
- * in is the same type as the accumulated value. An implicit AccumulatorParam object needs to be
- * available when you create Accumulators of a specific type.
- *
- * @tparam T type of value to accumulate
- */
-@deprecated("use AccumulatorV2", "2.0.0")
-trait AccumulatorParam[T] extends AccumulableParam[T, T] {
-  def addAccumulator(t1: T, t2: T): T = {
-    addInPlace(t1, t2)
-  }
-}
-
-
-@deprecated("use AccumulatorV2", "2.0.0")
-object AccumulatorParam {
-
-  // The following implicit objects were in SparkContext before 1.2 and users had to
-  // `import SparkContext._` to enable them. Now we move them here to make the compiler find
-  // them automatically. However, as there are duplicate codes in SparkContext for backward
-  // compatibility, please update them accordingly if you modify the following implicit objects.
-
-  @deprecated("use AccumulatorV2", "2.0.0")
-  implicit object DoubleAccumulatorParam extends AccumulatorParam[Double] {
-    def addInPlace(t1: Double, t2: Double): Double = t1 + t2
-    def zero(initialValue: Double): Double = 0.0
-  }
-
-  @deprecated("use AccumulatorV2", "2.0.0")
-  implicit object IntAccumulatorParam extends AccumulatorParam[Int] {
-    def addInPlace(t1: Int, t2: Int): Int = t1 + t2
-    def zero(initialValue: Int): Int = 0
-  }
-
-  @deprecated("use AccumulatorV2", "2.0.0")
-  implicit object LongAccumulatorParam extends AccumulatorParam[Long] {
-    def addInPlace(t1: Long, t2: Long): Long = t1 + t2
-    def zero(initialValue: Long): Long = 0L
-  }
-
-  @deprecated("use AccumulatorV2", "2.0.0")
-  implicit object FloatAccumulatorParam extends AccumulatorParam[Float] {
-    def addInPlace(t1: Float, t2: Float): Float = t1 + t2
-    def zero(initialValue: Float): Float = 0f
-  }
-
-  // Note: when merging values, this param just adopts the newer value. This is used only
-  // internally for things that shouldn't really be accumulated across tasks, like input
-  // read method, which should be the same across all tasks in the same stage.
-  @deprecated("use AccumulatorV2", "2.0.0")
-  private[spark] object StringAccumulatorParam extends AccumulatorParam[String] {
-    def addInPlace(t1: String, t2: String): String = t2
-    def zero(initialValue: String): String = ""
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala b/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
index 90a5c4130f799..6a497afac444d 100644
--- a/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
+++ b/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
@@ -41,14 +41,14 @@ import org.apache.spark.util._
 class BarrierTaskContext private[spark] (
     taskContext: TaskContext) extends TaskContext with Logging {
 
+  import BarrierTaskContext._
+
   // Find the driver side RPCEndpointRef of the coordinator that handles all the barrier() calls.
   private val barrierCoordinator: RpcEndpointRef = {
     val env = SparkEnv.get
     RpcUtils.makeDriverRef("barrierSync", env.conf, env.rpcEnv)
   }
 
-  private val timer = new Timer("Barrier task timer for barrier() calls.")
-
   // Local barrierEpoch that identify a barrier() call from current task, it shall be identical
   // with the driver side epoch.
   private var barrierEpoch = 0
@@ -158,8 +158,6 @@ class BarrierTaskContext private[spark] (
 
   override def isInterrupted(): Boolean = taskContext.isInterrupted()
 
-  override def isRunningLocally(): Boolean = taskContext.isRunningLocally()
-
   override def addTaskCompletionListener(listener: TaskCompletionListener): this.type = {
     taskContext.addTaskCompletionListener(listener)
     this
@@ -234,4 +232,7 @@ object BarrierTaskContext {
   @Experimental
   @Since("2.4.0")
   def get(): BarrierTaskContext = TaskContext.get().asInstanceOf[BarrierTaskContext]
+
+  private val timer = new Timer("Barrier task timer for barrier() calls.")
+
 }
diff --git a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
index bcbc8df0d5865..ab0ae55ed357d 100644
--- a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
+++ b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
@@ -22,6 +22,7 @@ import java.util.concurrent.{ScheduledFuture, TimeUnit}
 import scala.collection.mutable
 import scala.concurrent.Future
 
+import org.apache.spark.executor.ExecutorMetrics
 import org.apache.spark.internal.Logging
 import org.apache.spark.rpc.{RpcCallContext, RpcEnv, ThreadSafeRpcEndpoint}
 import org.apache.spark.scheduler._
@@ -37,7 +38,8 @@ import org.apache.spark.util._
 private[spark] case class Heartbeat(
     executorId: String,
     accumUpdates: Array[(Long, Seq[AccumulatorV2[_, _]])], // taskId -> accumulator updates
-    blockManagerId: BlockManagerId)
+    blockManagerId: BlockManagerId,
+    executorUpdates: ExecutorMetrics) // executor level updates
 
 /**
  * An event that SparkContext uses to notify HeartbeatReceiver that SparkContext.taskScheduler is
@@ -119,14 +121,14 @@ private[spark] class HeartbeatReceiver(sc: SparkContext, clock: Clock)
       context.reply(true)
 
     // Messages received from executors
-    case heartbeat @ Heartbeat(executorId, accumUpdates, blockManagerId) =>
+    case heartbeat @ Heartbeat(executorId, accumUpdates, blockManagerId, executorMetrics) =>
       if (scheduler != null) {
         if (executorLastSeen.contains(executorId)) {
           executorLastSeen(executorId) = clock.getTimeMillis()
           eventLoopThread.submit(new Runnable {
             override def run(): Unit = Utils.tryLogNonFatalError {
               val unknownExecutor = !scheduler.executorHeartbeatReceived(
-                executorId, accumUpdates, blockManagerId)
+                executorId, accumUpdates, blockManagerId, executorMetrics)
               val response = HeartbeatResponse(reregisterBlockManager = unknownExecutor)
               context.reply(response)
             }
diff --git a/core/src/main/scala/org/apache/spark/Heartbeater.scala b/core/src/main/scala/org/apache/spark/Heartbeater.scala
new file mode 100644
index 0000000000000..84091eef04306
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/Heartbeater.scala
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import java.util.concurrent.TimeUnit
+
+import org.apache.spark.executor.ExecutorMetrics
+import org.apache.spark.internal.Logging
+import org.apache.spark.memory.MemoryManager
+import org.apache.spark.metrics.ExecutorMetricType
+import org.apache.spark.util.{ThreadUtils, Utils}
+
+/**
+ * Creates a heartbeat thread which will call the specified reportHeartbeat function at
+ * intervals of intervalMs.
+ *
+ * @param memoryManager the memory manager for execution and storage memory.
+ * @param reportHeartbeat the heartbeat reporting function to call.
+ * @param name the thread name for the heartbeater.
+ * @param intervalMs the interval between heartbeats.
+ */
+private[spark] class Heartbeater(
+    memoryManager: MemoryManager,
+    reportHeartbeat: () => Unit,
+    name: String,
+    intervalMs: Long) extends Logging {
+  // Executor for the heartbeat task
+  private val heartbeater = ThreadUtils.newDaemonSingleThreadScheduledExecutor(name)
+
+  /** Schedules a task to report a heartbeat. */
+  def start(): Unit = {
+    // Wait a random interval so the heartbeats don't end up in sync
+    val initialDelay = intervalMs + (math.random * intervalMs).asInstanceOf[Int]
+
+    val heartbeatTask = new Runnable() {
+      override def run(): Unit = Utils.logUncaughtExceptions(reportHeartbeat())
+    }
+    heartbeater.scheduleAtFixedRate(heartbeatTask, initialDelay, intervalMs, TimeUnit.MILLISECONDS)
+  }
+
+  /** Stops the heartbeat thread. */
+  def stop(): Unit = {
+    heartbeater.shutdown()
+    heartbeater.awaitTermination(10, TimeUnit.SECONDS)
+  }
+
+  /**
+   * Get the current executor level metrics. These are returned as an array, with the index
+   * determined by ExecutorMetricType.values
+   */
+  def getCurrentMetrics(): ExecutorMetrics = {
+    val metrics = ExecutorMetricType.values.map(_.getMetricValue(memoryManager)).toArray
+    new ExecutorMetrics(metrics)
+  }
+}
+
diff --git a/core/src/main/scala/org/apache/spark/MapOutputStatistics.scala b/core/src/main/scala/org/apache/spark/MapOutputStatistics.scala
index ff85e11409e35..f8a6f1d0d8cbb 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputStatistics.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputStatistics.scala
@@ -23,9 +23,5 @@ package org.apache.spark
  * @param shuffleId ID of the shuffle
  * @param bytesByPartitionId approximate number of output bytes for each map output partition
  *   (may be inexact due to use of compressed map statuses)
- * @param recordsByPartitionId number of output records for each map output partition
  */
-private[spark] class MapOutputStatistics(
-    val shuffleId: Int,
-    val bytesByPartitionId: Array[Long],
-    val recordsByPartitionId: Array[Long])
+private[spark] class MapOutputStatistics(val shuffleId: Int, val bytesByPartitionId: Array[Long])
diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index 41575ce4e6e3d..1c4fa4bc6541f 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -522,19 +522,16 @@ private[spark] class MapOutputTrackerMaster(
   def getStatistics(dep: ShuffleDependency[_, _, _]): MapOutputStatistics = {
     shuffleStatuses(dep.shuffleId).withMapStatuses { statuses =>
       val totalSizes = new Array[Long](dep.partitioner.numPartitions)
-      val recordsByMapTask = new Array[Long](statuses.length)
-
       val parallelAggThreshold = conf.get(
         SHUFFLE_MAP_OUTPUT_PARALLEL_AGGREGATION_THRESHOLD)
       val parallelism = math.min(
         Runtime.getRuntime.availableProcessors(),
         statuses.length.toLong * totalSizes.length / parallelAggThreshold + 1).toInt
       if (parallelism <= 1) {
-        statuses.zipWithIndex.foreach { case (s, index) =>
+        for (s <- statuses) {
           for (i <- 0 until totalSizes.length) {
             totalSizes(i) += s.getSizeForBlock(i)
           }
-          recordsByMapTask(index) = s.numberOfOutput
         }
       } else {
         val threadPool = ThreadUtils.newDaemonFixedThreadPool(parallelism, "map-output-aggregate")
@@ -551,11 +548,8 @@ private[spark] class MapOutputTrackerMaster(
         } finally {
           threadPool.shutdown()
         }
-        statuses.zipWithIndex.foreach { case (s, index) =>
-          recordsByMapTask(index) = s.numberOfOutput
-        }
       }
-      new MapOutputStatistics(dep.shuffleId, totalSizes, recordsByMapTask)
+      new MapOutputStatistics(dep.shuffleId, totalSizes)
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 6c4c5c94cfa28..21c5cbc04d813 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -25,9 +25,9 @@ import scala.collection.mutable.LinkedHashSet
 
 import org.apache.avro.{Schema, SchemaNormalization}
 
-import org.apache.spark.deploy.history.config._
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.History._
 import org.apache.spark.serializer.KryoSerializer
 import org.apache.spark.util.Utils
 
@@ -609,13 +609,14 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
     require(!encryptionEnabled || get(NETWORK_AUTH_ENABLED),
       s"${NETWORK_AUTH_ENABLED.key} must be enabled when enabling encryption.")
 
-    val executorTimeoutThreshold = getTimeAsSeconds("spark.network.timeout", "120s")
-    val executorHeartbeatInterval = getTimeAsSeconds("spark.executor.heartbeatInterval", "10s")
+    val executorTimeoutThresholdMs =
+      getTimeAsSeconds("spark.network.timeout", "120s") * 1000
+    val executorHeartbeatIntervalMs = get(EXECUTOR_HEARTBEAT_INTERVAL)
     // If spark.executor.heartbeatInterval bigger than spark.network.timeout,
     // it will almost always cause ExecutorLostFailure. See SPARK-22754.
-    require(executorTimeoutThreshold > executorHeartbeatInterval, "The value of " +
-      s"spark.network.timeout=${executorTimeoutThreshold}s must be no less than the value of " +
-      s"spark.executor.heartbeatInterval=${executorHeartbeatInterval}s.")
+    require(executorTimeoutThresholdMs > executorHeartbeatIntervalMs, "The value of " +
+      s"spark.network.timeout=${executorTimeoutThresholdMs}ms must be no less than the value of " +
+      s"spark.executor.heartbeatInterval=${executorHeartbeatIntervalMs}ms.")
   }
 
   /**
@@ -726,7 +727,13 @@ private[spark] object SparkConf extends Logging {
     DRIVER_MEMORY_OVERHEAD.key -> Seq(
       AlternateConfig("spark.yarn.driver.memoryOverhead", "2.3")),
     EXECUTOR_MEMORY_OVERHEAD.key -> Seq(
-      AlternateConfig("spark.yarn.executor.memoryOverhead", "2.3"))
+      AlternateConfig("spark.yarn.executor.memoryOverhead", "2.3")),
+    KEYTAB.key -> Seq(
+      AlternateConfig("spark.yarn.keytab", "3.0")),
+    PRINCIPAL.key -> Seq(
+      AlternateConfig("spark.yarn.principal", "3.0")),
+    KERBEROS_RELOGIN_PERIOD.key -> Seq(
+      AlternateConfig("spark.yarn.kerberos.relogin.period", "3.0"))
   )
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index e5b1e0ecd1586..845a3d5f6d6f9 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -25,7 +25,6 @@ import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger, AtomicReferenc
 
 import scala.collection.JavaConverters._
 import scala.collection.Map
-import scala.collection.generic.Growable
 import scala.collection.mutable.HashMap
 import scala.language.implicitConversions
 import scala.reflect.{classTag, ClassTag}
@@ -51,14 +50,15 @@ import org.apache.spark.partial.{ApproximateEvaluator, PartialResult}
 import org.apache.spark.rdd._
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.scheduler._
-import org.apache.spark.scheduler.cluster.{CoarseGrainedSchedulerBackend, StandaloneSchedulerBackend}
+import org.apache.spark.scheduler.cluster.StandaloneSchedulerBackend
 import org.apache.spark.scheduler.local.LocalSchedulerBackend
-import org.apache.spark.status.AppStatusStore
+import org.apache.spark.status.{AppStatusSource, AppStatusStore}
 import org.apache.spark.status.api.v1.ThreadStackTrace
 import org.apache.spark.storage._
 import org.apache.spark.storage.BlockManagerMessages.TriggerThreadDump
 import org.apache.spark.ui.{ConsoleProgressBar, SparkUI}
 import org.apache.spark.util._
+import org.apache.spark.util.logging.DriverLogger
 
 /**
  * Main entry point for Spark functionality. A SparkContext represents the connection to a Spark
@@ -206,6 +206,7 @@ class SparkContext(config: SparkConf) extends Logging {
   private var _applicationId: String = _
   private var _applicationAttemptId: Option[String] = None
   private var _eventLogger: Option[EventLoggingListener] = None
+  private var _driverLogger: Option[DriverLogger] = None
   private var _executorAllocationManager: Option[ExecutorAllocationManager] = None
   private var _cleaner: Option[ContextCleaner] = None
   private var _listenerBusStarted: Boolean = false
@@ -213,6 +214,7 @@ class SparkContext(config: SparkConf) extends Logging {
   private var _files: Seq[String] = _
   private var _shutdownHookRef: AnyRef = _
   private var _statusStore: AppStatusStore = _
+  private var _heartbeater: Heartbeater = _
 
   /* ------------------------------------------------------------------------------------- *
    | Accessors and public fields. These provide access to the internal state of the        |
@@ -371,6 +373,8 @@ class SparkContext(config: SparkConf) extends Logging {
       throw new SparkException("An application name must be set in your configuration")
     }
 
+    _driverLogger = DriverLogger(_conf)
+
     // log out spark.app.name in the Spark driver logs
     logInfo(s"Submitted application: $appName")
 
@@ -417,7 +421,8 @@ class SparkContext(config: SparkConf) extends Logging {
 
     // Initialize the app status store and listener before SparkEnv is created so that it gets
     // all events.
-    _statusStore = AppStatusStore.createLiveStore(conf)
+    val appStatusSource = AppStatusSource.createSource(conf)
+    _statusStore = AppStatusStore.createLiveStore(conf, appStatusSource)
     listenerBus.addToStatusQueue(_statusStore.listener.get)
 
     // Create the Spark execution environment (cache, map output tracker, etc)
@@ -496,6 +501,13 @@ class SparkContext(config: SparkConf) extends Logging {
     _dagScheduler = new DAGScheduler(this)
     _heartbeatReceiver.ask[Boolean](TaskSchedulerIsSet)
 
+    // create and start the heartbeater for collecting memory metrics
+    _heartbeater = new Heartbeater(env.memoryManager,
+      () => SparkContext.this.reportHeartBeat(),
+      "driver-heartbeater",
+      conf.get(EXECUTOR_HEARTBEAT_INTERVAL))
+    _heartbeater.start()
+
     // start TaskScheduler after taskScheduler sets DAGScheduler reference in DAGScheduler's
     // constructor
     _taskScheduler.start()
@@ -563,7 +575,7 @@ class SparkContext(config: SparkConf) extends Logging {
     _executorAllocationManager.foreach { e =>
       _env.metricsSystem.registerSource(e.executorAllocationManagerSource)
     }
-
+    appStatusSource.foreach(_env.metricsSystem.registerSource(_))
     // Make sure the context is stopped if the user forgets about it. This avoids leaving
     // unfinished event logs around after the JVM exits cleanly. It doesn't help if the JVM
     // is killed, though.
@@ -1330,76 +1342,6 @@ class SparkContext(config: SparkConf) extends Logging {
 
   // Methods for creating shared variables
 
-  /**
-   * Create an [[org.apache.spark.Accumulator]] variable of a given type, which tasks can "add"
-   * values to using the `+=` method. Only the driver can access the accumulator's `value`.
-   */
-  @deprecated("use AccumulatorV2", "2.0.0")
-  def accumulator[T](initialValue: T)(implicit param: AccumulatorParam[T]): Accumulator[T] = {
-    val acc = new Accumulator(initialValue, param)
-    cleaner.foreach(_.registerAccumulatorForCleanup(acc.newAcc))
-    acc
-  }
-
-  /**
-   * Create an [[org.apache.spark.Accumulator]] variable of a given type, with a name for display
-   * in the Spark UI. Tasks can "add" values to the accumulator using the `+=` method. Only the
-   * driver can access the accumulator's `value`.
-   */
-  @deprecated("use AccumulatorV2", "2.0.0")
-  def accumulator[T](initialValue: T, name: String)(implicit param: AccumulatorParam[T])
-    : Accumulator[T] = {
-    val acc = new Accumulator(initialValue, param, Option(name))
-    cleaner.foreach(_.registerAccumulatorForCleanup(acc.newAcc))
-    acc
-  }
-
-  /**
-   * Create an [[org.apache.spark.Accumulable]] shared variable, to which tasks can add values
-   * with `+=`. Only the driver can access the accumulable's `value`.
-   * @tparam R accumulator result type
-   * @tparam T type that can be added to the accumulator
-   */
-  @deprecated("use AccumulatorV2", "2.0.0")
-  def accumulable[R, T](initialValue: R)(implicit param: AccumulableParam[R, T])
-    : Accumulable[R, T] = {
-    val acc = new Accumulable(initialValue, param)
-    cleaner.foreach(_.registerAccumulatorForCleanup(acc.newAcc))
-    acc
-  }
-
-  /**
-   * Create an [[org.apache.spark.Accumulable]] shared variable, with a name for display in the
-   * Spark UI. Tasks can add values to the accumulable using the `+=` operator. Only the driver can
-   * access the accumulable's `value`.
-   * @tparam R accumulator result type
-   * @tparam T type that can be added to the accumulator
-   */
-  @deprecated("use AccumulatorV2", "2.0.0")
-  def accumulable[R, T](initialValue: R, name: String)(implicit param: AccumulableParam[R, T])
-    : Accumulable[R, T] = {
-    val acc = new Accumulable(initialValue, param, Option(name))
-    cleaner.foreach(_.registerAccumulatorForCleanup(acc.newAcc))
-    acc
-  }
-
-  /**
-   * Create an accumulator from a "mutable collection" type.
-   *
-   * Growable and TraversableOnce are the standard APIs that guarantee += and ++=, implemented by
-   * standard mutable collections. So you can use this with mutable Map, Set, etc.
-   */
-  @deprecated("use AccumulatorV2", "2.0.0")
-  def accumulableCollection[R <% Growable[T] with TraversableOnce[T] with Serializable: ClassTag, T]
-      (initialValue: R): Accumulable[R, T] = {
-    // TODO the context bound (<%) above should be replaced with simple type bound and implicit
-    // conversion but is a breaking change. This should be fixed in Spark 3.x.
-    val param = new GrowableAccumulableParam[R, T]
-    val acc = new Accumulable(initialValue, param)
-    cleaner.foreach(_.registerAccumulatorForCleanup(acc.newAcc))
-    acc
-  }
-
   /**
    * Register the given accumulator.
    *
@@ -1930,6 +1872,9 @@ class SparkContext(config: SparkConf) extends Logging {
     Utils.tryLogNonFatalError {
       postApplicationEnd()
     }
+    Utils.tryLogNonFatalError {
+      _driverLogger.foreach(_.stop())
+    }
     Utils.tryLogNonFatalError {
       _ui.foreach(_.stop())
     }
@@ -1959,6 +1904,12 @@ class SparkContext(config: SparkConf) extends Logging {
     Utils.tryLogNonFatalError {
       _eventLogger.foreach(_.stop())
     }
+    if (_heartbeater != null) {
+      Utils.tryLogNonFatalError {
+        _heartbeater.stop()
+      }
+      _heartbeater = null
+    }
     if (env != null && _heartbeatReceiver != null) {
       Utils.tryLogNonFatalError {
         env.rpcEnv.stop(_heartbeatReceiver)
@@ -2409,6 +2360,7 @@ class SparkContext(config: SparkConf) extends Logging {
     // the cluster manager to get an application ID (in case the cluster manager provides one).
     listenerBus.post(SparkListenerApplicationStart(appName, Some(applicationId),
       startTime, sparkUser, applicationAttemptId, schedulerBackend.getDriverLogUrls))
+    _driverLogger.foreach(_.startSync(_hadoopConfiguration))
   }
 
   /** Post the application end event */
@@ -2429,6 +2381,14 @@ class SparkContext(config: SparkConf) extends Logging {
     }
   }
 
+  /** Reports heartbeat metrics for the driver. */
+  private def reportHeartBeat(): Unit = {
+    val driverUpdates = _heartbeater.getCurrentMetrics()
+    val accumUpdates = new Array[(Long, Int, Int, Seq[AccumulableInfo])](0)
+    listenerBus.post(SparkListenerExecutorMetricsUpdate("driver", accumUpdates,
+      Some(driverUpdates)))
+  }
+
   // In order to prevent multiple SparkContexts from being active at the same time, mark this
   // context as having finished construction.
   // NOTE: this must be placed at the end of the SparkContext constructor.
diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index 72123f2232532..66038eeaea54f 100644
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -261,7 +261,7 @@ object SparkEnv extends Logging {
       // SparkConf, then one taking no arguments
       try {
         cls.getConstructor(classOf[SparkConf], java.lang.Boolean.TYPE)
-          .newInstance(conf, new java.lang.Boolean(isDriver))
+          .newInstance(conf, java.lang.Boolean.valueOf(isDriver))
           .asInstanceOf[T]
       } catch {
         case _: NoSuchMethodException =>
diff --git a/core/src/main/scala/org/apache/spark/TaskContext.scala b/core/src/main/scala/org/apache/spark/TaskContext.scala
index 2b939dabb1105..959f246f3f9f6 100644
--- a/core/src/main/scala/org/apache/spark/TaskContext.scala
+++ b/core/src/main/scala/org/apache/spark/TaskContext.scala
@@ -96,13 +96,6 @@ abstract class TaskContext extends Serializable {
    */
   def isInterrupted(): Boolean
 
-  /**
-   * Returns true if the task is running locally in the driver program.
-   * @return false
-   */
-  @deprecated("Local execution was removed, so this always returns false", "2.0.0")
-  def isRunningLocally(): Boolean
-
   /**
    * Adds a (Java friendly) listener to be executed on task completion.
    * This will be called in all situations - success, failure, or cancellation. Adding a listener
diff --git a/core/src/main/scala/org/apache/spark/TaskContextImpl.scala b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
index 89730424e5acf..76296c5d0abd3 100644
--- a/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
+++ b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
@@ -157,8 +157,6 @@ private[spark] class TaskContextImpl(
   @GuardedBy("this")
   override def isCompleted(): Boolean = synchronized(completed)
 
-  override def isRunningLocally(): Boolean = false
-
   override def isInterrupted(): Boolean = reasonIfKilled.isDefined
 
   override def getLocalProperty(key: String): String = localProperties.getProperty(key)
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
index 9544475ff0428..50ed8d9bd3f68 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
@@ -19,7 +19,7 @@ package org.apache.spark.api.java
 
 import java.{lang => jl}
 import java.lang.{Iterable => JIterable}
-import java.util.{Comparator, List => JList}
+import java.util.{Comparator, Iterator => JIterator, List => JList}
 
 import scala.collection.JavaConverters._
 import scala.language.implicitConversions
@@ -34,7 +34,8 @@ import org.apache.spark.{HashPartitioner, Partitioner}
 import org.apache.spark.Partitioner._
 import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
 import org.apache.spark.api.java.JavaUtils.mapAsSerializableJavaMap
-import org.apache.spark.api.java.function.{Function => JFunction, Function2 => JFunction2, PairFunction}
+import org.apache.spark.api.java.function.{FlatMapFunction, Function => JFunction,
+  Function2 => JFunction2, PairFunction}
 import org.apache.spark.partial.{BoundedDouble, PartialResult}
 import org.apache.spark.rdd.{OrderedRDDFunctions, RDD}
 import org.apache.spark.rdd.RDD.rddToPairRDDFunctions
@@ -674,8 +675,8 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * Pass each value in the key-value pair RDD through a flatMap function without changing the
    * keys; this also retains the original RDD's partitioning.
    */
-  def flatMapValues[U](f: JFunction[V, java.lang.Iterable[U]]): JavaPairRDD[K, U] = {
-    def fn: (V) => Iterable[U] = (x: V) => f.call(x).asScala
+  def flatMapValues[U](f: FlatMapFunction[V, U]): JavaPairRDD[K, U] = {
+    def fn: (V) => Iterator[U] = (x: V) => f.call(x).asScala
     implicit val ctag: ClassTag[U] = fakeClassTag
     fromRDD(rdd.flatMapValues(fn))
   }
@@ -951,7 +952,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    *
    * The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:
    * Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available
-   * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
+   * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.
    *
    * @param relativeSD Relative accuracy. Smaller values create counters that require more space.
    *                   It must be greater than 0.000017.
@@ -968,7 +969,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    *
    * The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:
    * Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available
-   * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
+   * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.
    *
    * @param relativeSD Relative accuracy. Smaller values create counters that require more space.
    *                   It must be greater than 0.000017.
@@ -984,7 +985,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    *
    * The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:
    * Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available
-   * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
+   * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.
    *
    * @param relativeSD Relative accuracy. Smaller values create counters that require more space.
    *                   It must be greater than 0.000017.
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
index 91ae1002abd21..5ba821935ac69 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
@@ -685,7 +685,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
    *
    * The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:
    * Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available
-   * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
+   * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.
    *
    * @param relativeSD Relative accuracy. Smaller values create counters that require more space.
    *                   It must be greater than 0.000017.
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
index 09c83849e26b2..03f259d73e975 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
@@ -21,6 +21,7 @@ import java.io.Closeable
 import java.util
 import java.util.{Map => JMap}
 
+import scala.annotation.varargs
 import scala.collection.JavaConverters._
 import scala.language.implicitConversions
 import scala.reflect.ClassTag
@@ -30,11 +31,10 @@ import org.apache.hadoop.mapred.{InputFormat, JobConf}
 import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
 
 import org.apache.spark._
-import org.apache.spark.AccumulatorParam._
 import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.input.PortableDataStream
-import org.apache.spark.rdd.{EmptyRDD, HadoopRDD, NewHadoopRDD, RDD}
+import org.apache.spark.rdd.{EmptyRDD, HadoopRDD, NewHadoopRDD}
 
 /**
  * A Java-friendly version of [[org.apache.spark.SparkContext]] that returns
@@ -43,8 +43,7 @@ import org.apache.spark.rdd.{EmptyRDD, HadoopRDD, NewHadoopRDD, RDD}
  * Only one SparkContext may be active per JVM.  You must `stop()` the active SparkContext before
  * creating a new one.  This limitation may eventually be removed; see SPARK-2243 for more details.
  */
-class JavaSparkContext(val sc: SparkContext)
-  extends JavaSparkContextVarargsWorkaround with Closeable {
+class JavaSparkContext(val sc: SparkContext) extends Closeable {
 
   /**
    * Create a JavaSparkContext that loads settings from system properties (for instance, when
@@ -507,141 +506,31 @@ class JavaSparkContext(val sc: SparkContext)
     new JavaNewHadoopRDD(rdd.asInstanceOf[NewHadoopRDD[K, V]])
   }
 
-  /** Build the union of two or more RDDs. */
-  override def union[T](first: JavaRDD[T], rest: java.util.List[JavaRDD[T]]): JavaRDD[T] = {
-    val rdds: Seq[RDD[T]] = (Seq(first) ++ rest.asScala).map(_.rdd)
-    implicit val ctag: ClassTag[T] = first.classTag
-    sc.union(rdds)
+  /** Build the union of JavaRDDs. */
+  @varargs
+  def union[T](rdds: JavaRDD[T]*): JavaRDD[T] = {
+    require(rdds.nonEmpty, "Union called on no RDDs")
+    implicit val ctag: ClassTag[T] = rdds.head.classTag
+    sc.union(rdds.map(_.rdd))
   }
 
-  /** Build the union of two or more RDDs. */
-  override def union[K, V](first: JavaPairRDD[K, V], rest: java.util.List[JavaPairRDD[K, V]])
-      : JavaPairRDD[K, V] = {
-    val rdds: Seq[RDD[(K, V)]] = (Seq(first) ++ rest.asScala).map(_.rdd)
-    implicit val ctag: ClassTag[(K, V)] = first.classTag
-    implicit val ctagK: ClassTag[K] = first.kClassTag
-    implicit val ctagV: ClassTag[V] = first.vClassTag
-    new JavaPairRDD(sc.union(rdds))
+  /** Build the union of JavaPairRDDs. */
+  @varargs
+  def union[K, V](rdds: JavaPairRDD[K, V]*): JavaPairRDD[K, V] = {
+    require(rdds.nonEmpty, "Union called on no RDDs")
+    implicit val ctag: ClassTag[(K, V)] = rdds.head.classTag
+    implicit val ctagK: ClassTag[K] = rdds.head.kClassTag
+    implicit val ctagV: ClassTag[V] = rdds.head.vClassTag
+    new JavaPairRDD(sc.union(rdds.map(_.rdd)))
   }
 
-  /** Build the union of two or more RDDs. */
-  override def union(first: JavaDoubleRDD, rest: java.util.List[JavaDoubleRDD]): JavaDoubleRDD = {
-    val rdds: Seq[RDD[Double]] = (Seq(first) ++ rest.asScala).map(_.srdd)
-    new JavaDoubleRDD(sc.union(rdds))
+  /** Build the union of JavaDoubleRDDs. */
+  @varargs
+  def union(rdds: JavaDoubleRDD*): JavaDoubleRDD = {
+    require(rdds.nonEmpty, "Union called on no RDDs")
+    new JavaDoubleRDD(sc.union(rdds.map(_.srdd)))
   }
 
-  /**
-   * Create an [[org.apache.spark.Accumulator]] integer variable, which tasks can "add" values
-   * to using the `add` method. Only the master can access the accumulator's `value`.
-   */
-  @deprecated("use sc().longAccumulator()", "2.0.0")
-  def intAccumulator(initialValue: Int): Accumulator[java.lang.Integer] =
-    sc.accumulator(initialValue)(IntAccumulatorParam).asInstanceOf[Accumulator[java.lang.Integer]]
-
-  /**
-   * Create an [[org.apache.spark.Accumulator]] integer variable, which tasks can "add" values
-   * to using the `add` method. Only the master can access the accumulator's `value`.
-   *
-   * This version supports naming the accumulator for display in Spark's web UI.
-   */
-  @deprecated("use sc().longAccumulator(String)", "2.0.0")
-  def intAccumulator(initialValue: Int, name: String): Accumulator[java.lang.Integer] =
-    sc.accumulator(initialValue, name)(IntAccumulatorParam)
-      .asInstanceOf[Accumulator[java.lang.Integer]]
-
-  /**
-   * Create an [[org.apache.spark.Accumulator]] double variable, which tasks can "add" values
-   * to using the `add` method. Only the master can access the accumulator's `value`.
-   */
-  @deprecated("use sc().doubleAccumulator()", "2.0.0")
-  def doubleAccumulator(initialValue: Double): Accumulator[java.lang.Double] =
-    sc.accumulator(initialValue)(DoubleAccumulatorParam).asInstanceOf[Accumulator[java.lang.Double]]
-
-  /**
-   * Create an [[org.apache.spark.Accumulator]] double variable, which tasks can "add" values
-   * to using the `add` method. Only the master can access the accumulator's `value`.
-   *
-   * This version supports naming the accumulator for display in Spark's web UI.
-   */
-  @deprecated("use sc().doubleAccumulator(String)", "2.0.0")
-  def doubleAccumulator(initialValue: Double, name: String): Accumulator[java.lang.Double] =
-    sc.accumulator(initialValue, name)(DoubleAccumulatorParam)
-      .asInstanceOf[Accumulator[java.lang.Double]]
-
-  /**
-   * Create an [[org.apache.spark.Accumulator]] integer variable, which tasks can "add" values
-   * to using the `add` method. Only the master can access the accumulator's `value`.
-   */
-  @deprecated("use sc().longAccumulator()", "2.0.0")
-  def accumulator(initialValue: Int): Accumulator[java.lang.Integer] = intAccumulator(initialValue)
-
-  /**
-   * Create an [[org.apache.spark.Accumulator]] integer variable, which tasks can "add" values
-   * to using the `add` method. Only the master can access the accumulator's `value`.
-   *
-   * This version supports naming the accumulator for display in Spark's web UI.
-   */
-  @deprecated("use sc().longAccumulator(String)", "2.0.0")
-  def accumulator(initialValue: Int, name: String): Accumulator[java.lang.Integer] =
-    intAccumulator(initialValue, name)
-
-  /**
-   * Create an [[org.apache.spark.Accumulator]] double variable, which tasks can "add" values
-   * to using the `add` method. Only the master can access the accumulator's `value`.
-   */
-  @deprecated("use sc().doubleAccumulator()", "2.0.0")
-  def accumulator(initialValue: Double): Accumulator[java.lang.Double] =
-    doubleAccumulator(initialValue)
-
-
-  /**
-   * Create an [[org.apache.spark.Accumulator]] double variable, which tasks can "add" values
-   * to using the `add` method. Only the master can access the accumulator's `value`.
-   *
-   * This version supports naming the accumulator for display in Spark's web UI.
-   */
-  @deprecated("use sc().doubleAccumulator(String)", "2.0.0")
-  def accumulator(initialValue: Double, name: String): Accumulator[java.lang.Double] =
-    doubleAccumulator(initialValue, name)
-
-  /**
-   * Create an [[org.apache.spark.Accumulator]] variable of a given type, which tasks can "add"
-   * values to using the `add` method. Only the master can access the accumulator's `value`.
-   */
-  @deprecated("use AccumulatorV2", "2.0.0")
-  def accumulator[T](initialValue: T, accumulatorParam: AccumulatorParam[T]): Accumulator[T] =
-    sc.accumulator(initialValue)(accumulatorParam)
-
-  /**
-   * Create an [[org.apache.spark.Accumulator]] variable of a given type, which tasks can "add"
-   * values to using the `add` method. Only the master can access the accumulator's `value`.
-   *
-   * This version supports naming the accumulator for display in Spark's web UI.
-   */
-  @deprecated("use AccumulatorV2", "2.0.0")
-  def accumulator[T](initialValue: T, name: String, accumulatorParam: AccumulatorParam[T])
-      : Accumulator[T] =
-    sc.accumulator(initialValue, name)(accumulatorParam)
-
-  /**
-   * Create an [[org.apache.spark.Accumulable]] shared variable of the given type, to which tasks
-   * can "add" values with `add`. Only the master can access the accumulable's `value`.
-   */
-  @deprecated("use AccumulatorV2", "2.0.0")
-  def accumulable[T, R](initialValue: T, param: AccumulableParam[T, R]): Accumulable[T, R] =
-    sc.accumulable(initialValue)(param)
-
-  /**
-   * Create an [[org.apache.spark.Accumulable]] shared variable of the given type, to which tasks
-   * can "add" values with `add`. Only the master can access the accumulable's `value`.
-   *
-   * This version supports naming the accumulator for display in Spark's web UI.
-   */
-  @deprecated("use AccumulatorV2", "2.0.0")
-  def accumulable[T, R](initialValue: T, name: String, param: AccumulableParam[T, R])
-      : Accumulable[T, R] =
-    sc.accumulable(initialValue, name)(param)
-
   /**
    * Broadcast a read-only variable to the cluster, returning a
    * [[org.apache.spark.broadcast.Broadcast]] object for reading it in distributed functions.
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonHadoopUtil.scala b/core/src/main/scala/org/apache/spark/api/python/PythonHadoopUtil.scala
index 6259bead3ea88..2ab8add63efae 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonHadoopUtil.scala
@@ -43,7 +43,8 @@ private[python] object Converter extends Logging {
                   defaultConverter: Converter[Any, Any]): Converter[Any, Any] = {
     converterClass.map { cc =>
       Try {
-        val c = Utils.classForName(cc).newInstance().asInstanceOf[Converter[Any, Any]]
+        val c = Utils.classForName(cc).getConstructor().
+          newInstance().asInstanceOf[Converter[Any, Any]]
         logInfo(s"Loaded converter: $cc")
         c
       } match {
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index e639a842754bd..5ed5070558af7 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -24,8 +24,10 @@ import java.util.{ArrayList => JArrayList, List => JList, Map => JMap}
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
+import scala.concurrent.Promise
+import scala.concurrent.duration.Duration
 import scala.language.existentials
-import scala.util.control.NonFatal
+import scala.util.Try
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.io.compress.CompressionCodec
@@ -37,6 +39,7 @@ import org.apache.spark.api.java.{JavaPairRDD, JavaRDD, JavaSparkContext}
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.input.PortableDataStream
 import org.apache.spark.internal.Logging
+import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.rdd.RDD
 import org.apache.spark.security.SocketAuthHelper
 import org.apache.spark.util._
@@ -169,27 +172,34 @@ private[spark] object PythonRDD extends Logging {
 
   def readRDDFromFile(sc: JavaSparkContext, filename: String, parallelism: Int):
   JavaRDD[Array[Byte]] = {
-    val file = new DataInputStream(new FileInputStream(filename))
+    readRDDFromInputStream(sc.sc, new FileInputStream(filename), parallelism)
+  }
+
+  def readRDDFromInputStream(
+      sc: SparkContext,
+      in: InputStream,
+      parallelism: Int): JavaRDD[Array[Byte]] = {
+    val din = new DataInputStream(in)
     try {
       val objs = new mutable.ArrayBuffer[Array[Byte]]
       try {
         while (true) {
-          val length = file.readInt()
+          val length = din.readInt()
           val obj = new Array[Byte](length)
-          file.readFully(obj)
+          din.readFully(obj)
           objs += obj
         }
       } catch {
         case eof: EOFException => // No-op
       }
-      JavaRDD.fromRDD(sc.sc.parallelize(objs, parallelism))
+      JavaRDD.fromRDD(sc.parallelize(objs, parallelism))
     } finally {
-      file.close()
+      din.close()
     }
   }
 
-  def readBroadcastFromFile(sc: JavaSparkContext, path: String): Broadcast[PythonBroadcast] = {
-    sc.broadcast(new PythonBroadcast(path))
+  def setupBroadcast(path: String): PythonBroadcast = {
+    new PythonBroadcast(path)
   }
 
   def writeIteratorToStream[T](iter: Iterator[T], dataOut: DataOutputStream) {
@@ -419,34 +429,15 @@ private[spark] object PythonRDD extends Logging {
    */
   private[spark] def serveToStream(
       threadName: String)(writeFunc: OutputStream => Unit): Array[Any] = {
-    val serverSocket = new ServerSocket(0, 1, InetAddress.getByName("localhost"))
-    // Close the socket if no connection in 15 seconds
-    serverSocket.setSoTimeout(15000)
-
-    new Thread(threadName) {
-      setDaemon(true)
-      override def run() {
-        try {
-          val sock = serverSocket.accept()
-          authHelper.authClient(sock)
-
-          val out = new BufferedOutputStream(sock.getOutputStream)
-          Utils.tryWithSafeFinally {
-            writeFunc(out)
-          } {
-            out.close()
-            sock.close()
-          }
-        } catch {
-          case NonFatal(e) =>
-            logError(s"Error while sending iterator", e)
-        } finally {
-          serverSocket.close()
-        }
+    val (port, secret) = PythonServer.setupOneConnectionServer(authHelper, threadName) { s =>
+      val out = new BufferedOutputStream(s.getOutputStream())
+      Utils.tryWithSafeFinally {
+        writeFunc(out)
+      } {
+        out.close()
       }
-    }.start()
-
-    Array(serverSocket.getLocalPort, authHelper.secret)
+    }
+    Array(port, secret)
   }
 
   private def getMergedConf(confAsMap: java.util.HashMap[String, String],
@@ -664,13 +655,12 @@ private[spark] class PythonAccumulatorV2(
   }
 }
 
-/**
- * A Wrapper for Python Broadcast, which is written into disk by Python. It also will
- * write the data into disk after deserialization, then Python can read it from disks.
- */
 // scalastyle:off no.finalize
 private[spark] class PythonBroadcast(@transient var path: String) extends Serializable
-  with Logging {
+    with Logging {
+
+  private var encryptionServer: PythonServer[Unit] = null
+  private var decryptionServer: PythonServer[Unit] = null
 
   /**
    * Read data from disks, then copy it to `out`
@@ -713,5 +703,255 @@ private[spark] class PythonBroadcast(@transient var path: String) extends Serial
     }
     super.finalize()
   }
+
+  def setupEncryptionServer(): Array[Any] = {
+    encryptionServer = new PythonServer[Unit]("broadcast-encrypt-server") {
+      override def handleConnection(sock: Socket): Unit = {
+        val env = SparkEnv.get
+        val in = sock.getInputStream()
+        val abspath = new File(path).getAbsolutePath
+        val out = env.serializerManager.wrapForEncryption(new FileOutputStream(abspath))
+        DechunkedInputStream.dechunkAndCopyToOutput(in, out)
+      }
+    }
+    Array(encryptionServer.port, encryptionServer.secret)
+  }
+
+  def setupDecryptionServer(): Array[Any] = {
+    decryptionServer = new PythonServer[Unit]("broadcast-decrypt-server-for-driver") {
+      override def handleConnection(sock: Socket): Unit = {
+        val out = new DataOutputStream(new BufferedOutputStream(sock.getOutputStream()))
+        Utils.tryWithSafeFinally {
+          val in = SparkEnv.get.serializerManager.wrapForEncryption(new FileInputStream(path))
+          Utils.tryWithSafeFinally {
+            Utils.copyStream(in, out, false)
+          } {
+            in.close()
+          }
+          out.flush()
+        } {
+          JavaUtils.closeQuietly(out)
+        }
+      }
+    }
+    Array(decryptionServer.port, decryptionServer.secret)
+  }
+
+  def waitTillBroadcastDataSent(): Unit = decryptionServer.getResult()
+
+  def waitTillDataReceived(): Unit = encryptionServer.getResult()
 }
 // scalastyle:on no.finalize
+
+/**
+ * The inverse of pyspark's ChunkedStream for sending data of unknown size.
+ *
+ * We might be serializing a really large object from python -- we don't want
+ * python to buffer the whole thing in memory, nor can it write to a file,
+ * so we don't know the length in advance.  So python writes it in chunks, each chunk
+ * preceeded by a length, till we get a "length" of -1 which serves as EOF.
+ *
+ * Tested from python tests.
+ */
+private[spark] class DechunkedInputStream(wrapped: InputStream) extends InputStream with Logging {
+  private val din = new DataInputStream(wrapped)
+  private var remainingInChunk = din.readInt()
+
+  override def read(): Int = {
+    val into = new Array[Byte](1)
+    val n = read(into, 0, 1)
+    if (n == -1) {
+      -1
+    } else {
+      // if you just cast a byte to an int, then anything > 127 is negative, which is interpreted
+      // as an EOF
+      val b = into(0)
+      if (b < 0) {
+        256 + b
+      } else {
+        b
+      }
+    }
+  }
+
+  override def read(dest: Array[Byte], off: Int, len: Int): Int = {
+    if (remainingInChunk == -1) {
+      return -1
+    }
+    var destSpace = len
+    var destPos = off
+    while (destSpace > 0 && remainingInChunk != -1) {
+      val toCopy = math.min(remainingInChunk, destSpace)
+      val read = din.read(dest, destPos, toCopy)
+      destPos += read
+      destSpace -= read
+      remainingInChunk -= read
+      if (remainingInChunk == 0) {
+        remainingInChunk = din.readInt()
+      }
+    }
+    assert(destSpace == 0 || remainingInChunk == -1)
+    return destPos - off
+  }
+
+  override def close(): Unit = wrapped.close()
+}
+
+private[spark] object DechunkedInputStream {
+
+  /**
+   * Dechunks the input, copies to output, and closes both input and the output safely.
+   */
+  def dechunkAndCopyToOutput(chunked: InputStream, out: OutputStream): Unit = {
+    val dechunked = new DechunkedInputStream(chunked)
+    Utils.tryWithSafeFinally {
+      Utils.copyStream(dechunked, out)
+    } {
+      JavaUtils.closeQuietly(out)
+      JavaUtils.closeQuietly(dechunked)
+    }
+  }
+}
+
+/**
+ * Creates a server in the jvm to communicate with python for handling one batch of data, with
+ * authentication and error handling.
+ */
+private[spark] abstract class PythonServer[T](
+    authHelper: SocketAuthHelper,
+    threadName: String) {
+
+  def this(env: SparkEnv, threadName: String) = this(new SocketAuthHelper(env.conf), threadName)
+  def this(threadName: String) = this(SparkEnv.get, threadName)
+
+  val (port, secret) = PythonServer.setupOneConnectionServer(authHelper, threadName) { sock =>
+    promise.complete(Try(handleConnection(sock)))
+  }
+
+  /**
+   * Handle a connection which has already been authenticated.  Any error from this function
+   * will clean up this connection and the entire server, and get propogated to [[getResult]].
+   */
+  def handleConnection(sock: Socket): T
+
+  val promise = Promise[T]()
+
+  /**
+   * Blocks indefinitely for [[handleConnection]] to finish, and returns that result.  If
+   * handleConnection throws an exception, this will throw an exception which includes the original
+   * exception as a cause.
+   */
+  def getResult(): T = {
+    getResult(Duration.Inf)
+  }
+
+  def getResult(wait: Duration): T = {
+    ThreadUtils.awaitResult(promise.future, wait)
+  }
+
+}
+
+private[spark] object PythonServer {
+
+  /**
+   * Create a socket server and run user function on the socket in a background thread.
+   *
+   * The socket server can only accept one connection, or close if no connection
+   * in 15 seconds.
+   *
+   * The thread will terminate after the supplied user function, or if there are any exceptions.
+   *
+   * If you need to get a result of the supplied function, create a subclass of [[PythonServer]]
+   *
+   * @return The port number of a local socket and the secret for authentication.
+   */
+  def setupOneConnectionServer(
+      authHelper: SocketAuthHelper,
+      threadName: String)
+      (func: Socket => Unit): (Int, String) = {
+    val serverSocket = new ServerSocket(0, 1, InetAddress.getByAddress(Array(127, 0, 0, 1)))
+    // Close the socket if no connection in 15 seconds
+    serverSocket.setSoTimeout(15000)
+
+    new Thread(threadName) {
+      setDaemon(true)
+      override def run(): Unit = {
+        var sock: Socket = null
+        try {
+          sock = serverSocket.accept()
+          authHelper.authClient(sock)
+          func(sock)
+        } finally {
+          JavaUtils.closeQuietly(serverSocket)
+          JavaUtils.closeQuietly(sock)
+        }
+      }
+    }.start()
+    (serverSocket.getLocalPort, authHelper.secret)
+  }
+}
+
+/**
+ * Sends decrypted broadcast data to python worker.  See [[PythonRunner]] for entire protocol.
+ */
+private[spark] class EncryptedPythonBroadcastServer(
+    val env: SparkEnv,
+    val idsAndFiles: Seq[(Long, String)])
+    extends PythonServer[Unit]("broadcast-decrypt-server") with Logging {
+
+  override def handleConnection(socket: Socket): Unit = {
+    val out = new DataOutputStream(new BufferedOutputStream(socket.getOutputStream()))
+    var socketIn: InputStream = null
+    // send the broadcast id, then the decrypted data.  We don't need to send the length, the
+    // the python pickle module just needs a stream.
+    Utils.tryWithSafeFinally {
+      (idsAndFiles).foreach { case (id, path) =>
+        out.writeLong(id)
+        val in = env.serializerManager.wrapForEncryption(new FileInputStream(path))
+        Utils.tryWithSafeFinally {
+          Utils.copyStream(in, out, false)
+        } {
+          in.close()
+        }
+      }
+      logTrace("waiting for python to accept broadcast data over socket")
+      out.flush()
+      socketIn = socket.getInputStream()
+      socketIn.read()
+      logTrace("done serving broadcast data")
+    } {
+      JavaUtils.closeQuietly(socketIn)
+      JavaUtils.closeQuietly(out)
+    }
+  }
+
+  def waitTillBroadcastDataSent(): Unit = {
+    getResult()
+  }
+}
+
+/**
+ * Helper for making RDD[Array[Byte]] from some python data, by reading the data from python
+ * over a socket.  This is used in preference to writing data to a file when encryption is enabled.
+ */
+private[spark] abstract class PythonRDDServer
+    extends PythonServer[JavaRDD[Array[Byte]]]("pyspark-parallelize-server") {
+
+  def handleConnection(sock: Socket): JavaRDD[Array[Byte]] = {
+    val in = sock.getInputStream()
+    val dechunkedInput: InputStream = new DechunkedInputStream(in)
+    streamToRDD(dechunkedInput)
+  }
+
+  protected def streamToRDD(input: InputStream): RDD[Array[Byte]]
+
+}
+
+private[spark] class PythonParallelizeServer(sc: SparkContext, parallelism: Int)
+    extends PythonRDDServer {
+
+  override protected def streamToRDD(input: InputStream): RDD[Array[Byte]] = {
+    PythonRDD.readRDDFromInputStream(sc, input, parallelism)
+  }
+}
+
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
index 4c53bc269a104..f73e95eac8f79 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
@@ -106,15 +106,17 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
       envVars.put("PYSPARK_EXECUTOR_MEMORY_MB", memoryMb.get.toString)
     }
     val worker: Socket = env.createPythonWorker(pythonExec, envVars.asScala.toMap)
-    // Whether is the worker released into idle pool
-    val released = new AtomicBoolean(false)
+    // Whether is the worker released into idle pool or closed. When any codes try to release or
+    // close a worker, they should use `releasedOrClosed.compareAndSet` to flip the state to make
+    // sure there is only one winner that is going to release or close the worker.
+    val releasedOrClosed = new AtomicBoolean(false)
 
     // Start a thread to feed the process input from our parent's iterator
     val writerThread = newWriterThread(env, worker, inputIterator, partitionIndex, context)
 
     context.addTaskCompletionListener[Unit] { _ =>
       writerThread.shutdownOnTaskCompletion()
-      if (!reuseWorker || !released.get) {
+      if (!reuseWorker || releasedOrClosed.compareAndSet(false, true)) {
         try {
           worker.close()
         } catch {
@@ -131,7 +133,7 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
     val stream = new DataInputStream(new BufferedInputStream(worker.getInputStream, bufferSize))
 
     val stdoutIterator = newReaderIterator(
-      stream, writerThread, startTime, env, worker, released, context)
+      stream, writerThread, startTime, env, worker, releasedOrClosed, context)
     new InterruptibleIterator(context, stdoutIterator)
   }
 
@@ -148,7 +150,7 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
       startTime: Long,
       env: SparkEnv,
       worker: Socket,
-      released: AtomicBoolean,
+      releasedOrClosed: AtomicBoolean,
       context: TaskContext): Iterator[OUT]
 
   /**
@@ -289,19 +291,51 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
         val newBids = broadcastVars.map(_.id).toSet
         // number of different broadcasts
         val toRemove = oldBids.diff(newBids)
-        val cnt = toRemove.size + newBids.diff(oldBids).size
+        val addedBids = newBids.diff(oldBids)
+        val cnt = toRemove.size + addedBids.size
+        val needsDecryptionServer = env.serializerManager.encryptionEnabled && addedBids.nonEmpty
+        dataOut.writeBoolean(needsDecryptionServer)
         dataOut.writeInt(cnt)
-        for (bid <- toRemove) {
-          // remove the broadcast from worker
-          dataOut.writeLong(- bid - 1)  // bid >= 0
-          oldBids.remove(bid)
+        def sendBidsToRemove(): Unit = {
+          for (bid <- toRemove) {
+            // remove the broadcast from worker
+            dataOut.writeLong(-bid - 1) // bid >= 0
+            oldBids.remove(bid)
+          }
         }
-        for (broadcast <- broadcastVars) {
-          if (!oldBids.contains(broadcast.id)) {
+        if (needsDecryptionServer) {
+          // if there is encryption, we setup a server which reads the encrypted files, and sends
+          // the decrypted data to python
+          val idsAndFiles = broadcastVars.flatMap { broadcast =>
+            if (!oldBids.contains(broadcast.id)) {
+              Some((broadcast.id, broadcast.value.path))
+            } else {
+              None
+            }
+          }
+          val server = new EncryptedPythonBroadcastServer(env, idsAndFiles)
+          dataOut.writeInt(server.port)
+          logTrace(s"broadcast decryption server setup on ${server.port}")
+          PythonRDD.writeUTF(server.secret, dataOut)
+          sendBidsToRemove()
+          idsAndFiles.foreach { case (id, _) =>
             // send new broadcast
-            dataOut.writeLong(broadcast.id)
-            PythonRDD.writeUTF(broadcast.value.path, dataOut)
-            oldBids.add(broadcast.id)
+            dataOut.writeLong(id)
+            oldBids.add(id)
+          }
+          dataOut.flush()
+          logTrace("waiting for python to read decrypted broadcast data from server")
+          server.waitTillBroadcastDataSent()
+          logTrace("done sending decrypted data to python")
+        } else {
+          sendBidsToRemove()
+          for (broadcast <- broadcastVars) {
+            if (!oldBids.contains(broadcast.id)) {
+              // send new broadcast
+              dataOut.writeLong(broadcast.id)
+              PythonRDD.writeUTF(broadcast.value.path, dataOut)
+              oldBids.add(broadcast.id)
+            }
           }
         }
         dataOut.flush()
@@ -360,7 +394,7 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
       startTime: Long,
       env: SparkEnv,
       worker: Socket,
-      released: AtomicBoolean,
+      releasedOrClosed: AtomicBoolean,
       context: TaskContext)
     extends Iterator[OUT] {
 
@@ -431,9 +465,8 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
       }
       // Check whether the worker is ready to be re-used.
       if (stream.readInt() == SpecialLengths.END_OF_STREAM) {
-        if (reuseWorker) {
+        if (reuseWorker && releasedOrClosed.compareAndSet(false, true)) {
           env.releasePythonWorker(pythonExec, envVars.asScala.toMap, worker)
-          released.set(true)
         }
       }
       eos = true
@@ -533,9 +566,9 @@ private[spark] class PythonRunner(funcs: Seq[ChainedPythonFunctions])
       startTime: Long,
       env: SparkEnv,
       worker: Socket,
-      released: AtomicBoolean,
+      releasedOrClosed: AtomicBoolean,
       context: TaskContext): Iterator[Array[Byte]] = {
-    new ReaderIterator(stream, writerThread, startTime, env, worker, released, context) {
+    new ReaderIterator(stream, writerThread, startTime, env, worker, releasedOrClosed, context) {
 
       protected override def read(): Array[Byte] = {
         if (writerThread.exception.isDefined) {
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
index 27a5e19f96a14..b6b0cac910d69 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
@@ -32,7 +32,8 @@ private[spark] object PythonUtils {
     val pythonPath = new ArrayBuffer[String]
     for (sparkHome <- sys.env.get("SPARK_HOME")) {
       pythonPath += Seq(sparkHome, "python", "lib", "pyspark.zip").mkString(File.separator)
-      pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.10.7-src.zip").mkString(File.separator)
+      pythonPath +=
+        Seq(sparkHome, "python", "lib", "py4j-0.10.8.1-src.zip").mkString(File.separator)
     }
     pythonPath ++= SparkContext.jarOfObject(this)
     pythonPath.mkString(File.pathSeparator)
@@ -74,4 +75,8 @@ private[spark] object PythonUtils {
   def toScalaMap[K, V](jm: java.util.Map[K, V]): Map[K, V] = {
     jm.asScala.toMap
   }
+
+  def getEncryptionEnabled(sc: JavaSparkContext): Boolean = {
+    sc.conf.get(org.apache.spark.internal.config.IO_ENCRYPTION_ENABLED)
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
index 6afa37aa36fd3..1f2f503a28d49 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
@@ -21,6 +21,7 @@ import java.io.{DataInputStream, DataOutputStream, EOFException, InputStream, Ou
 import java.net.{InetAddress, ServerSocket, Socket, SocketException}
 import java.nio.charset.StandardCharsets
 import java.util.Arrays
+import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
@@ -31,7 +32,7 @@ import org.apache.spark.security.SocketAuthHelper
 import org.apache.spark.util.{RedirectThread, Utils}
 
 private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String, String])
-  extends Logging {
+  extends Logging { self =>
 
   import PythonWorkerFactory._
 
@@ -39,7 +40,7 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
   // pyspark/daemon.py (by default) and tell it to fork new workers for our tasks. This daemon
   // currently only works on UNIX-based systems now because it uses signals for child management,
   // so we can also fall back to launching workers, pyspark/worker.py (by default) directly.
-  val useDaemon = {
+  private val useDaemon = {
     val useDaemonEnabled = SparkEnv.get.conf.getBoolean("spark.python.use.daemon", true)
 
     // This flag is ignored on Windows as it's unable to fork.
@@ -51,44 +52,52 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
   // as expert-only option, and shouldn't be used before knowing what it means exactly.
 
   // This configuration indicates the module to run the daemon to execute its Python workers.
-  val daemonModule = SparkEnv.get.conf.getOption("spark.python.daemon.module").map { value =>
-    logInfo(
-      s"Python daemon module in PySpark is set to [$value] in 'spark.python.daemon.module', " +
-      "using this to start the daemon up. Note that this configuration only has an effect when " +
-      "'spark.python.use.daemon' is enabled and the platform is not Windows.")
-    value
-  }.getOrElse("pyspark.daemon")
+  private val daemonModule =
+    SparkEnv.get.conf.getOption("spark.python.daemon.module").map { value =>
+      logInfo(
+        s"Python daemon module in PySpark is set to [$value] in 'spark.python.daemon.module', " +
+        "using this to start the daemon up. Note that this configuration only has an effect when " +
+        "'spark.python.use.daemon' is enabled and the platform is not Windows.")
+      value
+    }.getOrElse("pyspark.daemon")
 
   // This configuration indicates the module to run each Python worker.
-  val workerModule = SparkEnv.get.conf.getOption("spark.python.worker.module").map { value =>
-    logInfo(
-      s"Python worker module in PySpark is set to [$value] in 'spark.python.worker.module', " +
-      "using this to start the worker up. Note that this configuration only has an effect when " +
-      "'spark.python.use.daemon' is disabled or the platform is Windows.")
-    value
-  }.getOrElse("pyspark.worker")
+  private val workerModule =
+    SparkEnv.get.conf.getOption("spark.python.worker.module").map { value =>
+      logInfo(
+        s"Python worker module in PySpark is set to [$value] in 'spark.python.worker.module', " +
+        "using this to start the worker up. Note that this configuration only has an effect when " +
+        "'spark.python.use.daemon' is disabled or the platform is Windows.")
+      value
+    }.getOrElse("pyspark.worker")
 
   private val authHelper = new SocketAuthHelper(SparkEnv.get.conf)
 
-  var daemon: Process = null
+  @GuardedBy("self")
+  private var daemon: Process = null
   val daemonHost = InetAddress.getByAddress(Array(127, 0, 0, 1))
-  var daemonPort: Int = 0
-  val daemonWorkers = new mutable.WeakHashMap[Socket, Int]()
-  val idleWorkers = new mutable.Queue[Socket]()
-  var lastActivity = 0L
+  @GuardedBy("self")
+  private var daemonPort: Int = 0
+  @GuardedBy("self")
+  private val daemonWorkers = new mutable.WeakHashMap[Socket, Int]()
+  @GuardedBy("self")
+  private val idleWorkers = new mutable.Queue[Socket]()
+  @GuardedBy("self")
+  private var lastActivity = 0L
   new MonitorThread().start()
 
-  var simpleWorkers = new mutable.WeakHashMap[Socket, Process]()
+  @GuardedBy("self")
+  private val simpleWorkers = new mutable.WeakHashMap[Socket, Process]()
 
-  val pythonPath = PythonUtils.mergePythonPaths(
+  private val pythonPath = PythonUtils.mergePythonPaths(
     PythonUtils.sparkPythonPath,
     envVars.getOrElse("PYTHONPATH", ""),
     sys.env.getOrElse("PYTHONPATH", ""))
 
   def create(): Socket = {
     if (useDaemon) {
-      synchronized {
-        if (idleWorkers.size > 0) {
+      self.synchronized {
+        if (idleWorkers.nonEmpty) {
           return idleWorkers.dequeue()
         }
       }
@@ -117,7 +126,7 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
       socket
     }
 
-    synchronized {
+    self.synchronized {
       // Start the daemon if it hasn't been started
       startDaemon()
 
@@ -163,7 +172,9 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
       try {
         val socket = serverSocket.accept()
         authHelper.authClient(socket)
-        simpleWorkers.put(socket, worker)
+        self.synchronized {
+          simpleWorkers.put(socket, worker)
+        }
         return socket
       } catch {
         case e: Exception =>
@@ -178,7 +189,7 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
   }
 
   private def startDaemon() {
-    synchronized {
+    self.synchronized {
       // Is it already running?
       if (daemon != null) {
         return
@@ -278,7 +289,7 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
 
     override def run() {
       while (true) {
-        synchronized {
+        self.synchronized {
           if (lastActivity + IDLE_WORKER_TIMEOUT_MS < System.currentTimeMillis()) {
             cleanupIdleWorkers()
             lastActivity = System.currentTimeMillis()
@@ -303,7 +314,7 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
   }
 
   private def stopDaemon() {
-    synchronized {
+    self.synchronized {
       if (useDaemon) {
         cleanupIdleWorkers()
 
@@ -325,7 +336,7 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
   }
 
   def stopWorker(worker: Socket) {
-    synchronized {
+    self.synchronized {
       if (useDaemon) {
         if (daemon != null) {
           daemonWorkers.get(worker).foreach { pid =>
@@ -345,7 +356,7 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
 
   def releaseWorker(worker: Socket) {
     if (useDaemon) {
-      synchronized {
+      self.synchronized {
         lastActivity = System.currentTimeMillis()
         idleWorkers.enqueue(worker)
       }
diff --git a/core/src/main/scala/org/apache/spark/api/r/RBackend.scala b/core/src/main/scala/org/apache/spark/api/r/RBackend.scala
index 7ce2581555014..50c8fdf5316d6 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RBackend.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RBackend.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.api.r
 
-import java.io.{DataInputStream, DataOutputStream, File, FileOutputStream, IOException}
+import java.io.{DataOutputStream, File, FileOutputStream, IOException}
 import java.net.{InetAddress, InetSocketAddress, ServerSocket, Socket}
 import java.util.concurrent.TimeUnit
 
@@ -32,8 +32,6 @@ import io.netty.handler.timeout.ReadTimeoutHandler
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
-import org.apache.spark.network.util.JavaUtils
-import org.apache.spark.util.Utils
 
 /**
  * Netty-based backend server that is used to communicate between R and Java.
@@ -99,7 +97,7 @@ private[spark] class RBackend {
     if (bootstrap != null && bootstrap.config().group() != null) {
       bootstrap.config().group().shutdownGracefully()
     }
-    if (bootstrap != null && bootstrap.childGroup() != null) {
+    if (bootstrap != null && bootstrap.config().childGroup() != null) {
       bootstrap.config().childGroup().shutdownGracefully()
     }
     bootstrap = null
@@ -147,7 +145,7 @@ private[spark] object RBackend extends Logging {
       new Thread("wait for socket to close") {
         setDaemon(true)
         override def run(): Unit = {
-          // any un-catched exception will also shutdown JVM
+          // any uncaught exception will also shutdown JVM
           val buf = new Array[Byte](1024)
           // shutdown JVM if R does not connect back in 10 seconds
           serverSocket.setSoTimeout(10000)
diff --git a/core/src/main/scala/org/apache/spark/api/r/RRDD.scala b/core/src/main/scala/org/apache/spark/api/r/RRDD.scala
index 295355c7bf018..1dc61c7eef33c 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RRDD.scala
@@ -17,7 +17,9 @@
 
 package org.apache.spark.api.r
 
-import java.io.File
+import java.io.{DataInputStream, File}
+import java.net.Socket
+import java.nio.charset.StandardCharsets.UTF_8
 import java.util.{Map => JMap}
 
 import scala.collection.JavaConverters._
@@ -25,10 +27,11 @@ import scala.reflect.ClassTag
 
 import org.apache.spark._
 import org.apache.spark.api.java.{JavaPairRDD, JavaRDD, JavaSparkContext}
-import org.apache.spark.api.python.PythonRDD
+import org.apache.spark.api.python.{PythonRDD, PythonServer}
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
+import org.apache.spark.security.SocketAuthHelper
 
 private abstract class BaseRRDD[T: ClassTag, U: ClassTag](
     parent: RDD[T],
@@ -163,3 +166,29 @@ private[r] object RRDD {
     PythonRDD.readRDDFromFile(jsc, fileName, parallelism)
   }
 }
+
+/**
+ * Helper for making RDD[Array[Byte]] from some R data, by reading the data from R
+ * over a socket. This is used in preference to writing data to a file when encryption is enabled.
+ */
+private[spark] class RParallelizeServer(sc: JavaSparkContext, parallelism: Int)
+    extends PythonServer[JavaRDD[Array[Byte]]](
+      new RSocketAuthHelper(), "sparkr-parallelize-server") {
+
+  override def handleConnection(sock: Socket): JavaRDD[Array[Byte]] = {
+    val in = sock.getInputStream()
+    PythonRDD.readRDDFromInputStream(sc.sc, in, parallelism)
+  }
+}
+
+private[spark] class RSocketAuthHelper extends SocketAuthHelper(SparkEnv.get.conf) {
+  override protected def readUtf8(s: Socket): String = {
+    val din = new DataInputStream(s.getInputStream())
+    val len = din.readInt()
+    val bytes = new Array[Byte](len)
+    din.readFully(bytes)
+    // The R code adds a null terminator to serialized strings, so ignore it here.
+    assert(bytes(bytes.length - 1) == 0) // sanity check.
+    new String(bytes, 0, bytes.length - 1, UTF_8)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/api/r/RUtils.scala b/core/src/main/scala/org/apache/spark/api/r/RUtils.scala
index fdd8cf62f0e5f..9bf35af1da925 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RUtils.scala
@@ -21,6 +21,8 @@ import java.io.File
 import java.util.Arrays
 
 import org.apache.spark.{SparkEnv, SparkException}
+import org.apache.spark.api.java.JavaSparkContext
+import org.apache.spark.api.python.PythonUtils
 
 private[spark] object RUtils {
   // Local path where R binary packages built from R source code contained in the spark
@@ -104,4 +106,6 @@ private[spark] object RUtils {
       case e: Exception => false
     }
   }
+
+  def getEncryptionEnabled(sc: JavaSparkContext): Boolean = PythonUtils.getEncryptionEnabled(sc)
 }
diff --git a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
index 537ab57f9664d..6e0a3f63988d4 100644
--- a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
@@ -74,9 +74,9 @@ private[spark] object SerDe {
       jvmObjectTracker: JVMObjectTracker): Object = {
     dataType match {
       case 'n' => null
-      case 'i' => new java.lang.Integer(readInt(dis))
-      case 'd' => new java.lang.Double(readDouble(dis))
-      case 'b' => new java.lang.Boolean(readBoolean(dis))
+      case 'i' => java.lang.Integer.valueOf(readInt(dis))
+      case 'd' => java.lang.Double.valueOf(readDouble(dis))
+      case 'b' => java.lang.Boolean.valueOf(readBoolean(dis))
       case 'c' => readString(dis)
       case 'e' => readMap(dis, jvmObjectTracker)
       case 'r' => readBytes(dis)
diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
index cbd49e070f2eb..26ead57316e18 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.broadcast
 
 import java.io._
+import java.lang.ref.SoftReference
 import java.nio.ByteBuffer
 import java.util.zip.Adler32
 
@@ -61,9 +62,11 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
    * Value of the broadcast object on executors. This is reconstructed by [[readBroadcastBlock]],
    * which builds this value by reading blocks from the driver and/or other executors.
    *
-   * On the driver, if the value is required, it is read lazily from the block manager.
+   * On the driver, if the value is required, it is read lazily from the block manager. We hold
+   * a soft reference so that it can be garbage collected if required, as we can always reconstruct
+   * in the future.
    */
-  @transient private lazy val _value: T = readBroadcastBlock()
+  @transient private var _value: SoftReference[T] = _
 
   /** The compression codec to use, or None if compression is disabled */
   @transient private var compressionCodec: Option[CompressionCodec] = _
@@ -92,8 +95,15 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
   /** The checksum for all the blocks. */
   private var checksums: Array[Int] = _
 
-  override protected def getValue() = {
-    _value
+  override protected def getValue() = synchronized {
+    val memoized: T = if (_value == null) null.asInstanceOf[T] else _value.get
+    if (memoized != null) {
+      memoized
+    } else {
+      val newlyRead = readBroadcastBlock()
+      _value = new SoftReference[T](newlyRead)
+      newlyRead
+    }
   }
 
   private def calcChecksum(block: ByteBuffer): Int = {
@@ -205,8 +215,8 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
   }
 
   private def readBroadcastBlock(): T = Utils.tryOrIOException {
-    TorrentBroadcast.synchronized {
-      val broadcastCache = SparkEnv.get.broadcastManager.cachedValues
+    val broadcastCache = SparkEnv.get.broadcastManager.cachedValues
+    broadcastCache.synchronized {
 
       Option(broadcastCache.get(broadcastId)).map(_.asInstanceOf[T]).getOrElse {
         setConf(SparkEnv.get.conf)
diff --git a/core/src/main/scala/org/apache/spark/deploy/DependencyUtils.scala b/core/src/main/scala/org/apache/spark/deploy/DependencyUtils.scala
index 178bdcfccb603..5a17a6b6e169c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/DependencyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/DependencyUtils.scala
@@ -61,11 +61,12 @@ private[deploy] object DependencyUtils extends Logging {
       hadoopConf: Configuration,
       secMgr: SecurityManager): String = {
     val targetDir = Utils.createTempDir()
+    val userJarName = userJar.split(File.separatorChar).last
     Option(jars)
       .map {
         resolveGlobPaths(_, hadoopConf)
           .split(",")
-          .filterNot(_.contains(userJar.split("/").last))
+          .filterNot(_.contains(userJarName))
           .mkString(",")
       }
       .filterNot(_ == "")
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index 4cc0063d010ef..7bb2a419107d6 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -30,24 +30,20 @@ import scala.util.control.NonFatal
 
 import com.google.common.primitives.Longs
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{FileStatus, FileSystem, Path, PathFilter}
+import org.apache.hadoop.fs._
 import org.apache.hadoop.mapred.JobConf
 import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 import org.apache.hadoop.security.token.{Token, TokenIdentifier}
 import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier
 
 import org.apache.spark.{SparkConf, SparkException}
-import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config._
 import org.apache.spark.util.Utils
 
 /**
- * :: DeveloperApi ::
  * Contains util methods to interact with Hadoop from Spark.
  */
-@DeveloperApi
-class SparkHadoopUtil extends Logging {
+private[spark] class SparkHadoopUtil extends Logging {
   private val sparkConf = new SparkConf(false).loadFromSystemProperties(true)
   val conf: Configuration = newConfiguration(sparkConf)
   UserGroupInformation.setConfiguration(conf)
@@ -387,7 +383,7 @@ class SparkHadoopUtil extends Logging {
 
 }
 
-object SparkHadoopUtil {
+private[spark] object SparkHadoopUtil {
 
   private lazy val instance = new SparkHadoopUtil
 
@@ -412,20 +408,6 @@ object SparkHadoopUtil {
 
   def get: SparkHadoopUtil = instance
 
-  /**
-   * Given an expiration date for the current set of credentials, calculate the time when new
-   * credentials should be created.
-   *
-   * @param expirationDate Drop-dead expiration date
-   * @param conf Spark configuration
-   * @return Timestamp when new credentials should be created.
-   */
-  private[spark] def nextCredentialRenewalTime(expirationDate: Long, conf: SparkConf): Long = {
-    val ct = System.currentTimeMillis
-    val ratio = conf.get(CREDENTIALS_RENEWAL_INTERVAL_RATIO)
-    (ct + (ratio * (expirationDate - ct))).toLong
-  }
-
   /**
    * Returns a Configuration object with Spark configuration applied on top. Unlike
    * the instance method, this will always return a Configuration instance, and not a
@@ -471,4 +453,33 @@ object SparkHadoopUtil {
       hadoopConf.set(key.substring("spark.hadoop.".length), value)
     }
   }
+
+  // scalastyle:off line.size.limit
+  /**
+   * Create a path that uses replication instead of erasure coding (ec), regardless of the default
+   * configuration in hdfs for the given path.  This can be helpful as hdfs ec doesn't support
+   * hflush(), hsync(), or append()
+   * https://hadoop.apache.org/docs/r3.0.0/hadoop-project-dist/hadoop-hdfs/HDFSErasureCoding.html#Limitations
+   */
+  // scalastyle:on line.size.limit
+  def createNonECFile(fs: FileSystem, path: Path): FSDataOutputStream = {
+    try {
+      // Use reflection as this uses apis only avialable in hadoop 3
+      val builderMethod = fs.getClass().getMethod("createFile", classOf[Path])
+      val builder = builderMethod.invoke(fs, path)
+      val builderCls = builder.getClass()
+      // this may throw a NoSuchMethodException if the path is not on hdfs
+      val replicateMethod = builderCls.getMethod("replicate")
+      val buildMethod = builderCls.getMethod("build")
+      val b2 = replicateMethod.invoke(builder)
+      buildMethod.invoke(b2).asInstanceOf[FSDataOutputStream]
+    } catch {
+      case  _: NoSuchMethodException =>
+        // No createFile() method, we're using an older hdfs client, which doesn't give us control
+        // over EC vs. replication.  Older hdfs doesn't have EC anyway, so just create a file with
+        // old apis.
+        fs.create(path)
+    }
+  }
+
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index cf902db8709e7..324f6f8894d34 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -318,7 +318,7 @@ private[spark] class SparkSubmit extends Logging {
 
       if (!StringUtils.isBlank(resolvedMavenCoordinates)) {
         args.jars = mergeFileLists(args.jars, resolvedMavenCoordinates)
-        if (args.isPython) {
+        if (args.isPython || isInternal(args.primaryResource)) {
           args.pyFiles = mergeFileLists(args.pyFiles, resolvedMavenCoordinates)
         }
       }
@@ -335,7 +335,7 @@ private[spark] class SparkSubmit extends Logging {
     val targetDir = Utils.createTempDir()
 
     // assure a keytab is available from any place in a JVM
-    if (clusterManager == YARN || clusterManager == LOCAL || isMesosClient) {
+    if (clusterManager == YARN || clusterManager == LOCAL || isMesosClient || isKubernetesCluster) {
       if (args.principal != null) {
         if (args.keytab != null) {
           require(new File(args.keytab).exists(), s"Keytab file: ${args.keytab} does not exist")
@@ -520,6 +520,10 @@ private[spark] class SparkSubmit extends Logging {
         confKey = "spark.driver.extraJavaOptions"),
       OptionAssigner(args.driverExtraLibraryPath, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
         confKey = "spark.driver.extraLibraryPath"),
+      OptionAssigner(args.principal, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
+        confKey = PRINCIPAL.key),
+      OptionAssigner(args.keytab, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
+        confKey = KEYTAB.key),
 
       // Propagate attributes for dependency resolution at the driver side
       OptionAssigner(args.packages, STANDALONE | MESOS, CLUSTER, confKey = "spark.jars.packages"),
@@ -537,8 +541,6 @@ private[spark] class SparkSubmit extends Logging {
       OptionAssigner(args.jars, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.dist.jars"),
       OptionAssigner(args.files, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.dist.files"),
       OptionAssigner(args.archives, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.dist.archives"),
-      OptionAssigner(args.principal, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.principal"),
-      OptionAssigner(args.keytab, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.keytab"),
 
       // Other options
       OptionAssigner(args.executorCores, STANDALONE | YARN | KUBERNETES, ALL_DEPLOY_MODES,
@@ -644,7 +646,8 @@ private[spark] class SparkSubmit extends Logging {
       }
     }
 
-    if (clusterManager == MESOS && UserGroupInformation.isSecurityEnabled) {
+    if ((clusterManager == MESOS || clusterManager == KUBERNETES)
+       && UserGroupInformation.isSecurityEnabled) {
       setRMPrincipal(sparkConf)
     }
 
@@ -760,8 +763,8 @@ private[spark] class SparkSubmit extends Logging {
   }
 
   // [SPARK-20328]. HadoopRDD calls into a Hadoop library that fetches delegation tokens with
-  // renewer set to the YARN ResourceManager.  Since YARN isn't configured in Mesos mode, we
-  // must trick it into thinking we're YARN.
+  // renewer set to the YARN ResourceManager.  Since YARN isn't configured in Mesos or Kubernetes
+  // mode, we must trick it into thinking we're YARN.
   private def setRMPrincipal(sparkConf: SparkConf): Unit = {
     val shortUserName = UserGroupInformation.getCurrentUser.getShortUserName
     val key = s"spark.hadoop.${YarnConfiguration.RM_PRINCIPAL}"
@@ -826,7 +829,7 @@ private[spark] class SparkSubmit extends Logging {
     }
 
     val app: SparkApplication = if (classOf[SparkApplication].isAssignableFrom(mainClass)) {
-      mainClass.newInstance().asInstanceOf[SparkApplication]
+      mainClass.getConstructor().newInstance().asInstanceOf[SparkApplication]
     } else {
       // SPARK-4170
       if (classOf[scala.App].isAssignableFrom(mainClass)) {
@@ -925,8 +928,6 @@ object SparkSubmit extends CommandLineUtils with Logging {
         } catch {
           case e: SparkUserAppException =>
             exitFn(e.exitCode)
-          case e: SparkException =>
-            printErrorAndExit(e.getMessage())
         }
       }
 
@@ -991,9 +992,9 @@ private[spark] object SparkSubmitUtils {
 
   // Exposed for testing.
   // These components are used to make the default exclusion rules for Spark dependencies.
-  // We need to specify each component explicitly, otherwise we miss spark-streaming-kafka-0-8 and
-  // other spark-streaming utility components. Underscore is there to differentiate between
-  // spark-streaming_2.1x and spark-streaming-kafka-0-8-assembly_2.1x
+  // We need to specify each component explicitly, otherwise we miss
+  // spark-streaming utility components. Underscore is there to differentiate between
+  // spark-streaming_2.1x and spark-streaming-kafka-0-10-assembly_2.1x
   val IVY_DEFAULT_EXCLUDES = Seq("catalyst_", "core_", "graphx_", "kvstore_", "launcher_", "mllib_",
     "mllib-local_", "network-common_", "network-shuffle_", "repl_", "sketch_", "sql_", "streaming_",
     "tags_", "unsafe_")
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index 0998757715457..4cf08a7980f55 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -199,8 +199,14 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
     numExecutors = Option(numExecutors)
       .getOrElse(sparkProperties.get("spark.executor.instances").orNull)
     queue = Option(queue).orElse(sparkProperties.get("spark.yarn.queue")).orNull
-    keytab = Option(keytab).orElse(sparkProperties.get("spark.yarn.keytab")).orNull
-    principal = Option(principal).orElse(sparkProperties.get("spark.yarn.principal")).orNull
+    keytab = Option(keytab)
+      .orElse(sparkProperties.get("spark.kerberos.keytab"))
+      .orElse(sparkProperties.get("spark.yarn.keytab"))
+      .orNull
+    principal = Option(principal)
+      .orElse(sparkProperties.get("spark.kerberos.principal"))
+      .orElse(sparkProperties.get("spark.yarn.principal"))
+      .orNull
     dynamicAllocationEnabled =
       sparkProperties.get("spark.dynamicAllocation.enabled").exists("true".equalsIgnoreCase)
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 44d23908146c7..da6e5f03aabb5 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -19,7 +19,6 @@ package org.apache.spark.deploy.history
 
 import java.io.{File, FileNotFoundException, IOException}
 import java.nio.file.Files
-import java.nio.file.attribute.PosixFilePermissions
 import java.util.{Date, ServiceLoader}
 import java.util.concurrent.{ConcurrentHashMap, ExecutorService, Future, TimeUnit}
 import java.util.zip.{ZipEntry, ZipOutputStream}
@@ -35,7 +34,7 @@ import com.fasterxml.jackson.annotation.JsonIgnore
 import com.google.common.io.ByteStreams
 import com.google.common.util.concurrent.MoreExecutors
 import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
-import org.apache.hadoop.hdfs.DistributedFileSystem
+import org.apache.hadoop.hdfs.{DFSInputStream, DistributedFileSystem}
 import org.apache.hadoop.hdfs.protocol.HdfsConstants
 import org.apache.hadoop.security.AccessControlException
 import org.fusesource.leveldbjni.internal.NativeDB
@@ -43,13 +42,15 @@ import org.fusesource.leveldbjni.internal.NativeDB
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.DRIVER_LOG_DFS_DIR
+import org.apache.spark.internal.config.History._
+import org.apache.spark.internal.config.Status._
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.ReplayListenerBus._
 import org.apache.spark.status._
 import org.apache.spark.status.KVUtils._
 import org.apache.spark.status.api.v1.{ApplicationAttemptInfo, ApplicationInfo}
-import org.apache.spark.status.config._
 import org.apache.spark.ui.SparkUI
 import org.apache.spark.util.{Clock, SystemClock, ThreadUtils, Utils}
 import org.apache.spark.util.kvstore._
@@ -87,7 +88,6 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     this(conf, new SystemClock())
   }
 
-  import config._
   import FsHistoryProvider._
 
   // Interval between safemode checks.
@@ -98,7 +98,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
   private val UPDATE_INTERVAL_S = conf.getTimeAsSeconds("spark.history.fs.update.interval", "10s")
 
   // Interval between each cleaner checks for event logs to delete
-  private val CLEAN_INTERVAL_S = conf.getTimeAsSeconds("spark.history.fs.cleaner.interval", "1d")
+  private val CLEAN_INTERVAL_S = conf.get(CLEANER_INTERVAL_S)
 
   // Number of threads used to replay event logs.
   private val NUM_PROCESSING_THREADS = conf.getInt(SPARK_HISTORY_FS_NUM_REPLAY_THREADS,
@@ -133,9 +133,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
 
   // Visible for testing.
   private[history] val listing: KVStore = storePath.map { path =>
-    val perms = PosixFilePermissions.fromString("rwx------")
-    val dbPath = Files.createDirectories(new File(path, "listing.ldb").toPath(),
-      PosixFilePermissions.asFileAttribute(perms)).toFile()
+    val dbPath = Files.createDirectories(new File(path, "listing.ldb").toPath()).toFile()
+    Utils.chmod700(dbPath)
 
     val metadata = new FsHistoryProviderMetadata(CURRENT_LISTING_VERSION,
       AppStatusStore.CURRENT_VERSION, logDir.toString())
@@ -276,11 +275,18 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       pool.scheduleWithFixedDelay(
         getRunner(() => checkForLogs()), 0, UPDATE_INTERVAL_S, TimeUnit.SECONDS)
 
-      if (conf.getBoolean("spark.history.fs.cleaner.enabled", false)) {
+      if (conf.get(CLEANER_ENABLED)) {
         // A task that periodically cleans event logs on disk.
         pool.scheduleWithFixedDelay(
           getRunner(() => cleanLogs()), 0, CLEAN_INTERVAL_S, TimeUnit.SECONDS)
       }
+
+      if (conf.contains(DRIVER_LOG_DFS_DIR) && conf.get(DRIVER_LOG_CLEANER_ENABLED)) {
+        pool.scheduleWithFixedDelay(getRunner(() => cleanDriverLogs()),
+          0,
+          conf.get(DRIVER_LOG_CLEANER_INTERVAL),
+          TimeUnit.SECONDS)
+      }
     } else {
       logDebug("Background update thread disabled for testing")
     }
@@ -451,10 +457,32 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
               listing.write(info.copy(lastProcessed = newLastScanTime, fileSize = entry.getLen()))
             }
 
-            if (info.fileSize < entry.getLen()) {
+            if (shouldReloadLog(info, entry)) {
               if (info.appId.isDefined && fastInProgressParsing) {
                 // When fast in-progress parsing is on, we don't need to re-parse when the
                 // size changes, but we do need to invalidate any existing UIs.
+                // Also, we need to update the `lastUpdated time` to display the updated time in
+                // the HistoryUI and to avoid cleaning the inprogress app while running.
+                val appInfo = listing.read(classOf[ApplicationInfoWrapper], info.appId.get)
+
+                val attemptList = appInfo.attempts.map { attempt =>
+                  if (attempt.info.attemptId == info.attemptId) {
+                    new AttemptInfoWrapper(
+                      attempt.info.copy(lastUpdated = new Date(newLastScanTime)),
+                      attempt.logPath,
+                      attempt.fileSize,
+                      attempt.adminAcls,
+                      attempt.viewAcls,
+                      attempt.adminAclsGroups,
+                      attempt.viewAclsGroups)
+                  } else {
+                    attempt
+                  }
+                }
+
+                val updatedAppInfo = new ApplicationInfoWrapper(appInfo.info, attemptList)
+                listing.write(updatedAppInfo)
+
                 invalidateUI(info.appId.get, info.attemptId)
                 false
               } else {
@@ -468,8 +496,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
               // If the file is currently not being tracked by the SHS, add an entry for it and try
               // to parse it. This will allow the cleaner code to detect the file as stale later on
               // if it was not possible to parse it.
-              listing.write(LogInfo(entry.getPath().toString(), newLastScanTime, None, None,
-                entry.getLen()))
+              listing.write(LogInfo(entry.getPath().toString(), newLastScanTime, LogType.EventLogs,
+                None, None, entry.getLen()))
               entry.getLen() > 0
           }
         }
@@ -543,6 +571,24 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     }
   }
 
+  private[history] def shouldReloadLog(info: LogInfo, entry: FileStatus): Boolean = {
+    var result = info.fileSize < entry.getLen
+    if (!result && info.logPath.endsWith(EventLoggingListener.IN_PROGRESS)) {
+      try {
+        result = Utils.tryWithResource(fs.open(entry.getPath)) { in =>
+          in.getWrappedStream match {
+            case dfsIn: DFSInputStream => info.fileSize < dfsIn.getFileLength
+            case _ => false
+          }
+        }
+      } catch {
+        case e: Exception =>
+          logDebug(s"Failed to check the length for the file : ${info.logPath}", e)
+      }
+    }
+    result
+  }
+
   private def cleanAppData(appId: String, attemptId: Option[String], logPath: String): Unit = {
     try {
       val app = load(appId)
@@ -708,7 +754,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         // listing data is good.
         invalidateUI(app.info.id, app.attempts.head.info.attemptId)
         addListing(app)
-        listing.write(LogInfo(logPath.toString(), scanTime, Some(app.info.id),
+        listing.write(LogInfo(logPath.toString(), scanTime, LogType.EventLogs, Some(app.info.id),
           app.attempts.head.info.attemptId, fileStatus.getLen()))
 
         // For a finished log, remove the corresponding "in progress" entry from the listing DB if
@@ -737,7 +783,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         // If the app hasn't written down its app ID to the logs, still record the entry in the
         // listing db, with an empty ID. This will make the log eligible for deletion if the app
         // does not make progress after the configured max log age.
-        listing.write(LogInfo(logPath.toString(), scanTime, None, None, fileStatus.getLen()))
+        listing.write(
+          LogInfo(logPath.toString(), scanTime, LogType.EventLogs, None, None, fileStatus.getLen()))
     }
   }
 
@@ -782,7 +829,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         val logPath = new Path(logDir, attempt.logPath)
         listing.delete(classOf[LogInfo], logPath.toString())
         cleanAppData(app.id, attempt.info.attemptId, logPath.toString())
-        deleteLog(logPath)
+        deleteLog(fs, logPath)
       }
 
       if (remaining.isEmpty) {
@@ -796,11 +843,12 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       .reverse()
       .first(maxTime)
       .asScala
+      .filter { l => l.logType == null || l.logType == LogType.EventLogs }
       .toList
     stale.foreach { log =>
       if (log.appId.isEmpty) {
         logInfo(s"Deleting invalid / corrupt event log ${log.logPath}")
-        deleteLog(new Path(log.logPath))
+        deleteLog(fs, new Path(log.logPath))
         listing.delete(classOf[LogInfo], log.logPath)
       }
     }
@@ -808,6 +856,61 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     clearBlacklist(CLEAN_INTERVAL_S)
   }
 
+  /**
+   * Delete driver logs from the configured spark dfs dir that exceed the configured max age
+   */
+  private[history] def cleanDriverLogs(): Unit = Utils.tryLog {
+    val driverLogDir = conf.get(DRIVER_LOG_DFS_DIR).get
+    val driverLogFs = new Path(driverLogDir).getFileSystem(hadoopConf)
+    val currentTime = clock.getTimeMillis()
+    val maxTime = currentTime - conf.get(MAX_DRIVER_LOG_AGE_S) * 1000
+    val logFiles = driverLogFs.listLocatedStatus(new Path(driverLogDir))
+    while (logFiles.hasNext()) {
+      val f = logFiles.next()
+      // Do not rely on 'modtime' as it is not updated for all filesystems when files are written to
+      val deleteFile =
+        try {
+          val info = listing.read(classOf[LogInfo], f.getPath().toString())
+          // Update the lastprocessedtime of file if it's length or modification time has changed
+          if (info.fileSize < f.getLen() || info.lastProcessed < f.getModificationTime()) {
+            listing.write(
+              info.copy(lastProcessed = currentTime, fileSize = f.getLen()))
+            false
+          } else if (info.lastProcessed > maxTime) {
+            false
+          } else {
+            true
+          }
+        } catch {
+          case e: NoSuchElementException =>
+            // For every new driver log file discovered, create a new entry in listing
+            listing.write(LogInfo(f.getPath().toString(), currentTime, LogType.DriverLogs, None,
+              None, f.getLen()))
+          false
+        }
+      if (deleteFile) {
+        logInfo(s"Deleting expired driver log for: ${f.getPath().getName()}")
+        listing.delete(classOf[LogInfo], f.getPath().toString())
+        deleteLog(driverLogFs, f.getPath())
+      }
+    }
+
+    // Delete driver log file entries that exceed the configured max age and
+    // may have been deleted on filesystem externally.
+    val stale = listing.view(classOf[LogInfo])
+      .index("lastProcessed")
+      .reverse()
+      .first(maxTime)
+      .asScala
+      .filter { l => l.logType != null && l.logType == LogType.DriverLogs }
+      .toList
+    stale.foreach { log =>
+      logInfo(s"Deleting invalid driver log ${log.logPath}")
+      listing.delete(classOf[LogInfo], log.logPath)
+      deleteLog(driverLogFs, new Path(log.logPath))
+    }
+  }
+
   /**
    * Rebuilds the application state store from its event log.
    */
@@ -964,7 +1067,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       throw new NoSuchElementException(s"Cannot find attempt $attemptId of $appId."))
   }
 
-  private def deleteLog(log: Path): Unit = {
+  private def deleteLog(fs: FileSystem, log: Path): Unit = {
     if (isBlacklisted(log)) {
       logDebug(s"Skipping deleting $log as we don't have permissions on it.")
     } else {
@@ -1009,6 +1112,10 @@ private[history] case class FsHistoryProviderMetadata(
     uiVersion: Long,
     logDir: String)
 
+private[history] object LogType extends Enumeration {
+  val DriverLogs, EventLogs = Value
+}
+
 /**
  * Tracking info for event logs detected in the configured log directory. Tracks both valid and
  * invalid logs (e.g. unparseable logs, recorded as logs with no app ID) so that the cleaner
@@ -1017,6 +1124,7 @@ private[history] case class FsHistoryProviderMetadata(
 private[history] case class LogInfo(
     @KVIndexParam logPath: String,
     @KVIndexParam("lastProcessed") lastProcessed: Long,
+    logType: LogType.Value,
     appId: Option[String],
     attemptId: Option[String],
     fileSize: Long)
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
index 32667ddf5c7ea..00ca4efa4d266 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
@@ -31,8 +31,8 @@ private[history] class HistoryPage(parent: HistoryServer) extends WebUIPage("")
     val requestedIncomplete =
       Option(UIUtils.stripXSS(request.getParameter("showIncomplete"))).getOrElse("false").toBoolean
 
-    val allAppsSize = parent.getApplicationList()
-      .count(isApplicationCompleted(_) != requestedIncomplete)
+    val displayApplications = parent.getApplicationList()
+      .exists(isApplicationCompleted(_) != requestedIncomplete)
     val eventLogsUnderProcessCount = parent.getEventLogsUnderProcess()
     val lastUpdatedTime = parent.getLastUpdatedTime()
     val providerConfig = parent.getProviderConfig()
@@ -63,9 +63,9 @@ private[history] class HistoryPage(parent: HistoryServer) extends WebUIPage("")
             }
 
             {
-            if (allAppsSize > 0) {
+            if (displayApplications) {
               <script src={UIUtils.prependBaseUri(
-                  request, "/static/dataTables.rowsGroup.js")}></script> ++
+                request, "/static/dataTables.rowsGroup.js")}></script> ++
                 <div id="history-summary" class="row-fluid"></div> ++
                 <script src={UIUtils.prependBaseUri(request, "/static/historypage.js")}></script> ++
                 <script>setAppLimit({parent.maxApplications})</script>
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index 56f3f59504a7d..5856c7057b745 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -28,9 +28,9 @@ import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder}
 
 import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.deploy.history.config.HISTORY_SERVER_UI_PORT
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.History.HISTORY_SERVER_UI_PORT
 import org.apache.spark.status.api.v1.{ApiRootResource, ApplicationInfo, UIRoot}
 import org.apache.spark.ui.{SparkUI, UIUtils, WebUI}
 import org.apache.spark.ui.JettyUtils._
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
index 080ba12c2f0d1..49f00cb10179e 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
@@ -34,35 +34,21 @@ private[history] class HistoryServerArguments(conf: SparkConf, args: Array[Strin
 
   @tailrec
   private def parse(args: List[String]): Unit = {
-    if (args.length == 1) {
-      setLogDirectory(args.head)
-    } else {
-      args match {
-        case ("--dir" | "-d") :: value :: tail =>
-          setLogDirectory(value)
-          parse(tail)
+    args match {
+      case ("--help" | "-h") :: tail =>
+        printUsageAndExit(0)
 
-        case ("--help" | "-h") :: tail =>
-          printUsageAndExit(0)
+      case ("--properties-file") :: value :: tail =>
+        propertiesFile = value
+        parse(tail)
 
-        case ("--properties-file") :: value :: tail =>
-          propertiesFile = value
-          parse(tail)
+      case Nil =>
 
-        case Nil =>
-
-        case _ =>
-          printUsageAndExit(1)
-      }
+      case _ =>
+        printUsageAndExit(1)
     }
   }
 
-  private def setLogDirectory(value: String): Unit = {
-    logWarning("Setting log directory through the command line is deprecated as of " +
-      "Spark 1.1.0. Please set this through spark.history.fs.logDirectory instead.")
-    conf.set("spark.history.fs.logDirectory", value)
-  }
-
    // This mutates the SparkConf, so all accesses to it must be made after this line
    Utils.loadDefaultSparkProperties(conf, propertiesFile)
 
@@ -73,8 +59,6 @@ private[history] class HistoryServerArguments(conf: SparkConf, args: Array[Strin
       |Usage: HistoryServer [options]
       |
       |Options:
-      |  DIR                         Deprecated; set spark.history.fs.logDirectory directly
-      |  --dir DIR (-d DIR)          Deprecated; set spark.history.fs.logDirectory directly
       |  --properties-file FILE      Path to a custom Spark properties file.
       |                              Default is conf/spark-defaults.conf.
       |
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerDiskManager.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerDiskManager.scala
index c03a360b91ef8..0a1f33395ad62 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerDiskManager.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerDiskManager.scala
@@ -18,8 +18,6 @@
 package org.apache.spark.deploy.history
 
 import java.io.File
-import java.nio.file.Files
-import java.nio.file.attribute.PosixFilePermissions
 import java.util.concurrent.atomic.AtomicLong
 
 import scala.collection.JavaConverters._
@@ -29,6 +27,7 @@ import org.apache.commons.io.FileUtils
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.History._
 import org.apache.spark.status.KVUtils._
 import org.apache.spark.util.{Clock, Utils}
 import org.apache.spark.util.kvstore.KVStore
@@ -52,8 +51,6 @@ private class HistoryServerDiskManager(
     listing: KVStore,
     clock: Clock) extends Logging {
 
-  import config._
-
   private val appStoreDir = new File(path, "apps")
   if (!appStoreDir.isDirectory() && !appStoreDir.mkdir()) {
     throw new IllegalArgumentException(s"Failed to create app directory ($appStoreDir).")
@@ -107,9 +104,8 @@ private class HistoryServerDiskManager(
     val needed = approximateSize(eventLogSize, isCompressed)
     makeRoom(needed)
 
-    val perms = PosixFilePermissions.fromString("rwx------")
-    val tmp = Files.createTempDirectory(tmpStoreDir.toPath(), "appstore",
-      PosixFilePermissions.asFileAttribute(perms)).toFile()
+    val tmp = Utils.createTempDir(tmpStoreDir.getPath(), "appstore")
+    Utils.chmod700(tmp)
 
     updateUsage(needed)
     val current = currentUsage.get()
diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionClient.scala b/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionClient.scala
index 31a8e3e60c067..afa413fe165df 100644
--- a/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionClient.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionClient.scala
@@ -408,6 +408,10 @@ private[spark] class RestSubmissionClient(master: String) extends Logging {
 }
 
 private[spark] object RestSubmissionClient {
+
+  // SPARK_HOME and SPARK_CONF_DIR are filtered out because they are usually wrong
+  // on the remote machine (SPARK-12345) (SPARK-25934)
+  private val BLACKLISTED_SPARK_ENV_VARS = Set("SPARK_ENV_LOADED", "SPARK_HOME", "SPARK_CONF_DIR")
   private val REPORT_DRIVER_STATUS_INTERVAL = 1000
   private val REPORT_DRIVER_STATUS_MAX_TRIES = 10
   val PROTOCOL_VERSION = "v1"
@@ -417,9 +421,7 @@ private[spark] object RestSubmissionClient {
    */
   private[rest] def filterSystemEnvironment(env: Map[String, String]): Map[String, String] = {
     env.filterKeys { k =>
-      // SPARK_HOME is filtered out because it is usually wrong on the remote machine (SPARK-12345)
-      (k.startsWith("SPARK_") && k != "SPARK_ENV_LOADED" && k != "SPARK_HOME") ||
-        k.startsWith("MESOS_")
+      (k.startsWith("SPARK_") && !BLACKLISTED_SPARK_ENV_VARS.contains(k)) || k.startsWith("MESOS_")
     }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala b/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala
index 22b65abce611a..afa1a5fbba792 100644
--- a/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala
@@ -138,6 +138,16 @@ private[rest] class StandaloneSubmitRequestServlet(
     val driverExtraClassPath = sparkProperties.get("spark.driver.extraClassPath")
     val driverExtraLibraryPath = sparkProperties.get("spark.driver.extraLibraryPath")
     val superviseDriver = sparkProperties.get("spark.driver.supervise")
+    // The semantics of "spark.master" and the masterUrl are different. While the
+    // property "spark.master" could contain all registered masters, masterUrl
+    // contains only the active master. To make sure a Spark driver can recover
+    // in a multi-master setup, we use the "spark.master" property while submitting
+    // the driver.
+    val masters = sparkProperties.get("spark.master")
+    val (_, masterPort) = Utils.extractHostPortFromSparkUrl(masterUrl)
+    val masterRestPort = this.conf.getInt("spark.master.rest.port", 6066)
+    val updatedMasters = masters.map(
+      _.replace(s":$masterRestPort", s":$masterPort")).getOrElse(masterUrl)
     val appArgs = request.appArgs
     // Filter SPARK_LOCAL_(IP|HOSTNAME) environment variables from being set on the remote system.
     val environmentVariables =
@@ -146,7 +156,7 @@ private[rest] class StandaloneSubmitRequestServlet(
     // Construct driver description
     val conf = new SparkConf(false)
       .setAll(sparkProperties)
-      .set("spark.master", masterUrl)
+      .set("spark.master", updatedMasters)
     val extraClassPath = driverExtraClassPath.toSeq.flatMap(_.split(File.pathSeparator))
     val extraLibraryPath = driverExtraLibraryPath.toSeq.flatMap(_.split(File.pathSeparator))
     val extraJavaOpts = driverExtraJavaOptions.map(Utils.splitCommandString).getOrElse(Seq.empty)
diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolMessage.scala b/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolMessage.scala
index ef5a7e35ad562..97b689cdadd5f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolMessage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolMessage.scala
@@ -36,7 +36,7 @@ import org.apache.spark.util.Utils
  *   (2) the Spark version of the client / server
  *   (3) an optional message
  */
-@JsonInclude(Include.NON_NULL)
+@JsonInclude(Include.NON_ABSENT)
 @JsonAutoDetect(getterVisibility = Visibility.ANY, setterVisibility = Visibility.ANY)
 @JsonPropertyOrder(alphabetic = true)
 private[rest] abstract class SubmitRestProtocolMessage {
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
index ab8d8d96a9b08..126a6ab801369 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
@@ -17,76 +17,158 @@
 
 package org.apache.spark.deploy.security
 
+import java.io.File
+import java.security.PrivilegedExceptionAction
+import java.util.concurrent.{ScheduledExecutorService, TimeUnit}
+import java.util.concurrent.atomic.AtomicReference
+
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.FileSystem
-import org.apache.hadoop.security.Credentials
+import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 
 import org.apache.spark.SparkConf
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
+import org.apache.spark.rpc.RpcEndpointRef
+import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.UpdateDelegationTokens
+import org.apache.spark.ui.UIUtils
+import org.apache.spark.util.ThreadUtils
 
 /**
- * Manages all the registered HadoopDelegationTokenProviders and offer APIs for other modules to
- * obtain delegation tokens and their renewal time. By default [[HadoopFSDelegationTokenProvider]],
- * [[HiveDelegationTokenProvider]] and [[HBaseDelegationTokenProvider]] will be loaded in if not
- * explicitly disabled.
+ * Manager for delegation tokens in a Spark application.
+ *
+ * This manager has two modes of operation:
+ *
+ * 1.  When configured with a principal and a keytab, it will make sure long-running apps can run
+ * without interruption while accessing secured services. It periodically logs in to the KDC with
+ * user-provided credentials, and contacts all the configured secure services to obtain delegation
+ * tokens to be distributed to the rest of the application.
+ *
+ * Because the Hadoop UGI API does not expose the TTL of the TGT, a configuration controls how often
+ * to check that a relogin is necessary. This is done reasonably often since the check is a no-op
+ * when the relogin is not yet needed. The check period can be overridden in the configuration.
  *
- * Also, each HadoopDelegationTokenProvider is controlled by
- * spark.security.credentials.{service}.enabled, and will not be loaded if this config is set to
- * false. For example, Hive's delegation token provider [[HiveDelegationTokenProvider]] can be
- * enabled/disabled by the configuration spark.security.credentials.hive.enabled.
+ * New delegation tokens are created once 75% of the renewal interval of the original tokens has
+ * elapsed. The new tokens are sent to the Spark driver endpoint once it's registered with the AM.
+ * The driver is tasked with distributing the tokens to other processes that might need them.
  *
- * @param sparkConf Spark configuration
- * @param hadoopConf Hadoop configuration
- * @param fileSystems Delegation tokens will be fetched for these Hadoop filesystems.
+ * 2. When operating without an explicit principal and keytab, token renewal will not be available.
+ * Starting the manager will distribute an initial set of delegation tokens to the provided Spark
+ * driver, but the app will not get new tokens when those expire.
+ *
+ * It can also be used just to create delegation tokens, by calling the `obtainDelegationTokens`
+ * method. This option does not require calling the `start` method, but leaves it up to the
+ * caller to distribute the tokens that were generated.
  */
 private[spark] class HadoopDelegationTokenManager(
-    sparkConf: SparkConf,
-    hadoopConf: Configuration,
-    fileSystems: Configuration => Set[FileSystem])
-  extends Logging {
+    protected val sparkConf: SparkConf,
+    protected val hadoopConf: Configuration) extends Logging {
 
   private val deprecatedProviderEnabledConfigs = List(
     "spark.yarn.security.tokens.%s.enabled",
     "spark.yarn.security.credentials.%s.enabled")
   private val providerEnabledConfig = "spark.security.credentials.%s.enabled"
 
-  // Maintain all the registered delegation token providers
-  private val delegationTokenProviders = getDelegationTokenProviders
+  private val principal = sparkConf.get(PRINCIPAL).orNull
+  private val keytab = sparkConf.get(KEYTAB).orNull
+
+  require((principal == null) == (keytab == null),
+    "Both principal and keytab must be defined, or neither.")
+  require(keytab == null || new File(keytab).isFile(), s"Cannot find keytab at $keytab.")
+
+  private val delegationTokenProviders = loadProviders()
   logDebug("Using the following builtin delegation token providers: " +
     s"${delegationTokenProviders.keys.mkString(", ")}.")
 
-  /** Construct a [[HadoopDelegationTokenManager]] for the default Hadoop filesystem */
-  def this(sparkConf: SparkConf, hadoopConf: Configuration) = {
-    this(
-      sparkConf,
-      hadoopConf,
-      hadoopConf => Set(FileSystem.get(hadoopConf).getHomeDirectory.getFileSystem(hadoopConf)))
+  private var renewalExecutor: ScheduledExecutorService = _
+  private val driverRef = new AtomicReference[RpcEndpointRef]()
+
+  /** Set the endpoint used to send tokens to the driver. */
+  def setDriverRef(ref: RpcEndpointRef): Unit = {
+    driverRef.set(ref)
   }
 
-  private def getDelegationTokenProviders: Map[String, HadoopDelegationTokenProvider] = {
-    val providers = Seq(new HadoopFSDelegationTokenProvider(fileSystems)) ++
-      safeCreateProvider(new HiveDelegationTokenProvider) ++
-      safeCreateProvider(new HBaseDelegationTokenProvider)
+  /** @return Whether delegation token renewal is enabled. */
+  def renewalEnabled: Boolean = principal != null
 
-    // Filter out providers for which spark.security.credentials.{service}.enabled is false.
-    providers
-      .filter { p => isServiceEnabled(p.serviceName) }
-      .map { p => (p.serviceName, p) }
-      .toMap
+  /**
+   * Start the token renewer. Requires a principal and keytab. Upon start, the renewer will:
+   *
+   * - log in the configured principal, and set up a task to keep that user's ticket renewed
+   * - obtain delegation tokens from all available providers
+   * - send the tokens to the driver, if it's already registered
+   * - schedule a periodic task to update the tokens when needed.
+   *
+   * @return The newly logged in user.
+   */
+  def start(): UserGroupInformation = {
+    require(renewalEnabled, "Token renewal must be enabled to start the renewer.")
+    renewalExecutor =
+      ThreadUtils.newDaemonSingleThreadScheduledExecutor("Credential Renewal Thread")
+
+    val originalCreds = UserGroupInformation.getCurrentUser().getCredentials()
+    val ugi = doLogin()
+
+    val tgtRenewalTask = new Runnable() {
+      override def run(): Unit = {
+        ugi.checkTGTAndReloginFromKeytab()
+      }
+    }
+    val tgtRenewalPeriod = sparkConf.get(KERBEROS_RELOGIN_PERIOD)
+    renewalExecutor.scheduleAtFixedRate(tgtRenewalTask, tgtRenewalPeriod, tgtRenewalPeriod,
+      TimeUnit.SECONDS)
+
+    val creds = obtainTokensAndScheduleRenewal(ugi)
+    ugi.addCredentials(creds)
+
+    val driver = driverRef.get()
+    if (driver != null) {
+      val tokens = SparkHadoopUtil.get.serialize(creds)
+      driver.send(UpdateDelegationTokens(tokens))
+    }
+
+    // Transfer the original user's tokens to the new user, since it may contain needed tokens
+    // (such as those user to connect to YARN). Explicitly avoid overwriting tokens that already
+    // exist in the current user's credentials, since those were freshly obtained above
+    // (see SPARK-23361).
+    val existing = ugi.getCredentials()
+    existing.mergeAll(originalCreds)
+    ugi.addCredentials(existing)
+    ugi
   }
 
-  private def safeCreateProvider(
-      createFn: => HadoopDelegationTokenProvider): Option[HadoopDelegationTokenProvider] = {
-    try {
-      Some(createFn)
-    } catch {
-      case t: Throwable =>
-        logDebug(s"Failed to load built in provider.", t)
-        None
+  def stop(): Unit = {
+    if (renewalExecutor != null) {
+      renewalExecutor.shutdown()
     }
   }
 
-  def isServiceEnabled(serviceName: String): Boolean = {
+  /**
+   * Fetch new delegation tokens for configured services, storing them in the given credentials.
+   * Tokens are fetched for the current logged in user.
+   *
+   * @param creds Credentials object where to store the delegation tokens.
+   * @return The time by which the tokens must be renewed.
+   */
+  def obtainDelegationTokens(creds: Credentials): Long = {
+    delegationTokenProviders.values.flatMap { provider =>
+      if (provider.delegationTokensRequired(sparkConf, hadoopConf)) {
+        provider.obtainDelegationTokens(hadoopConf, sparkConf, creds)
+      } else {
+        logDebug(s"Service ${provider.serviceName} does not require a token." +
+          s" Check your configuration to see if security is disabled or not.")
+        None
+      }
+    }.foldLeft(Long.MaxValue)(math.min)
+  }
+
+  // Visible for testing.
+  def isProviderLoaded(serviceName: String): Boolean = {
+    delegationTokenProviders.contains(serviceName)
+  }
+
+  protected def isServiceEnabled(serviceName: String): Boolean = {
     val key = providerEnabledConfig.format(serviceName)
 
     deprecatedProviderEnabledConfigs.foreach { pattern =>
@@ -110,32 +192,107 @@ private[spark] class HadoopDelegationTokenManager(
   }
 
   /**
-   * Get delegation token provider for the specified service.
+   * List of file systems for which to obtain delegation tokens. The base implementation
+   * returns just the default file system in the given Hadoop configuration.
    */
-  def getServiceDelegationTokenProvider(service: String): Option[HadoopDelegationTokenProvider] = {
-    delegationTokenProviders.get(service)
+  protected def fileSystemsToAccess(): Set[FileSystem] = {
+    Set(FileSystem.get(hadoopConf))
+  }
+
+  private def scheduleRenewal(delay: Long): Unit = {
+    val _delay = math.max(0, delay)
+    logInfo(s"Scheduling login from keytab in ${UIUtils.formatDuration(delay)}.")
+
+    val renewalTask = new Runnable() {
+      override def run(): Unit = {
+        updateTokensTask()
+      }
+    }
+    renewalExecutor.schedule(renewalTask, _delay, TimeUnit.MILLISECONDS)
   }
 
   /**
-   * Writes delegation tokens to creds.  Delegation tokens are fetched from all registered
-   * providers.
-   *
-   * @param hadoopConf hadoop Configuration
-   * @param creds Credentials that will be updated in place (overwritten)
-   * @return Time after which the fetched delegation tokens should be renewed.
+   * Periodic task to login to the KDC and create new delegation tokens. Re-schedules itself
+   * to fetch the next set of tokens when needed.
    */
-  def obtainDelegationTokens(
-      hadoopConf: Configuration,
-      creds: Credentials): Long = {
-    delegationTokenProviders.values.flatMap { provider =>
-      if (provider.delegationTokensRequired(sparkConf, hadoopConf)) {
-        provider.obtainDelegationTokens(hadoopConf, sparkConf, creds)
+  private def updateTokensTask(): Unit = {
+    try {
+      val freshUGI = doLogin()
+      val creds = obtainTokensAndScheduleRenewal(freshUGI)
+      val tokens = SparkHadoopUtil.get.serialize(creds)
+
+      val driver = driverRef.get()
+      if (driver != null) {
+        logInfo("Updating delegation tokens.")
+        driver.send(UpdateDelegationTokens(tokens))
       } else {
-        logDebug(s"Service ${provider.serviceName} does not require a token." +
-          s" Check your configuration to see if security is disabled or not.")
-        None
+        // This shouldn't really happen, since the driver should register way before tokens expire.
+        logWarning("Delegation tokens close to expiration but no driver has registered yet.")
+        SparkHadoopUtil.get.addDelegationTokens(tokens, sparkConf)
       }
-    }.foldLeft(Long.MaxValue)(math.min)
+    } catch {
+      case e: Exception =>
+        val delay = TimeUnit.SECONDS.toMillis(sparkConf.get(CREDENTIALS_RENEWAL_RETRY_WAIT))
+        logWarning(s"Failed to update tokens, will try again in ${UIUtils.formatDuration(delay)}!" +
+          " If this happens too often tasks will fail.", e)
+        scheduleRenewal(delay)
+    }
   }
-}
 
+  /**
+   * Obtain new delegation tokens from the available providers. Schedules a new task to fetch
+   * new tokens before the new set expires.
+   *
+   * @return Credentials containing the new tokens.
+   */
+  private def obtainTokensAndScheduleRenewal(ugi: UserGroupInformation): Credentials = {
+    ugi.doAs(new PrivilegedExceptionAction[Credentials]() {
+      override def run(): Credentials = {
+        val creds = new Credentials()
+        val nextRenewal = obtainDelegationTokens(creds)
+
+        // Calculate the time when new credentials should be created, based on the configured
+        // ratio.
+        val now = System.currentTimeMillis
+        val ratio = sparkConf.get(CREDENTIALS_RENEWAL_INTERVAL_RATIO)
+        val delay = (ratio * (nextRenewal - now)).toLong
+        scheduleRenewal(delay)
+        creds
+      }
+    })
+  }
+
+  private def doLogin(): UserGroupInformation = {
+    logInfo(s"Attempting to login to KDC using principal: $principal")
+    val ugi = UserGroupInformation.loginUserFromKeytabAndReturnUGI(principal, keytab)
+    logInfo("Successfully logged into KDC.")
+    ugi
+  }
+
+  private def loadProviders(): Map[String, HadoopDelegationTokenProvider] = {
+    val providers = Seq(
+      new HadoopFSDelegationTokenProvider(
+        () => HadoopDelegationTokenManager.this.fileSystemsToAccess())) ++
+      safeCreateProvider(new HiveDelegationTokenProvider) ++
+      safeCreateProvider(new HBaseDelegationTokenProvider) ++
+      safeCreateProvider(new KafkaDelegationTokenProvider)
+
+    // Filter out providers for which spark.security.credentials.{service}.enabled is false.
+    providers
+      .filter { p => isServiceEnabled(p.serviceName) }
+      .map { p => (p.serviceName, p) }
+      .toMap
+  }
+
+  private def safeCreateProvider(
+      createFn: => HadoopDelegationTokenProvider): Option[HadoopDelegationTokenProvider] = {
+    try {
+      Some(createFn)
+    } catch {
+      case t: Throwable =>
+        logDebug(s"Failed to load built in provider.", t)
+        None
+    }
+  }
+
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
index 21ca669ea98f0..767b5521e8d7b 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
@@ -30,7 +30,7 @@ import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 
-private[deploy] class HadoopFSDelegationTokenProvider(fileSystems: Configuration => Set[FileSystem])
+private[deploy] class HadoopFSDelegationTokenProvider(fileSystems: () => Set[FileSystem])
     extends HadoopDelegationTokenProvider with Logging {
 
   // This tokenRenewalInterval will be set in the first call to obtainDelegationTokens.
@@ -44,8 +44,7 @@ private[deploy] class HadoopFSDelegationTokenProvider(fileSystems: Configuration
       hadoopConf: Configuration,
       sparkConf: SparkConf,
       creds: Credentials): Option[Long] = {
-
-    val fsToGetTokens = fileSystems(hadoopConf)
+    val fsToGetTokens = fileSystems()
     val fetchCreds = fetchDelegationTokens(getTokenRenewer(hadoopConf), fsToGetTokens, creds)
 
     // Get the token renewal interval if it is not set. It will only be called once.
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/KafkaDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/KafkaDelegationTokenProvider.scala
new file mode 100644
index 0000000000000..45995be630cc5
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/security/KafkaDelegationTokenProvider.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.security
+
+import scala.language.existentials
+import scala.util.control.NonFatal
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.security.Credentials
+import org.apache.kafka.common.security.auth.SecurityProtocol.{SASL_PLAINTEXT, SASL_SSL, SSL}
+
+import org.apache.spark.SparkConf
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
+
+private[security] class KafkaDelegationTokenProvider
+  extends HadoopDelegationTokenProvider with Logging {
+
+  override def serviceName: String = "kafka"
+
+  override def obtainDelegationTokens(
+      hadoopConf: Configuration,
+      sparkConf: SparkConf,
+      creds: Credentials): Option[Long] = {
+    try {
+      logDebug("Attempting to fetch Kafka security token.")
+      val (token, nextRenewalDate) = KafkaTokenUtil.obtainToken(sparkConf)
+      creds.addToken(token.getService, token)
+      return Some(nextRenewalDate)
+    } catch {
+      case NonFatal(e) =>
+        logInfo(s"Failed to get token from service $serviceName", e)
+    }
+    None
+  }
+
+  override def delegationTokensRequired(
+      sparkConf: SparkConf,
+      hadoopConf: Configuration): Boolean = {
+    val protocol = sparkConf.get(Kafka.SECURITY_PROTOCOL)
+    sparkConf.contains(Kafka.BOOTSTRAP_SERVERS) &&
+      (protocol == SASL_SSL.name ||
+        protocol == SSL.name ||
+        protocol == SASL_PLAINTEXT.name)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/KafkaTokenUtil.scala b/core/src/main/scala/org/apache/spark/deploy/security/KafkaTokenUtil.scala
new file mode 100644
index 0000000000000..c890cee59ffe0
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/security/KafkaTokenUtil.scala
@@ -0,0 +1,202 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.security
+
+import java.{ util => ju }
+import java.text.SimpleDateFormat
+
+import scala.util.control.NonFatal
+
+import org.apache.hadoop.io.Text
+import org.apache.hadoop.security.token.{Token, TokenIdentifier}
+import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier
+import org.apache.kafka.clients.CommonClientConfigs
+import org.apache.kafka.clients.admin.{AdminClient, CreateDelegationTokenOptions}
+import org.apache.kafka.common.config.SaslConfigs
+import org.apache.kafka.common.security.JaasContext
+import org.apache.kafka.common.security.auth.SecurityProtocol.{SASL_PLAINTEXT, SASL_SSL, SSL}
+import org.apache.kafka.common.security.token.delegation.DelegationToken
+
+import org.apache.spark.SparkConf
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
+
+private[spark] object KafkaTokenUtil extends Logging {
+  val TOKEN_KIND = new Text("KAFKA_DELEGATION_TOKEN")
+  val TOKEN_SERVICE = new Text("kafka.server.delegation.token")
+
+  private[spark] class KafkaDelegationTokenIdentifier extends AbstractDelegationTokenIdentifier {
+    override def getKind: Text = TOKEN_KIND
+  }
+
+  private[security] def obtainToken(sparkConf: SparkConf): (Token[_ <: TokenIdentifier], Long) = {
+    val adminClient = AdminClient.create(createAdminClientProperties(sparkConf))
+    val createDelegationTokenOptions = new CreateDelegationTokenOptions()
+    val createResult = adminClient.createDelegationToken(createDelegationTokenOptions)
+    val token = createResult.delegationToken().get()
+    printToken(token)
+
+    (new Token[KafkaDelegationTokenIdentifier](
+      token.tokenInfo.tokenId.getBytes,
+      token.hmacAsBase64String.getBytes,
+      TOKEN_KIND,
+      TOKEN_SERVICE
+    ), token.tokenInfo.expiryTimestamp)
+  }
+
+  private[security] def createAdminClientProperties(sparkConf: SparkConf): ju.Properties = {
+    val adminClientProperties = new ju.Properties
+
+    val bootstrapServers = sparkConf.get(Kafka.BOOTSTRAP_SERVERS)
+    require(bootstrapServers.nonEmpty, s"Tried to obtain kafka delegation token but bootstrap " +
+      "servers not configured.")
+    adminClientProperties.put(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers.get)
+
+    val protocol = sparkConf.get(Kafka.SECURITY_PROTOCOL)
+    adminClientProperties.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, protocol)
+    protocol match {
+      case SASL_SSL.name =>
+        setTrustStoreProperties(sparkConf, adminClientProperties)
+
+      case SSL.name =>
+        setTrustStoreProperties(sparkConf, adminClientProperties)
+        setKeyStoreProperties(sparkConf, adminClientProperties)
+        logWarning("Obtaining kafka delegation token with SSL protocol. Please " +
+          "configure 2-way authentication on the broker side.")
+
+      case SASL_PLAINTEXT.name =>
+        logWarning("Obtaining kafka delegation token through plain communication channel. Please " +
+          "consider the security impact.")
+    }
+
+    // There are multiple possibilities to log in and applied in the following order:
+    // - JVM global security provided -> try to log in with JVM global security configuration
+    //   which can be configured for example with 'java.security.auth.login.config'.
+    //   For this no additional parameter needed.
+    // - Keytab is provided -> try to log in with kerberos module and keytab using kafka's dynamic
+    //   JAAS configuration.
+    // - Keytab not provided -> try to log in with kerberos module and ticket cache using kafka's
+    //   dynamic JAAS configuration.
+    // Kafka client is unable to use subject from JVM which already logged in
+    // to kdc (see KAFKA-7677)
+    if (isGlobalJaasConfigurationProvided) {
+      logDebug("JVM global security configuration detected, using it for login.")
+    } else {
+      adminClientProperties.put(SaslConfigs.SASL_MECHANISM, SaslConfigs.GSSAPI_MECHANISM)
+      if (sparkConf.contains(KEYTAB)) {
+        logDebug("Keytab detected, using it for login.")
+        val jaasParams = getKeytabJaasParams(sparkConf)
+        adminClientProperties.put(SaslConfigs.SASL_JAAS_CONFIG, jaasParams)
+      } else {
+        logDebug("Using ticket cache for login.")
+        val jaasParams = getTicketCacheJaasParams(sparkConf)
+        adminClientProperties.put(SaslConfigs.SASL_JAAS_CONFIG, jaasParams)
+      }
+    }
+
+    adminClientProperties
+  }
+
+  def isGlobalJaasConfigurationProvided: Boolean = {
+    try {
+      JaasContext.loadClientContext(ju.Collections.emptyMap[String, Object]())
+      true
+    } catch {
+      case NonFatal(_) => false
+    }
+  }
+
+  private def setTrustStoreProperties(sparkConf: SparkConf, properties: ju.Properties): Unit = {
+    sparkConf.get(Kafka.TRUSTSTORE_LOCATION).foreach { truststoreLocation =>
+      properties.put("ssl.truststore.location", truststoreLocation)
+    }
+    sparkConf.get(Kafka.TRUSTSTORE_PASSWORD).foreach { truststorePassword =>
+      properties.put("ssl.truststore.password", truststorePassword)
+    }
+  }
+
+  private def setKeyStoreProperties(sparkConf: SparkConf, properties: ju.Properties): Unit = {
+    sparkConf.get(Kafka.KEYSTORE_LOCATION).foreach { keystoreLocation =>
+      properties.put("ssl.keystore.location", keystoreLocation)
+    }
+    sparkConf.get(Kafka.KEYSTORE_PASSWORD).foreach { keystorePassword =>
+      properties.put("ssl.keystore.password", keystorePassword)
+    }
+    sparkConf.get(Kafka.KEY_PASSWORD).foreach { keyPassword =>
+      properties.put("ssl.key.password", keyPassword)
+    }
+  }
+
+  private[security] def getKeytabJaasParams(sparkConf: SparkConf): String = {
+    val serviceName = sparkConf.get(Kafka.KERBEROS_SERVICE_NAME)
+    require(serviceName.nonEmpty, "Kerberos service name must be defined")
+
+    val params =
+      s"""
+      |${getKrb5LoginModuleName} required
+      | useKeyTab=true
+      | serviceName="${serviceName.get}"
+      | keyTab="${sparkConf.get(KEYTAB).get}"
+      | principal="${sparkConf.get(PRINCIPAL).get}";
+      """.stripMargin.replace("\n", "")
+    logDebug(s"Krb keytab JAAS params: $params")
+    params
+  }
+
+  def getTicketCacheJaasParams(sparkConf: SparkConf): String = {
+    val serviceName = sparkConf.get(Kafka.KERBEROS_SERVICE_NAME)
+    require(serviceName.nonEmpty, "Kerberos service name must be defined")
+
+    val params =
+      s"""
+      |${getKrb5LoginModuleName} required
+      | useTicketCache=true
+      | serviceName="${serviceName.get}";
+      """.stripMargin.replace("\n", "")
+    logDebug(s"Krb ticket cache JAAS params: $params")
+    params
+  }
+
+  /**
+   * Krb5LoginModule package vary in different JVMs.
+   * Please see Hadoop UserGroupInformation for further details.
+   */
+  private def getKrb5LoginModuleName(): String = {
+    if (System.getProperty("java.vendor").contains("IBM")) {
+      "com.ibm.security.auth.module.Krb5LoginModule"
+    } else {
+      "com.sun.security.auth.module.Krb5LoginModule"
+    }
+  }
+
+  private def printToken(token: DelegationToken): Unit = {
+    if (log.isDebugEnabled) {
+      val dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm")
+      logDebug("%-15s %-30s %-15s %-25s %-15s %-15s %-15s".format(
+        "TOKENID", "HMAC", "OWNER", "RENEWERS", "ISSUEDATE", "EXPIRYDATE", "MAXDATE"))
+      val tokenInfo = token.tokenInfo
+      logDebug("%-15s [hidden] %-15s %-25s %-15s %-15s %-15s".format(
+        tokenInfo.tokenId,
+        tokenInfo.owner,
+        tokenInfo.renewersAsString,
+        dateFormat.format(tokenInfo.issueTimestamp),
+        dateFormat.format(tokenInfo.expiryTimestamp),
+        dateFormat.format(tokenInfo.maxTimestamp)))
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index 86b19578037df..a30a501e5d4a1 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -28,6 +28,7 @@ import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.{ArrayBuffer, HashMap, Map}
+import scala.concurrent.duration._
 import scala.util.control.NonFatal
 
 import com.google.common.util.concurrent.ThreadFactoryBuilder
@@ -38,7 +39,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.memory.{SparkOutOfMemoryError, TaskMemoryManager}
 import org.apache.spark.rpc.RpcTimeout
-import org.apache.spark.scheduler.{DirectTaskResult, IndirectTaskResult, Task, TaskDescription}
+import org.apache.spark.scheduler._
 import org.apache.spark.shuffle.FetchFailedException
 import org.apache.spark.storage.{StorageLevel, TaskResultBlockId}
 import org.apache.spark.util._
@@ -120,7 +121,7 @@ private[spark] class Executor(
   }
 
   // Whether to load classes in user jars before those in Spark jars
-  private val userClassPathFirst = conf.getBoolean("spark.executor.userClassPathFirst", false)
+  private val userClassPathFirst = conf.get(EXECUTOR_USER_CLASS_PATH_FIRST)
 
   // Whether to monitor killed / interrupted tasks
   private val taskReaperEnabled = conf.getBoolean("spark.task.reaper.enabled", false)
@@ -136,6 +137,29 @@ private[spark] class Executor(
   // for fetching remote cached RDD blocks, so need to make sure it uses the right classloader too.
   env.serializerManager.setDefaultClassLoader(replClassLoader)
 
+  private val executorPlugins: Seq[ExecutorPlugin] = {
+    val pluginNames = conf.get(EXECUTOR_PLUGINS)
+    if (pluginNames.nonEmpty) {
+      logDebug(s"Initializing the following plugins: ${pluginNames.mkString(", ")}")
+
+      // Plugins need to load using a class loader that includes the executor's user classpath
+      val pluginList: Seq[ExecutorPlugin] =
+        Utils.withContextClassLoader(replClassLoader) {
+          val plugins = Utils.loadExtensions(classOf[ExecutorPlugin], pluginNames, conf)
+          plugins.foreach { plugin =>
+            plugin.init()
+            logDebug(s"Successfully loaded plugin " + plugin.getClass().getCanonicalName())
+          }
+          plugins
+        }
+
+      logDebug("Finished initializing plugins")
+      pluginList
+    } else {
+      Nil
+    }
+  }
+
   // Max size of direct result. If task result is bigger than this, we use the block manager
   // to send the result back.
   private val maxDirectResultSize = Math.min(
@@ -147,19 +171,34 @@ private[spark] class Executor(
   // Maintains the list of running tasks.
   private val runningTasks = new ConcurrentHashMap[Long, TaskRunner]
 
-  // Executor for the heartbeat task.
-  private val heartbeater = ThreadUtils.newDaemonSingleThreadScheduledExecutor("driver-heartbeater")
-
-  // must be initialized before running startDriverHeartbeat()
-  private val heartbeatReceiverRef =
-    RpcUtils.makeDriverRef(HeartbeatReceiver.ENDPOINT_NAME, conf, env.rpcEnv)
-
   /**
    * When an executor is unable to send heartbeats to the driver more than `HEARTBEAT_MAX_FAILURES`
    * times, it should kill itself. The default value is 60. It means we will retry to send
    * heartbeats about 10 minutes because the heartbeat interval is 10s.
    */
-  private val HEARTBEAT_MAX_FAILURES = conf.getInt("spark.executor.heartbeat.maxFailures", 60)
+  private val HEARTBEAT_MAX_FAILURES = conf.get(EXECUTOR_HEARTBEAT_MAX_FAILURES)
+
+  /**
+   * Whether to drop empty accumulators from heartbeats sent to the driver. Including the empty
+   * accumulators (that satisfy isZero) can make the size of the heartbeat message very large.
+   */
+  private val HEARTBEAT_DROP_ZEROES = conf.get(EXECUTOR_HEARTBEAT_DROP_ZERO_ACCUMULATOR_UPDATES)
+
+  /**
+   * Interval to send heartbeats, in milliseconds
+   */
+  private val HEARTBEAT_INTERVAL_MS = conf.get(EXECUTOR_HEARTBEAT_INTERVAL)
+
+  // Executor for the heartbeat task.
+  private val heartbeater = new Heartbeater(
+    env.memoryManager,
+    () => Executor.this.reportHeartBeat(),
+    "executor-heartbeater",
+    HEARTBEAT_INTERVAL_MS)
+
+  // must be initialized before running startDriverHeartbeat()
+  private val heartbeatReceiverRef =
+    RpcUtils.makeDriverRef(HeartbeatReceiver.ENDPOINT_NAME, conf, env.rpcEnv)
 
   /**
    * Count the failure times of heartbeat. It should only be accessed in the heartbeat thread. Each
@@ -167,7 +206,7 @@ private[spark] class Executor(
    */
   private var heartbeatFailures = 0
 
-  startDriverHeartbeater()
+  heartbeater.start()
 
   private[executor] def numRunningTasks: Int = runningTasks.size()
 
@@ -216,9 +255,25 @@ private[spark] class Executor(
 
   def stop(): Unit = {
     env.metricsSystem.report()
-    heartbeater.shutdown()
-    heartbeater.awaitTermination(10, TimeUnit.SECONDS)
+    try {
+      heartbeater.stop()
+    } catch {
+      case NonFatal(e) =>
+        logWarning("Unable to stop heartbeater", e)
+     }
     threadPool.shutdown()
+
+    // Notify plugins that executor is shutting down so they can terminate cleanly
+    Utils.withContextClassLoader(replClassLoader) {
+      executorPlugins.foreach { plugin =>
+        try {
+          plugin.shutdown()
+        } catch {
+          case e: Exception =>
+            logWarning("Plugin " + plugin.getClass().getCanonicalName() + " shutdown failed", e)
+        }
+      }
+    }
     if (!isLocal) {
       env.stop()
     }
@@ -464,7 +519,7 @@ private[spark] class Executor(
         executorSource.METRIC_OUTPUT_BYTES_WRITTEN
           .inc(task.metrics.outputMetrics.bytesWritten)
         executorSource.METRIC_OUTPUT_RECORDS_WRITTEN
-          .inc(task.metrics.inputMetrics.recordsRead)
+          .inc(task.metrics.outputMetrics.recordsWritten)
         executorSource.METRIC_RESULT_SIZE.inc(task.metrics.resultSize)
         executorSource.METRIC_DISK_BYTES_SPILLED.inc(task.metrics.diskBytesSpilled)
         executorSource.METRIC_MEMORY_BYTES_SPILLED.inc(task.metrics.memoryBytesSpilled)
@@ -787,18 +842,28 @@ private[spark] class Executor(
     val accumUpdates = new ArrayBuffer[(Long, Seq[AccumulatorV2[_, _]])]()
     val curGCTime = computeTotalGcTime()
 
+    // get executor level memory metrics
+    val executorUpdates = heartbeater.getCurrentMetrics()
+
     for (taskRunner <- runningTasks.values().asScala) {
       if (taskRunner.task != null) {
         taskRunner.task.metrics.mergeShuffleReadMetrics()
         taskRunner.task.metrics.setJvmGCTime(curGCTime - taskRunner.startGCTime)
-        accumUpdates += ((taskRunner.taskId, taskRunner.task.metrics.accumulators()))
+        val accumulatorsToReport =
+          if (HEARTBEAT_DROP_ZEROES) {
+            taskRunner.task.metrics.accumulators().filterNot(_.isZero)
+          } else {
+            taskRunner.task.metrics.accumulators()
+          }
+        accumUpdates += ((taskRunner.taskId, accumulatorsToReport))
       }
     }
 
-    val message = Heartbeat(executorId, accumUpdates.toArray, env.blockManager.blockManagerId)
+    val message = Heartbeat(executorId, accumUpdates.toArray, env.blockManager.blockManagerId,
+      executorUpdates)
     try {
       val response = heartbeatReceiverRef.askSync[HeartbeatResponse](
-          message, RpcTimeout(conf, "spark.executor.heartbeatInterval", "10s"))
+        message, new RpcTimeout(HEARTBEAT_INTERVAL_MS.millis, EXECUTOR_HEARTBEAT_INTERVAL.key))
       if (response.reregisterBlockManager) {
         logInfo("Told to re-register on heartbeat")
         env.blockManager.reregister()
@@ -815,21 +880,6 @@ private[spark] class Executor(
         }
     }
   }
-
-  /**
-   * Schedules a task to report heartbeat and partial metrics for active tasks to driver.
-   */
-  private def startDriverHeartbeater(): Unit = {
-    val intervalMs = conf.getTimeAsMs("spark.executor.heartbeatInterval", "10s")
-
-    // Wait a random interval so the heartbeats don't end up in sync
-    val initialDelay = intervalMs + (math.random * intervalMs).asInstanceOf[Int]
-
-    val heartbeatTask = new Runnable() {
-      override def run(): Unit = Utils.logUncaughtExceptions(reportHeartBeat())
-    }
-    heartbeater.scheduleAtFixedRate(heartbeatTask, initialDelay, intervalMs, TimeUnit.MILLISECONDS)
-  }
 }
 
 private[spark] object Executor {
diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorMetrics.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorMetrics.scala
new file mode 100644
index 0000000000000..1befd27de1cba
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorMetrics.scala
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.executor
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.metrics.ExecutorMetricType
+
+/**
+ * :: DeveloperApi ::
+ * Metrics tracked for executors and the driver.
+ *
+ * Executor-level metrics are sent from each executor to the driver as part of the Heartbeat.
+ */
+@DeveloperApi
+class ExecutorMetrics private[spark] extends Serializable {
+
+  // Metrics are indexed by ExecutorMetricType.values
+  private val metrics = new Array[Long](ExecutorMetricType.values.length)
+
+  // the first element is initialized to -1, indicating that the values for the array
+  // haven't been set yet.
+  metrics(0) = -1
+
+  /** Returns the value for the specified metricType. */
+  def getMetricValue(metricType: ExecutorMetricType): Long = {
+    metrics(ExecutorMetricType.metricIdxMap(metricType))
+  }
+
+  /** Returns true if the values for the metrics have been set, false otherwise. */
+  def isSet(): Boolean = metrics(0) > -1
+
+  private[spark] def this(metrics: Array[Long]) {
+    this()
+    Array.copy(metrics, 0, this.metrics, 0, Math.min(metrics.size, this.metrics.size))
+  }
+
+  /**
+   * Constructor: create the ExecutorMetrics with the values specified.
+   *
+   * @param executorMetrics map of executor metric name to value
+   */
+  private[spark] def this(executorMetrics: Map[String, Long]) {
+    this()
+    (0 until ExecutorMetricType.values.length).foreach { idx =>
+      metrics(idx) = executorMetrics.getOrElse(ExecutorMetricType.values(idx).name, 0L)
+    }
+  }
+
+  /**
+   * Compare the specified executor metrics values with the current executor metric values,
+   * and update the value for any metrics where the new value for the metric is larger.
+   *
+   * @param executorMetrics the executor metrics to compare
+   * @return if there is a new peak value for any metric
+   */
+  private[spark] def compareAndUpdatePeakValues(executorMetrics: ExecutorMetrics): Boolean = {
+    var updated = false
+
+    (0 until ExecutorMetricType.values.length).foreach { idx =>
+       if (executorMetrics.metrics(idx) > metrics(idx)) {
+        updated = true
+        metrics(idx) = executorMetrics.metrics(idx)
+      }
+    }
+    updated
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/executor/ShuffleReadMetrics.scala b/core/src/main/scala/org/apache/spark/executor/ShuffleReadMetrics.scala
index 4be395c8358b2..12c4b8f67f71c 100644
--- a/core/src/main/scala/org/apache/spark/executor/ShuffleReadMetrics.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ShuffleReadMetrics.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.executor
 
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.shuffle.ShuffleReadMetricsReporter
 import org.apache.spark.util.LongAccumulator
 
 
@@ -123,12 +124,13 @@ class ShuffleReadMetrics private[spark] () extends Serializable {
   }
 }
 
+
 /**
  * A temporary shuffle read metrics holder that is used to collect shuffle read metrics for each
  * shuffle dependency, and all temporary metrics will be merged into the [[ShuffleReadMetrics]] at
  * last.
  */
-private[spark] class TempShuffleReadMetrics {
+private[spark] class TempShuffleReadMetrics extends ShuffleReadMetricsReporter {
   private[this] var _remoteBlocksFetched = 0L
   private[this] var _localBlocksFetched = 0L
   private[this] var _remoteBytesRead = 0L
@@ -137,13 +139,13 @@ private[spark] class TempShuffleReadMetrics {
   private[this] var _fetchWaitTime = 0L
   private[this] var _recordsRead = 0L
 
-  def incRemoteBlocksFetched(v: Long): Unit = _remoteBlocksFetched += v
-  def incLocalBlocksFetched(v: Long): Unit = _localBlocksFetched += v
-  def incRemoteBytesRead(v: Long): Unit = _remoteBytesRead += v
-  def incRemoteBytesReadToDisk(v: Long): Unit = _remoteBytesReadToDisk += v
-  def incLocalBytesRead(v: Long): Unit = _localBytesRead += v
-  def incFetchWaitTime(v: Long): Unit = _fetchWaitTime += v
-  def incRecordsRead(v: Long): Unit = _recordsRead += v
+  override def incRemoteBlocksFetched(v: Long): Unit = _remoteBlocksFetched += v
+  override def incLocalBlocksFetched(v: Long): Unit = _localBlocksFetched += v
+  override def incRemoteBytesRead(v: Long): Unit = _remoteBytesRead += v
+  override def incRemoteBytesReadToDisk(v: Long): Unit = _remoteBytesReadToDisk += v
+  override def incLocalBytesRead(v: Long): Unit = _localBytesRead += v
+  override def incFetchWaitTime(v: Long): Unit = _fetchWaitTime += v
+  override def incRecordsRead(v: Long): Unit = _recordsRead += v
 
   def remoteBlocksFetched: Long = _remoteBlocksFetched
   def localBlocksFetched: Long = _localBlocksFetched
diff --git a/core/src/main/scala/org/apache/spark/executor/ShuffleWriteMetrics.scala b/core/src/main/scala/org/apache/spark/executor/ShuffleWriteMetrics.scala
index ada2e1bc08593..d0b0e7da079c9 100644
--- a/core/src/main/scala/org/apache/spark/executor/ShuffleWriteMetrics.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ShuffleWriteMetrics.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.executor
 
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.shuffle.ShuffleWriteMetricsReporter
 import org.apache.spark.util.LongAccumulator
 
 
@@ -27,7 +28,7 @@ import org.apache.spark.util.LongAccumulator
  * Operations are not thread-safe.
  */
 @DeveloperApi
-class ShuffleWriteMetrics private[spark] () extends Serializable {
+class ShuffleWriteMetrics private[spark] () extends ShuffleWriteMetricsReporter with Serializable {
   private[executor] val _bytesWritten = new LongAccumulator
   private[executor] val _recordsWritten = new LongAccumulator
   private[executor] val _writeTime = new LongAccumulator
@@ -47,23 +48,13 @@ class ShuffleWriteMetrics private[spark] () extends Serializable {
    */
   def writeTime: Long = _writeTime.sum
 
-  private[spark] def incBytesWritten(v: Long): Unit = _bytesWritten.add(v)
-  private[spark] def incRecordsWritten(v: Long): Unit = _recordsWritten.add(v)
-  private[spark] def incWriteTime(v: Long): Unit = _writeTime.add(v)
-  private[spark] def decBytesWritten(v: Long): Unit = {
+  private[spark] override def incBytesWritten(v: Long): Unit = _bytesWritten.add(v)
+  private[spark] override def incRecordsWritten(v: Long): Unit = _recordsWritten.add(v)
+  private[spark] override def incWriteTime(v: Long): Unit = _writeTime.add(v)
+  private[spark] override def decBytesWritten(v: Long): Unit = {
     _bytesWritten.setValue(bytesWritten - v)
   }
-  private[spark] def decRecordsWritten(v: Long): Unit = {
+  private[spark] override def decRecordsWritten(v: Long): Unit = {
     _recordsWritten.setValue(recordsWritten - v)
   }
-
-  // Legacy methods for backward compatibility.
-  // TODO: remove these once we make this class private.
-  @deprecated("use bytesWritten instead", "2.0.0")
-  def shuffleBytesWritten: Long = bytesWritten
-  @deprecated("use writeTime instead", "2.0.0")
-  def shuffleWriteTime: Long = writeTime
-  @deprecated("use recordsWritten instead", "2.0.0")
-  def shuffleRecordsWritten: Long = recordsWritten
-
 }
diff --git a/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala b/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala
index ab020aaf6fa4f..5b33c110154d6 100644
--- a/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala
+++ b/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala
@@ -52,6 +52,18 @@ private[spark] abstract class StreamFileInputFormat[T]
     val totalBytes = files.filterNot(_.isDirectory).map(_.getLen + openCostInBytes).sum
     val bytesPerCore = totalBytes / defaultParallelism
     val maxSplitSize = Math.min(defaultMaxSplitBytes, Math.max(openCostInBytes, bytesPerCore))
+
+    // For small files we need to ensure the min split size per node & rack <= maxSplitSize
+    val jobConfig = context.getConfiguration
+    val minSplitSizePerNode = jobConfig.getLong(CombineFileInputFormat.SPLIT_MINSIZE_PERNODE, 0L)
+    val minSplitSizePerRack = jobConfig.getLong(CombineFileInputFormat.SPLIT_MINSIZE_PERRACK, 0L)
+
+    if (maxSplitSize < minSplitSizePerNode) {
+      super.setMinSplitSizeNode(maxSplitSize)
+    }
+    if (maxSplitSize < minSplitSizePerRack) {
+      super.setMinSplitSizeRack(maxSplitSize)
+    }
     super.setMaxSplitSize(maxSplitSize)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/internal/Logging.scala b/core/src/main/scala/org/apache/spark/internal/Logging.scala
index c0d709ad25f29..00db9af846ab9 100644
--- a/core/src/main/scala/org/apache/spark/internal/Logging.scala
+++ b/core/src/main/scala/org/apache/spark/internal/Logging.scala
@@ -17,7 +17,11 @@
 
 package org.apache.spark.internal
 
-import org.apache.log4j.{Level, LogManager, PropertyConfigurator}
+import java.util.concurrent.ConcurrentHashMap
+
+import scala.collection.JavaConverters._
+
+import org.apache.log4j._
 import org.slf4j.{Logger, LoggerFactory}
 import org.slf4j.impl.StaticLoggerBinder
 
@@ -143,13 +147,25 @@ trait Logging {
         // overriding the root logger's config if they're different.
         val replLogger = LogManager.getLogger(logName)
         val replLevel = Option(replLogger.getLevel()).getOrElse(Level.WARN)
+        // Update the consoleAppender threshold to replLevel
         if (replLevel != rootLogger.getEffectiveLevel()) {
           if (!silent) {
             System.err.printf("Setting default log level to \"%s\".\n", replLevel)
             System.err.println("To adjust logging level use sc.setLogLevel(newLevel). " +
               "For SparkR, use setLogLevel(newLevel).")
           }
-          rootLogger.setLevel(replLevel)
+          rootLogger.getAllAppenders().asScala.foreach {
+            case ca: ConsoleAppender =>
+              Option(ca.getThreshold()) match {
+                case Some(t) =>
+                  Logging.consoleAppenderToThreshold.put(ca, t)
+                  if (!t.isGreaterOrEqual(replLevel)) {
+                    ca.setThreshold(replLevel)
+                  }
+                case None => ca.setThreshold(replLevel)
+              }
+            case _ => // no-op
+          }
         }
       }
       // scalastyle:on println
@@ -166,6 +182,7 @@ private[spark] object Logging {
   @volatile private var initialized = false
   @volatile private var defaultRootLevel: Level = null
   @volatile private var defaultSparkLog4jConfig = false
+  private val consoleAppenderToThreshold = new ConcurrentHashMap[ConsoleAppender, Priority]()
 
   val initLock = new Object()
   try {
@@ -192,7 +209,13 @@ private[spark] object Logging {
         defaultSparkLog4jConfig = false
         LogManager.resetConfiguration()
       } else {
-        LogManager.getRootLogger().setLevel(defaultRootLevel)
+        val rootLogger = LogManager.getRootLogger()
+        rootLogger.setLevel(defaultRootLevel)
+        rootLogger.getAllAppenders().asScala.foreach {
+          case ca: ConsoleAppender =>
+            ca.setThreshold(consoleAppenderToThreshold.get(ca))
+          case _ => // no-op
+        }
       }
     }
     this.initialized = false
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/config.scala b/core/src/main/scala/org/apache/spark/internal/config/History.scala
similarity index 77%
rename from core/src/main/scala/org/apache/spark/deploy/history/config.scala
rename to core/src/main/scala/org/apache/spark/internal/config/History.scala
index 25ba9edb9e014..b7d8061d26d21 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/config.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/History.scala
@@ -15,14 +15,13 @@
  * limitations under the License.
  */
 
-package org.apache.spark.deploy.history
+package org.apache.spark.internal.config
 
 import java.util.concurrent.TimeUnit
 
-import org.apache.spark.internal.config.ConfigBuilder
 import org.apache.spark.network.util.ByteUnit
 
-private[spark] object config {
+private[spark] object History {
 
   val DEFAULT_LOG_DIR = "file:/tmp/spark-events"
 
@@ -30,6 +29,14 @@ private[spark] object config {
     .stringConf
     .createWithDefault(DEFAULT_LOG_DIR)
 
+  val CLEANER_ENABLED = ConfigBuilder("spark.history.fs.cleaner.enabled")
+    .booleanConf
+    .createWithDefault(false)
+
+  val CLEANER_INTERVAL_S = ConfigBuilder("spark.history.fs.cleaner.interval")
+    .timeConf(TimeUnit.SECONDS)
+    .createWithDefaultString("1d")
+
   val MAX_LOG_AGE_S = ConfigBuilder("spark.history.fs.cleaner.maxAge")
     .timeConf(TimeUnit.SECONDS)
     .createWithDefaultString("7d")
@@ -64,4 +71,12 @@ private[spark] object config {
       .bytesConf(ByteUnit.BYTE)
       .createWithDefaultString("1m")
 
+  val DRIVER_LOG_CLEANER_ENABLED = ConfigBuilder("spark.history.fs.driverlog.cleaner.enabled")
+    .fallbackConf(CLEANER_ENABLED)
+
+  val DRIVER_LOG_CLEANER_INTERVAL = ConfigBuilder("spark.history.fs.driverlog.cleaner.interval")
+    .fallbackConf(CLEANER_INTERVAL_S)
+
+  val MAX_DRIVER_LOG_AGE_S = ConfigBuilder("spark.history.fs.driverlog.cleaner.maxAge")
+    .fallbackConf(MAX_LOG_AGE_S)
 }
diff --git a/core/src/main/scala/org/apache/spark/internal/config/Kafka.scala b/core/src/main/scala/org/apache/spark/internal/config/Kafka.scala
new file mode 100644
index 0000000000000..85d74c27142ad
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/internal/config/Kafka.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.internal.config
+
+private[spark] object Kafka {
+
+  val BOOTSTRAP_SERVERS =
+    ConfigBuilder("spark.kafka.bootstrap.servers")
+      .doc("A list of coma separated host/port pairs to use for establishing the initial " +
+        "connection to the Kafka cluster. For further details please see kafka documentation. " +
+        "Only used to obtain delegation token.")
+      .stringConf
+      .createOptional
+
+  val SECURITY_PROTOCOL =
+    ConfigBuilder("spark.kafka.security.protocol")
+      .doc("Protocol used to communicate with brokers. For further details please see kafka " +
+        "documentation. Only used to obtain delegation token.")
+      .stringConf
+      .createWithDefault("SASL_SSL")
+
+  val KERBEROS_SERVICE_NAME =
+    ConfigBuilder("spark.kafka.sasl.kerberos.service.name")
+      .doc("The Kerberos principal name that Kafka runs as. This can be defined either in " +
+        "Kafka's JAAS config or in Kafka's config. For further details please see kafka " +
+        "documentation. Only used to obtain delegation token.")
+      .stringConf
+      .createOptional
+
+  val TRUSTSTORE_LOCATION =
+    ConfigBuilder("spark.kafka.ssl.truststore.location")
+      .doc("The location of the trust store file. For further details please see kafka " +
+        "documentation. Only used to obtain delegation token.")
+      .stringConf
+      .createOptional
+
+  val TRUSTSTORE_PASSWORD =
+    ConfigBuilder("spark.kafka.ssl.truststore.password")
+      .doc("The store password for the trust store file. This is optional for client and only " +
+        "needed if ssl.truststore.location is configured. For further details please see kafka " +
+        "documentation. Only used to obtain delegation token.")
+      .stringConf
+      .createOptional
+
+  val KEYSTORE_LOCATION =
+    ConfigBuilder("spark.kafka.ssl.keystore.location")
+      .doc("The location of the key store file. This is optional for client and can be used for " +
+        "two-way authentication for client. For further details please see kafka documentation. " +
+        "Only used to obtain delegation token.")
+      .stringConf
+      .createOptional
+
+  val KEYSTORE_PASSWORD =
+    ConfigBuilder("spark.kafka.ssl.keystore.password")
+      .doc("The store password for the key store file. This is optional for client and only " +
+        "needed if ssl.keystore.location is configured. For further details please see kafka " +
+        "documentation. Only used to obtain delegation token.")
+      .stringConf
+      .createOptional
+
+  val KEY_PASSWORD =
+    ConfigBuilder("spark.kafka.ssl.key.password")
+      .doc("The password of the private key in the key store file. This is optional for client. " +
+        "For further details please see kafka documentation. Only used to obtain delegation token.")
+      .stringConf
+      .createOptional
+}
diff --git a/core/src/main/scala/org/apache/spark/status/config.scala b/core/src/main/scala/org/apache/spark/internal/config/Status.scala
similarity index 83%
rename from core/src/main/scala/org/apache/spark/status/config.scala
rename to core/src/main/scala/org/apache/spark/internal/config/Status.scala
index 67801b8f046f4..c56157227f8fc 100644
--- a/core/src/main/scala/org/apache/spark/status/config.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/Status.scala
@@ -15,13 +15,11 @@
  * limitations under the License.
  */
 
-package org.apache.spark.status
+package org.apache.spark.internal.config
 
 import java.util.concurrent.TimeUnit
 
-import org.apache.spark.internal.config._
-
-private[spark] object config {
+private[spark] object Status {
 
   val ASYNC_TRACKING_ENABLED = ConfigBuilder("spark.appStateStore.asyncTracking.enable")
     .booleanConf
@@ -51,4 +49,10 @@ private[spark] object config {
     .intConf
     .createWithDefault(Int.MaxValue)
 
+  val APP_STATUS_METRICS_ENABLED =
+    ConfigBuilder("spark.app.status.metrics.enabled")
+      .doc("Whether Dropwizard/Codahale metrics " +
+        "will be reported for the status of the running spark app.")
+      .booleanConf
+      .createWithDefault(false)
 }
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 319e664a19677..646b3881a79b0 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -21,6 +21,7 @@ import java.util.concurrent.TimeUnit
 
 import org.apache.spark.launcher.SparkLauncher
 import org.apache.spark.network.util.ByteUnit
+import org.apache.spark.unsafe.array.ByteArrayMethods
 import org.apache.spark.util.Utils
 
 package object config {
@@ -48,6 +49,19 @@ package object config {
     .bytesConf(ByteUnit.MiB)
     .createOptional
 
+  private[spark] val DRIVER_LOG_DFS_DIR =
+    ConfigBuilder("spark.driver.log.dfsDir").stringConf.createOptional
+
+  private[spark] val DRIVER_LOG_LAYOUT =
+    ConfigBuilder("spark.driver.log.layout")
+      .stringConf
+      .createOptional
+
+  private[spark] val DRIVER_LOG_PERSISTTODFS =
+    ConfigBuilder("spark.driver.log.persistToDfs.enabled")
+      .booleanConf
+      .createWithDefault(false)
+
   private[spark] val EVENT_LOG_COMPRESS =
     ConfigBuilder("spark.eventLog.compress")
       .booleanConf
@@ -58,6 +72,11 @@ package object config {
       .booleanConf
       .createWithDefault(false)
 
+  private[spark] val EVENT_LOG_ALLOW_EC =
+    ConfigBuilder("spark.eventLog.allowErasureCoding")
+      .booleanConf
+      .createWithDefault(false)
+
   private[spark] val EVENT_LOG_TESTING =
     ConfigBuilder("spark.eventLog.testing")
       .internal()
@@ -69,15 +88,34 @@ package object config {
     .bytesConf(ByteUnit.KiB)
     .createWithDefaultString("100k")
 
+  private[spark] val EVENT_LOG_STAGE_EXECUTOR_METRICS =
+    ConfigBuilder("spark.eventLog.logStageExecutorMetrics.enabled")
+      .booleanConf
+      .createWithDefault(false)
+
   private[spark] val EVENT_LOG_OVERWRITE =
     ConfigBuilder("spark.eventLog.overwrite").booleanConf.createWithDefault(false)
 
-  private[spark] val EVENT_LOG_CALLSITE_FORM =
-    ConfigBuilder("spark.eventLog.callsite").stringConf.createWithDefault("short")
+  private[spark] val EVENT_LOG_CALLSITE_LONG_FORM =
+    ConfigBuilder("spark.eventLog.longForm.enabled").booleanConf.createWithDefault(false)
 
   private[spark] val EXECUTOR_CLASS_PATH =
     ConfigBuilder(SparkLauncher.EXECUTOR_EXTRA_CLASSPATH).stringConf.createOptional
 
+  private[spark] val EXECUTOR_HEARTBEAT_DROP_ZERO_ACCUMULATOR_UPDATES =
+    ConfigBuilder("spark.executor.heartbeat.dropZeroAccumulatorUpdates")
+      .internal()
+      .booleanConf
+      .createWithDefault(true)
+
+  private[spark] val EXECUTOR_HEARTBEAT_INTERVAL =
+    ConfigBuilder("spark.executor.heartbeatInterval")
+      .timeConf(TimeUnit.MILLISECONDS)
+      .createWithDefaultString("10s")
+
+  private[spark] val EXECUTOR_HEARTBEAT_MAX_FAILURES =
+    ConfigBuilder("spark.executor.heartbeat.maxFailures").internal().intConf.createWithDefault(60)
+
   private[spark] val EXECUTOR_JAVA_OPTIONS =
     ConfigBuilder(SparkLauncher.EXECUTOR_EXTRA_JAVA_OPTIONS).stringConf.createOptional
 
@@ -147,14 +185,18 @@ package object config {
   private[spark] val SHUFFLE_SERVICE_PORT =
     ConfigBuilder("spark.shuffle.service.port").intConf.createWithDefault(7337)
 
-  private[spark] val KEYTAB = ConfigBuilder("spark.yarn.keytab")
+  private[spark] val KEYTAB = ConfigBuilder("spark.kerberos.keytab")
     .doc("Location of user's keytab.")
     .stringConf.createOptional
 
-  private[spark] val PRINCIPAL = ConfigBuilder("spark.yarn.principal")
+  private[spark] val PRINCIPAL = ConfigBuilder("spark.kerberos.principal")
     .doc("Name of the Kerberos principal.")
     .stringConf.createOptional
 
+  private[spark] val KERBEROS_RELOGIN_PERIOD = ConfigBuilder("spark.kerberos.relogin.period")
+    .timeConf(TimeUnit.SECONDS)
+    .createWithDefaultString("1m")
+
   private[spark] val EXECUTOR_INSTANCES = ConfigBuilder("spark.executor.instances")
     .intConf
     .createOptional
@@ -239,7 +281,7 @@ package object config {
   private[spark] val LISTENER_BUS_EVENT_QUEUE_CAPACITY =
     ConfigBuilder("spark.scheduler.listenerbus.eventqueue.capacity")
       .intConf
-      .checkValue(_ > 0, "The capacity of listener bus event queue must not be negative")
+      .checkValue(_ > 0, "The capacity of listener bus event queue must be positive")
       .createWithDefault(10000)
 
   private[spark] val LISTENER_BUS_METRICS_MAX_LISTENER_CLASSES_TIMED =
@@ -387,8 +429,9 @@ package object config {
       .internal()
       .doc("The chunk size in bytes during writing out the bytes of ChunkedByteBuffer.")
       .bytesConf(ByteUnit.BYTE)
-      .checkValue(_ <= Int.MaxValue, "The chunk size during writing out the bytes of" +
-        " ChunkedByteBuffer should not larger than Int.MaxValue.")
+      .checkValue(_ <= ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH,
+        "The chunk size during writing out the bytes of ChunkedByteBuffer should" +
+          s" be less than or equal to ${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}.")
       .createWithDefault(64 * 1024 * 1024)
 
   private[spark] val CHECKPOINT_COMPRESS =
@@ -459,8 +502,9 @@ package object config {
         "otherwise specified. These buffers reduce the number of disk seeks and system calls " +
         "made in creating intermediate shuffle files.")
       .bytesConf(ByteUnit.KiB)
-      .checkValue(v => v > 0 && v <= Int.MaxValue / 1024,
-        s"The file buffer size must be greater than 0 and less than ${Int.MaxValue / 1024}.")
+      .checkValue(v => v > 0 && v <= ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH / 1024,
+        s"The file buffer size must be positive and less than or equal to" +
+          s" ${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH / 1024}.")
       .createWithDefaultString("32k")
 
   private[spark] val SHUFFLE_UNSAFE_FILE_OUTPUT_BUFFER_SIZE =
@@ -468,16 +512,18 @@ package object config {
       .doc("The file system for this buffer size after each partition " +
         "is written in unsafe shuffle writer. In KiB unless otherwise specified.")
       .bytesConf(ByteUnit.KiB)
-      .checkValue(v => v > 0 && v <= Int.MaxValue / 1024,
-        s"The buffer size must be greater than 0 and less than ${Int.MaxValue / 1024}.")
+      .checkValue(v => v > 0 && v <= ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH / 1024,
+        s"The buffer size must be positive and less than or equal to" +
+          s" ${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH / 1024}.")
       .createWithDefaultString("32k")
 
   private[spark] val SHUFFLE_DISK_WRITE_BUFFER_SIZE =
     ConfigBuilder("spark.shuffle.spill.diskWriteBufferSize")
       .doc("The buffer size, in bytes, to use when writing the sorted records to an on-disk file.")
       .bytesConf(ByteUnit.BYTE)
-      .checkValue(v => v > 0 && v <= Int.MaxValue,
-        s"The buffer size must be greater than 0 and less than ${Int.MaxValue}.")
+      .checkValue(v => v > 12 && v <= ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH,
+        s"The buffer size must be greater than 12 and less than or equal to " +
+          s"${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}.")
       .createWithDefault(1024 * 1024)
 
   private[spark] val UNROLL_MEMORY_CHECK_PERIOD =
@@ -524,6 +570,12 @@ package object config {
       .stringConf
       .createOptional
 
+  private[spark] val UI_REQUEST_HEADER_SIZE =
+    ConfigBuilder("spark.ui.requestHeaderSize")
+      .doc("Value for HTTP request header size in bytes.")
+      .bytesConf(ByteUnit.BYTE)
+      .createWithDefaultString("8k")
+
   private[spark] val EXTRA_LISTENERS = ConfigBuilder("spark.extraListeners")
     .doc("Class names of listeners to add to SparkContext during initialization.")
     .stringConf
@@ -580,7 +632,7 @@ package object config {
       .internal()
       .doc("For testing only, controls the size of chunks when memory mapping a file")
       .bytesConf(ByteUnit.BYTE)
-      .createWithDefault(Int.MaxValue)
+      .createWithDefault(ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH)
 
   private[spark] val BARRIER_SYNC_TIMEOUT =
     ConfigBuilder("spark.barrier.sync.timeout")
@@ -592,6 +644,14 @@ package object config {
       .checkValue(v => v > 0, "The value should be a positive time value.")
       .createWithDefaultString("365d")
 
+  private[spark] val UNSCHEDULABLE_TASKSET_TIMEOUT =
+    ConfigBuilder("spark.scheduler.blacklist.unschedulableTaskSetTimeout")
+      .doc("The timeout in seconds to wait to acquire a new executor and schedule a task " +
+        "before aborting a TaskSet which is unschedulable because of being completely blacklisted.")
+      .timeConf(TimeUnit.SECONDS)
+      .checkValue(v => v >= 0, "The value should be a non negative time value.")
+      .createWithDefault(120)
+
   private[spark] val BARRIER_MAX_CONCURRENT_TASKS_CHECK_INTERVAL =
     ConfigBuilder("spark.scheduler.barrier.maxConcurrentTasksCheck.interval")
       .doc("Time in seconds to wait between a max concurrent tasks check failure and the next " +
@@ -618,4 +678,14 @@ package object config {
       .intConf
       .checkValue(v => v > 0, "The max failures should be a positive value.")
       .createWithDefault(40)
+
+  private[spark] val EXECUTOR_PLUGINS =
+    ConfigBuilder("spark.executor.plugins")
+      .doc("Comma-separated list of class names for \"plugins\" implementing " +
+        "org.apache.spark.ExecutorPlugin.  Plugins have the same privileges as any task " +
+        "in a Spark executor.  They can also interfere with task execution and fail in " +
+        "unexpected ways.  So be sure to only use this for trusted plugins.")
+      .stringConf
+      .toSequence
+      .createWithDefault(Nil)
 }
diff --git a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
index 3e60c50ada59b..7477e03bfaa76 100644
--- a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
@@ -91,7 +91,7 @@ class HadoopMapReduceCommitProtocol(
   private def stagingDir = new Path(path, ".spark-staging-" + jobId)
 
   protected def setupCommitter(context: TaskAttemptContext): OutputCommitter = {
-    val format = context.getOutputFormatClass.newInstance()
+    val format = context.getOutputFormatClass.getConstructor().newInstance()
     // If OutputFormat is Configurable, we should set conf to it.
     format match {
       case c: Configurable => c.setConf(context.getConfiguration)
diff --git a/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala b/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala
index 9ebd0aa301592..3a58ea816937b 100644
--- a/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala
+++ b/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala
@@ -256,7 +256,7 @@ class HadoopMapRedWriteConfigUtil[K, V: ClassTag](conf: SerializableJobConf)
   private def getOutputFormat(): OutputFormat[K, V] = {
     require(outputFormat != null, "Must call initOutputFormat first.")
 
-    outputFormat.newInstance()
+    outputFormat.getConstructor().newInstance()
   }
 
   // --------------------------------------------------------------------------
@@ -379,7 +379,7 @@ class HadoopMapReduceWriteConfigUtil[K, V: ClassTag](conf: SerializableConfigura
   private def getOutputFormat(): NewOutputFormat[K, V] = {
     require(outputFormat != null, "Must call initOutputFormat first.")
 
-    outputFormat.newInstance()
+    outputFormat.getConstructor().newInstance()
   }
 
   // --------------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
index 7722db56ee297..0664c5ac752c1 100644
--- a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
+++ b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
@@ -154,72 +154,19 @@ class LZFCompressionCodec(conf: SparkConf) extends CompressionCodec {
  */
 @DeveloperApi
 class SnappyCompressionCodec(conf: SparkConf) extends CompressionCodec {
-  val version = SnappyCompressionCodec.version
 
-  override def compressedOutputStream(s: OutputStream): OutputStream = {
-    val blockSize = conf.getSizeAsBytes("spark.io.compression.snappy.blockSize", "32k").toInt
-    new SnappyOutputStreamWrapper(new SnappyOutputStream(s, blockSize))
-  }
-
-  override def compressedInputStream(s: InputStream): InputStream = new SnappyInputStream(s)
-}
-
-/**
- * Object guards against memory leak bug in snappy-java library:
- * (https://github.com/xerial/snappy-java/issues/131).
- * Before a new version of the library, we only call the method once and cache the result.
- */
-private final object SnappyCompressionCodec {
-  private lazy val version: String = try {
+  try {
     Snappy.getNativeLibraryVersion
   } catch {
     case e: Error => throw new IllegalArgumentException(e)
   }
-}
 
-/**
- * Wrapper over `SnappyOutputStream` which guards against write-after-close and double-close
- * issues. See SPARK-7660 for more details. This wrapping can be removed if we upgrade to a version
- * of snappy-java that contains the fix for https://github.com/xerial/snappy-java/issues/107.
- */
-private final class SnappyOutputStreamWrapper(os: SnappyOutputStream) extends OutputStream {
-
-  private[this] var closed: Boolean = false
-
-  override def write(b: Int): Unit = {
-    if (closed) {
-      throw new IOException("Stream is closed")
-    }
-    os.write(b)
-  }
-
-  override def write(b: Array[Byte]): Unit = {
-    if (closed) {
-      throw new IOException("Stream is closed")
-    }
-    os.write(b)
-  }
-
-  override def write(b: Array[Byte], off: Int, len: Int): Unit = {
-    if (closed) {
-      throw new IOException("Stream is closed")
-    }
-    os.write(b, off, len)
-  }
-
-  override def flush(): Unit = {
-    if (closed) {
-      throw new IOException("Stream is closed")
-    }
-    os.flush()
+  override def compressedOutputStream(s: OutputStream): OutputStream = {
+    val blockSize = conf.getSizeAsBytes("spark.io.compression.snappy.blockSize", "32k").toInt
+    new SnappyOutputStream(s, blockSize)
   }
 
-  override def close(): Unit = {
-    if (!closed) {
-      closed = true
-      os.close()
-    }
-  }
+  override def compressedInputStream(s: InputStream): InputStream = new SnappyInputStream(s)
 }
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/memory/MemoryManager.scala b/core/src/main/scala/org/apache/spark/memory/MemoryManager.scala
index 0641adc2ab699..4fde2d0beaa71 100644
--- a/core/src/main/scala/org/apache/spark/memory/MemoryManager.scala
+++ b/core/src/main/scala/org/apache/spark/memory/MemoryManager.scala
@@ -180,6 +180,34 @@ private[spark] abstract class MemoryManager(
     onHeapStorageMemoryPool.memoryUsed + offHeapStorageMemoryPool.memoryUsed
   }
 
+  /**
+   *  On heap execution memory currently in use, in bytes.
+   */
+  final def onHeapExecutionMemoryUsed: Long = synchronized {
+    onHeapExecutionMemoryPool.memoryUsed
+  }
+
+  /**
+   *  Off heap execution memory currently in use, in bytes.
+   */
+  final def offHeapExecutionMemoryUsed: Long = synchronized {
+    offHeapExecutionMemoryPool.memoryUsed
+  }
+
+  /**
+   *  On heap storage memory currently in use, in bytes.
+   */
+  final def onHeapStorageMemoryUsed: Long = synchronized {
+    onHeapStorageMemoryPool.memoryUsed
+  }
+
+  /**
+   *  Off heap storage memory currently in use, in bytes.
+   */
+  final def offHeapStorageMemoryUsed: Long = synchronized {
+    offHeapStorageMemoryPool.memoryUsed
+  }
+
   /**
    * Returns the execution memory consumption, in bytes, for the given task.
    */
diff --git a/core/src/main/scala/org/apache/spark/metrics/ExecutorMetricType.scala b/core/src/main/scala/org/apache/spark/metrics/ExecutorMetricType.scala
new file mode 100644
index 0000000000000..cd10dad25e87b
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/metrics/ExecutorMetricType.scala
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.metrics
+
+import java.lang.management.{BufferPoolMXBean, ManagementFactory}
+import javax.management.ObjectName
+
+import org.apache.spark.memory.MemoryManager
+
+/**
+ * Executor metric types for executor-level metrics stored in ExecutorMetrics.
+ */
+sealed trait ExecutorMetricType {
+  private[spark] def getMetricValue(memoryManager: MemoryManager): Long
+  private[spark] val name = getClass().getName().stripSuffix("$").split("""\.""").last
+}
+
+private[spark] abstract class MemoryManagerExecutorMetricType(
+    f: MemoryManager => Long) extends ExecutorMetricType {
+  override private[spark] def getMetricValue(memoryManager: MemoryManager): Long = {
+    f(memoryManager)
+  }
+}
+
+private[spark] abstract class MBeanExecutorMetricType(mBeanName: String)
+  extends ExecutorMetricType {
+  private val bean = ManagementFactory.newPlatformMXBeanProxy(
+    ManagementFactory.getPlatformMBeanServer,
+    new ObjectName(mBeanName).toString, classOf[BufferPoolMXBean])
+
+  override private[spark] def getMetricValue(memoryManager: MemoryManager): Long = {
+    bean.getMemoryUsed
+  }
+}
+
+case object JVMHeapMemory extends ExecutorMetricType {
+  override private[spark] def getMetricValue(memoryManager: MemoryManager): Long = {
+    ManagementFactory.getMemoryMXBean.getHeapMemoryUsage().getUsed()
+  }
+}
+
+case object JVMOffHeapMemory extends ExecutorMetricType {
+  override private[spark] def getMetricValue(memoryManager: MemoryManager): Long = {
+    ManagementFactory.getMemoryMXBean.getNonHeapMemoryUsage().getUsed()
+  }
+}
+
+case object OnHeapExecutionMemory extends MemoryManagerExecutorMetricType(
+  _.onHeapExecutionMemoryUsed)
+
+case object OffHeapExecutionMemory extends MemoryManagerExecutorMetricType(
+  _.offHeapExecutionMemoryUsed)
+
+case object OnHeapStorageMemory extends MemoryManagerExecutorMetricType(
+  _.onHeapStorageMemoryUsed)
+
+case object OffHeapStorageMemory extends MemoryManagerExecutorMetricType(
+  _.offHeapStorageMemoryUsed)
+
+case object OnHeapUnifiedMemory extends MemoryManagerExecutorMetricType(
+  (m => m.onHeapExecutionMemoryUsed + m.onHeapStorageMemoryUsed))
+
+case object OffHeapUnifiedMemory extends MemoryManagerExecutorMetricType(
+  (m => m.offHeapExecutionMemoryUsed + m.offHeapStorageMemoryUsed))
+
+case object DirectPoolMemory extends MBeanExecutorMetricType(
+  "java.nio:type=BufferPool,name=direct")
+
+case object MappedPoolMemory extends MBeanExecutorMetricType(
+  "java.nio:type=BufferPool,name=mapped")
+
+private[spark] object ExecutorMetricType {
+  // List of all executor metric types
+  val values = IndexedSeq(
+    JVMHeapMemory,
+    JVMOffHeapMemory,
+    OnHeapExecutionMemory,
+    OffHeapExecutionMemory,
+    OnHeapStorageMemory,
+    OffHeapStorageMemory,
+    OnHeapUnifiedMemory,
+    OffHeapUnifiedMemory,
+    DirectPoolMemory,
+    MappedPoolMemory
+  )
+
+  // Map of executor metric type to its index in values.
+  val metricIdxMap =
+    Map[ExecutorMetricType, Int](ExecutorMetricType.values.zipWithIndex: _*)
+}
diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
index 3457a2632277d..bb7b434e9a113 100644
--- a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
@@ -179,7 +179,7 @@ private[spark] class MetricsSystem private (
     sourceConfigs.foreach { kv =>
       val classPath = kv._2.getProperty("class")
       try {
-        val source = Utils.classForName(classPath).newInstance()
+        val source = Utils.classForName(classPath).getConstructor().newInstance()
         registerSource(source.asInstanceOf[Source])
       } catch {
         case e: Exception => logError("Source class " + classPath + " cannot be instantiated", e)
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/StatsdSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/StatsdSink.scala
index 859a2f6bcd456..61e74e05169cc 100644
--- a/core/src/main/scala/org/apache/spark/metrics/sink/StatsdSink.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/StatsdSink.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.metrics.sink
 
-import java.util.Properties
+import java.util.{Locale, Properties}
 import java.util.concurrent.TimeUnit
 
 import com.codahale.metrics.MetricRegistry
@@ -52,7 +52,8 @@ private[spark] class StatsdSink(
 
   val pollPeriod = property.getProperty(STATSD_KEY_PERIOD, STATSD_DEFAULT_PERIOD).toInt
   val pollUnit =
-    TimeUnit.valueOf(property.getProperty(STATSD_KEY_UNIT, STATSD_DEFAULT_UNIT).toUpperCase)
+    TimeUnit.valueOf(
+      property.getProperty(STATSD_KEY_UNIT, STATSD_DEFAULT_UNIT).toUpperCase(Locale.ROOT))
 
   val prefix = property.getProperty(STATSD_KEY_PREFIX, STATSD_DEFAULT_PREFIX)
 
diff --git a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
index 1d8a266d0079c..a58c8fa2e763f 100644
--- a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
@@ -26,8 +26,8 @@ import scala.reflect.ClassTag
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer, NioManagedBuffer}
-import org.apache.spark.network.shuffle.{BlockFetchingListener, ShuffleClient, TempFileManager}
-import org.apache.spark.storage.{BlockId, StorageLevel}
+import org.apache.spark.network.shuffle.{BlockFetchingListener, DownloadFileManager, ShuffleClient}
+import org.apache.spark.storage.{BlockId, EncryptedManagedBuffer, StorageLevel}
 import org.apache.spark.util.ThreadUtils
 
 private[spark]
@@ -68,7 +68,7 @@ abstract class BlockTransferService extends ShuffleClient with Closeable with Lo
       execId: String,
       blockIds: Array[String],
       listener: BlockFetchingListener,
-      tempFileManager: TempFileManager): Unit
+      tempFileManager: DownloadFileManager): Unit
 
   /**
    * Upload a single block to a remote node, available only after [[init]] is invoked.
@@ -92,7 +92,7 @@ abstract class BlockTransferService extends ShuffleClient with Closeable with Lo
       port: Int,
       execId: String,
       blockId: String,
-      tempFileManager: TempFileManager): ManagedBuffer = {
+      tempFileManager: DownloadFileManager): ManagedBuffer = {
     // A monitor for the thread to wait on.
     val result = Promise[ManagedBuffer]()
     fetchBlocks(host, port, execId, Array(blockId),
@@ -104,6 +104,8 @@ abstract class BlockTransferService extends ShuffleClient with Closeable with Lo
           data match {
             case f: FileSegmentManagedBuffer =>
               result.success(f)
+            case e: EncryptedManagedBuffer =>
+              result.success(e)
             case _ =>
               val ret = ByteBuffer.allocate(data.size.toInt)
               ret.put(data.nioByteBuffer())
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
index 1905632a936d3..dc55685b1e7bd 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
@@ -33,7 +33,7 @@ import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
 import org.apache.spark.network.client.{RpcResponseCallback, TransportClientBootstrap, TransportClientFactory}
 import org.apache.spark.network.crypto.{AuthClientBootstrap, AuthServerBootstrap}
 import org.apache.spark.network.server._
-import org.apache.spark.network.shuffle.{BlockFetchingListener, OneForOneBlockFetcher, RetryingBlockFetcher, TempFileManager}
+import org.apache.spark.network.shuffle.{BlockFetchingListener, DownloadFileManager, OneForOneBlockFetcher, RetryingBlockFetcher}
 import org.apache.spark.network.shuffle.protocol.{UploadBlock, UploadBlockStream}
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.serializer.JavaSerializer
@@ -106,7 +106,7 @@ private[spark] class NettyBlockTransferService(
       execId: String,
       blockIds: Array[String],
       listener: BlockFetchingListener,
-      tempFileManager: TempFileManager): Unit = {
+      tempFileManager: DownloadFileManager): Unit = {
     logTrace(s"Fetch blocks from $host:$port (executor id $execId)")
     try {
       val blockFetchStarter = new RetryingBlockFetcher.BlockFetchStarter {
diff --git a/core/src/main/scala/org/apache/spark/package.scala b/core/src/main/scala/org/apache/spark/package.scala
index 8058a4d5dbdea..5d0639e92c36a 100644
--- a/core/src/main/scala/org/apache/spark/package.scala
+++ b/core/src/main/scala/org/apache/spark/package.scala
@@ -19,6 +19,8 @@ package org.apache
 
 import java.util.Properties
 
+import org.apache.spark.util.VersionUtils
+
 /**
  * Core Spark functionality. [[org.apache.spark.SparkContext]] serves as the main entry point to
  * Spark, while [[org.apache.spark.rdd.RDD]] is the data type representing a distributed collection,
@@ -89,6 +91,7 @@ package object spark {
   }
 
   val SPARK_VERSION = SparkBuildInfo.spark_version
+  val SPARK_VERSION_SHORT = VersionUtils.shortVersion(SparkBuildInfo.spark_version)
   val SPARK_BRANCH = SparkBuildInfo.spark_branch
   val SPARK_REVISION = SparkBuildInfo.spark_revision
   val SPARK_BUILD_USER = SparkBuildInfo.spark_build_user
diff --git a/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala b/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala
index a14bad47dfe10..039dbcbd5e035 100644
--- a/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala
@@ -41,7 +41,7 @@ private[spark] class BinaryFileRDD[T](
     // traversing a large number of directories and files. Parallelize it.
     conf.setIfUnset(FileInputFormat.LIST_STATUS_NUM_THREADS,
       Runtime.getRuntime.availableProcessors().toString)
-    val inputFormat = inputFormatClass.newInstance
+    val inputFormat = inputFormatClass.getConstructor().newInstance()
     inputFormat match {
       case configurable: Configurable =>
         configurable.setConf(conf)
diff --git a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
index 4574c3724962e..7e76731f5e454 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
@@ -143,8 +143,10 @@ class CoGroupedRDD[K: ClassTag](
 
       case shuffleDependency: ShuffleDependency[_, _, _] =>
         // Read map outputs of shuffle
+        val metrics = context.taskMetrics().createTempShuffleReadMetrics()
         val it = SparkEnv.get.shuffleManager
-          .getReader(shuffleDependency.shuffleHandle, split.index, split.index + 1, context)
+          .getReader(
+            shuffleDependency.shuffleHandle, split.index, split.index + 1, context, metrics)
           .read()
         rddIterators += ((it, depNum))
     }
diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
index 2d66d25ba39fa..483de28d92ab7 100644
--- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
@@ -120,7 +120,7 @@ class NewHadoopRDD[K, V](
   }
 
   override def getPartitions: Array[Partition] = {
-    val inputFormat = inputFormatClass.newInstance
+    val inputFormat = inputFormatClass.getConstructor().newInstance()
     inputFormat match {
       case configurable: Configurable =>
         configurable.setConf(_conf)
@@ -183,7 +183,7 @@ class NewHadoopRDD[K, V](
         }
       }
 
-      private val format = inputFormatClass.newInstance
+      private val format = inputFormatClass.getConstructor().newInstance()
       format match {
         case configurable: Configurable =>
           configurable.setConf(conf)
diff --git a/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala
index a5992022d0832..5b1c024257529 100644
--- a/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala
@@ -35,7 +35,8 @@ import org.apache.spark.internal.Logging
  *
  *   val rdd: RDD[(String, Int)] = ...
  *   implicit val caseInsensitiveOrdering = new Ordering[String] {
- *     override def compare(a: String, b: String) = a.toLowerCase.compare(b.toLowerCase)
+ *     override def compare(a: String, b: String) =
+ *       a.toLowerCase(Locale.ROOT).compare(b.toLowerCase(Locale.ROOT))
  *   }
  *
  *   // Sort by key, using the above case insensitive ordering.
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index e68c6b1366c7f..4bf4f082d0382 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -394,7 +394,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    *
    * The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:
    * Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available
-   * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
+   * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.
    *
    * The relative accuracy is approximately `1.054 / sqrt(2^p)`. Setting a nonzero (`sp` is
    * greater than `p`) would trigger sparse representation of registers, which may reduce the
@@ -436,7 +436,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    *
    * The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:
    * Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available
-   * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
+   * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.
    *
    * @param relativeSD Relative accuracy. Smaller values create counters that require more space.
    *                   It must be greater than 0.000017.
@@ -456,7 +456,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    *
    * The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:
    * Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available
-   * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
+   * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.
    *
    * @param relativeSD Relative accuracy. Smaller values create counters that require more space.
    *                   It must be greater than 0.000017.
@@ -473,7 +473,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    *
    * The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:
    * Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available
-   * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
+   * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.
    *
    * @param relativeSD Relative accuracy. Smaller values create counters that require more space.
    *                   It must be greater than 0.000017.
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 61ad6dfdb2215..6a25ee20b2c68 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -42,7 +42,8 @@ import org.apache.spark.partial.GroupedCountEvaluator
 import org.apache.spark.partial.PartialResult
 import org.apache.spark.storage.{RDDBlockId, StorageLevel}
 import org.apache.spark.util.{BoundedPriorityQueue, Utils}
-import org.apache.spark.util.collection.{OpenHashMap, Utils => collectionUtils}
+import org.apache.spark.util.collection.{ExternalAppendOnlyMap, OpenHashMap,
+  Utils => collectionUtils}
 import org.apache.spark.util.random.{BernoulliCellSampler, BernoulliSampler, PoissonSampler,
   SamplingUtils}
 
@@ -396,7 +397,20 @@ abstract class RDD[T: ClassTag](
    * Return a new RDD containing the distinct elements in this RDD.
    */
   def distinct(numPartitions: Int)(implicit ord: Ordering[T] = null): RDD[T] = withScope {
-    map(x => (x, null)).reduceByKey((x, y) => x, numPartitions).map(_._1)
+    def removeDuplicatesInPartition(partition: Iterator[T]): Iterator[T] = {
+      // Create an instance of external append only map which ignores values.
+      val map = new ExternalAppendOnlyMap[T, Null, Null](
+        createCombiner = value => null,
+        mergeValue = (a, b) => a,
+        mergeCombiners = (a, b) => a)
+      map.insertAll(partition.map(_ -> null))
+      map.iterator.map(_._1)
+    }
+    partitioner match {
+      case Some(p) if numPartitions == partitions.length =>
+        mapPartitions(removeDuplicatesInPartition, preservesPartitioning = true)
+      case _ => map(x => (x, null)).reduceByKey((x, y) => x, numPartitions).map(_._1)
+    }
   }
 
   /**
@@ -1244,7 +1258,7 @@ abstract class RDD[T: ClassTag](
    *
    * The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:
    * Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available
-   * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
+   * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.
    *
    * The relative accuracy is approximately `1.054 / sqrt(2^p)`. Setting a nonzero (`sp` is greater
    * than `p`) would trigger sparse representation of registers, which may reduce the memory
@@ -1276,7 +1290,7 @@ abstract class RDD[T: ClassTag](
    *
    * The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:
    * Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available
-   * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
+   * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.
    *
    * @param relativeSD Relative accuracy. Smaller values create counters that require more space.
    *                   It must be greater than 0.000017.
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala b/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala
index 53d69ba26811f..3abb2d8a11f35 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala
@@ -41,7 +41,7 @@ import org.apache.spark.internal.Logging
  * There is no particular relationship between an operation scope and a stage or a job.
  * A scope may live inside one stage (e.g. map) or span across multiple jobs (e.g. take).
  */
-@JsonInclude(Include.NON_NULL)
+@JsonInclude(Include.NON_ABSENT)
 @JsonPropertyOrder(Array("id", "name", "parent"))
 private[spark] class RDDOperationScope(
     val name: String,
diff --git a/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
index e8f9b27b7eb55..5ec99b7f4f3ab 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
@@ -101,7 +101,9 @@ class ShuffledRDD[K: ClassTag, V: ClassTag, C: ClassTag](
 
   override def compute(split: Partition, context: TaskContext): Iterator[(K, C)] = {
     val dep = dependencies.head.asInstanceOf[ShuffleDependency[K, V, C]]
-    SparkEnv.get.shuffleManager.getReader(dep.shuffleHandle, split.index, split.index + 1, context)
+    val metrics = context.taskMetrics().createTempShuffleReadMetrics()
+    SparkEnv.get.shuffleManager.getReader(
+      dep.shuffleHandle, split.index, split.index + 1, context, metrics)
       .read()
       .asInstanceOf[Iterator[(K, C)]]
   }
diff --git a/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala
index a733eaa5d7e53..42d190377f104 100644
--- a/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala
@@ -107,9 +107,14 @@ private[spark] class SubtractedRDD[K: ClassTag, V: ClassTag, W: ClassTag](
             .asInstanceOf[Iterator[Product2[K, V]]].foreach(op)
 
         case shuffleDependency: ShuffleDependency[_, _, _] =>
+          val metrics = context.taskMetrics().createTempShuffleReadMetrics()
           val iter = SparkEnv.get.shuffleManager
             .getReader(
-              shuffleDependency.shuffleHandle, partition.index, partition.index + 1, context)
+              shuffleDependency.shuffleHandle,
+              partition.index,
+              partition.index + 1,
+              context,
+              metrics)
             .read()
           iter.foreach(op)
       }
diff --git a/core/src/main/scala/org/apache/spark/rdd/WholeTextFileRDD.scala b/core/src/main/scala/org/apache/spark/rdd/WholeTextFileRDD.scala
index 9f3d0745c33c9..eada762b99c8e 100644
--- a/core/src/main/scala/org/apache/spark/rdd/WholeTextFileRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/WholeTextFileRDD.scala
@@ -44,7 +44,7 @@ private[spark] class WholeTextFileRDD(
     // traversing a large number of directories and files. Parallelize it.
     conf.setIfUnset(FileInputFormat.LIST_STATUS_NUM_THREADS,
       Runtime.getRuntime.availableProcessors().toString)
-    val inputFormat = inputFormatClass.newInstance
+    val inputFormat = inputFormatClass.getConstructor().newInstance()
     inputFormat match {
       case configurable: Configurable =>
         configurable.setConf(conf)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/AccumulableInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/AccumulableInfo.scala
index 0a5fe5a1d3ee1..bd0fe90b1f3b6 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/AccumulableInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/AccumulableInfo.scala
@@ -22,7 +22,7 @@ import org.apache.spark.annotation.DeveloperApi
 
 /**
  * :: DeveloperApi ::
- * Information about an [[org.apache.spark.Accumulable]] modified during a task or stage.
+ * Information about an [[org.apache.spark.util.AccumulatorV2]] modified during a task or stage.
  *
  * @param id accumulator ID
  * @param name accumulator name
@@ -47,33 +47,3 @@ case class AccumulableInfo private[spark] (
     private[spark] val countFailedValues: Boolean,
     // TODO: use this to identify internal task metrics instead of encoding it in the name
     private[spark] val metadata: Option[String] = None)
-
-
-/**
- * A collection of deprecated constructors. This will be removed soon.
- */
-object AccumulableInfo {
-
-  @deprecated("do not create AccumulableInfo", "2.0.0")
-  def apply(
-      id: Long,
-      name: String,
-      update: Option[String],
-      value: String,
-      internal: Boolean): AccumulableInfo = {
-    new AccumulableInfo(
-      id, Option(name), update, Option(value), internal, countFailedValues = false)
-  }
-
-  @deprecated("do not create AccumulableInfo", "2.0.0")
-  def apply(id: Long, name: String, update: Option[String], value: String): AccumulableInfo = {
-    new AccumulableInfo(
-      id, Option(name), update, Option(value), internal = false, countFailedValues = false)
-  }
-
-  @deprecated("do not create AccumulableInfo", "2.0.0")
-  def apply(id: Long, name: String, value: String): AccumulableInfo = {
-    new AccumulableInfo(
-      id, Option(name), None, Option(value), internal = false, countFailedValues = false)
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/scheduler/AsyncEventQueue.scala b/core/src/main/scala/org/apache/spark/scheduler/AsyncEventQueue.scala
index e2b6df4600590..7cd2b862216ee 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/AsyncEventQueue.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/AsyncEventQueue.scala
@@ -169,7 +169,8 @@ private class AsyncEventQueue(
           val prevLastReportTimestamp = lastReportTimestamp
           lastReportTimestamp = System.currentTimeMillis()
           val previous = new java.util.Date(prevLastReportTimestamp)
-          logWarning(s"Dropped $droppedCount events from $name since $previous.")
+          logWarning(s"Dropped $droppedCount events from $name since " +
+            s"${if (prevLastReportTimestamp == 0) "the application started" else s"$previous"}.")
         }
       }
     }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala b/core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala
index 980fbbe516b91..ef6d02d85c27b 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala
@@ -146,21 +146,31 @@ private[scheduler] class BlacklistTracker (
     nextExpiryTime = math.min(execMinExpiry, nodeMinExpiry)
   }
 
+  private def killExecutor(exec: String, msg: String): Unit = {
+    allocationClient match {
+      case Some(a) =>
+        logInfo(msg)
+        a.killExecutors(Seq(exec), adjustTargetNumExecutors = false, countFailures = false,
+          force = true)
+      case None =>
+        logInfo(s"Not attempting to kill blacklisted executor id $exec " +
+          s"since allocation client is not defined.")
+    }
+  }
+
   private def killBlacklistedExecutor(exec: String): Unit = {
     if (conf.get(config.BLACKLIST_KILL_ENABLED)) {
-      allocationClient match {
-        case Some(a) =>
-          logInfo(s"Killing blacklisted executor id $exec " +
-            s"since ${config.BLACKLIST_KILL_ENABLED.key} is set.")
-          a.killExecutors(Seq(exec), adjustTargetNumExecutors = false, countFailures = false,
-            force = true)
-        case None =>
-          logWarning(s"Not attempting to kill blacklisted executor id $exec " +
-            s"since allocation client is not defined.")
-      }
+      killExecutor(exec,
+        s"Killing blacklisted executor id $exec since ${config.BLACKLIST_KILL_ENABLED.key} is set.")
     }
   }
 
+  private[scheduler] def killBlacklistedIdleExecutor(exec: String): Unit = {
+    killExecutor(exec,
+      s"Killing blacklisted idle executor id $exec because of task unschedulability and trying " +
+        "to acquire a new executor.")
+  }
+
   private def killExecutorsOnBlacklistedNode(node: String): Unit = {
     if (conf.get(config.BLACKLIST_KILL_ENABLED)) {
       allocationClient match {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index 50c91da8b13d1..06966e77db81e 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -35,7 +35,7 @@ import org.apache.commons.lang3.SerializationUtils
 
 import org.apache.spark._
 import org.apache.spark.broadcast.Broadcast
-import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.executor.{ExecutorMetrics, TaskMetrics}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config
 import org.apache.spark.network.util.JavaUtils
@@ -264,8 +264,11 @@ private[spark] class DAGScheduler(
       execId: String,
       // (taskId, stageId, stageAttemptId, accumUpdates)
       accumUpdates: Array[(Long, Int, Int, Seq[AccumulableInfo])],
-      blockManagerId: BlockManagerId): Boolean = {
-    listenerBus.post(SparkListenerExecutorMetricsUpdate(execId, accumUpdates))
+      blockManagerId: BlockManagerId,
+      // executor metrics indexed by ExecutorMetricType.values
+      executorUpdates: ExecutorMetrics): Boolean = {
+    listenerBus.post(SparkListenerExecutorMetricsUpdate(execId, accumUpdates,
+      Some(executorUpdates)))
     blockManagerMaster.driverEndpoint.askSync[Boolean](
       BlockManagerHeartbeat(blockManagerId), new RpcTimeout(600 seconds, "BlockManagerHeartbeat"))
   }
@@ -1242,9 +1245,10 @@ private[spark] class DAGScheduler(
   private def updateAccumulators(event: CompletionEvent): Unit = {
     val task = event.task
     val stage = stageIdToStage(task.stageId)
-    try {
-      event.accumUpdates.foreach { updates =>
-        val id = updates.id
+
+    event.accumUpdates.foreach { updates =>
+      val id = updates.id
+      try {
         // Find the corresponding accumulator on the driver and update it
         val acc: AccumulatorV2[Any, Any] = AccumulatorContext.get(id) match {
           case Some(accum) => accum.asInstanceOf[AccumulatorV2[Any, Any]]
@@ -1258,10 +1262,17 @@ private[spark] class DAGScheduler(
           event.taskInfo.setAccumulables(
             acc.toInfo(Some(updates.value), Some(acc.value)) +: event.taskInfo.accumulables)
         }
+      } catch {
+        case NonFatal(e) =>
+          // Log the class name to make it easy to find the bad implementation
+          val accumClassName = AccumulatorContext.get(id) match {
+            case Some(accum) => accum.getClass.getName
+            case None => "Unknown class"
+          }
+          logError(
+            s"Failed to update accumulator $id ($accumClassName) for task ${task.partitionId}",
+            e)
       }
-    } catch {
-      case NonFatal(e) =>
-        logError(s"Failed to update accumulators for task ${task.partitionId}", e)
     }
   }
 
@@ -1284,6 +1295,27 @@ private[spark] class DAGScheduler(
       Utils.getFormattedClassName(event.task), event.reason, event.taskInfo, taskMetrics))
   }
 
+  /**
+   * Check [[SparkContext.SPARK_JOB_INTERRUPT_ON_CANCEL]] in job properties to see if we should
+   * interrupt running tasks. Returns `false` if the property value is not a boolean value
+   */
+  private def shouldInterruptTaskThread(job: ActiveJob): Boolean = {
+    if (job.properties == null) {
+      false
+    } else {
+      val shouldInterruptThread =
+        job.properties.getProperty(SparkContext.SPARK_JOB_INTERRUPT_ON_CANCEL, "false")
+      try {
+        shouldInterruptThread.toBoolean
+      } catch {
+        case e: IllegalArgumentException =>
+          logWarning(s"${SparkContext.SPARK_JOB_INTERRUPT_ON_CANCEL} in Job ${job.jobId} " +
+            s"is invalid: $shouldInterruptThread. Using 'false' instead", e)
+          false
+      }
+    }
+  }
+
   /**
    * Responds to a task finishing. This is called inside the event loop so it assumes that it can
    * modify the scheduler's internal state. Use taskEnded() to post a task end event from outside.
@@ -1353,6 +1385,21 @@ private[spark] class DAGScheduler(
                   if (job.numFinished == job.numPartitions) {
                     markStageAsFinished(resultStage)
                     cleanupStateForJobAndIndependentStages(job)
+                    try {
+                      // killAllTaskAttempts will fail if a SchedulerBackend does not implement
+                      // killTask.
+                      logInfo(s"Job ${job.jobId} is finished. Cancelling potential speculative " +
+                        "or zombie tasks for this job")
+                      // ResultStage is only used by this job. It's safe to kill speculative or
+                      // zombie tasks in this stage.
+                      taskScheduler.killAllTaskAttempts(
+                        stageId,
+                        shouldInterruptTaskThread(job),
+                        reason = "Stage finished")
+                    } catch {
+                      case e: UnsupportedOperationException =>
+                        logWarning(s"Could not cancel tasks for stage $stageId", e)
+                    }
                     listenerBus.post(
                       SparkListenerJobEnd(job.jobId, clock.getTimeMillis(), JobSucceeded))
                   }
@@ -1362,7 +1409,7 @@ private[spark] class DAGScheduler(
                   try {
                     job.listener.taskSucceeded(rt.outputId, event.result)
                   } catch {
-                    case e: Exception =>
+                    case e: Throwable if !Utils.isFatalError(e) =>
                       // TODO: Perhaps we want to mark the resultStage as failed?
                       job.listener.jobFailed(new SparkDriverExecutionException(e))
                   }
@@ -1879,10 +1926,6 @@ private[spark] class DAGScheduler(
     val error = new SparkException(failureReason, exception.getOrElse(null))
     var ableToCancelStages = true
 
-    val shouldInterruptThread =
-      if (job.properties == null) false
-      else job.properties.getProperty(SparkContext.SPARK_JOB_INTERRUPT_ON_CANCEL, "false").toBoolean
-
     // Cancel all independent, running stages.
     val stages = jobIdToStageIds(job.jobId)
     if (stages.isEmpty) {
@@ -1902,12 +1945,12 @@ private[spark] class DAGScheduler(
           val stage = stageIdToStage(stageId)
           if (runningStages.contains(stage)) {
             try { // cancelTasks will fail if a SchedulerBackend does not implement killTask
-              taskScheduler.cancelTasks(stageId, shouldInterruptThread)
+              taskScheduler.cancelTasks(stageId, shouldInterruptTaskThread(job))
               markStageAsFinished(stage, Some(failureReason))
             } catch {
               case e: UnsupportedOperationException =>
-                logInfo(s"Could not cancel tasks for stage $stageId", e)
-              ableToCancelStages = false
+                logWarning(s"Could not cancel tasks for stage $stageId", e)
+                ableToCancelStages = false
             }
           }
         }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
index 69bc51c1ecf90..5f697fe99258d 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
@@ -20,22 +20,19 @@ package org.apache.spark.scheduler
 import java.io._
 import java.net.URI
 import java.nio.charset.StandardCharsets
-import java.util.EnumSet
 import java.util.Locale
 
-import scala.collection.mutable
-import scala.collection.mutable.ArrayBuffer
+import scala.collection.mutable.{ArrayBuffer, Map}
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, FSDataOutputStream, Path}
 import org.apache.hadoop.fs.permission.FsPermission
-import org.apache.hadoop.hdfs.DFSOutputStream
-import org.apache.hadoop.hdfs.client.HdfsDataOutputStream.SyncFlag
 import org.json4s.JsonAST.JValue
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.{SPARK_VERSION, SparkConf}
 import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.executor.ExecutorMetrics
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.io.CompressionCodec
@@ -51,6 +48,7 @@ import org.apache.spark.util.{JsonProtocol, Utils}
  *   spark.eventLog.overwrite - Whether to overwrite any existing files.
  *   spark.eventLog.dir - Path to the directory in which events are logged.
  *   spark.eventLog.buffer.kb - Buffer size to use when writing to output streams
+ *   spark.eventLog.logStageExecutorMetrics.enabled - Whether to log stage executor metrics
  */
 private[spark] class EventLoggingListener(
     appId: String,
@@ -69,6 +67,8 @@ private[spark] class EventLoggingListener(
   private val shouldCompress = sparkConf.get(EVENT_LOG_COMPRESS)
   private val shouldOverwrite = sparkConf.get(EVENT_LOG_OVERWRITE)
   private val shouldLogBlockUpdates = sparkConf.get(EVENT_LOG_BLOCK_UPDATES)
+  private val shouldAllowECLogs = sparkConf.get(EVENT_LOG_ALLOW_EC)
+  private val shouldLogStageExecutorMetrics = sparkConf.get(EVENT_LOG_STAGE_EXECUTOR_METRICS)
   private val testing = sparkConf.get(EVENT_LOG_TESTING)
   private val outputBufferSize = sparkConf.get(EVENT_LOG_OUTPUT_BUFFER_SIZE).toInt
   private val fileSystem = Utils.getHadoopFileSystem(logBaseDir, hadoopConf)
@@ -93,6 +93,9 @@ private[spark] class EventLoggingListener(
   // Visible for tests only.
   private[scheduler] val logPath = getLogPath(logBaseDir, appId, appAttemptId, compressionCodecName)
 
+  // map of (stageId, stageAttempt), to peak executor metrics for the stage
+  private val liveStageExecutorMetrics = Map.empty[(Int, Int), Map[String, ExecutorMetrics]]
+
   /**
    * Creates the log file in the configured log directory.
    */
@@ -117,7 +120,11 @@ private[spark] class EventLoggingListener(
       if ((isDefaultLocal && uri.getScheme == null) || uri.getScheme == "file") {
         new FileOutputStream(uri.getPath)
       } else {
-        hadoopDataStream = Some(fileSystem.create(path))
+        hadoopDataStream = Some(if (shouldAllowECLogs) {
+          fileSystem.create(path)
+        } else {
+          SparkHadoopUtil.createNonECFile(fileSystem, path)
+        })
         hadoopDataStream.get
       }
 
@@ -144,10 +151,7 @@ private[spark] class EventLoggingListener(
     // scalastyle:on println
     if (flushLogger) {
       writer.foreach(_.flush())
-      hadoopDataStream.foreach(ds => ds.getWrappedStream match {
-        case wrapped: DFSOutputStream => wrapped.hsync(EnumSet.of(SyncFlag.UPDATE_LENGTH))
-        case _ => ds.hflush()
-      })
+      hadoopDataStream.foreach(_.hflush())
     }
     if (testing) {
       loggedEvents += eventJson
@@ -155,7 +159,14 @@ private[spark] class EventLoggingListener(
   }
 
   // Events that do not trigger a flush
-  override def onStageSubmitted(event: SparkListenerStageSubmitted): Unit = logEvent(event)
+  override def onStageSubmitted(event: SparkListenerStageSubmitted): Unit = {
+    logEvent(event)
+    if (shouldLogStageExecutorMetrics) {
+      // record the peak metrics for the new stage
+      liveStageExecutorMetrics.put((event.stageInfo.stageId, event.stageInfo.attemptNumber()),
+        Map.empty[String, ExecutorMetrics])
+    }
+  }
 
   override def onTaskStart(event: SparkListenerTaskStart): Unit = logEvent(event)
 
@@ -169,6 +180,26 @@ private[spark] class EventLoggingListener(
 
   // Events that trigger a flush
   override def onStageCompleted(event: SparkListenerStageCompleted): Unit = {
+    if (shouldLogStageExecutorMetrics) {
+      // clear out any previous attempts, that did not have a stage completed event
+      val prevAttemptId = event.stageInfo.attemptNumber() - 1
+      for (attemptId <- 0 to prevAttemptId) {
+        liveStageExecutorMetrics.remove((event.stageInfo.stageId, attemptId))
+      }
+
+      // log the peak executor metrics for the stage, for each live executor,
+      // whether or not the executor is running tasks for the stage
+      val executorOpt = liveStageExecutorMetrics.remove(
+        (event.stageInfo.stageId, event.stageInfo.attemptNumber()))
+      executorOpt.foreach { execMap =>
+        execMap.foreach { case (executorId, peakExecutorMetrics) =>
+            logEvent(new SparkListenerStageExecutorMetrics(executorId, event.stageInfo.stageId,
+              event.stageInfo.attemptNumber(), peakExecutorMetrics))
+        }
+      }
+    }
+
+    // log stage completed event
     logEvent(event, flushLogger = true)
   }
 
@@ -234,8 +265,18 @@ private[spark] class EventLoggingListener(
     }
   }
 
-  // No-op because logging every update would be overkill
-  override def onExecutorMetricsUpdate(event: SparkListenerExecutorMetricsUpdate): Unit = { }
+  override def onExecutorMetricsUpdate(event: SparkListenerExecutorMetricsUpdate): Unit = {
+    if (shouldLogStageExecutorMetrics) {
+      // For the active stages, record any new peak values for the memory metrics for the executor
+      event.executorUpdates.foreach { executorUpdates =>
+        liveStageExecutorMetrics.values.foreach { peakExecutorMetrics =>
+          val peakMetrics = peakExecutorMetrics.getOrElseUpdate(
+            event.execId, new ExecutorMetrics())
+          peakMetrics.compareAndUpdatePeakValues(executorUpdates)
+        }
+      }
+    }
+  }
 
   override def onOtherEvent(event: SparkListenerEvent): Unit = {
     if (event.logEvent) {
@@ -296,7 +337,7 @@ private[spark] object EventLoggingListener extends Logging {
   private val LOG_FILE_PERMISSIONS = new FsPermission(Integer.parseInt("770", 8).toShort)
 
   // A cache for compression codecs to avoid creating the same codec many times
-  private val codecMap = new mutable.HashMap[String, CompressionCodec]
+  private val codecMap = Map.empty[String, CompressionCodec]
 
   /**
    * Write metadata about an event log to the given stream.
@@ -341,19 +382,15 @@ private[spark] object EventLoggingListener extends Logging {
       appId: String,
       appAttemptId: Option[String],
       compressionCodecName: Option[String] = None): String = {
-    val base = new Path(logBaseDir).toString.stripSuffix("/") + "/" + sanitize(appId)
+    val base = new Path(logBaseDir).toString.stripSuffix("/") + "/" + Utils.sanitizeDirName(appId)
     val codec = compressionCodecName.map("." + _).getOrElse("")
     if (appAttemptId.isDefined) {
-      base + "_" + sanitize(appAttemptId.get) + codec
+      base + "_" + Utils.sanitizeDirName(appAttemptId.get) + codec
     } else {
       base + codec
     }
   }
 
-  private def sanitize(str: String): String = {
-    str.replaceAll("[ :/]", "-").replaceAll("[.${}'\"]", "_").toLowerCase(Locale.ROOT)
-  }
-
   /**
    * Opens an event log file and returns an input stream that contains the event data.
    *
diff --git a/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
index 7e1d75fe723d6..64f0a060a247c 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
@@ -20,7 +20,6 @@ package org.apache.spark.scheduler
 import java.io.{Externalizable, ObjectInput, ObjectOutput}
 
 import scala.collection.mutable
-import scala.collection.mutable.ArrayBuffer
 
 import org.roaringbitmap.RoaringBitmap
 
@@ -31,8 +30,7 @@ import org.apache.spark.util.Utils
 
 /**
  * Result returned by a ShuffleMapTask to a scheduler. Includes the block manager address that the
- * task ran on, the sizes of outputs for each reducer, and the number of outputs of the map task,
- * for passing on to the reduce tasks.
+ * task ran on as well as the sizes of outputs for each reducer, for passing on to the reduce tasks.
  */
 private[spark] sealed trait MapStatus {
   /** Location where this task was run. */
@@ -45,23 +43,24 @@ private[spark] sealed trait MapStatus {
    * necessary for correctness, since block fetchers are allowed to skip zero-size blocks.
    */
   def getSizeForBlock(reduceId: Int): Long
-
-  /**
-   * The number of outputs for the map task.
-   */
-  def numberOfOutput: Long
 }
 
 
 private[spark] object MapStatus {
 
-  def apply(loc: BlockManagerId, uncompressedSizes: Array[Long], numOutput: Long): MapStatus = {
-    if (uncompressedSizes.length >  Option(SparkEnv.get)
-      .map(_.conf.get(config.SHUFFLE_MIN_NUM_PARTS_TO_HIGHLY_COMPRESS))
-      .getOrElse(config.SHUFFLE_MIN_NUM_PARTS_TO_HIGHLY_COMPRESS.defaultValue.get)) {
-      HighlyCompressedMapStatus(loc, uncompressedSizes, numOutput)
+  /**
+   * Min partition number to use [[HighlyCompressedMapStatus]]. A bit ugly here because in test
+   * code we can't assume SparkEnv.get exists.
+   */
+  private lazy val minPartitionsToUseHighlyCompressMapStatus = Option(SparkEnv.get)
+    .map(_.conf.get(config.SHUFFLE_MIN_NUM_PARTS_TO_HIGHLY_COMPRESS))
+    .getOrElse(config.SHUFFLE_MIN_NUM_PARTS_TO_HIGHLY_COMPRESS.defaultValue.get)
+
+  def apply(loc: BlockManagerId, uncompressedSizes: Array[Long]): MapStatus = {
+    if (uncompressedSizes.length > minPartitionsToUseHighlyCompressMapStatus) {
+      HighlyCompressedMapStatus(loc, uncompressedSizes)
     } else {
-      new CompressedMapStatus(loc, uncompressedSizes, numOutput)
+      new CompressedMapStatus(loc, uncompressedSizes)
     }
   }
 
@@ -104,34 +103,29 @@ private[spark] object MapStatus {
  */
 private[spark] class CompressedMapStatus(
     private[this] var loc: BlockManagerId,
-    private[this] var compressedSizes: Array[Byte],
-    private[this] var numOutput: Long)
+    private[this] var compressedSizes: Array[Byte])
   extends MapStatus with Externalizable {
 
-  protected def this() = this(null, null.asInstanceOf[Array[Byte]], -1)  // For deserialization only
+  protected def this() = this(null, null.asInstanceOf[Array[Byte]])  // For deserialization only
 
-  def this(loc: BlockManagerId, uncompressedSizes: Array[Long], numOutput: Long) {
-    this(loc, uncompressedSizes.map(MapStatus.compressSize), numOutput)
+  def this(loc: BlockManagerId, uncompressedSizes: Array[Long]) {
+    this(loc, uncompressedSizes.map(MapStatus.compressSize))
   }
 
   override def location: BlockManagerId = loc
 
-  override def numberOfOutput: Long = numOutput
-
   override def getSizeForBlock(reduceId: Int): Long = {
     MapStatus.decompressSize(compressedSizes(reduceId))
   }
 
   override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
     loc.writeExternal(out)
-    out.writeLong(numOutput)
     out.writeInt(compressedSizes.length)
     out.write(compressedSizes)
   }
 
   override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
     loc = BlockManagerId(in)
-    numOutput = in.readLong()
     val len = in.readInt()
     compressedSizes = new Array[Byte](len)
     in.readFully(compressedSizes)
@@ -154,20 +148,17 @@ private[spark] class HighlyCompressedMapStatus private (
     private[this] var numNonEmptyBlocks: Int,
     private[this] var emptyBlocks: RoaringBitmap,
     private[this] var avgSize: Long,
-    private var hugeBlockSizes: Map[Int, Byte],
-    private[this] var numOutput: Long)
+    private[this] var hugeBlockSizes: scala.collection.Map[Int, Byte])
   extends MapStatus with Externalizable {
 
   // loc could be null when the default constructor is called during deserialization
   require(loc == null || avgSize > 0 || hugeBlockSizes.size > 0 || numNonEmptyBlocks == 0,
     "Average size can only be zero for map stages that produced no output")
 
-  protected def this() = this(null, -1, null, -1, null, -1)  // For deserialization only
+  protected def this() = this(null, -1, null, -1, null)  // For deserialization only
 
   override def location: BlockManagerId = loc
 
-  override def numberOfOutput: Long = numOutput
-
   override def getSizeForBlock(reduceId: Int): Long = {
     assert(hugeBlockSizes != null)
     if (emptyBlocks.contains(reduceId)) {
@@ -182,7 +173,6 @@ private[spark] class HighlyCompressedMapStatus private (
 
   override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
     loc.writeExternal(out)
-    out.writeLong(numOutput)
     emptyBlocks.writeExternal(out)
     out.writeLong(avgSize)
     out.writeInt(hugeBlockSizes.size)
@@ -194,26 +184,22 @@ private[spark] class HighlyCompressedMapStatus private (
 
   override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
     loc = BlockManagerId(in)
-    numOutput = in.readLong()
     emptyBlocks = new RoaringBitmap()
     emptyBlocks.readExternal(in)
     avgSize = in.readLong()
     val count = in.readInt()
-    val hugeBlockSizesArray = mutable.ArrayBuffer[Tuple2[Int, Byte]]()
+    val hugeBlockSizesImpl = mutable.Map.empty[Int, Byte]
     (0 until count).foreach { _ =>
       val block = in.readInt()
       val size = in.readByte()
-      hugeBlockSizesArray += Tuple2(block, size)
+      hugeBlockSizesImpl(block) = size
     }
-    hugeBlockSizes = hugeBlockSizesArray.toMap
+    hugeBlockSizes = hugeBlockSizesImpl
   }
 }
 
 private[spark] object HighlyCompressedMapStatus {
-  def apply(
-      loc: BlockManagerId,
-      uncompressedSizes: Array[Long],
-      numOutput: Long): HighlyCompressedMapStatus = {
+  def apply(loc: BlockManagerId, uncompressedSizes: Array[Long]): HighlyCompressedMapStatus = {
     // We must keep track of which blocks are empty so that we don't report a zero-sized
     // block as being non-empty (or vice-versa) when using the average block size.
     var i = 0
@@ -228,7 +214,7 @@ private[spark] object HighlyCompressedMapStatus {
     val threshold = Option(SparkEnv.get)
       .map(_.conf.get(config.SHUFFLE_ACCURATE_BLOCK_THRESHOLD))
       .getOrElse(config.SHUFFLE_ACCURATE_BLOCK_THRESHOLD.defaultValue.get)
-    val hugeBlockSizesArray = ArrayBuffer[Tuple2[Int, Byte]]()
+    val hugeBlockSizes = mutable.Map.empty[Int, Byte]
     while (i < totalNumBlocks) {
       val size = uncompressedSizes(i)
       if (size > 0) {
@@ -239,7 +225,7 @@ private[spark] object HighlyCompressedMapStatus {
           totalSmallBlockSize += size
           numSmallBlocks += 1
         } else {
-          hugeBlockSizesArray += Tuple2(i, MapStatus.compressSize(uncompressedSizes(i)))
+          hugeBlockSizes(i) = MapStatus.compressSize(uncompressedSizes(i))
         }
       } else {
         emptyBlocks.add(i)
@@ -254,6 +240,6 @@ private[spark] object HighlyCompressedMapStatus {
     emptyBlocks.trim()
     emptyBlocks.runOptimize()
     new HighlyCompressedMapStatus(loc, numNonEmptyBlocks, emptyBlocks, avgSize,
-      hugeBlockSizesArray.toMap, numOutput)
+      hugeBlockSizes)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
index 226c23733c870..4c6b0c1227b18 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
@@ -118,6 +118,8 @@ private[spark] class ReplayListenerBus extends SparkListenerBus with Logging {
       case e: HaltReplayException =>
         // Just stop replay.
       case _: EOFException if maybeTruncated =>
+      case _: IOException if maybeTruncated =>
+        logWarning(s"Failed to read Spark event log: $sourceName")
       case ioe: IOException =>
         throw ioe
       case e: Exception =>
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
index f2cd65fd523ab..5412717d61988 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
@@ -95,7 +95,8 @@ private[spark] class ShuffleMapTask(
     var writer: ShuffleWriter[Any, Any] = null
     try {
       val manager = SparkEnv.get.shuffleManager
-      writer = manager.getWriter[Any, Any](dep.shuffleHandle, partitionId, context)
+      writer = manager.getWriter[Any, Any](
+        dep.shuffleHandle, partitionId, context, context.taskMetrics().shuffleWriteMetrics)
       writer.write(rdd.iterator(partition, context).asInstanceOf[Iterator[_ <: Product2[Any, Any]]])
       writer.stop(success = true).get
     } catch {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
index 8a112f6a37b96..e92b8a2718df0 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
@@ -26,7 +26,7 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo
 
 import org.apache.spark.{SparkConf, TaskEndReason}
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.executor.{ExecutorMetrics, TaskMetrics}
 import org.apache.spark.scheduler.cluster.ExecutorInfo
 import org.apache.spark.storage.{BlockManagerId, BlockUpdatedInfo}
 import org.apache.spark.ui.SparkUI
@@ -160,11 +160,29 @@ case class SparkListenerBlockUpdated(blockUpdatedInfo: BlockUpdatedInfo) extends
  * Periodic updates from executors.
  * @param execId executor id
  * @param accumUpdates sequence of (taskId, stageId, stageAttemptId, accumUpdates)
+ * @param executorUpdates executor level metrics updates
  */
 @DeveloperApi
 case class SparkListenerExecutorMetricsUpdate(
     execId: String,
-    accumUpdates: Seq[(Long, Int, Int, Seq[AccumulableInfo])])
+    accumUpdates: Seq[(Long, Int, Int, Seq[AccumulableInfo])],
+    executorUpdates: Option[ExecutorMetrics] = None)
+  extends SparkListenerEvent
+
+/**
+ * Peak metric values for the executor for the stage, written to the history log at stage
+ * completion.
+ * @param execId executor id
+ * @param stageId stage id
+ * @param stageAttemptId stage attempt
+ * @param executorMetrics executor level metrics, indexed by ExecutorMetricType.values
+ */
+@DeveloperApi
+case class SparkListenerStageExecutorMetrics(
+    execId: String,
+    stageId: Int,
+    stageAttemptId: Int,
+    executorMetrics: ExecutorMetrics)
   extends SparkListenerEvent
 
 @DeveloperApi
@@ -264,6 +282,13 @@ private[spark] trait SparkListenerInterface {
    */
   def onExecutorMetricsUpdate(executorMetricsUpdate: SparkListenerExecutorMetricsUpdate): Unit
 
+  /**
+   * Called with the peak memory metrics for a given (executor, stage) combination. Note that this
+   * is only present when reading from the event log (as in the history server), and is never
+   * called in a live application.
+   */
+  def onStageExecutorMetrics(executorMetrics: SparkListenerStageExecutorMetrics): Unit
+
   /**
    * Called when the driver registers a new executor.
    */
@@ -361,6 +386,9 @@ abstract class SparkListener extends SparkListenerInterface {
   override def onExecutorMetricsUpdate(
       executorMetricsUpdate: SparkListenerExecutorMetricsUpdate): Unit = { }
 
+  override def onStageExecutorMetrics(
+      executorMetrics: SparkListenerStageExecutorMetrics): Unit = { }
+
   override def onExecutorAdded(executorAdded: SparkListenerExecutorAdded): Unit = { }
 
   override def onExecutorRemoved(executorRemoved: SparkListenerExecutorRemoved): Unit = { }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala
index ff19cc65552e0..8f6b7ad309602 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala
@@ -57,6 +57,8 @@ private[spark] trait SparkListenerBus
         listener.onApplicationEnd(applicationEnd)
       case metricsUpdate: SparkListenerExecutorMetricsUpdate =>
         listener.onExecutorMetricsUpdate(metricsUpdate)
+      case stageExecutorMetrics: SparkListenerStageExecutorMetrics =>
+        listener.onStageExecutorMetrics(stageExecutorMetrics)
       case executorAdded: SparkListenerExecutorAdded =>
         listener.onExecutorAdded(executorAdded)
       case executorRemoved: SparkListenerExecutorRemoved =>
diff --git a/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala
index 903e25b7986f2..33a68f24bd53a 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala
@@ -30,7 +30,7 @@ import org.apache.spark.storage.RDDInfo
 @DeveloperApi
 class StageInfo(
     val stageId: Int,
-    @deprecated("Use attemptNumber instead", "2.3.0") val attemptId: Int,
+    private val attemptId: Int,
     val name: String,
     val numTasks: Int,
     val rddInfos: Seq[RDDInfo],
@@ -56,6 +56,8 @@ class StageInfo(
     completionTime = Some(System.currentTimeMillis)
   }
 
+  // This would just be the second constructor arg, except we need to maintain this method
+  // with parentheses for compatibility
   def attemptNumber(): Int = attemptId
 
   private[spark] def getStatusString: String = {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
index 95f7ae4fd39a2..94221eb0d5515 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.scheduler
 
+import org.apache.spark.executor.ExecutorMetrics
 import org.apache.spark.scheduler.SchedulingMode.SchedulingMode
 import org.apache.spark.storage.BlockManagerId
 import org.apache.spark.util.AccumulatorV2
@@ -74,14 +75,15 @@ private[spark] trait TaskScheduler {
   def defaultParallelism(): Int
 
   /**
-   * Update metrics for in-progress tasks and let the master know that the BlockManager is still
-   * alive. Return true if the driver knows about the given block manager. Otherwise, return false,
-   * indicating that the block manager should re-register.
+   * Update metrics for in-progress tasks and executor metrics, and let the master know that the
+   * BlockManager is still alive. Return true if the driver knows about the given block manager.
+   * Otherwise, return false, indicating that the block manager should re-register.
    */
   def executorHeartbeatReceived(
       execId: String,
       accumUpdates: Array[(Long, Seq[AccumulatorV2[_, _]])],
-      blockManagerId: BlockManagerId): Boolean
+      blockManagerId: BlockManagerId,
+      executorUpdates: ExecutorMetrics): Boolean
 
   /**
    * Get an application ID associated with the job.
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index 8b71170668639..61556ea642614 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -28,13 +28,14 @@ import scala.util.Random
 
 import org.apache.spark._
 import org.apache.spark.TaskState.TaskState
+import org.apache.spark.executor.ExecutorMetrics
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config
 import org.apache.spark.rpc.RpcEndpoint
 import org.apache.spark.scheduler.SchedulingMode.SchedulingMode
 import org.apache.spark.scheduler.TaskLocality.TaskLocality
 import org.apache.spark.storage.BlockManagerId
-import org.apache.spark.util.{AccumulatorV2, ThreadUtils, Utils}
+import org.apache.spark.util.{AccumulatorV2, SystemClock, ThreadUtils, Utils}
 
 /**
  * Schedules tasks for multiple types of clusters by acting through a SchedulerBackend.
@@ -116,6 +117,11 @@ private[spark] class TaskSchedulerImpl(
 
   protected val executorIdToHost = new HashMap[String, String]
 
+  private val abortTimer = new Timer(true)
+  private val clock = new SystemClock
+  // Exposed for testing
+  val unschedulableTaskSetToExpiryTime = new HashMap[TaskSetManager, Long]
+
   // Listener object to pass upcalls into
   var dagScheduler: DAGScheduler = null
 
@@ -414,9 +420,53 @@ private[spark] class TaskSchedulerImpl(
             launchedAnyTask |= launchedTaskAtCurrentMaxLocality
           } while (launchedTaskAtCurrentMaxLocality)
         }
+
         if (!launchedAnyTask) {
-          taskSet.abortIfCompletelyBlacklisted(hostToExecutors)
+          taskSet.getCompletelyBlacklistedTaskIfAny(hostToExecutors).foreach { taskIndex =>
+              // If the taskSet is unschedulable we try to find an existing idle blacklisted
+              // executor. If we cannot find one, we abort immediately. Else we kill the idle
+              // executor and kick off an abortTimer which if it doesn't schedule a task within the
+              // the timeout will abort the taskSet if we were unable to schedule any task from the
+              // taskSet.
+              // Note 1: We keep track of schedulability on a per taskSet basis rather than on a per
+              // task basis.
+              // Note 2: The taskSet can still be aborted when there are more than one idle
+              // blacklisted executors and dynamic allocation is on. This can happen when a killed
+              // idle executor isn't replaced in time by ExecutorAllocationManager as it relies on
+              // pending tasks and doesn't kill executors on idle timeouts, resulting in the abort
+              // timer to expire and abort the taskSet.
+              executorIdToRunningTaskIds.find(x => !isExecutorBusy(x._1)) match {
+                case Some ((executorId, _)) =>
+                  if (!unschedulableTaskSetToExpiryTime.contains(taskSet)) {
+                    blacklistTrackerOpt.foreach(blt => blt.killBlacklistedIdleExecutor(executorId))
+
+                    val timeout = conf.get(config.UNSCHEDULABLE_TASKSET_TIMEOUT) * 1000
+                    unschedulableTaskSetToExpiryTime(taskSet) = clock.getTimeMillis() + timeout
+                    logInfo(s"Waiting for $timeout ms for completely "
+                      + s"blacklisted task to be schedulable again before aborting $taskSet.")
+                    abortTimer.schedule(
+                      createUnschedulableTaskSetAbortTimer(taskSet, taskIndex), timeout)
+                  }
+                case None => // Abort Immediately
+                  logInfo("Cannot schedule any task because of complete blacklisting. No idle" +
+                    s" executors can be found to kill. Aborting $taskSet." )
+                  taskSet.abortSinceCompletelyBlacklisted(taskIndex)
+              }
+          }
+        } else {
+          // We want to defer killing any taskSets as long as we have a non blacklisted executor
+          // which can be used to schedule a task from any active taskSets. This ensures that the
+          // job can make progress.
+          // Note: It is theoretically possible that a taskSet never gets scheduled on a
+          // non-blacklisted executor and the abort timer doesn't kick in because of a constant
+          // submission of new TaskSets. See the PR for more details.
+          if (unschedulableTaskSetToExpiryTime.nonEmpty) {
+            logInfo("Clearing the expiry times for all unschedulable taskSets as a task was " +
+              "recently scheduled.")
+            unschedulableTaskSetToExpiryTime.clear()
+          }
         }
+
         if (launchedAnyTask && taskSet.isBarrier) {
           // Check whether the barrier tasks are partially launched.
           // TODO SPARK-24818 handle the assert failure case (that can happen when some locality
@@ -452,6 +502,23 @@ private[spark] class TaskSchedulerImpl(
     return tasks
   }
 
+  private def createUnschedulableTaskSetAbortTimer(
+      taskSet: TaskSetManager,
+      taskIndex: Int): TimerTask = {
+    new TimerTask() {
+      override def run() {
+        if (unschedulableTaskSetToExpiryTime.contains(taskSet) &&
+            unschedulableTaskSetToExpiryTime(taskSet) <= clock.getTimeMillis()) {
+          logInfo("Cannot schedule any task because of complete blacklisting. " +
+            s"Wait time for scheduling expired. Aborting $taskSet.")
+          taskSet.abortSinceCompletelyBlacklisted(taskIndex)
+        } else {
+          this.cancel()
+        }
+      }
+    }
+  }
+
   /**
    * Shuffle offers around to avoid always placing tasks on the same workers.  Exposed to allow
    * overriding in tests, so it can be deterministic.
@@ -508,14 +575,15 @@ private[spark] class TaskSchedulerImpl(
   }
 
   /**
-   * Update metrics for in-progress tasks and let the master know that the BlockManager is still
-   * alive. Return true if the driver knows about the given block manager. Otherwise, return false,
-   * indicating that the block manager should re-register.
+   * Update metrics for in-progress tasks and executor metrics, and let the master know that the
+   * BlockManager is still alive. Return true if the driver knows about the given block manager.
+   * Otherwise, return false, indicating that the block manager should re-register.
    */
   override def executorHeartbeatReceived(
       execId: String,
       accumUpdates: Array[(Long, Seq[AccumulatorV2[_, _]])],
-      blockManagerId: BlockManagerId): Boolean = {
+      blockManagerId: BlockManagerId,
+      executorMetrics: ExecutorMetrics): Boolean = {
     // (taskId, stageId, stageAttemptId, accumUpdates)
     val accumUpdatesWithTaskIds: Array[(Long, Int, Int, Seq[AccumulableInfo])] = {
       accumUpdates.flatMap { case (id, updates) =>
@@ -525,7 +593,8 @@ private[spark] class TaskSchedulerImpl(
         }
       }
     }
-    dagScheduler.executorHeartbeatReceived(execId, accumUpdatesWithTaskIds, blockManagerId)
+    dagScheduler.executorHeartbeatReceived(execId, accumUpdatesWithTaskIds, blockManagerId,
+      executorMetrics)
   }
 
   def handleTaskGettingResult(taskSetManager: TaskSetManager, tid: Long): Unit = synchronized {
@@ -587,6 +656,7 @@ private[spark] class TaskSchedulerImpl(
       barrierCoordinator.stop()
     }
     starvationTimer.cancel()
+    abortTimer.cancel()
   }
 
   override def defaultParallelism(): Int = backend.defaultParallelism()
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index d5e85a11cb279..6bf60dd8e9dfa 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -623,8 +623,8 @@ private[spark] class TaskSetManager(
    *
    * It is possible that this taskset has become impossible to schedule *anywhere* due to the
    * blacklist.  The most common scenario would be if there are fewer executors than
-   * spark.task.maxFailures. We need to detect this so we can fail the task set, otherwise the job
-   * will hang.
+   * spark.task.maxFailures. We need to detect this so we can avoid the job from being hung.
+   * We try to acquire new executor/s by killing an existing idle blacklisted executor.
    *
    * There's a tradeoff here: we could make sure all tasks in the task set are schedulable, but that
    * would add extra time to each iteration of the scheduling loop. Here, we take the approach of
@@ -635,9 +635,9 @@ private[spark] class TaskSetManager(
    * failures (this is because the method picks one unscheduled task, and then iterates through each
    * executor until it finds one that the task isn't blacklisted on).
    */
-  private[scheduler] def abortIfCompletelyBlacklisted(
-      hostToExecutors: HashMap[String, HashSet[String]]): Unit = {
-    taskSetBlacklistHelperOpt.foreach { taskSetBlacklist =>
+  private[scheduler] def getCompletelyBlacklistedTaskIfAny(
+      hostToExecutors: HashMap[String, HashSet[String]]): Option[Int] = {
+    taskSetBlacklistHelperOpt.flatMap { taskSetBlacklist =>
       val appBlacklist = blacklistTracker.get
       // Only look for unschedulable tasks when at least one executor has registered. Otherwise,
       // task sets will be (unnecessarily) aborted in cases when no executors have registered yet.
@@ -658,11 +658,11 @@ private[spark] class TaskSetManager(
           }
         }
 
-        pendingTask.foreach { indexInTaskSet =>
+        pendingTask.find { indexInTaskSet =>
           // try to find some executor this task can run on.  Its possible that some *other*
           // task isn't schedulable anywhere, but we will discover that in some later call,
           // when that unschedulable task is the last task remaining.
-          val blacklistedEverywhere = hostToExecutors.forall { case (host, execsOnHost) =>
+          hostToExecutors.forall { case (host, execsOnHost) =>
             // Check if the task can run on the node
             val nodeBlacklisted =
               appBlacklist.isNodeBlacklisted(host) ||
@@ -679,22 +679,27 @@ private[spark] class TaskSetManager(
               }
             }
           }
-          if (blacklistedEverywhere) {
-            val partition = tasks(indexInTaskSet).partitionId
-            abort(s"""
-              |Aborting $taskSet because task $indexInTaskSet (partition $partition)
-              |cannot run anywhere due to node and executor blacklist.
-              |Most recent failure:
-              |${taskSetBlacklist.getLatestFailureReason}
-              |
-              |Blacklisting behavior can be configured via spark.blacklist.*.
-              |""".stripMargin)
-          }
         }
+      } else {
+        None
       }
     }
   }
 
+  private[scheduler] def abortSinceCompletelyBlacklisted(indexInTaskSet: Int): Unit = {
+    taskSetBlacklistHelperOpt.foreach { taskSetBlacklist =>
+      val partition = tasks(indexInTaskSet).partitionId
+      abort(s"""
+         |Aborting $taskSet because task $indexInTaskSet (partition $partition)
+         |cannot run anywhere due to node and executor blacklist.
+         |Most recent failure:
+         |${taskSetBlacklist.getLatestFailureReason}
+         |
+         |Blacklisting behavior can be configured via spark.blacklist.*.
+         |""".stripMargin)
+    }
+  }
+
   /**
    * Marks the task as getting result and notifies the DAG Scheduler
    */
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index de7c0d813ae65..329158a44d369 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -18,13 +18,17 @@
 package org.apache.spark.scheduler.cluster
 
 import java.util.concurrent.TimeUnit
-import java.util.concurrent.atomic.AtomicInteger
+import java.util.concurrent.atomic.{AtomicInteger, AtomicReference}
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
 import scala.concurrent.Future
 
+import org.apache.hadoop.security.UserGroupInformation
+
 import org.apache.spark.{ExecutorAllocationClient, SparkEnv, SparkException, TaskState}
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.deploy.security.HadoopDelegationTokenManager
 import org.apache.spark.internal.Logging
 import org.apache.spark.rpc._
 import org.apache.spark.scheduler._
@@ -95,6 +99,12 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
   // The num of current max ExecutorId used to re-register appMaster
   @volatile protected var currentExecutorIdCounter = 0
 
+  // Current set of delegation tokens to send to executors.
+  private val delegationTokens = new AtomicReference[Array[Byte]]()
+
+  // The token manager used to create security tokens.
+  private var delegationTokenManager: Option[HadoopDelegationTokenManager] = None
+
   private val reviveThread =
     ThreadUtils.newDaemonSingleThreadScheduledExecutor("driver-revive-thread")
 
@@ -152,6 +162,8 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
         }
 
       case UpdateDelegationTokens(newDelegationTokens) =>
+        SparkHadoopUtil.get.addDelegationTokens(newDelegationTokens, conf)
+        delegationTokens.set(newDelegationTokens)
         executorDataMap.values.foreach { ed =>
           ed.executorEndpoint.send(UpdateDelegationTokens(newDelegationTokens))
         }
@@ -230,7 +242,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
         val reply = SparkAppConfig(
           sparkProperties,
           SparkEnv.get.securityManager.getIOEncryptionKey(),
-          fetchHadoopDelegationTokens())
+          Option(delegationTokens.get()))
         context.reply(reply)
     }
 
@@ -390,6 +402,21 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
 
     // TODO (prashant) send conf instead of properties
     driverEndpoint = createDriverEndpointRef(properties)
+
+    if (UserGroupInformation.isSecurityEnabled()) {
+      delegationTokenManager = createTokenManager()
+      delegationTokenManager.foreach { dtm =>
+        dtm.setDriverRef(driverEndpoint)
+        val creds = if (dtm.renewalEnabled) {
+          dtm.start().getCredentials()
+        } else {
+          val creds = UserGroupInformation.getCurrentUser().getCredentials()
+          dtm.obtainDelegationTokens(creds)
+          creds
+        }
+        delegationTokens.set(SparkHadoopUtil.get.serialize(creds))
+      }
+    }
   }
 
   protected def createDriverEndpointRef(
@@ -416,6 +443,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
   override def stop() {
     reviveThread.shutdownNow()
     stopExecutors()
+    delegationTokenManager.foreach(_.stop())
     try {
       if (driverEndpoint != null) {
         driverEndpoint.askSync[Boolean](StopDriver)
@@ -684,7 +712,13 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     true
   }
 
-  protected def fetchHadoopDelegationTokens(): Option[Array[Byte]] = { None }
+  /**
+   * Create the delegation token manager to be used for the application. This method is called
+   * once during the start of the scheduler backend (so after the object has already been
+   * fully constructed), only if security is enabled in the Hadoop configuration.
+   */
+  protected def createTokenManager(): Option[HadoopDelegationTokenManager] = None
+
 }
 
 private[spark] object CoarseGrainedSchedulerBackend {
diff --git a/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala b/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala
index 00621976b77f4..18b735b8035ab 100644
--- a/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala
+++ b/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala
@@ -16,7 +16,7 @@
  */
 package org.apache.spark.security
 
-import java.io.{InputStream, OutputStream}
+import java.io.{Closeable, InputStream, IOException, OutputStream}
 import java.nio.ByteBuffer
 import java.nio.channels.{ReadableByteChannel, WritableByteChannel}
 import java.util.Properties
@@ -54,8 +54,10 @@ private[spark] object CryptoStreamUtils extends Logging {
     val params = new CryptoParams(key, sparkConf)
     val iv = createInitializationVector(params.conf)
     os.write(iv)
-    new CryptoOutputStream(params.transformation, params.conf, os, params.keySpec,
-      new IvParameterSpec(iv))
+    new ErrorHandlingOutputStream(
+      new CryptoOutputStream(params.transformation, params.conf, os, params.keySpec,
+        new IvParameterSpec(iv)),
+      os)
   }
 
   /**
@@ -70,8 +72,10 @@ private[spark] object CryptoStreamUtils extends Logging {
     val helper = new CryptoHelperChannel(channel)
 
     helper.write(ByteBuffer.wrap(iv))
-    new CryptoOutputStream(params.transformation, params.conf, helper, params.keySpec,
-      new IvParameterSpec(iv))
+    new ErrorHandlingWritableChannel(
+      new CryptoOutputStream(params.transformation, params.conf, helper, params.keySpec,
+        new IvParameterSpec(iv)),
+      helper)
   }
 
   /**
@@ -84,8 +88,10 @@ private[spark] object CryptoStreamUtils extends Logging {
     val iv = new Array[Byte](IV_LENGTH_IN_BYTES)
     ByteStreams.readFully(is, iv)
     val params = new CryptoParams(key, sparkConf)
-    new CryptoInputStream(params.transformation, params.conf, is, params.keySpec,
-      new IvParameterSpec(iv))
+    new ErrorHandlingInputStream(
+      new CryptoInputStream(params.transformation, params.conf, is, params.keySpec,
+        new IvParameterSpec(iv)),
+      is)
   }
 
   /**
@@ -100,8 +106,10 @@ private[spark] object CryptoStreamUtils extends Logging {
     JavaUtils.readFully(channel, buf)
 
     val params = new CryptoParams(key, sparkConf)
-    new CryptoInputStream(params.transformation, params.conf, channel, params.keySpec,
-      new IvParameterSpec(iv))
+    new ErrorHandlingReadableChannel(
+      new CryptoInputStream(params.transformation, params.conf, channel, params.keySpec,
+        new IvParameterSpec(iv)),
+      channel)
   }
 
   def toCryptoConf(conf: SparkConf): Properties = {
@@ -157,6 +165,117 @@ private[spark] object CryptoStreamUtils extends Logging {
 
   }
 
+  /**
+   * SPARK-25535. The commons-cryto library will throw InternalError if something goes
+   * wrong, and leave bad state behind in the Java wrappers, so it's not safe to use them
+   * afterwards. This wrapper detects that situation and avoids further calls into the
+   * commons-crypto code, while still allowing the underlying streams to be closed.
+   *
+   * This should be removed once CRYPTO-141 is fixed (and Spark upgrades its commons-crypto
+   * dependency).
+   */
+  trait BaseErrorHandler extends Closeable {
+
+    private var closed = false
+
+    /** The encrypted stream that may get into an unhealthy state. */
+    protected def cipherStream: Closeable
+
+    /**
+     * The underlying stream that is being wrapped by the encrypted stream, so that it can be
+     * closed even if there's an error in the crypto layer.
+     */
+    protected def original: Closeable
+
+    protected def safeCall[T](fn: => T): T = {
+      if (closed) {
+        throw new IOException("Cipher stream is closed.")
+      }
+      try {
+        fn
+      } catch {
+        case ie: InternalError =>
+          closed = true
+          original.close()
+          throw ie
+      }
+    }
+
+    override def close(): Unit = {
+      if (!closed) {
+        cipherStream.close()
+      }
+    }
+
+  }
+
+  // Visible for testing.
+  class ErrorHandlingReadableChannel(
+      protected val cipherStream: ReadableByteChannel,
+      protected val original: ReadableByteChannel)
+    extends ReadableByteChannel with BaseErrorHandler {
+
+    override def read(src: ByteBuffer): Int = safeCall {
+      cipherStream.read(src)
+    }
+
+    override def isOpen(): Boolean = cipherStream.isOpen()
+
+  }
+
+  private class ErrorHandlingInputStream(
+      protected val cipherStream: InputStream,
+      protected val original: InputStream)
+    extends InputStream with BaseErrorHandler {
+
+    override def read(b: Array[Byte]): Int = safeCall {
+      cipherStream.read(b)
+    }
+
+    override def read(b: Array[Byte], off: Int, len: Int): Int = safeCall {
+      cipherStream.read(b, off, len)
+    }
+
+    override def read(): Int = safeCall {
+      cipherStream.read()
+    }
+  }
+
+  private class ErrorHandlingWritableChannel(
+      protected val cipherStream: WritableByteChannel,
+      protected val original: WritableByteChannel)
+    extends WritableByteChannel with BaseErrorHandler {
+
+    override def write(src: ByteBuffer): Int = safeCall {
+      cipherStream.write(src)
+    }
+
+    override def isOpen(): Boolean = cipherStream.isOpen()
+
+  }
+
+  private class ErrorHandlingOutputStream(
+      protected val cipherStream: OutputStream,
+      protected val original: OutputStream)
+    extends OutputStream with BaseErrorHandler {
+
+    override def flush(): Unit = safeCall {
+      cipherStream.flush()
+    }
+
+    override def write(b: Array[Byte]): Unit = safeCall {
+      cipherStream.write(b)
+    }
+
+    override def write(b: Array[Byte], off: Int, len: Int): Unit = safeCall {
+      cipherStream.write(b, off, len)
+    }
+
+    override def write(b: Int): Unit = safeCall {
+      cipherStream.write(b)
+    }
+  }
+
   private class CryptoParams(key: Array[Byte], sparkConf: SparkConf) {
 
     val keySpec = new SecretKeySpec(key, "AES")
diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
index 72427dd6ce4d4..1e1c27c477877 100644
--- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
@@ -30,6 +30,7 @@ import scala.util.control.NonFatal
 import com.esotericsoftware.kryo.{Kryo, KryoException, Serializer => KryoClassSerializer}
 import com.esotericsoftware.kryo.io.{Input => KryoInput, Output => KryoOutput}
 import com.esotericsoftware.kryo.io.{UnsafeInput => KryoUnsafeInput, UnsafeOutput => KryoUnsafeOutput}
+import com.esotericsoftware.kryo.pool.{KryoCallback, KryoFactory, KryoPool}
 import com.esotericsoftware.kryo.serializers.{JavaSerializer => KryoJavaSerializer}
 import com.twitter.chill.{AllScalaRegistrar, EmptyScalaKryoInstantiator}
 import org.apache.avro.generic.{GenericData, GenericRecord}
@@ -41,7 +42,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.network.util.ByteUnit
 import org.apache.spark.scheduler.{CompressedMapStatus, HighlyCompressedMapStatus}
 import org.apache.spark.storage._
-import org.apache.spark.util.{BoundedPriorityQueue, SerializableConfiguration, SerializableJobConf, Utils}
+import org.apache.spark.util.{BoundedPriorityQueue, ByteBufferInputStream, SerializableConfiguration, SerializableJobConf, Utils}
 import org.apache.spark.util.collection.CompactBuffer
 
 /**
@@ -84,6 +85,7 @@ class KryoSerializer(conf: SparkConf)
   private val avroSchemas = conf.getAvroSchema
   // whether to use unsafe based IO for serialization
   private val useUnsafe = conf.getBoolean("spark.kryo.unsafe", false)
+  private val usePool = conf.getBoolean("spark.kryo.pool", true)
 
   def newKryoOutput(): KryoOutput =
     if (useUnsafe) {
@@ -92,6 +94,36 @@ class KryoSerializer(conf: SparkConf)
       new KryoOutput(bufferSize, math.max(bufferSize, maxBufferSize))
     }
 
+  @transient
+  private lazy val factory: KryoFactory = new KryoFactory() {
+    override def create: Kryo = {
+      newKryo()
+    }
+  }
+
+  private class PoolWrapper extends KryoPool {
+    private var pool: KryoPool = getPool
+
+    override def borrow(): Kryo = pool.borrow()
+
+    override def release(kryo: Kryo): Unit = pool.release(kryo)
+
+    override def run[T](kryoCallback: KryoCallback[T]): T = pool.run(kryoCallback)
+
+    def reset(): Unit = {
+      pool = getPool
+    }
+
+    private def getPool: KryoPool = {
+      new KryoPool.Builder(factory).softReferences.build
+    }
+  }
+
+  @transient
+  private lazy val internalPool = new PoolWrapper
+
+  def pool: KryoPool = internalPool
+
   def newKryo(): Kryo = {
     val instantiator = new EmptyScalaKryoInstantiator
     val kryo = instantiator.newKryo()
@@ -132,7 +164,8 @@ class KryoSerializer(conf: SparkConf)
         .foreach { className => kryo.register(Class.forName(className, true, classLoader)) }
       // Allow the user to register their own classes by setting spark.kryo.registrator.
       userRegistrators
-        .map(Class.forName(_, true, classLoader).newInstance().asInstanceOf[KryoRegistrator])
+        .map(Class.forName(_, true, classLoader).getConstructor().
+          newInstance().asInstanceOf[KryoRegistrator])
         .foreach { reg => reg.registerClasses(kryo) }
       // scalastyle:on classforname
     } catch {
@@ -182,6 +215,12 @@ class KryoSerializer(conf: SparkConf)
     // We can't load those class directly in order to avoid unnecessary jar dependencies.
     // We load them safely, ignore it if the class not found.
     Seq(
+      "org.apache.spark.ml.attribute.Attribute",
+      "org.apache.spark.ml.attribute.AttributeGroup",
+      "org.apache.spark.ml.attribute.BinaryAttribute",
+      "org.apache.spark.ml.attribute.NominalAttribute",
+      "org.apache.spark.ml.attribute.NumericAttribute",
+
       "org.apache.spark.ml.feature.Instance",
       "org.apache.spark.ml.feature.LabeledPoint",
       "org.apache.spark.ml.feature.OffsetInstance",
@@ -191,6 +230,7 @@ class KryoSerializer(conf: SparkConf)
       "org.apache.spark.ml.linalg.SparseMatrix",
       "org.apache.spark.ml.linalg.SparseVector",
       "org.apache.spark.ml.linalg.Vector",
+      "org.apache.spark.ml.stat.distribution.MultivariateGaussian",
       "org.apache.spark.ml.tree.impl.TreePoint",
       "org.apache.spark.mllib.clustering.VectorWithNorm",
       "org.apache.spark.mllib.linalg.DenseMatrix",
@@ -199,7 +239,8 @@ class KryoSerializer(conf: SparkConf)
       "org.apache.spark.mllib.linalg.SparseMatrix",
       "org.apache.spark.mllib.linalg.SparseVector",
       "org.apache.spark.mllib.linalg.Vector",
-      "org.apache.spark.mllib.regression.LabeledPoint"
+      "org.apache.spark.mllib.regression.LabeledPoint",
+      "org.apache.spark.mllib.stat.distribution.MultivariateGaussian"
     ).foreach { name =>
       try {
         val clazz = Utils.classForName(name)
@@ -214,8 +255,14 @@ class KryoSerializer(conf: SparkConf)
     kryo
   }
 
+  override def setDefaultClassLoader(classLoader: ClassLoader): Serializer = {
+    super.setDefaultClassLoader(classLoader)
+    internalPool.reset()
+    this
+  }
+
   override def newInstance(): SerializerInstance = {
-    new KryoSerializerInstance(this, useUnsafe)
+    new KryoSerializerInstance(this, useUnsafe, usePool)
   }
 
   private[spark] override lazy val supportsRelocationOfSerializedObjects: Boolean = {
@@ -298,7 +345,8 @@ class KryoDeserializationStream(
   }
 }
 
-private[spark] class KryoSerializerInstance(ks: KryoSerializer, useUnsafe: Boolean)
+private[spark] class KryoSerializerInstance(
+   ks: KryoSerializer, useUnsafe: Boolean, usePool: Boolean)
   extends SerializerInstance {
   /**
    * A re-used [[Kryo]] instance. Methods will borrow this instance by calling `borrowKryo()`, do
@@ -306,22 +354,29 @@ private[spark] class KryoSerializerInstance(ks: KryoSerializer, useUnsafe: Boole
    * pool of size one. SerializerInstances are not thread-safe, hence accesses to this field are
    * not synchronized.
    */
-  @Nullable private[this] var cachedKryo: Kryo = borrowKryo()
+  @Nullable private[this] var cachedKryo: Kryo = if (usePool) null else borrowKryo()
 
   /**
    * Borrows a [[Kryo]] instance. If possible, this tries to re-use a cached Kryo instance;
    * otherwise, it allocates a new instance.
    */
   private[serializer] def borrowKryo(): Kryo = {
-    if (cachedKryo != null) {
-      val kryo = cachedKryo
-      // As a defensive measure, call reset() to clear any Kryo state that might have been modified
-      // by the last operation to borrow this instance (see SPARK-7766 for discussion of this issue)
+    if (usePool) {
+      val kryo = ks.pool.borrow()
       kryo.reset()
-      cachedKryo = null
       kryo
     } else {
-      ks.newKryo()
+      if (cachedKryo != null) {
+        val kryo = cachedKryo
+        // As a defensive measure, call reset() to clear any Kryo state that might have
+        // been modified by the last operation to borrow this instance
+        // (see SPARK-7766 for discussion of this issue)
+        kryo.reset()
+        cachedKryo = null
+        kryo
+      } else {
+        ks.newKryo()
+      }
     }
   }
 
@@ -331,8 +386,12 @@ private[spark] class KryoSerializerInstance(ks: KryoSerializer, useUnsafe: Boole
    * re-use.
    */
   private[serializer] def releaseKryo(kryo: Kryo): Unit = {
-    if (cachedKryo == null) {
-      cachedKryo = kryo
+    if (usePool) {
+      ks.pool.release(kryo)
+    } else {
+      if (cachedKryo == null) {
+        cachedKryo = kryo
+      }
     }
   }
 
@@ -358,7 +417,12 @@ private[spark] class KryoSerializerInstance(ks: KryoSerializer, useUnsafe: Boole
   override def deserialize[T: ClassTag](bytes: ByteBuffer): T = {
     val kryo = borrowKryo()
     try {
-      input.setBuffer(bytes.array(), bytes.arrayOffset() + bytes.position(), bytes.remaining())
+      if (bytes.hasArray) {
+        input.setBuffer(bytes.array(), bytes.arrayOffset() + bytes.position(), bytes.remaining())
+      } else {
+        input.setBuffer(new Array[Byte](4096))
+        input.setInputStream(new ByteBufferInputStream(bytes))
+      }
       kryo.readClassAndObject(input).asInstanceOf[T]
     } finally {
       releaseKryo(kryo)
@@ -370,7 +434,12 @@ private[spark] class KryoSerializerInstance(ks: KryoSerializer, useUnsafe: Boole
     val oldClassLoader = kryo.getClassLoader
     try {
       kryo.setClassLoader(loader)
-      input.setBuffer(bytes.array(), bytes.arrayOffset() + bytes.position(), bytes.remaining())
+      if (bytes.hasArray) {
+        input.setBuffer(bytes.array(), bytes.arrayOffset() + bytes.position(), bytes.remaining())
+      } else {
+        input.setBuffer(new Array[Byte](4096))
+        input.setInputStream(new ByteBufferInputStream(bytes))
+      }
       kryo.readClassAndObject(input).asInstanceOf[T]
     } finally {
       kryo.setClassLoader(oldClassLoader)
diff --git a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
index 74b0e0b3a741a..27e2f98c58f0c 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
@@ -33,6 +33,7 @@ private[spark] class BlockStoreShuffleReader[K, C](
     startPartition: Int,
     endPartition: Int,
     context: TaskContext,
+    readMetrics: ShuffleReadMetricsReporter,
     serializerManager: SerializerManager = SparkEnv.get.serializerManager,
     blockManager: BlockManager = SparkEnv.get.blockManager,
     mapOutputTracker: MapOutputTracker = SparkEnv.get.mapOutputTracker)
@@ -53,7 +54,8 @@ private[spark] class BlockStoreShuffleReader[K, C](
       SparkEnv.get.conf.getInt("spark.reducer.maxReqsInFlight", Int.MaxValue),
       SparkEnv.get.conf.get(config.REDUCER_MAX_BLOCKS_IN_FLIGHT_PER_ADDRESS),
       SparkEnv.get.conf.get(config.MAX_REMOTE_BLOCK_SIZE_FETCH_TO_MEM),
-      SparkEnv.get.conf.getBoolean("spark.shuffle.detectCorrupt", true))
+      SparkEnv.get.conf.getBoolean("spark.shuffle.detectCorrupt", true),
+      readMetrics)
 
     val serializerInstance = dep.serializer.newInstance()
 
@@ -66,7 +68,6 @@ private[spark] class BlockStoreShuffleReader[K, C](
     }
 
     // Update the context task metrics for each record read.
-    val readMetrics = context.taskMetrics.createTempShuffleReadMetrics()
     val metricIter = CompletionIterator[(Any, Any), Iterator[(Any, Any)]](
       recordIter.map { record =>
         readMetrics.incRecordsRead(1)
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala
index 4ea8a7120a9cc..18a743fbfa6fc 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala
@@ -38,7 +38,11 @@ private[spark] trait ShuffleManager {
       dependency: ShuffleDependency[K, V, C]): ShuffleHandle
 
   /** Get a writer for a given partition. Called on executors by map tasks. */
-  def getWriter[K, V](handle: ShuffleHandle, mapId: Int, context: TaskContext): ShuffleWriter[K, V]
+  def getWriter[K, V](
+      handle: ShuffleHandle,
+      mapId: Int,
+      context: TaskContext,
+      metrics: ShuffleWriteMetricsReporter): ShuffleWriter[K, V]
 
   /**
    * Get a reader for a range of reduce partitions (startPartition to endPartition-1, inclusive).
@@ -48,7 +52,8 @@ private[spark] trait ShuffleManager {
       handle: ShuffleHandle,
       startPartition: Int,
       endPartition: Int,
-      context: TaskContext): ShuffleReader[K, C]
+      context: TaskContext,
+      metrics: ShuffleReadMetricsReporter): ShuffleReader[K, C]
 
   /**
    * Remove a shuffle's metadata from the ShuffleManager.
diff --git a/core/src/main/scala/org/apache/spark/shuffle/metrics.scala b/core/src/main/scala/org/apache/spark/shuffle/metrics.scala
new file mode 100644
index 0000000000000..33be677bc90cb
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/shuffle/metrics.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle
+
+/**
+ * An interface for reporting shuffle read metrics, for each shuffle. This interface assumes
+ * all the methods are called on a single-threaded, i.e. concrete implementations would not need
+ * to synchronize.
+ *
+ * All methods have additional Spark visibility modifier to allow public, concrete implementations
+ * that still have these methods marked as private[spark].
+ */
+private[spark] trait ShuffleReadMetricsReporter {
+  private[spark] def incRemoteBlocksFetched(v: Long): Unit
+  private[spark] def incLocalBlocksFetched(v: Long): Unit
+  private[spark] def incRemoteBytesRead(v: Long): Unit
+  private[spark] def incRemoteBytesReadToDisk(v: Long): Unit
+  private[spark] def incLocalBytesRead(v: Long): Unit
+  private[spark] def incFetchWaitTime(v: Long): Unit
+  private[spark] def incRecordsRead(v: Long): Unit
+}
+
+
+/**
+ * An interface for reporting shuffle write metrics. This interface assumes all the methods are
+ * called on a single-threaded, i.e. concrete implementations would not need to synchronize.
+ *
+ * All methods have additional Spark visibility modifier to allow public, concrete implementations
+ * that still have these methods marked as private[spark].
+ */
+private[spark] trait ShuffleWriteMetricsReporter {
+  private[spark] def incBytesWritten(v: Long): Unit
+  private[spark] def incRecordsWritten(v: Long): Unit
+  private[spark] def incWriteTime(v: Long): Unit
+  private[spark] def decBytesWritten(v: Long): Unit
+  private[spark] def decRecordsWritten(v: Long): Unit
+}
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
index 0caf84c6050a8..b51a843a31c31 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
@@ -114,16 +114,19 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager
       handle: ShuffleHandle,
       startPartition: Int,
       endPartition: Int,
-      context: TaskContext): ShuffleReader[K, C] = {
+      context: TaskContext,
+      metrics: ShuffleReadMetricsReporter): ShuffleReader[K, C] = {
     new BlockStoreShuffleReader(
-      handle.asInstanceOf[BaseShuffleHandle[K, _, C]], startPartition, endPartition, context)
+      handle.asInstanceOf[BaseShuffleHandle[K, _, C]],
+      startPartition, endPartition, context, metrics)
   }
 
   /** Get a writer for a given partition. Called on executors by map tasks. */
   override def getWriter[K, V](
       handle: ShuffleHandle,
       mapId: Int,
-      context: TaskContext): ShuffleWriter[K, V] = {
+      context: TaskContext,
+      metrics: ShuffleWriteMetricsReporter): ShuffleWriter[K, V] = {
     numMapsForShuffle.putIfAbsent(
       handle.shuffleId, handle.asInstanceOf[BaseShuffleHandle[_, _, _]].numMaps)
     val env = SparkEnv.get
@@ -136,15 +139,16 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager
           unsafeShuffleHandle,
           mapId,
           context,
-          env.conf)
+          env.conf,
+          metrics)
       case bypassMergeSortHandle: BypassMergeSortShuffleHandle[K @unchecked, V @unchecked] =>
         new BypassMergeSortShuffleWriter(
           env.blockManager,
           shuffleBlockResolver.asInstanceOf[IndexShuffleBlockResolver],
           bypassMergeSortHandle,
           mapId,
-          context,
-          env.conf)
+          env.conf,
+          metrics)
       case other: BaseShuffleHandle[K @unchecked, V @unchecked, _] =>
         new SortShuffleWriter(shuffleBlockResolver, other, mapId, context)
     }
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
index 91fc26762e533..274399b9cc1f3 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
@@ -70,8 +70,7 @@ private[spark] class SortShuffleWriter[K, V, C](
       val blockId = ShuffleBlockId(dep.shuffleId, mapId, IndexShuffleBlockResolver.NOOP_REDUCE_ID)
       val partitionLengths = sorter.writePartitionedFile(blockId, tmp)
       shuffleBlockResolver.writeIndexFileAndCommit(dep.shuffleId, mapId, partitionLengths, tmp)
-      mapStatus = MapStatus(blockManager.shuffleServerId, partitionLengths,
-        writeMetrics.recordsWritten)
+      mapStatus = MapStatus(blockManager.shuffleServerId, partitionLengths)
     } finally {
       if (tmp.exists() && !tmp.delete()) {
         logError(s"Error while deleting temp file ${tmp.getAbsolutePath}")
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
index 91b75e4852999..bd3f58b6182c0 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
@@ -25,8 +25,9 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable.HashMap
 
 import org.apache.spark._
-import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.executor.{ExecutorMetrics, TaskMetrics}
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.Status._
 import org.apache.spark.scheduler._
 import org.apache.spark.status.api.v1
 import org.apache.spark.storage._
@@ -44,10 +45,9 @@ private[spark] class AppStatusListener(
     kvstore: ElementTrackingStore,
     conf: SparkConf,
     live: Boolean,
+    appStatusSource: Option[AppStatusSource] = None,
     lastUpdateTime: Option[Long] = None) extends SparkListener with Logging {
 
-  import config._
-
   private var sparkVersion = SPARK_VERSION
   private var appInfo: v1.ApplicationInfo = null
   private var appSummary = new AppSummary(0, 0)
@@ -66,6 +66,7 @@ private[spark] class AppStatusListener(
   private val liveStages = new ConcurrentHashMap[(Int, Int), LiveStage]()
   private val liveJobs = new HashMap[Int, LiveJob]()
   private val liveExecutors = new HashMap[String, LiveExecutor]()
+  private val deadExecutors = new HashMap[String, LiveExecutor]()
   private val liveTasks = new HashMap[Long, LiveTask]()
   private val liveRDDs = new HashMap[Int, LiveRDD]()
   private val pools = new HashMap[String, SchedulerPool]()
@@ -204,6 +205,19 @@ private[spark] class AppStatusListener(
           update(rdd, now)
         }
       }
+      if (isExecutorActiveForLiveStages(exec)) {
+        // the executor was running for a currently active stage, so save it for now in
+        // deadExecutors, and remove when there are no active stages overlapping with the
+        // executor.
+        deadExecutors.put(event.executorId, exec)
+      }
+    }
+  }
+
+  /** Was the specified executor active for any currently live stages? */
+  private def isExecutorActiveForLiveStages(exec: LiveExecutor): Boolean = {
+    liveStages.values.asScala.exists { stage =>
+      stage.info.submissionTime.getOrElse(0L) < exec.removeTime.getTime
     }
   }
 
@@ -266,6 +280,11 @@ private[spark] class AppStatusListener(
   private def updateBlackListStatus(execId: String, blacklisted: Boolean): Unit = {
     liveExecutors.get(execId).foreach { exec =>
       exec.isBlacklisted = blacklisted
+      if (blacklisted) {
+        appStatusSource.foreach(_.BLACKLISTED_EXECUTORS.inc())
+      } else {
+        appStatusSource.foreach(_.UNBLACKLISTED_EXECUTORS.inc())
+      }
       liveUpdate(exec, System.nanoTime())
     }
   }
@@ -368,16 +387,40 @@ private[spark] class AppStatusListener(
       }
 
       job.status = event.jobResult match {
-        case JobSucceeded => JobExecutionStatus.SUCCEEDED
-        case JobFailed(_) => JobExecutionStatus.FAILED
+        case JobSucceeded =>
+          appStatusSource.foreach{_.SUCCEEDED_JOBS.inc()}
+          JobExecutionStatus.SUCCEEDED
+        case JobFailed(_) =>
+          appStatusSource.foreach{_.FAILED_JOBS.inc()}
+          JobExecutionStatus.FAILED
       }
 
       job.completionTime = if (event.time > 0) Some(new Date(event.time)) else None
+
+      for {
+        source <- appStatusSource
+        submissionTime <- job.submissionTime
+        completionTime <- job.completionTime
+      } {
+        source.JOB_DURATION.value.set(completionTime.getTime() - submissionTime.getTime())
+      }
+
+      // update global app status counters
+      appStatusSource.foreach { source =>
+        source.COMPLETED_STAGES.inc(job.completedStages.size)
+        source.FAILED_STAGES.inc(job.failedStages)
+        source.COMPLETED_TASKS.inc(job.completedTasks)
+        source.FAILED_TASKS.inc(job.failedTasks)
+        source.KILLED_TASKS.inc(job.killedTasks)
+        source.SKIPPED_TASKS.inc(job.skippedTasks)
+        source.SKIPPED_STAGES.inc(job.skippedStages.size)
+      }
       update(job, now, last = true)
+      if (job.status == JobExecutionStatus.SUCCEEDED) {
+        appSummary = new AppSummary(appSummary.numCompletedJobs + 1, appSummary.numCompletedStages)
+        kvstore.write(appSummary)
+      }
     }
-
-    appSummary = new AppSummary(appSummary.numCompletedJobs + 1, appSummary.numCompletedStages)
-    kvstore.write(appSummary)
   }
 
   override def onStageSubmitted(event: SparkListenerStageSubmitted): Unit = {
@@ -430,6 +473,7 @@ private[spark] class AppStatusListener(
       val locality = event.taskInfo.taskLocality.toString()
       val count = stage.localitySummary.getOrElse(locality, 0L) + 1L
       stage.localitySummary = stage.localitySummary ++ Map(locality -> count)
+      stage.activeTasksPerExecutor(event.taskInfo.executorId) += 1
       maybeUpdate(stage, now)
 
       stage.jobs.foreach { job =>
@@ -515,6 +559,7 @@ private[spark] class AppStatusListener(
       if (killedDelta > 0) {
         stage.killedSummary = killedTasksSummary(event.reason, stage.killedSummary)
       }
+      stage.activeTasksPerExecutor(event.taskInfo.executorId) -= 1
       // [SPARK-24415] Wait for all tasks to finish before removing stage from live list
       val removeStage =
         stage.activeTasks == 0 &&
@@ -539,7 +584,11 @@ private[spark] class AppStatusListener(
         if (killedDelta > 0) {
           job.killedSummary = killedTasksSummary(event.reason, job.killedSummary)
         }
-        conditionalLiveUpdate(job, now, removeStage)
+        if (removeStage) {
+          update(job, now)
+        } else {
+          maybeUpdate(job, now)
+        }
       }
 
       val esummary = stage.executorSummary(event.taskInfo.executorId)
@@ -550,7 +599,16 @@ private[spark] class AppStatusListener(
       if (metricsDelta != null) {
         esummary.metrics = LiveEntityHelpers.addMetrics(esummary.metrics, metricsDelta)
       }
-      conditionalLiveUpdate(esummary, now, removeStage)
+
+      val isLastTask = stage.activeTasksPerExecutor(event.taskInfo.executorId) == 0
+
+      // If the last task of the executor finished, then update the esummary
+      // for both live and history events.
+      if (isLastTask) {
+        update(esummary, now)
+      } else {
+        maybeUpdate(esummary, now)
+      }
 
       if (!stage.cleaning && stage.savedTasks.get() > maxTasksPerStage) {
         stage.cleaning = true
@@ -583,9 +641,14 @@ private[spark] class AppStatusListener(
         }
       }
 
-      // Force an update on live applications when the number of active tasks reaches 0. This is
-      // checked in some tests (e.g. SQLTestUtilsBase) so it needs to be reliably up to date.
-      conditionalLiveUpdate(exec, now, exec.activeTasks == 0)
+      // Force an update on both live and history applications when the number of active tasks
+      // reaches 0. This is checked in some tests (e.g. SQLTestUtilsBase) so it needs to be
+      // reliably up to date.
+      if (exec.activeTasks == 0) {
+        update(exec, now)
+      } else {
+        maybeUpdate(exec, now)
+      }
     }
   }
 
@@ -639,10 +702,14 @@ private[spark] class AppStatusListener(
       if (removeStage) {
         liveStages.remove((event.stageInfo.stageId, event.stageInfo.attemptNumber))
       }
+      if (stage.status == v1.StageStatus.COMPLETE) {
+        appSummary = new AppSummary(appSummary.numCompletedJobs, appSummary.numCompletedStages + 1)
+        kvstore.write(appSummary)
+      }
     }
 
-    appSummary = new AppSummary(appSummary.numCompletedJobs, appSummary.numCompletedStages + 1)
-    kvstore.write(appSummary)
+    // remove any dead executors that were not running for any currently active stages
+    deadExecutors.retain((execId, exec) => isExecutorActiveForLiveStages(exec))
   }
 
   private def removeBlackListedStageFrom(exec: LiveExecutor, stageId: Int, now: Long) = {
@@ -669,7 +736,37 @@ private[spark] class AppStatusListener(
   }
 
   override def onUnpersistRDD(event: SparkListenerUnpersistRDD): Unit = {
-    liveRDDs.remove(event.rddId)
+    liveRDDs.remove(event.rddId).foreach { liveRDD =>
+      val storageLevel = liveRDD.info.storageLevel
+
+      // Use RDD partition info to update executor block info.
+      liveRDD.getPartitions().foreach { case (_, part) =>
+        part.executors.foreach { executorId =>
+          liveExecutors.get(executorId).foreach { exec =>
+            exec.rddBlocks = exec.rddBlocks - 1
+          }
+        }
+      }
+
+      val now = System.nanoTime()
+
+      // Use RDD distribution to update executor memory and disk usage info.
+      liveRDD.getDistributions().foreach { case (executorId, rddDist) =>
+        liveExecutors.get(executorId).foreach { exec =>
+          if (exec.hasMemoryInfo) {
+            if (storageLevel.useOffHeap) {
+              exec.usedOffHeap = addDeltaToValue(exec.usedOffHeap, -rddDist.offHeapUsed)
+            } else {
+              exec.usedOnHeap = addDeltaToValue(exec.usedOnHeap, -rddDist.onHeapUsed)
+            }
+          }
+          exec.memoryUsed = addDeltaToValue(exec.memoryUsed, -rddDist.memoryUsed)
+          exec.diskUsed = addDeltaToValue(exec.diskUsed, -rddDist.diskUsed)
+          maybeUpdate(exec, now)
+        }
+      }
+    }
+
     kvstore.delete(classOf[RDDStorageInfoWrapper], event.rddId)
   }
 
@@ -692,6 +789,31 @@ private[spark] class AppStatusListener(
         }
       }
     }
+
+    // check if there is a new peak value for any of the executor level memory metrics
+    // for the live UI. SparkListenerExecutorMetricsUpdate events are only processed
+    // for the live UI.
+    event.executorUpdates.foreach { updates =>
+      liveExecutors.get(event.execId).foreach { exec =>
+        if (exec.peakExecutorMetrics.compareAndUpdatePeakValues(updates)) {
+          maybeUpdate(exec, now)
+        }
+      }
+    }
+  }
+
+  override def onStageExecutorMetrics(executorMetrics: SparkListenerStageExecutorMetrics): Unit = {
+    val now = System.nanoTime()
+
+    // check if there is a new peak value for any of the executor level memory metrics,
+    // while reading from the log. SparkListenerStageExecutorMetrics are only processed
+    // when reading logs.
+    liveExecutors.get(executorMetrics.execId)
+      .orElse(deadExecutors.get(executorMetrics.execId)).map { exec =>
+      if (exec.peakExecutorMetrics.compareAndUpdatePeakValues(executorMetrics.executorMetrics)) {
+        update(exec, now)
+      }
+    }
   }
 
   override def onBlockUpdated(event: SparkListenerBlockUpdated): Unit = {
@@ -728,6 +850,11 @@ private[spark] class AppStatusListener(
       .sortBy(_.stageId)
   }
 
+  /**
+   * Apply a delta to a value, but ensure that it doesn't go negative.
+   */
+  private def addDeltaToValue(old: Long, delta: Long): Long = math.max(0, old + delta)
+
   private def updateRDDBlock(event: SparkListenerBlockUpdated, block: RDDBlockId): Unit = {
     val now = System.nanoTime()
     val executorId = event.blockUpdatedInfo.blockManagerId.executorId
@@ -737,9 +864,6 @@ private[spark] class AppStatusListener(
     val diskDelta = event.blockUpdatedInfo.diskSize * (if (storageLevel.useDisk) 1 else -1)
     val memoryDelta = event.blockUpdatedInfo.memSize * (if (storageLevel.useMemory) 1 else -1)
 
-    // Function to apply a delta to a value, but ensure that it doesn't go negative.
-    def newValue(old: Long, delta: Long): Long = math.max(0, old + delta)
-
     val updatedStorageLevel = if (storageLevel.isValid) {
       Some(storageLevel.description)
     } else {
@@ -756,13 +880,13 @@ private[spark] class AppStatusListener(
     maybeExec.foreach { exec =>
       if (exec.hasMemoryInfo) {
         if (storageLevel.useOffHeap) {
-          exec.usedOffHeap = newValue(exec.usedOffHeap, memoryDelta)
+          exec.usedOffHeap = addDeltaToValue(exec.usedOffHeap, memoryDelta)
         } else {
-          exec.usedOnHeap = newValue(exec.usedOnHeap, memoryDelta)
+          exec.usedOnHeap = addDeltaToValue(exec.usedOnHeap, memoryDelta)
         }
       }
-      exec.memoryUsed = newValue(exec.memoryUsed, memoryDelta)
-      exec.diskUsed = newValue(exec.diskUsed, diskDelta)
+      exec.memoryUsed = addDeltaToValue(exec.memoryUsed, memoryDelta)
+      exec.diskUsed = addDeltaToValue(exec.diskUsed, diskDelta)
     }
 
     // Update the block entry in the RDD info, keeping track of the deltas above so that we
@@ -790,8 +914,8 @@ private[spark] class AppStatusListener(
       // Only update the partition if it's still stored in some executor, otherwise get rid of it.
       if (executors.nonEmpty) {
         partition.update(executors, rdd.storageLevel,
-          newValue(partition.memoryUsed, memoryDelta),
-          newValue(partition.diskUsed, diskDelta))
+          addDeltaToValue(partition.memoryUsed, memoryDelta),
+          addDeltaToValue(partition.diskUsed, diskDelta))
       } else {
         rdd.removePartition(block.name)
       }
@@ -799,14 +923,14 @@ private[spark] class AppStatusListener(
       maybeExec.foreach { exec =>
         if (exec.rddBlocks + rddBlocksDelta > 0) {
           val dist = rdd.distribution(exec)
-          dist.memoryUsed = newValue(dist.memoryUsed, memoryDelta)
-          dist.diskUsed = newValue(dist.diskUsed, diskDelta)
+          dist.memoryUsed = addDeltaToValue(dist.memoryUsed, memoryDelta)
+          dist.diskUsed = addDeltaToValue(dist.diskUsed, diskDelta)
 
           if (exec.hasMemoryInfo) {
             if (storageLevel.useOffHeap) {
-              dist.offHeapUsed = newValue(dist.offHeapUsed, memoryDelta)
+              dist.offHeapUsed = addDeltaToValue(dist.offHeapUsed, memoryDelta)
             } else {
-              dist.onHeapUsed = newValue(dist.onHeapUsed, memoryDelta)
+              dist.onHeapUsed = addDeltaToValue(dist.onHeapUsed, memoryDelta)
             }
           }
           dist.lastUpdate = null
@@ -825,8 +949,8 @@ private[spark] class AppStatusListener(
         }
       }
 
-      rdd.memoryUsed = newValue(rdd.memoryUsed, memoryDelta)
-      rdd.diskUsed = newValue(rdd.diskUsed, diskDelta)
+      rdd.memoryUsed = addDeltaToValue(rdd.memoryUsed, memoryDelta)
+      rdd.diskUsed = addDeltaToValue(rdd.diskUsed, diskDelta)
       update(rdd, now)
     }
 
@@ -905,14 +1029,6 @@ private[spark] class AppStatusListener(
     }
   }
 
-  private def conditionalLiveUpdate(entity: LiveEntity, now: Long, condition: Boolean): Unit = {
-    if (condition) {
-      liveUpdate(entity, now)
-    } else {
-      maybeUpdate(entity, now)
-    }
-  }
-
   private def cleanupExecutors(count: Long): Unit = {
     // Because the limit is on the number of *dead* executors, we need to calculate whether
     // there are actually enough dead executors to be deleted.
@@ -968,16 +1084,6 @@ private[spark] class AppStatusListener(
         kvstore.delete(e.getClass(), e.id)
       }
 
-      val tasks = kvstore.view(classOf[TaskDataWrapper])
-        .index("stage")
-        .first(key)
-        .last(key)
-        .asScala
-
-      tasks.foreach { t =>
-        kvstore.delete(t.getClass(), t.taskId)
-      }
-
       // Check whether there are remaining attempts for the same stage. If there aren't, then
       // also delete the RDD graph data.
       val remainingAttempts = kvstore.view(classOf[StageDataWrapper])
@@ -1000,6 +1106,15 @@ private[spark] class AppStatusListener(
 
       cleanupCachedQuantiles(key)
     }
+
+    // Delete tasks for all stages in one pass, as deleting them for each stage individually is slow
+    val tasks = kvstore.view(classOf[TaskDataWrapper]).asScala
+    val keys = stages.map { s => (s.info.stageId, s.info.attemptId) }.toSet
+    tasks.foreach { t =>
+      if (keys.contains((t.stageId, t.stageAttemptId))) {
+        kvstore.delete(t.getClass(), t.taskId)
+      }
+    }
   }
 
   private def cleanupTasks(stage: LiveStage): Unit = {
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusSource.scala b/core/src/main/scala/org/apache/spark/status/AppStatusSource.scala
new file mode 100644
index 0000000000000..f6a21578ff499
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusSource.scala
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status
+
+import java.util.concurrent.atomic.AtomicLong
+
+import AppStatusSource.getCounter
+import com.codahale.metrics.{Counter, Gauge, MetricRegistry}
+
+import org.apache.spark.SparkConf
+import org.apache.spark.internal.config.Status.APP_STATUS_METRICS_ENABLED
+import org.apache.spark.metrics.source.Source
+
+private [spark] class JobDuration(val value: AtomicLong) extends Gauge[Long] {
+  override def getValue: Long = value.get()
+}
+
+private[spark] class AppStatusSource extends Source {
+
+  override implicit val metricRegistry = new MetricRegistry()
+
+  override val sourceName = "appStatus"
+
+  val jobDuration = new JobDuration(new AtomicLong(0L))
+
+  // Duration of each job in milliseconds
+  val JOB_DURATION = metricRegistry
+    .register(MetricRegistry.name("jobDuration"), jobDuration)
+
+  val FAILED_STAGES = getCounter("stages", "failedStages")
+
+  val SKIPPED_STAGES = getCounter("stages", "skippedStages")
+
+  val COMPLETED_STAGES = getCounter("stages", "completedStages")
+
+  val SUCCEEDED_JOBS = getCounter("jobs", "succeededJobs")
+
+  val FAILED_JOBS = getCounter("jobs", "failedJobs")
+
+  val COMPLETED_TASKS = getCounter("tasks", "completedTasks")
+
+  val FAILED_TASKS = getCounter("tasks", "failedTasks")
+
+  val KILLED_TASKS = getCounter("tasks", "killedTasks")
+
+  val SKIPPED_TASKS = getCounter("tasks", "skippedTasks")
+
+  val BLACKLISTED_EXECUTORS = getCounter("tasks", "blackListedExecutors")
+
+  val UNBLACKLISTED_EXECUTORS = getCounter("tasks", "unblackListedExecutors")
+}
+
+private[spark] object AppStatusSource {
+
+  def getCounter(prefix: String, name: String)(implicit metricRegistry: MetricRegistry): Counter = {
+    metricRegistry.counter(MetricRegistry.name(prefix, name))
+  }
+
+  def createSource(conf: SparkConf): Option[AppStatusSource] = {
+    Option(conf.get(APP_STATUS_METRICS_ENABLED))
+      .filter(identity)
+      .map { _ => new AppStatusSource() }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
index e237281c552b1..5c0ed4d5d8f4c 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
@@ -112,10 +112,12 @@ private[spark] class AppStatusStore(
     }
   }
 
-  def stageAttempt(stageId: Int, stageAttemptId: Int, details: Boolean = false): v1.StageData = {
+  def stageAttempt(stageId: Int, stageAttemptId: Int,
+      details: Boolean = false): (v1.StageData, Seq[Int]) = {
     val stageKey = Array(stageId, stageAttemptId)
-    val stage = store.read(classOf[StageDataWrapper], stageKey).info
-    if (details) stageWithDetails(stage) else stage
+    val stageDataWrapper = store.read(classOf[StageDataWrapper], stageKey)
+    val stage = if (details) stageWithDetails(stageDataWrapper.info) else stageDataWrapper.info
+    (stage, stageDataWrapper.jobIds.toSeq)
   }
 
   def taskCount(stageId: Int, stageAttemptId: Int): Long = {
@@ -349,7 +351,9 @@ private[spark] class AppStatusStore(
   def taskList(stageId: Int, stageAttemptId: Int, maxTasks: Int): Seq[v1.TaskData] = {
     val stageKey = Array(stageId, stageAttemptId)
     store.view(classOf[TaskDataWrapper]).index("stage").first(stageKey).last(stageKey).reverse()
-      .max(maxTasks).asScala.map(_.toApi).toSeq.reverse
+      .max(maxTasks).asScala.map { taskDataWrapper =>
+      constructTaskData(taskDataWrapper)
+    }.toSeq.reverse
   }
 
   def taskList(
@@ -388,7 +392,9 @@ private[spark] class AppStatusStore(
     }
 
     val ordered = if (ascending) indexed else indexed.reverse()
-    ordered.skip(offset).max(length).asScala.map(_.toApi).toSeq
+    ordered.skip(offset).max(length).asScala.map { taskDataWrapper =>
+      constructTaskData(taskDataWrapper)
+    }.toSeq
   }
 
   def executorSummary(stageId: Int, attemptId: Int): Map[String, v1.ExecutorStageSummary] = {
@@ -494,6 +500,24 @@ private[spark] class AppStatusStore(
     store.close()
   }
 
+  def constructTaskData(taskDataWrapper: TaskDataWrapper) : v1.TaskData = {
+    val taskDataOld: v1.TaskData = taskDataWrapper.toApi
+    val executorLogs: Option[Map[String, String]] = try {
+      Some(executorSummary(taskDataOld.executorId).executorLogs)
+    } catch {
+      case e: NoSuchElementException => e.getMessage
+        None
+    }
+    new v1.TaskData(taskDataOld.taskId, taskDataOld.index,
+      taskDataOld.attempt, taskDataOld.launchTime, taskDataOld.resultFetchStart,
+      taskDataOld.duration, taskDataOld.executorId, taskDataOld.host, taskDataOld.status,
+      taskDataOld.taskLocality, taskDataOld.speculative, taskDataOld.accumulatorUpdates,
+      taskDataOld.errorMessage, taskDataOld.taskMetrics,
+      executorLogs.getOrElse(Map[String, String]()),
+      AppStatusUtils.schedulerDelay(taskDataOld),
+      AppStatusUtils.gettingResultTime(taskDataOld))
+  }
+
 }
 
 private[spark] object AppStatusStore {
@@ -503,10 +527,11 @@ private[spark] object AppStatusStore {
   /**
    * Create an in-memory store for a live application.
    */
-  def createLiveStore(conf: SparkConf): AppStatusStore = {
+  def createLiveStore(
+      conf: SparkConf,
+      appStatusSource: Option[AppStatusSource] = None): AppStatusStore = {
     val store = new ElementTrackingStore(new InMemoryStore(), conf)
-    val listener = new AppStatusListener(store, conf, true)
+    val listener = new AppStatusListener(store, conf, true, appStatusSource)
     new AppStatusStore(store, listener = Some(listener))
   }
-
 }
diff --git a/core/src/main/scala/org/apache/spark/status/ElementTrackingStore.scala b/core/src/main/scala/org/apache/spark/status/ElementTrackingStore.scala
index 863b0967f765e..5ec7d90bfaaba 100644
--- a/core/src/main/scala/org/apache/spark/status/ElementTrackingStore.scala
+++ b/core/src/main/scala/org/apache/spark/status/ElementTrackingStore.scala
@@ -24,6 +24,7 @@ import scala.collection.mutable.{HashMap, ListBuffer}
 import com.google.common.util.concurrent.MoreExecutors
 
 import org.apache.spark.SparkConf
+import org.apache.spark.internal.config.Status._
 import org.apache.spark.util.{ThreadUtils, Utils}
 import org.apache.spark.util.kvstore._
 
@@ -45,8 +46,6 @@ import org.apache.spark.util.kvstore._
  */
 private[spark] class ElementTrackingStore(store: KVStore, conf: SparkConf) extends KVStore {
 
-  import config._
-
   private val triggers = new HashMap[Class[_], Seq[Trigger[_]]]()
   private val flushTriggers = new ListBuffer[() => Unit]()
   private val executor = if (conf.get(ASYNC_TRACKING_ENABLED)) {
diff --git a/core/src/main/scala/org/apache/spark/status/KVUtils.scala b/core/src/main/scala/org/apache/spark/status/KVUtils.scala
index 99b1843d8e1c0..45348be5c98b9 100644
--- a/core/src/main/scala/org/apache/spark/status/KVUtils.scala
+++ b/core/src/main/scala/org/apache/spark/status/KVUtils.scala
@@ -42,7 +42,7 @@ private[spark] object KVUtils extends Logging {
   private[spark] class KVStoreScalaSerializer extends KVStoreSerializer {
 
     mapper.registerModule(DefaultScalaModule)
-    mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL)
+    mapper.setSerializationInclusion(JsonInclude.Include.NON_ABSENT)
 
   }
 
diff --git a/core/src/main/scala/org/apache/spark/status/LiveEntity.scala b/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
index 79e3f13b826ce..47e45a66ecccb 100644
--- a/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
+++ b/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
@@ -26,14 +26,13 @@ import scala.collection.mutable.HashMap
 import com.google.common.collect.Interners
 
 import org.apache.spark.JobExecutionStatus
-import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.executor.{ExecutorMetrics, TaskMetrics}
 import org.apache.spark.scheduler.{AccumulableInfo, StageInfo, TaskInfo}
 import org.apache.spark.status.api.v1
 import org.apache.spark.storage.RDDInfo
 import org.apache.spark.ui.SparkUI
 import org.apache.spark.util.AccumulatorContext
 import org.apache.spark.util.collection.OpenHashSet
-import org.apache.spark.util.kvstore.KVStore
 
 /**
  * A mutable representation of a live entity in Spark (jobs, stages, tasks, et al). Every live
@@ -62,7 +61,7 @@ private[spark] abstract class LiveEntity {
 private class LiveJob(
     val jobId: Int,
     name: String,
-    submissionTime: Option[Date],
+    val submissionTime: Option[Date],
     val stageIds: Seq[Int],
     jobGroup: Option[String],
     numTasks: Int) extends LiveEntity {
@@ -268,6 +267,9 @@ private class LiveExecutor(val executorId: String, _addTime: Long) extends LiveE
 
   def hasMemoryInfo: Boolean = totalOnHeap >= 0L
 
+  // peak values for executor level metrics
+  val peakExecutorMetrics = new ExecutorMetrics()
+
   def hostname: String = if (host != null) host else hostPort.split(":")(0)
 
   override protected def doUpdate(): Any = {
@@ -302,10 +304,10 @@ private class LiveExecutor(val executorId: String, _addTime: Long) extends LiveE
       Option(removeReason),
       executorLogs,
       memoryMetrics,
-      blacklistedInStages)
+      blacklistedInStages,
+      Some(peakExecutorMetrics).filter(_.isSet))
     new ExecutorSummaryWrapper(info)
   }
-
 }
 
 private class LiveExecutorStageSummary(
@@ -374,6 +376,8 @@ private class LiveStage extends LiveEntity {
 
   val executorSummaries = new HashMap[String, LiveExecutorStageSummary]()
 
+  val activeTasksPerExecutor = new HashMap[String, Int]().withDefaultValue(0)
+
   var blackListedExecutors = new HashSet[String]()
 
   // Used for cleanup of tasks after they reach the configured limit. Not written to the store.
@@ -538,6 +542,10 @@ private class LiveRDD(val info: RDDInfo) extends LiveEntity {
     distributions.get(exec.executorId)
   }
 
+  def getPartitions(): scala.collection.Map[String, LiveRDDPartition] = partitions
+
+  def getDistributions(): scala.collection.Map[String, LiveRDDDistribution] = distributions
+
   override protected def doUpdate(): Any = {
     val dists = if (distributions.nonEmpty) {
       Some(distributions.values.map(_.toApi()).toSeq)
@@ -581,8 +589,7 @@ private object LiveEntityHelpers {
       .filter { acc =>
         // We don't need to store internal or SQL accumulables as their values will be shown in
         // other places, so drop them to reduce the memory usage.
-        !acc.internal && (!acc.metadata.isDefined ||
-          acc.metadata.get != Some(AccumulatorContext.SQL_ACCUM_IDENTIFIER))
+        !acc.internal && acc.metadata != Some(AccumulatorContext.SQL_ACCUM_IDENTIFIER)
       }
       .map { acc =>
         new v1.AccumulableInfo(
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/JacksonMessageWriter.scala b/core/src/main/scala/org/apache/spark/status/api/v1/JacksonMessageWriter.scala
index 4560d300cb0c8..50a286d0d3b0f 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/JacksonMessageWriter.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/JacksonMessageWriter.scala
@@ -49,7 +49,7 @@ private[v1] class JacksonMessageWriter extends MessageBodyWriter[Object]{
   }
   mapper.registerModule(com.fasterxml.jackson.module.scala.DefaultScalaModule)
   mapper.enable(SerializationFeature.INDENT_OUTPUT)
-  mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL)
+  mapper.setSerializationInclusion(JsonInclude.Include.NON_ABSENT)
   mapper.setDateFormat(JacksonMessageWriter.makeISODateFormat)
 
   override def isWriteable(
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala
index 32100c5704538..1f4082cac8f75 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala
@@ -175,7 +175,7 @@ private[v1] class OneApplicationAttemptResource extends AbstractApplicationResou
   def getAttempt(): ApplicationAttemptInfo = {
     uiRoot.getApplicationInfo(appId)
       .flatMap { app =>
-        app.attempts.filter(_.attemptId == attemptId).headOption
+        app.attempts.find(_.attemptId.contains(attemptId))
       }
       .getOrElse {
         throw new NotFoundException(s"unknown app $appId, attempt $attemptId")
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala
index 96249e4bfd5fa..f81892734c2de 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala
@@ -16,15 +16,16 @@
  */
 package org.apache.spark.status.api.v1
 
-import java.util.{List => JList}
+import java.util.{HashMap, List => JList, Locale}
 import javax.ws.rs._
-import javax.ws.rs.core.MediaType
+import javax.ws.rs.core.{Context, MediaType, MultivaluedMap, UriInfo}
 
 import org.apache.spark.SparkException
 import org.apache.spark.scheduler.StageInfo
 import org.apache.spark.status.api.v1.StageStatus._
 import org.apache.spark.status.api.v1.TaskSorting._
 import org.apache.spark.ui.SparkUI
+import org.apache.spark.ui.jobs.ApiHelper._
 
 @Produces(Array(MediaType.APPLICATION_JSON))
 private[v1] class StagesResource extends BaseAppResource {
@@ -56,7 +57,7 @@ private[v1] class StagesResource extends BaseAppResource {
       @PathParam("stageAttemptId") stageAttemptId: Int,
       @QueryParam("details") @DefaultValue("true") details: Boolean): StageData = withUI { ui =>
     try {
-      ui.store.stageAttempt(stageId, stageAttemptId, details = details)
+      ui.store.stageAttempt(stageId, stageAttemptId, details = details)._1
     } catch {
       case _: NoSuchElementException =>
         // Change the message depending on whether there are any attempts for the requested stage.
@@ -102,4 +103,120 @@ private[v1] class StagesResource extends BaseAppResource {
     withUI(_.store.taskList(stageId, stageAttemptId, offset, length, sortBy))
   }
 
+  // This api needs to stay formatted exactly as it is below, since, it is being used by the
+  // datatables for the stages page.
+  @GET
+  @Path("{stageId: \\d+}/{stageAttemptId: \\d+}/taskTable")
+  def taskTable(
+    @PathParam("stageId") stageId: Int,
+    @PathParam("stageAttemptId") stageAttemptId: Int,
+    @QueryParam("details") @DefaultValue("true") details: Boolean,
+    @Context uriInfo: UriInfo):
+  HashMap[String, Object] = {
+    withUI { ui =>
+      val uriQueryParameters = uriInfo.getQueryParameters(true)
+      val totalRecords = uriQueryParameters.getFirst("numTasks")
+      var isSearch = false
+      var searchValue: String = null
+      var filteredRecords = totalRecords
+      // The datatables client API sends a list of query parameters to the server which contain
+      // information like the columns to be sorted, search value typed by the user in the search
+      // box, pagination index etc. For more information on these query parameters,
+      // refer https://datatables.net/manual/server-side.
+      if (uriQueryParameters.getFirst("search[value]") != null &&
+        uriQueryParameters.getFirst("search[value]").length > 0) {
+        isSearch = true
+        searchValue = uriQueryParameters.getFirst("search[value]")
+      }
+      val _tasksToShow: Seq[TaskData] = doPagination(uriQueryParameters, stageId, stageAttemptId,
+        isSearch, totalRecords.toInt)
+      val ret = new HashMap[String, Object]()
+      if (_tasksToShow.nonEmpty) {
+        // Performs server-side search based on input from user
+        if (isSearch) {
+          val filteredTaskList = filterTaskList(_tasksToShow, searchValue)
+          filteredRecords = filteredTaskList.length.toString
+          if (filteredTaskList.length > 0) {
+            val pageStartIndex = uriQueryParameters.getFirst("start").toInt
+            val pageLength = uriQueryParameters.getFirst("length").toInt
+            ret.put("aaData", filteredTaskList.slice(
+              pageStartIndex, pageStartIndex + pageLength))
+          } else {
+            ret.put("aaData", filteredTaskList)
+          }
+        } else {
+          ret.put("aaData", _tasksToShow)
+        }
+      } else {
+        ret.put("aaData", _tasksToShow)
+      }
+      ret.put("recordsTotal", totalRecords)
+      ret.put("recordsFiltered", filteredRecords)
+      ret
+    }
+  }
+
+  // Performs pagination on the server side
+  def doPagination(queryParameters: MultivaluedMap[String, String], stageId: Int,
+    stageAttemptId: Int, isSearch: Boolean, totalRecords: Int): Seq[TaskData] = {
+    var columnNameToSort = queryParameters.getFirst("columnNameToSort")
+    // Sorting on Logs column will default to Index column sort
+    if (columnNameToSort.equalsIgnoreCase("Logs")) {
+      columnNameToSort = "Index"
+    }
+    val isAscendingStr = queryParameters.getFirst("order[0][dir]")
+    var pageStartIndex = 0
+    var pageLength = totalRecords
+    // We fetch only the desired rows upto the specified page length for all cases except when a
+    // search query is present, in that case, we need to fetch all the rows to perform the search
+    // on the entire table
+    if (!isSearch) {
+      pageStartIndex = queryParameters.getFirst("start").toInt
+      pageLength = queryParameters.getFirst("length").toInt
+    }
+    withUI(_.store.taskList(stageId, stageAttemptId, pageStartIndex, pageLength,
+      indexName(columnNameToSort), isAscendingStr.equalsIgnoreCase("asc")))
+  }
+
+  // Filters task list based on search parameter
+  def filterTaskList(
+    taskDataList: Seq[TaskData],
+    searchValue: String): Seq[TaskData] = {
+    val defaultOptionString: String = "d"
+    val searchValueLowerCase = searchValue.toLowerCase(Locale.ROOT)
+    val containsValue = (taskDataParams: Any) => taskDataParams.toString.toLowerCase(
+      Locale.ROOT).contains(searchValueLowerCase)
+    val taskMetricsContainsValue = (task: TaskData) => task.taskMetrics match {
+      case None => false
+      case Some(metrics) =>
+        (containsValue(task.taskMetrics.get.executorDeserializeTime)
+        || containsValue(task.taskMetrics.get.executorRunTime)
+        || containsValue(task.taskMetrics.get.jvmGcTime)
+        || containsValue(task.taskMetrics.get.resultSerializationTime)
+        || containsValue(task.taskMetrics.get.memoryBytesSpilled)
+        || containsValue(task.taskMetrics.get.diskBytesSpilled)
+        || containsValue(task.taskMetrics.get.peakExecutionMemory)
+        || containsValue(task.taskMetrics.get.inputMetrics.bytesRead)
+        || containsValue(task.taskMetrics.get.inputMetrics.recordsRead)
+        || containsValue(task.taskMetrics.get.outputMetrics.bytesWritten)
+        || containsValue(task.taskMetrics.get.outputMetrics.recordsWritten)
+        || containsValue(task.taskMetrics.get.shuffleReadMetrics.fetchWaitTime)
+        || containsValue(task.taskMetrics.get.shuffleReadMetrics.recordsRead)
+        || containsValue(task.taskMetrics.get.shuffleWriteMetrics.bytesWritten)
+        || containsValue(task.taskMetrics.get.shuffleWriteMetrics.recordsWritten)
+        || containsValue(task.taskMetrics.get.shuffleWriteMetrics.writeTime))
+    }
+    val filteredTaskDataSequence: Seq[TaskData] = taskDataList.filter(f =>
+      (containsValue(f.taskId) || containsValue(f.index) || containsValue(f.attempt)
+        || containsValue(f.launchTime)
+        || containsValue(f.resultFetchStart.getOrElse(defaultOptionString))
+        || containsValue(f.duration.getOrElse(defaultOptionString))
+        || containsValue(f.executorId) || containsValue(f.host) || containsValue(f.status)
+        || containsValue(f.taskLocality) || containsValue(f.speculative)
+        || containsValue(f.errorMessage.getOrElse(defaultOptionString))
+        || taskMetricsContainsValue(f)
+        || containsValue(f.schedulerDelay) || containsValue(f.gettingResultTime)))
+    filteredTaskDataSequence
+  }
+
 }
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
index 971d7e90fa7b8..aa21da2b66ab2 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
@@ -22,9 +22,14 @@ import java.util.Date
 import scala.xml.{NodeSeq, Text}
 
 import com.fasterxml.jackson.annotation.JsonIgnoreProperties
-import com.fasterxml.jackson.databind.annotation.JsonDeserialize
+import com.fasterxml.jackson.core.{JsonGenerator, JsonParser}
+import com.fasterxml.jackson.core.`type`.TypeReference
+import com.fasterxml.jackson.databind.{DeserializationContext, JsonDeserializer, JsonSerializer, SerializerProvider}
+import com.fasterxml.jackson.databind.annotation.{JsonDeserialize, JsonSerialize}
 
 import org.apache.spark.JobExecutionStatus
+import org.apache.spark.executor.ExecutorMetrics
+import org.apache.spark.metrics.ExecutorMetricType
 
 case class ApplicationInfo private[spark](
     id: String,
@@ -98,7 +103,10 @@ class ExecutorSummary private[spark](
     val removeReason: Option[String],
     val executorLogs: Map[String, String],
     val memoryMetrics: Option[MemoryMetrics],
-    val blacklistedInStages: Set[Int])
+    val blacklistedInStages: Set[Int],
+    @JsonSerialize(using = classOf[ExecutorMetricsJsonSerializer])
+    @JsonDeserialize(using = classOf[ExecutorMetricsJsonDeserializer])
+    val peakMemoryMetrics: Option[ExecutorMetrics])
 
 class MemoryMetrics private[spark](
     val usedOnHeapStorageMemory: Long,
@@ -106,6 +114,36 @@ class MemoryMetrics private[spark](
     val totalOnHeapStorageMemory: Long,
     val totalOffHeapStorageMemory: Long)
 
+/** deserializer for peakMemoryMetrics: convert map to ExecutorMetrics */
+private[spark] class ExecutorMetricsJsonDeserializer
+    extends JsonDeserializer[Option[ExecutorMetrics]] {
+  override def deserialize(
+      jsonParser: JsonParser,
+      deserializationContext: DeserializationContext): Option[ExecutorMetrics] = {
+    val metricsMap = jsonParser.readValueAs[Option[Map[String, Long]]](
+      new TypeReference[Option[Map[String, java.lang.Long]]] {})
+    metricsMap.map(metrics => new ExecutorMetrics(metrics))
+  }
+}
+/** serializer for peakMemoryMetrics: convert ExecutorMetrics to map with metric name as key */
+private[spark] class ExecutorMetricsJsonSerializer
+    extends JsonSerializer[Option[ExecutorMetrics]] {
+  override def serialize(
+      metrics: Option[ExecutorMetrics],
+      jsonGenerator: JsonGenerator,
+      serializerProvider: SerializerProvider): Unit = {
+    metrics.foreach { m: ExecutorMetrics =>
+      val metricsMap = ExecutorMetricType.values.map { metricType =>
+            metricType.name -> m.getMetricValue(metricType)
+      }.toMap
+      jsonGenerator.writeObject(metricsMap)
+    }
+  }
+
+  override def isEmpty(provider: SerializerProvider, value: Option[ExecutorMetrics]): Boolean =
+    value.isEmpty
+}
+
 class JobData private[spark](
     val jobId: Int,
     val name: String,
@@ -215,7 +253,10 @@ class TaskData private[spark](
     val speculative: Boolean,
     val accumulatorUpdates: Seq[AccumulableInfo],
     val errorMessage: Option[String] = None,
-    val taskMetrics: Option[TaskMetrics] = None)
+    val taskMetrics: Option[TaskMetrics] = None,
+    val executorLogs: Map[String, String],
+    val schedulerDelay: Long,
+    val gettingResultTime: Long)
 
 class TaskMetrics private[spark](
     val executorDeserializeTime: Long,
diff --git a/core/src/main/scala/org/apache/spark/status/storeTypes.scala b/core/src/main/scala/org/apache/spark/status/storeTypes.scala
index 646cf25880e37..ef19e86f3135f 100644
--- a/core/src/main/scala/org/apache/spark/status/storeTypes.scala
+++ b/core/src/main/scala/org/apache/spark/status/storeTypes.scala
@@ -283,7 +283,10 @@ private[spark] class TaskDataWrapper(
       speculative,
       accumulatorUpdates,
       errorMessage,
-      metrics)
+      metrics,
+      executorLogs = null,
+      schedulerDelay = 0L,
+      gettingResultTime = 0L)
   }
 
   @JsonIgnore @KVIndex(TaskIndexNames.STAGE)
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index f5c69ad241e3a..1dfbc6effb346 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -35,7 +35,7 @@ import scala.util.control.NonFatal
 import com.codahale.metrics.{MetricRegistry, MetricSet}
 
 import org.apache.spark._
-import org.apache.spark.executor.{DataReadMethod, ShuffleWriteMetrics}
+import org.apache.spark.executor.DataReadMethod
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.memory.{MemoryManager, MemoryMode}
 import org.apache.spark.metrics.source.Source
@@ -43,12 +43,13 @@ import org.apache.spark.network._
 import org.apache.spark.network.buffer.ManagedBuffer
 import org.apache.spark.network.client.StreamCallbackWithID
 import org.apache.spark.network.netty.SparkTransportConf
-import org.apache.spark.network.shuffle.{ExternalShuffleClient, TempFileManager}
+import org.apache.spark.network.shuffle._
 import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo
+import org.apache.spark.network.util.TransportConf
 import org.apache.spark.rpc.RpcEnv
 import org.apache.spark.scheduler.ExecutorCacheTaskLocation
 import org.apache.spark.serializer.{SerializerInstance, SerializerManager}
-import org.apache.spark.shuffle.ShuffleManager
+import org.apache.spark.shuffle.{ShuffleManager, ShuffleWriteMetricsReporter}
 import org.apache.spark.storage.memory._
 import org.apache.spark.unsafe.Platform
 import org.apache.spark.util._
@@ -131,8 +132,6 @@ private[spark] class BlockManager(
 
   private[spark] val externalShuffleServiceEnabled =
     conf.get(config.SHUFFLE_SERVICE_ENABLED)
-  private val chunkSize =
-    conf.getSizeAsBytes("spark.storage.memoryMapLimitForTests", Int.MaxValue.toString).toInt
   private val remoteReadNioBufferConversion =
     conf.getBoolean("spark.network.remoteReadNioBufferConversion", false)
 
@@ -213,11 +212,11 @@ private[spark] class BlockManager(
 
   private var blockReplicationPolicy: BlockReplicationPolicy = _
 
-  // A TempFileManager used to track all the files of remote blocks which above the
+  // A DownloadFileManager used to track all the files of remote blocks which are above the
   // specified memory threshold. Files will be deleted automatically based on weak reference.
   // Exposed for test
   private[storage] val remoteBlockTempFileManager =
-    new BlockManager.RemoteBlockTempFileManager(this)
+    new BlockManager.RemoteBlockDownloadFileManager(this)
   private val maxRemoteBlockToMem = conf.get(config.MAX_REMOTE_BLOCK_SIZE_FETCH_TO_MEM)
 
   /**
@@ -237,7 +236,7 @@ private[spark] class BlockManager(
       val priorityClass = conf.get(
         "spark.storage.replication.policy", classOf[RandomBlockReplicationPolicy].getName)
       val clazz = Utils.classForName(priorityClass)
-      val ret = clazz.newInstance.asInstanceOf[BlockReplicationPolicy]
+      val ret = clazz.getConstructor().newInstance().asInstanceOf[BlockReplicationPolicy]
       logInfo(s"Using $priorityClass for block replication policy")
       ret
     }
@@ -436,10 +435,8 @@ private[spark] class BlockManager(
         // stream.
         channel.close()
         // TODO SPARK-25035 Even if we're only going to write the data to disk after this, we end up
-        // using a lot of memory here.  With encryption, we'll read the whole file into a regular
-        // byte buffer and OOM.  Without encryption, we'll memory map the file and won't get a jvm
-        // OOM, but might get killed by the OS / cluster manager.  We could at least read the tmp
-        // file as a stream in both cases.
+        // using a lot of memory here. We'll read the whole file into a regular
+        // byte buffer and OOM.  We could at least read the tmp file as a stream.
         val buffer = securityManager.getIOEncryptionKey() match {
           case Some(key) =>
             // we need to pass in the size of the unencrypted block
@@ -451,7 +448,7 @@ private[spark] class BlockManager(
             new EncryptedBlockData(tmpFile, blockSize, conf, key).toChunkedByteBuffer(allocator)
 
           case None =>
-            ChunkedByteBuffer.map(tmpFile, conf.get(config.MEMORY_MAP_LIMIT_FOR_TESTS).toInt)
+            ChunkedByteBuffer.fromFile(tmpFile)
         }
         putBytes(blockId, buffer, level)(classTag)
         tmpFile.delete()
@@ -695,9 +692,9 @@ private[spark] class BlockManager(
    */
   private def getRemoteValues[T: ClassTag](blockId: BlockId): Option[BlockResult] = {
     val ct = implicitly[ClassTag[T]]
-    getRemoteBytes(blockId).map { data =>
+    getRemoteManagedBuffer(blockId).map { data =>
       val values =
-        serializerManager.dataDeserializeStream(blockId, data.toInputStream(dispose = true))(ct)
+        serializerManager.dataDeserializeStream(blockId, data.createInputStream())(ct)
       new BlockResult(values, DataReadMethod.Network, data.size)
     }
   }
@@ -720,14 +717,9 @@ private[spark] class BlockManager(
   }
 
   /**
-   * Get block from remote block managers as serialized bytes.
+   * Get block from remote block managers as a ManagedBuffer.
    */
-  def getRemoteBytes(blockId: BlockId): Option[ChunkedByteBuffer] = {
-    // TODO if we change this method to return the ManagedBuffer, then getRemoteValues
-    // could just use the inputStream on the temp file, rather than memory-mapping the file.
-    // Until then, replication can cause the process to use too much memory and get killed
-    // by the OS / cluster manager (not a java OOM, since it's a memory-mapped file) even though
-    // we've read the data to disk.
+  private def getRemoteManagedBuffer(blockId: BlockId): Option[ManagedBuffer] = {
     logDebug(s"Getting remote block $blockId")
     require(blockId != null, "BlockId is null")
     var runningFailureCount = 0
@@ -792,14 +784,13 @@ private[spark] class BlockManager(
       }
 
       if (data != null) {
-        // SPARK-24307 undocumented "escape-hatch" in case there are any issues in converting to
-        // ChunkedByteBuffer, to go back to old code-path.  Can be removed post Spark 2.4 if
-        // new path is stable.
-        if (remoteReadNioBufferConversion) {
-          return Some(new ChunkedByteBuffer(data.nioByteBuffer()))
-        } else {
-          return Some(ChunkedByteBuffer.fromManagedBuffer(data, chunkSize))
-        }
+        // If the ManagedBuffer is a BlockManagerManagedBuffer, the disposal of the
+        // byte buffers backing it may need to be handled after reading the bytes.
+        // In this case, since we just fetched the bytes remotely, we do not have
+        // a BlockManagerManagedBuffer. The assert here is to ensure that this holds
+        // true (or the disposal is handled).
+        assert(!data.isInstanceOf[BlockManagerManagedBuffer])
+        return Some(data)
       }
       logDebug(s"The value of block $blockId is null")
     }
@@ -807,6 +798,22 @@ private[spark] class BlockManager(
     None
   }
 
+  /**
+   * Get block from remote block managers as serialized bytes.
+   */
+  def getRemoteBytes(blockId: BlockId): Option[ChunkedByteBuffer] = {
+    getRemoteManagedBuffer(blockId).map { data =>
+      // SPARK-24307 undocumented "escape-hatch" in case there are any issues in converting to
+      // ChunkedByteBuffer, to go back to old code-path.  Can be removed post Spark 2.4 if
+      // new path is stable.
+      if (remoteReadNioBufferConversion) {
+        new ChunkedByteBuffer(data.nioByteBuffer())
+      } else {
+        ChunkedByteBuffer.fromManagedBuffer(data)
+      }
+    }
+  }
+
   /**
    * Get a block from the block manager (either local or remote).
    *
@@ -935,7 +942,7 @@ private[spark] class BlockManager(
       file: File,
       serializerInstance: SerializerInstance,
       bufferSize: Int,
-      writeMetrics: ShuffleWriteMetrics): DiskBlockObjectWriter = {
+      writeMetrics: ShuffleWriteMetricsReporter): DiskBlockObjectWriter = {
     val syncWrites = conf.getBoolean("spark.shuffle.sync", false)
     new DiskBlockObjectWriter(file, serializerManager, serializerInstance, bufferSize,
       syncWrites, writeMetrics, blockId)
@@ -1664,23 +1671,28 @@ private[spark] object BlockManager {
     metricRegistry.registerAll(metricSet)
   }
 
-  class RemoteBlockTempFileManager(blockManager: BlockManager)
-      extends TempFileManager with Logging {
+  class RemoteBlockDownloadFileManager(blockManager: BlockManager)
+      extends DownloadFileManager with Logging {
+    // lazy because SparkEnv is set after this
+    lazy val encryptionKey = SparkEnv.get.securityManager.getIOEncryptionKey()
 
-    private class ReferenceWithCleanup(file: File, referenceQueue: JReferenceQueue[File])
-        extends WeakReference[File](file, referenceQueue) {
-      private val filePath = file.getAbsolutePath
+    private class ReferenceWithCleanup(
+        file: DownloadFile,
+        referenceQueue: JReferenceQueue[DownloadFile]
+        ) extends WeakReference[DownloadFile](file, referenceQueue) {
+
+      val filePath = file.path()
 
       def cleanUp(): Unit = {
         logDebug(s"Clean up file $filePath")
 
-        if (!new File(filePath).delete()) {
+        if (!file.delete()) {
           logDebug(s"Fail to delete file $filePath")
         }
       }
     }
 
-    private val referenceQueue = new JReferenceQueue[File]
+    private val referenceQueue = new JReferenceQueue[DownloadFile]
     private val referenceBuffer = Collections.newSetFromMap[ReferenceWithCleanup](
       new ConcurrentHashMap)
 
@@ -1692,11 +1704,21 @@ private[spark] object BlockManager {
     cleaningThread.setName("RemoteBlock-temp-file-clean-thread")
     cleaningThread.start()
 
-    override def createTempFile(): File = {
-      blockManager.diskBlockManager.createTempLocalBlock()._2
+    override def createTempFile(transportConf: TransportConf): DownloadFile = {
+      val file = blockManager.diskBlockManager.createTempLocalBlock()._2
+      encryptionKey match {
+        case Some(key) =>
+          // encryption is enabled, so when we read the decrypted data off the network, we need to
+          // encrypt it when writing to disk.  Note that the data may have been encrypted when it
+          // was cached on disk on the remote side, but it was already decrypted by now (see
+          // EncryptedBlockData).
+          new EncryptedDownloadFile(file, key)
+        case None =>
+          new SimpleDownloadFile(file, transportConf)
+      }
     }
 
-    override def registerTempFileToClean(file: File): Boolean = {
+    override def registerTempFileToClean(file: DownloadFile): Boolean = {
       referenceBuffer.add(new ReferenceWithCleanup(file, referenceQueue))
     }
 
@@ -1724,4 +1746,39 @@ private[spark] object BlockManager {
       }
     }
   }
+
+  /**
+   * A DownloadFile that encrypts data when it is written, and decrypts when it's read.
+   */
+  private class EncryptedDownloadFile(
+      file: File,
+      key: Array[Byte]) extends DownloadFile {
+
+    private val env = SparkEnv.get
+
+    override def delete(): Boolean = file.delete()
+
+    override def openForWriting(): DownloadFileWritableChannel = {
+      new EncryptedDownloadWritableChannel()
+    }
+
+    override def path(): String = file.getAbsolutePath
+
+    private class EncryptedDownloadWritableChannel extends DownloadFileWritableChannel {
+      private val countingOutput: CountingWritableChannel = new CountingWritableChannel(
+        Channels.newChannel(env.serializerManager.wrapForEncryption(new FileOutputStream(file))))
+
+      override def closeAndRead(): ManagedBuffer = {
+        countingOutput.close()
+        val size = countingOutput.getCount
+        new EncryptedManagedBuffer(new EncryptedBlockData(file, size, env.conf, key))
+      }
+
+      override def write(src: ByteBuffer): Int = countingOutput.write(src)
+
+      override def isOpen: Boolean = countingOutput.isOpen()
+
+      override def close(): Unit = countingOutput.close()
+    }
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
index a024c83d8d8b7..17390f9c60e79 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
@@ -20,9 +20,9 @@ package org.apache.spark.storage
 import java.io.{BufferedOutputStream, File, FileOutputStream, OutputStream}
 import java.nio.channels.FileChannel
 
-import org.apache.spark.executor.ShuffleWriteMetrics
 import org.apache.spark.internal.Logging
 import org.apache.spark.serializer.{SerializationStream, SerializerInstance, SerializerManager}
+import org.apache.spark.shuffle.ShuffleWriteMetricsReporter
 import org.apache.spark.util.Utils
 
 /**
@@ -43,7 +43,7 @@ private[spark] class DiskBlockObjectWriter(
     syncWrites: Boolean,
     // These write metrics concurrently shared with other active DiskBlockObjectWriters who
     // are themselves performing writes. All updates must be relative.
-    writeMetrics: ShuffleWriteMetrics,
+    writeMetrics: ShuffleWriteMetricsReporter,
     val blockId: BlockId = null)
   extends OutputStream
   with Logging {
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
index a820bc70b33b2..29963a95cb074 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
@@ -30,8 +30,10 @@ import io.netty.channel.DefaultFileRegion
 
 import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.internal.{config, Logging}
+import org.apache.spark.network.buffer.ManagedBuffer
 import org.apache.spark.network.util.{AbstractFileRegion, JavaUtils}
 import org.apache.spark.security.CryptoStreamUtils
+import org.apache.spark.unsafe.array.ByteArrayMethods
 import org.apache.spark.util.Utils
 import org.apache.spark.util.io.ChunkedByteBuffer
 
@@ -200,7 +202,7 @@ private class DiskBlockData(
   private def open() = new FileInputStream(file).getChannel
 }
 
-private class EncryptedBlockData(
+private[spark] class EncryptedBlockData(
     file: File,
     blockSize: Long,
     conf: SparkConf,
@@ -216,7 +218,7 @@ private class EncryptedBlockData(
       var remaining = blockSize
       val chunks = new ListBuffer[ByteBuffer]()
       while (remaining > 0) {
-        val chunkSize = math.min(remaining, Int.MaxValue)
+        val chunkSize = math.min(remaining, ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH)
         val chunk = allocator(chunkSize.toInt)
         remaining -= chunkSize
         JavaUtils.readFully(source, chunk)
@@ -234,7 +236,8 @@ private class EncryptedBlockData(
     // This is used by the block transfer service to replicate blocks. The upload code reads
     // all bytes into memory to send the block to the remote executor, so it's ok to do this
     // as long as the block fits in a Java array.
-    assert(blockSize <= Int.MaxValue, "Block is too large to be wrapped in a byte buffer.")
+    assert(blockSize <= ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH,
+      "Block is too large to be wrapped in a byte buffer.")
     val dst = ByteBuffer.allocate(blockSize.toInt)
     val in = open()
     try {
@@ -260,7 +263,23 @@ private class EncryptedBlockData(
         throw e
     }
   }
+}
+
+private[spark] class EncryptedManagedBuffer(
+    val blockData: EncryptedBlockData) extends ManagedBuffer {
+
+  // This is the size of the decrypted data
+  override def size(): Long = blockData.size
+
+  override def nioByteBuffer(): ByteBuffer = blockData.toByteBuffer()
+
+  override def convertToNetty(): AnyRef = blockData.toNetty()
+
+  override def createInputStream(): InputStream = blockData.toInputStream()
+
+  override def retain(): ManagedBuffer = this
 
+  override def release(): ManagedBuffer = this
 }
 
 private class ReadableChannelFileRegion(source: ReadableByteChannel, blockSize: Long)
diff --git a/core/src/main/scala/org/apache/spark/storage/RDDInfo.scala b/core/src/main/scala/org/apache/spark/storage/RDDInfo.scala
index 9ccc8f9cc585b..917cfab1c699a 100644
--- a/core/src/main/scala/org/apache/spark/storage/RDDInfo.scala
+++ b/core/src/main/scala/org/apache/spark/storage/RDDInfo.scala
@@ -55,14 +55,17 @@ class RDDInfo(
 }
 
 private[spark] object RDDInfo {
-  private val callsiteForm = SparkEnv.get.conf.get(EVENT_LOG_CALLSITE_FORM)
-
   def fromRdd(rdd: RDD[_]): RDDInfo = {
     val rddName = Option(rdd.name).getOrElse(Utils.getFormattedClassName(rdd))
     val parentIds = rdd.dependencies.map(_.rdd.id)
-    val callSite = callsiteForm match {
-      case "short" => rdd.creationSite.shortForm
-      case "long" => rdd.creationSite.longForm
+    val callsiteLongForm = Option(SparkEnv.get)
+      .map(_.conf.get(EVENT_LOG_CALLSITE_LONG_FORM))
+      .getOrElse(false)
+
+    val callSite = if (callsiteLongForm) {
+      rdd.creationSite.longForm
+    } else {
+      rdd.creationSite.shortForm
     }
     new RDDInfo(rdd.id, rddName, rdd.partitions.length,
       rdd.getStorageLevel, parentIds, callSite, rdd.scope)
diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
index 00d01dd28afb5..86f7c08eddcb5 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.storage
 
-import java.io.{File, InputStream, IOException}
+import java.io.{InputStream, IOException}
 import java.nio.ByteBuffer
 import java.util.concurrent.LinkedBlockingQueue
 import javax.annotation.concurrent.GuardedBy
@@ -28,8 +28,9 @@ import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, Queue}
 import org.apache.spark.{SparkException, TaskContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer}
-import org.apache.spark.network.shuffle.{BlockFetchingListener, ShuffleClient, TempFileManager}
-import org.apache.spark.shuffle.FetchFailedException
+import org.apache.spark.network.shuffle._
+import org.apache.spark.network.util.TransportConf
+import org.apache.spark.shuffle.{FetchFailedException, ShuffleReadMetricsReporter}
 import org.apache.spark.util.Utils
 import org.apache.spark.util.io.ChunkedByteBufferOutputStream
 
@@ -50,7 +51,7 @@ import org.apache.spark.util.io.ChunkedByteBufferOutputStream
  *                        For each block we also require the size (in bytes as a long field) in
  *                        order to throttle the memory usage. Note that zero-sized blocks are
  *                        already excluded, which happened in
- *                        [[MapOutputTracker.convertMapStatuses]].
+ *                        [[org.apache.spark.MapOutputTracker.convertMapStatuses]].
  * @param streamWrapper A function to wrap the returned input stream.
  * @param maxBytesInFlight max size (in bytes) of remote blocks to fetch at any given point.
  * @param maxReqsInFlight max number of remote requests to fetch blocks at any given point.
@@ -58,6 +59,7 @@ import org.apache.spark.util.io.ChunkedByteBufferOutputStream
  *                                    for a given remote host:port.
  * @param maxReqSizeShuffleToMem max size (in bytes) of a request that can be shuffled to memory.
  * @param detectCorrupt whether to detect any corruption in fetched blocks.
+ * @param shuffleMetrics used to report shuffle metrics.
  */
 private[spark]
 final class ShuffleBlockFetcherIterator(
@@ -70,8 +72,9 @@ final class ShuffleBlockFetcherIterator(
     maxReqsInFlight: Int,
     maxBlocksInFlightPerAddress: Int,
     maxReqSizeShuffleToMem: Long,
-    detectCorrupt: Boolean)
-  extends Iterator[(BlockId, InputStream)] with TempFileManager with Logging {
+    detectCorrupt: Boolean,
+    shuffleMetrics: ShuffleReadMetricsReporter)
+  extends Iterator[(BlockId, InputStream)] with DownloadFileManager with Logging {
 
   import ShuffleBlockFetcherIterator._
 
@@ -136,8 +139,6 @@ final class ShuffleBlockFetcherIterator(
    */
   private[this] val corruptedBlocks = mutable.HashSet[BlockId]()
 
-  private[this] val shuffleMetrics = context.taskMetrics().createTempShuffleReadMetrics()
-
   /**
    * Whether the iterator is still active. If isZombie is true, the callback interface will no
    * longer place fetched blocks into [[results]].
@@ -150,7 +151,7 @@ final class ShuffleBlockFetcherIterator(
    * deleted when cleanup. This is a layer of defensiveness against disk file leaks.
    */
   @GuardedBy("this")
-  private[this] val shuffleFilesSet = mutable.HashSet[File]()
+  private[this] val shuffleFilesSet = mutable.HashSet[DownloadFile]()
 
   initialize()
 
@@ -164,11 +165,15 @@ final class ShuffleBlockFetcherIterator(
     currentResult = null
   }
 
-  override def createTempFile(): File = {
-    blockManager.diskBlockManager.createTempLocalBlock()._2
+  override def createTempFile(transportConf: TransportConf): DownloadFile = {
+    // we never need to do any encryption or decryption here, regardless of configs, because that
+    // is handled at another layer in the code.  When encryption is enabled, shuffle data is written
+    // to disk encrypted in the first place, and sent over the network still encrypted.
+    new SimpleDownloadFile(
+      blockManager.diskBlockManager.createTempLocalBlock()._2, transportConf)
   }
 
-  override def registerTempFileToClean(file: File): Boolean = synchronized {
+  override def registerTempFileToClean(file: DownloadFile): Boolean = synchronized {
     if (isZombie) {
       false
     } else {
@@ -204,7 +209,7 @@ final class ShuffleBlockFetcherIterator(
     }
     shuffleFilesSet.foreach { file =>
       if (!file.delete()) {
-        logWarning("Failed to cleanup shuffle fetch temp file " + file.getAbsolutePath())
+        logWarning("Failed to cleanup shuffle fetch temp file " + file.path())
       }
     }
   }
@@ -443,35 +448,35 @@ final class ShuffleBlockFetcherIterator(
               buf.release()
               throwFetchFailedException(blockId, address, e)
           }
-
-          input = streamWrapper(blockId, in)
-          // Only copy the stream if it's wrapped by compression or encryption, also the size of
-          // block is small (the decompressed block is smaller than maxBytesInFlight)
-          if (detectCorrupt && !input.eq(in) && size < maxBytesInFlight / 3) {
-            val originalInput = input
-            val out = new ChunkedByteBufferOutputStream(64 * 1024, ByteBuffer.allocate)
-            try {
+          var isStreamCopied: Boolean = false
+          try {
+            input = streamWrapper(blockId, in)
+            // Only copy the stream if it's wrapped by compression or encryption, also the size of
+            // block is small (the decompressed block is smaller than maxBytesInFlight)
+            if (detectCorrupt && !input.eq(in) && size < maxBytesInFlight / 3) {
+              isStreamCopied = true
+              val out = new ChunkedByteBufferOutputStream(64 * 1024, ByteBuffer.allocate)
               // Decompress the whole block at once to detect any corruption, which could increase
               // the memory usage tne potential increase the chance of OOM.
               // TODO: manage the memory used here, and spill it into disk in case of OOM.
-              Utils.copyStream(input, out)
-              out.close()
+              Utils.copyStream(input, out, closeStreams = true)
               input = out.toChunkedByteBuffer.toInputStream(dispose = true)
-            } catch {
-              case e: IOException =>
-                buf.release()
-                if (buf.isInstanceOf[FileSegmentManagedBuffer]
-                  || corruptedBlocks.contains(blockId)) {
-                  throwFetchFailedException(blockId, address, e)
-                } else {
-                  logWarning(s"got an corrupted block $blockId from $address, fetch again", e)
-                  corruptedBlocks += blockId
-                  fetchRequests += FetchRequest(address, Array((blockId, size)))
-                  result = null
-                }
-            } finally {
-              // TODO: release the buf here to free memory earlier
-              originalInput.close()
+            }
+          } catch {
+            case e: IOException =>
+              buf.release()
+              if (buf.isInstanceOf[FileSegmentManagedBuffer]
+                || corruptedBlocks.contains(blockId)) {
+                throwFetchFailedException(blockId, address, e)
+              } else {
+                logWarning(s"got an corrupted block $blockId from $address, fetch again", e)
+                corruptedBlocks += blockId
+                fetchRequests += FetchRequest(address, Array((blockId, size)))
+                result = null
+              }
+          } finally {
+            // TODO: release the buf here to free memory earlier
+            if (isStreamCopied) {
               in.close()
             }
           }
diff --git a/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala b/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
index adc406bb1c441..1c9ea1dba97d7 100644
--- a/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
+++ b/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
@@ -22,9 +22,12 @@ import java.nio.{ByteBuffer, MappedByteBuffer}
 import scala.collection.Map
 import scala.collection.mutable
 
+import org.apache.commons.lang3.{JavaVersion, SystemUtils}
+import sun.misc.Unsafe
 import sun.nio.ch.DirectBuffer
 
 import org.apache.spark.internal.Logging
+import org.apache.spark.util.Utils
 
 /**
  * Storage information for each BlockManager.
@@ -193,6 +196,31 @@ private[spark] class StorageStatus(
 
 /** Helper methods for storage-related objects. */
 private[spark] object StorageUtils extends Logging {
+
+  // In Java 8, the type of DirectBuffer.cleaner() was sun.misc.Cleaner, and it was possible
+  // to access the method sun.misc.Cleaner.clean() to invoke it. The type changed to
+  // jdk.internal.ref.Cleaner in later JDKs, and the .clean() method is not accessible even with
+  // reflection. However sun.misc.Unsafe added a invokeCleaner() method in JDK 9+ and this is
+  // still accessible with reflection.
+  private val bufferCleaner: DirectBuffer => Unit =
+    if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)) {
+      val cleanerMethod =
+        Utils.classForName("sun.misc.Unsafe").getMethod("invokeCleaner", classOf[ByteBuffer])
+      val unsafeField = classOf[Unsafe].getDeclaredField("theUnsafe")
+      unsafeField.setAccessible(true)
+      val unsafe = unsafeField.get(null).asInstanceOf[Unsafe]
+      buffer: DirectBuffer => cleanerMethod.invoke(unsafe, buffer)
+    } else {
+      val cleanerMethod = Utils.classForName("sun.misc.Cleaner").getMethod("clean")
+      buffer: DirectBuffer => {
+        // Careful to avoid the return type of .cleaner(), which changes with JDK
+        val cleaner: AnyRef = buffer.cleaner()
+        if (cleaner != null) {
+          cleanerMethod.invoke(cleaner)
+        }
+      }
+    }
+
   /**
    * Attempt to clean up a ByteBuffer if it is direct or memory-mapped. This uses an *unsafe* Sun
    * API that will cause errors if one attempts to read from the disposed buffer. However, neither
@@ -204,14 +232,8 @@ private[spark] object StorageUtils extends Logging {
   def dispose(buffer: ByteBuffer): Unit = {
     if (buffer != null && buffer.isInstanceOf[MappedByteBuffer]) {
       logTrace(s"Disposing of $buffer")
-      cleanDirectBuffer(buffer.asInstanceOf[DirectBuffer])
+      bufferCleaner(buffer.asInstanceOf[DirectBuffer])
     }
   }
 
-  private def cleanDirectBuffer(buffer: DirectBuffer) = {
-    val cleaner = buffer.cleaner()
-    if (cleaner != null) {
-      cleaner.clean()
-    }
-  }
 }
diff --git a/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala b/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala
index 06fd56e54d9c8..8513359934bec 100644
--- a/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala
@@ -34,6 +34,7 @@ import org.apache.spark.memory.{MemoryManager, MemoryMode}
 import org.apache.spark.serializer.{SerializationStream, SerializerManager}
 import org.apache.spark.storage._
 import org.apache.spark.unsafe.Platform
+import org.apache.spark.unsafe.array.ByteArrayMethods
 import org.apache.spark.util.{SizeEstimator, Utils}
 import org.apache.spark.util.collection.SizeTrackingVector
 import org.apache.spark.util.io.{ChunkedByteBuffer, ChunkedByteBufferOutputStream}
@@ -333,11 +334,11 @@ private[spark] class MemoryStore(
 
     // Initial per-task memory to request for unrolling blocks (bytes).
     val initialMemoryThreshold = unrollMemoryThreshold
-    val chunkSize = if (initialMemoryThreshold > Int.MaxValue) {
+    val chunkSize = if (initialMemoryThreshold > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
       logWarning(s"Initial memory threshold of ${Utils.bytesToString(initialMemoryThreshold)} " +
         s"is too large to be set as chunk size. Chunk size has been capped to " +
-        s"${Utils.bytesToString(Int.MaxValue)}")
-      Int.MaxValue
+        s"${Utils.bytesToString(ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH)}")
+      ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH
     } else {
       initialMemoryThreshold.toInt
     }
diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
index 52a955111231a..316af9b79d286 100644
--- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
@@ -356,13 +356,15 @@ private[spark] object JettyUtils extends Logging {
 
         (connector, connector.getLocalPort())
       }
+      val httpConfig = new HttpConfiguration()
+      httpConfig.setRequestHeaderSize(conf.get(UI_REQUEST_HEADER_SIZE).toInt)
 
       // If SSL is configured, create the secure connector first.
       val securePort = sslOptions.createJettySslContextFactory().map { factory =>
         val securePort = sslOptions.port.getOrElse(if (port > 0) Utils.userPort(port, 400) else 0)
         val secureServerName = if (serverName.nonEmpty) s"$serverName (HTTPS)" else serverName
         val connectionFactories = AbstractConnectionFactory.getFactories(factory,
-          new HttpConnectionFactory())
+          new HttpConnectionFactory(httpConfig))
 
         def sslConnect(currentPort: Int): (ServerConnector, Int) = {
           newConnector(connectionFactories, currentPort)
@@ -377,7 +379,7 @@ private[spark] object JettyUtils extends Logging {
 
       // Bind the HTTP port.
       def httpConnect(currentPort: Int): (ServerConnector, Int) = {
-        newConnector(Array(new HttpConnectionFactory()), currentPort)
+        newConnector(Array(new HttpConnectionFactory(httpConfig)), currentPort)
       }
 
       val (httpConnector, httpPort) = Utils.startServiceOnPort[ServerConnector](port, httpConnect,
diff --git a/core/src/main/scala/org/apache/spark/ui/PagedTable.scala b/core/src/main/scala/org/apache/spark/ui/PagedTable.scala
index 65fa38387b9ee..6c2c1f6827948 100644
--- a/core/src/main/scala/org/apache/spark/ui/PagedTable.scala
+++ b/core/src/main/scala/org/apache/spark/ui/PagedTable.scala
@@ -31,11 +31,7 @@ import org.apache.spark.util.Utils
  *
  * @param pageSize the number of rows in a page
  */
-private[ui] abstract class PagedDataSource[T](val pageSize: Int) {
-
-  if (pageSize <= 0) {
-    throw new IllegalArgumentException("Page size must be positive")
-  }
+private[spark] abstract class PagedDataSource[T](val pageSize: Int) {
 
   /**
    * Return the size of all data.
@@ -51,13 +47,24 @@ private[ui] abstract class PagedDataSource[T](val pageSize: Int) {
    * Slice the data for this page
    */
   def pageData(page: Int): PageData[T] = {
-    val totalPages = (dataSize + pageSize - 1) / pageSize
-    if (page <= 0 || page > totalPages) {
-      throw new IndexOutOfBoundsException(
-        s"Page $page is out of range. Please select a page number between 1 and $totalPages.")
+    // Display all the data in one page, if the pageSize is less than or equal to zero.
+    val pageTableSize = if (pageSize <= 0) {
+      dataSize
+    } else {
+      pageSize
+    }
+    val totalPages = (dataSize + pageTableSize - 1) / pageTableSize
+
+    val pageToShow = if (page <= 0) {
+      1
+    } else if (page > totalPages) {
+      totalPages
+    } else {
+      page
     }
-    val from = (page - 1) * pageSize
-    val to = dataSize.min(page * pageSize)
+
+    val (from, to) = ((pageToShow - 1) * pageSize, dataSize.min(pageToShow * pageTableSize))
+
     PageData(totalPages, sliceData(from, to))
   }
 
@@ -72,7 +79,7 @@ private[ui] case class PageData[T](totalPage: Int, data: Seq[T])
 /**
  * A paged table that will generate a HTML table for a specified page and also the page navigation.
  */
-private[ui] trait PagedTable[T] {
+private[spark] trait PagedTable[T] {
 
   def tableId: String
 
@@ -80,8 +87,6 @@ private[ui] trait PagedTable[T] {
 
   def pageSizeFormField: String
 
-  def prevPageSizeFormField: String
-
   def pageNumberFormField: String
 
   def dataSource: PagedDataSource[T]
@@ -94,7 +99,23 @@ private[ui] trait PagedTable[T] {
     val _dataSource = dataSource
     try {
       val PageData(totalPages, data) = _dataSource.pageData(page)
-      val pageNavi = pageNavigation(page, _dataSource.pageSize, totalPages)
+
+      val pageToShow = if (page <= 0) {
+        1
+      } else if (page > totalPages) {
+        totalPages
+      } else {
+        page
+      }
+      // Display all the data in one page, if the pageSize is less than or equal to zero.
+      val pageSize = if (_dataSource.pageSize <= 0) {
+        data.size
+      } else {
+        _dataSource.pageSize
+      }
+
+      val pageNavi = pageNavigation(pageToShow, pageSize, totalPages)
+
       <div>
         {pageNavi}
         <table class={tableCssClass} id={tableId}>
@@ -122,13 +143,9 @@ private[ui] trait PagedTable[T] {
 
   /**
    * Return a page navigation.
-   * <ul>
-   *   <li>If the totalPages is 1, the page navigation will be empty</li>
-   *   <li>
-   *     If the totalPages is more than 1, it will create a page navigation including a group of
-   *     page numbers and a form to submit the page number.
-   *   </li>
-   * </ul>
+   *
+   * It will create a page navigation including a group of page numbers and a form
+   * to submit the page number.
    *
    * Here are some examples of the page navigation:
    * {{{
@@ -154,120 +171,112 @@ private[ui] trait PagedTable[T] {
    * }}}
    */
   private[ui] def pageNavigation(page: Int, pageSize: Int, totalPages: Int): Seq[Node] = {
-    if (totalPages == 1) {
-      Nil
-    } else {
-      // A group includes all page numbers will be shown in the page navigation.
-      // The size of group is 10 means there are 10 page numbers will be shown.
-      // The first group is 1 to 10, the second is 2 to 20, and so on
-      val groupSize = 10
-      val firstGroup = 0
-      val lastGroup = (totalPages - 1) / groupSize
-      val currentGroup = (page - 1) / groupSize
-      val startPage = currentGroup * groupSize + 1
-      val endPage = totalPages.min(startPage + groupSize - 1)
-      val pageTags = (startPage to endPage).map { p =>
-        if (p == page) {
-          // The current page should be disabled so that it cannot be clicked.
-          <li class="disabled"><a href="#">{p}</a></li>
-        } else {
-          <li><a href={Unparsed(pageLink(p))}>{p}</a></li>
-        }
+    // A group includes all page numbers will be shown in the page navigation.
+    // The size of group is 10 means there are 10 page numbers will be shown.
+    // The first group is 1 to 10, the second is 2 to 20, and so on
+    val groupSize = 10
+    val firstGroup = 0
+    val lastGroup = (totalPages - 1) / groupSize
+    val currentGroup = (page - 1) / groupSize
+    val startPage = currentGroup * groupSize + 1
+    val endPage = totalPages.min(startPage + groupSize - 1)
+    val pageTags = (startPage to endPage).map { p =>
+      if (p == page) {
+        // The current page should be disabled so that it cannot be clicked.
+        <li class="disabled"><a href="#">{p}</a></li>
+      } else {
+        <li><a href={Unparsed(pageLink(p))}>{p}</a></li>
       }
+    }
 
-      val hiddenFormFields = {
-        if (goButtonFormPath.contains('?')) {
-          val queryString = goButtonFormPath.split("\\?", 2)(1)
-          val search = queryString.split("#")(0)
-          Splitter
-            .on('&')
-            .trimResults()
-            .omitEmptyStrings()
-            .withKeyValueSeparator("=")
-            .split(search)
-            .asScala
-            .filterKeys(_ != pageSizeFormField)
-            .filterKeys(_ != prevPageSizeFormField)
-            .filterKeys(_ != pageNumberFormField)
-            .mapValues(URLDecoder.decode(_, "UTF-8"))
-            .map { case (k, v) =>
-              <input type="hidden" name={k} value={v} />
-            }
-        } else {
-          Seq.empty
-        }
+    val hiddenFormFields = {
+      if (goButtonFormPath.contains('?')) {
+        val queryString = goButtonFormPath.split("\\?", 2)(1)
+        val search = queryString.split("#")(0)
+        Splitter
+          .on('&')
+          .trimResults()
+          .omitEmptyStrings()
+          .withKeyValueSeparator("=")
+          .split(search)
+          .asScala
+          .filterKeys(_ != pageSizeFormField)
+          .filterKeys(_ != pageNumberFormField)
+          .mapValues(URLDecoder.decode(_, "UTF-8"))
+          .map { case (k, v) =>
+            <input type="hidden" name={k} value={v} />
+          }
+      } else {
+        Seq.empty
       }
+    }
 
+    <div>
       <div>
-        <div>
-          <form id={s"form-$tableId-page"}
-                method="get"
-                action={Unparsed(goButtonFormPath)}
-                class="form-inline pull-right"
-                style="margin-bottom: 0px;">
-            <input type="hidden"
-                   name={prevPageSizeFormField}
-                   value={pageSize.toString} />
-            {hiddenFormFields}
-            <label>{totalPages} Pages. Jump to</label>
-            <input type="text"
-                   name={pageNumberFormField}
-                   id={s"form-$tableId-page-no"}
-                   value={page.toString} class="span1" />
-
-            <label>. Show </label>
-            <input type="text"
-                   id={s"form-$tableId-page-size"}
-                   name={pageSizeFormField}
-                   value={pageSize.toString}
-                   class="span1" />
-            <label>items in a page.</label>
-
-            <button type="submit" class="btn">Go</button>
-          </form>
-        </div>
-        <div class="pagination" style="margin-bottom: 0px;">
-          <span style="float: left; padding-top: 4px; padding-right: 4px;">Page: </span>
-          <ul>
-            {if (currentGroup > firstGroup) {
-            <li>
-              <a href={Unparsed(pageLink(startPage - groupSize))} aria-label="Previous Group">
-                <span aria-hidden="true">
-                  &lt;&lt;
-                </span>
-              </a>
-            </li>
-            }}
-            {if (page > 1) {
-            <li>
-            <a href={Unparsed(pageLink(page - 1))} aria-label="Previous">
+        <form id={s"form-$tableId-page"}
+              method="get"
+              action={Unparsed(goButtonFormPath)}
+              class="form-inline pull-right"
+              style="margin-bottom: 0px;">
+          {hiddenFormFields}
+          <label>{totalPages} Pages. Jump to</label>
+          <input type="text"
+                 name={pageNumberFormField}
+                 id={s"form-$tableId-page-no"}
+                 value={page.toString} class="span1" />
+
+          <label>. Show </label>
+          <input type="text"
+                 id={s"form-$tableId-page-size"}
+                 name={pageSizeFormField}
+                 value={pageSize.toString}
+                 class="span1" />
+          <label>items in a page.</label>
+
+          <button type="submit" class="btn">Go</button>
+        </form>
+      </div>
+      <div class="pagination" style="margin-bottom: 0px;">
+        <span style="float: left; padding-top: 4px; padding-right: 4px;">Page: </span>
+        <ul>
+          {if (currentGroup > firstGroup) {
+          <li>
+            <a href={Unparsed(pageLink(startPage - groupSize))} aria-label="Previous Group">
               <span aria-hidden="true">
-                &lt;
+                &lt;&lt;
               </span>
             </a>
-            </li>
-            }}
-            {pageTags}
-            {if (page < totalPages) {
-            <li>
-              <a href={Unparsed(pageLink(page + 1))} aria-label="Next">
-                <span aria-hidden="true">&gt;</span>
-              </a>
-            </li>
-            }}
-            {if (currentGroup < lastGroup) {
-            <li>
-              <a href={Unparsed(pageLink(startPage + groupSize))} aria-label="Next Group">
-                <span aria-hidden="true">
-                  &gt;&gt;
-                </span>
-              </a>
-            </li>
+          </li>
           }}
-          </ul>
-        </div>
+          {if (page > 1) {
+          <li>
+          <a href={Unparsed(pageLink(page - 1))} aria-label="Previous">
+            <span aria-hidden="true">
+              &lt;
+            </span>
+          </a>
+          </li>
+          }}
+          {pageTags}
+          {if (page < totalPages) {
+          <li>
+            <a href={Unparsed(pageLink(page + 1))} aria-label="Next">
+              <span aria-hidden="true">&gt;</span>
+            </a>
+          </li>
+          }}
+          {if (currentGroup < lastGroup) {
+          <li>
+            <a href={Unparsed(pageLink(startPage + groupSize))} aria-label="Next Group">
+              <span aria-hidden="true">
+                &gt;&gt;
+              </span>
+            </a>
+          </li>
+        }}
+        </ul>
       </div>
-    }
+    </div>
   }
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index 732b7528f499e..60a929375baae 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -204,6 +204,8 @@ private[spark] object UIUtils extends Logging {
           href={prependBaseUri(request, "/static/dataTables.bootstrap.css")} type="text/css"/>
     <link rel="stylesheet"
           href={prependBaseUri(request, "/static/jsonFormatter.min.css")} type="text/css"/>
+    <link rel="stylesheet"
+          href={prependBaseUri(request, "/static/webui-dataTables.css")} type="text/css"/>
     <script src={prependBaseUri(request, "/static/jquery.dataTables.1.10.4.min.js")}></script>
     <script src={prependBaseUri(request, "/static/jquery.cookies.2.2.0.min.js")}></script>
     <script src={prependBaseUri(request, "/static/jquery.blockUI.min.js")}></script>
@@ -218,7 +220,6 @@ private[spark] object UIUtils extends Logging {
       title: String,
       content: => Seq[Node],
       activeTab: SparkUITab,
-      refreshInterval: Option[Int] = None,
       helpText: Option[String] = None,
       showVisualization: Boolean = false,
       useDataTables: Boolean = false): Seq[Node] = {
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
index 90e9a7a3630cf..2c22e0555fcb8 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
@@ -220,7 +220,6 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
     val parameterJobSortColumn = UIUtils.stripXSS(request.getParameter(jobTag + ".sort"))
     val parameterJobSortDesc = UIUtils.stripXSS(request.getParameter(jobTag + ".desc"))
     val parameterJobPageSize = UIUtils.stripXSS(request.getParameter(jobTag + ".pageSize"))
-    val parameterJobPrevPageSize = UIUtils.stripXSS(request.getParameter(jobTag + ".prevPageSize"))
 
     val jobPage = Option(parameterJobPage).map(_.toInt).getOrElse(1)
     val jobSortColumn = Option(parameterJobSortColumn).map { sortColumn =>
@@ -231,17 +230,7 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
       jobSortColumn == jobIdTitle
     )
     val jobPageSize = Option(parameterJobPageSize).map(_.toInt).getOrElse(100)
-    val jobPrevPageSize = Option(parameterJobPrevPageSize).map(_.toInt).getOrElse(jobPageSize)
-
-    val page: Int = {
-      // If the user has changed to a larger page size, then go to page 1 in order to avoid
-      // IndexOutOfBoundsException.
-      if (jobPageSize <= jobPrevPageSize) {
-        jobPage
-      } else {
-        1
-      }
-    }
+
     val currentTime = System.currentTimeMillis()
 
     try {
@@ -259,7 +248,7 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
         pageSize = jobPageSize,
         sortColumn = jobSortColumn,
         desc = jobSortDesc
-      ).table(page)
+      ).table(jobPage)
     } catch {
       case e @ (_ : IllegalArgumentException | _ : IndexOutOfBoundsException) =>
         <div class="alert alert-error">
@@ -526,8 +515,6 @@ private[ui] class JobPagedTable(
 
   override def pageSizeFormField: String = jobTag + ".pageSize"
 
-  override def prevPageSizeFormField: String = jobTag + ".prevPageSize"
-
   override def pageNumberFormField: String = jobTag + ".page"
 
   override val dataSource = new JobDataSource(
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorTable.scala
deleted file mode 100644
index 0ff64f053f371..0000000000000
--- a/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorTable.scala
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.ui.jobs
-
-import scala.xml.{Node, Unparsed}
-
-import org.apache.spark.status.AppStatusStore
-import org.apache.spark.status.api.v1.StageData
-import org.apache.spark.ui.{ToolTips, UIUtils}
-import org.apache.spark.util.Utils
-
-/** Stage summary grouped by executors. */
-private[ui] class ExecutorTable(stage: StageData, store: AppStatusStore) {
-
-  import ApiHelper._
-
-  def toNodeSeq: Seq[Node] = {
-    <table class={UIUtils.TABLE_CLASS_STRIPED_SORTABLE}>
-      <thead>
-        <th id="executorid">Executor ID</th>
-        <th>Address</th>
-        <th>Task Time</th>
-        <th>Total Tasks</th>
-        <th>Failed Tasks</th>
-        <th>Killed Tasks</th>
-        <th>Succeeded Tasks</th>
-        {if (hasInput(stage)) {
-          <th>
-            <span data-toggle="tooltip" title={ToolTips.INPUT}>Input Size / Records</span>
-          </th>
-        }}
-        {if (hasOutput(stage)) {
-          <th>
-            <span data-toggle="tooltip" title={ToolTips.OUTPUT}>Output Size / Records</span>
-          </th>
-        }}
-        {if (hasShuffleRead(stage)) {
-          <th>
-            <span data-toggle="tooltip" title={ToolTips.SHUFFLE_READ}>
-            Shuffle Read Size / Records</span>
-          </th>
-        }}
-        {if (hasShuffleWrite(stage)) {
-          <th>
-            <span data-toggle="tooltip" title={ToolTips.SHUFFLE_WRITE}>
-            Shuffle Write Size / Records</span>
-          </th>
-        }}
-        {if (hasBytesSpilled(stage)) {
-          <th>Shuffle Spill (Memory)</th>
-          <th>Shuffle Spill (Disk)</th>
-        }}
-        <th>
-          <span data-toggle="tooltip" title={ToolTips.BLACKLISTED}>
-          Blacklisted
-          </span>
-        </th>
-      </thead>
-      <tbody>
-        {createExecutorTable(stage)}
-      </tbody>
-    </table>
-    <script>
-      {Unparsed {
-        """
-          |      window.onload = function() {
-          |        sorttable.innerSortFunction.apply(document.getElementById('executorid'), [])
-          |      };
-        """.stripMargin
-      }}
-    </script>
-  }
-
-  private def createExecutorTable(stage: StageData) : Seq[Node] = {
-    val executorSummary = store.executorSummary(stage.stageId, stage.attemptId)
-
-    executorSummary.toSeq.sortBy(_._1).map { case (k, v) =>
-      val executor = store.asOption(store.executorSummary(k))
-      <tr>
-        <td>
-          <div style="float: left">{k}</div>
-          <div style="float: right">
-          {
-            executor.map(_.executorLogs).getOrElse(Map.empty).map {
-              case (logName, logUrl) => <div><a href={logUrl}>{logName}</a></div>
-            }
-          }
-          </div>
-        </td>
-        <td>{executor.map { e => e.hostPort }.getOrElse("CANNOT FIND ADDRESS")}</td>
-        <td sorttable_customkey={v.taskTime.toString}>{UIUtils.formatDuration(v.taskTime)}</td>
-        <td>{v.failedTasks + v.succeededTasks + v.killedTasks}</td>
-        <td>{v.failedTasks}</td>
-        <td>{v.killedTasks}</td>
-        <td>{v.succeededTasks}</td>
-        {if (hasInput(stage)) {
-          <td sorttable_customkey={v.inputBytes.toString}>
-            {s"${Utils.bytesToString(v.inputBytes)} / ${v.inputRecords}"}
-          </td>
-        }}
-        {if (hasOutput(stage)) {
-          <td sorttable_customkey={v.outputBytes.toString}>
-            {s"${Utils.bytesToString(v.outputBytes)} / ${v.outputRecords}"}
-          </td>
-        }}
-        {if (hasShuffleRead(stage)) {
-          <td sorttable_customkey={v.shuffleRead.toString}>
-            {s"${Utils.bytesToString(v.shuffleRead)} / ${v.shuffleReadRecords}"}
-          </td>
-        }}
-        {if (hasShuffleWrite(stage)) {
-          <td sorttable_customkey={v.shuffleWrite.toString}>
-            {s"${Utils.bytesToString(v.shuffleWrite)} / ${v.shuffleWriteRecords}"}
-          </td>
-        }}
-        {if (hasBytesSpilled(stage)) {
-          <td sorttable_customkey={v.memoryBytesSpilled.toString}>
-            {Utils.bytesToString(v.memoryBytesSpilled)}
-          </td>
-          <td sorttable_customkey={v.diskBytesSpilled.toString}>
-            {Utils.bytesToString(v.diskBytesSpilled)}
-          </td>
-        }}
-        {
-          if (executor.map(_.isBlacklisted).getOrElse(false)) {
-            <td>for application</td>
-          } else if (v.isBlacklistedForStage) {
-            <td>for stage</td>
-          } else {
-            <td>false</td>
-          }
-        }
-      </tr>
-    }
-  }
-
-}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
index 55eb989962668..a213b764abea7 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -91,7 +91,14 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
     val parameterTaskSortColumn = UIUtils.stripXSS(request.getParameter("task.sort"))
     val parameterTaskSortDesc = UIUtils.stripXSS(request.getParameter("task.desc"))
     val parameterTaskPageSize = UIUtils.stripXSS(request.getParameter("task.pageSize"))
-    val parameterTaskPrevPageSize = UIUtils.stripXSS(request.getParameter("task.prevPageSize"))
+
+    val eventTimelineParameterTaskPage = UIUtils.stripXSS(
+      request.getParameter("task.eventTimelinePageNumber"))
+    val eventTimelineParameterTaskPageSize = UIUtils.stripXSS(
+      request.getParameter("task.eventTimelinePageSize"))
+    var eventTimelineTaskPage = Option(eventTimelineParameterTaskPage).map(_.toInt).getOrElse(1)
+    var eventTimelineTaskPageSize = Option(
+      eventTimelineParameterTaskPageSize).map(_.toInt).getOrElse(100)
 
     val taskPage = Option(parameterTaskPage).map(_.toInt).getOrElse(1)
     val taskSortColumn = Option(parameterTaskSortColumn).map { sortColumn =>
@@ -99,13 +106,11 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
     }.getOrElse("Index")
     val taskSortDesc = Option(parameterTaskSortDesc).map(_.toBoolean).getOrElse(false)
     val taskPageSize = Option(parameterTaskPageSize).map(_.toInt).getOrElse(100)
-    val taskPrevPageSize = Option(parameterTaskPrevPageSize).map(_.toInt).getOrElse(taskPageSize)
-
     val stageId = parameterId.toInt
     val stageAttemptId = parameterAttempt.toInt
 
     val stageHeader = s"Details for Stage $stageId (Attempt $stageAttemptId)"
-    val stageData = parent.store
+    val (stageData, stageJobIds) = parent.store
       .asOption(parent.store.stageAttempt(stageId, stageAttemptId, details = false))
       .getOrElse {
         val content =
@@ -117,7 +122,8 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
 
     val localitySummary = store.localitySummary(stageData.stageId, stageData.attemptId)
 
-    val totalTasks = taskCount(stageData)
+    val totalTasks = stageData.numActiveTasks + stageData.numCompleteTasks +
+      stageData.numFailedTasks + stageData.numKilledTasks
     if (totalTasks == 0) {
       val content =
         <div>
@@ -132,7 +138,15 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
     val totalTasksNumStr = if (totalTasks == storedTasks) {
       s"$totalTasks"
     } else {
-      s"$storedTasks, showing ${totalTasks}"
+      s"$totalTasks, showing $storedTasks"
+    }
+    if (eventTimelineTaskPageSize < 1 || eventTimelineTaskPageSize > totalTasks) {
+      eventTimelineTaskPageSize = totalTasks
+    }
+    val eventTimelineTotalPages =
+      (totalTasks + eventTimelineTaskPageSize - 1) / eventTimelineTaskPageSize
+    if (eventTimelineTaskPage < 1 || eventTimelineTaskPage > eventTimelineTotalPages) {
+      eventTimelineTaskPage = 1
     }
 
     val summary =
@@ -154,20 +168,20 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
           }}
           {if (hasOutput(stageData)) {
             <li>
-              <strong>Output: </strong>
+              <strong>Output Size / Records: </strong>
               {s"${Utils.bytesToString(stageData.outputBytes)} / ${stageData.outputRecords}"}
             </li>
           }}
           {if (hasShuffleRead(stageData)) {
             <li>
-              <strong>Shuffle Read: </strong>
+              <strong>Shuffle Read Size / Records: </strong>
               {s"${Utils.bytesToString(stageData.shuffleReadBytes)} / " +
                s"${stageData.shuffleReadRecords}"}
             </li>
           }}
           {if (hasShuffleWrite(stageData)) {
             <li>
-              <strong>Shuffle Write: </strong>
+              <strong>Shuffle Write Size / Records: </strong>
                {s"${Utils.bytesToString(stageData.shuffleWriteBytes)} / " +
                s"${stageData.shuffleWriteRecords}"}
             </li>
@@ -182,74 +196,17 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
               {Utils.bytesToString(stageData.diskBytesSpilled)}
             </li>
           }}
-        </ul>
-      </div>
-
-    val showAdditionalMetrics =
-      <div>
-        <span class="expand-additional-metrics">
-          <span class="expand-additional-metrics-arrow arrow-closed"></span>
-          <a>Show Additional Metrics</a>
-        </span>
-        <div class="additional-metrics collapsed">
-          <ul>
-            <li>
-                <input type="checkbox" id="select-all-metrics"/>
-                <span class="additional-metric-title"><em>(De)select All</em></span>
-            </li>
+          {if (!stageJobIds.isEmpty) {
             <li>
-              <span data-toggle="tooltip"
-                    title={ToolTips.SCHEDULER_DELAY} data-placement="right">
-                <input type="checkbox" name={TaskDetailsClassNames.SCHEDULER_DELAY}/>
-                <span class="additional-metric-title">Scheduler Delay</span>
-              </span>
+              <strong>Associated Job Ids: </strong>
+              {stageJobIds.sorted.map { jobId =>
+                val jobURL = "%s/jobs/job/?id=%s"
+                  .format(UIUtils.prependBaseUri(request, parent.basePath), jobId)
+                <a href={jobURL}>{jobId.toString}</a><span>&nbsp;</span>
+              }}
             </li>
-            <li>
-              <span data-toggle="tooltip"
-                    title={ToolTips.TASK_DESERIALIZATION_TIME} data-placement="right">
-                <input type="checkbox" name={TaskDetailsClassNames.TASK_DESERIALIZATION_TIME}/>
-                <span class="additional-metric-title">Task Deserialization Time</span>
-              </span>
-            </li>
-            {if (stageData.shuffleReadBytes > 0) {
-              <li>
-                <span data-toggle="tooltip"
-                      title={ToolTips.SHUFFLE_READ_BLOCKED_TIME} data-placement="right">
-                  <input type="checkbox" name={TaskDetailsClassNames.SHUFFLE_READ_BLOCKED_TIME}/>
-                  <span class="additional-metric-title">Shuffle Read Blocked Time</span>
-                </span>
-              </li>
-              <li>
-                <span data-toggle="tooltip"
-                      title={ToolTips.SHUFFLE_READ_REMOTE_SIZE} data-placement="right">
-                  <input type="checkbox" name={TaskDetailsClassNames.SHUFFLE_READ_REMOTE_SIZE}/>
-                  <span class="additional-metric-title">Shuffle Remote Reads</span>
-                </span>
-              </li>
-            }}
-            <li>
-              <span data-toggle="tooltip"
-                    title={ToolTips.RESULT_SERIALIZATION_TIME} data-placement="right">
-                <input type="checkbox" name={TaskDetailsClassNames.RESULT_SERIALIZATION_TIME}/>
-                <span class="additional-metric-title">Result Serialization Time</span>
-              </span>
-            </li>
-            <li>
-              <span data-toggle="tooltip"
-                    title={ToolTips.GETTING_RESULT_TIME} data-placement="right">
-                <input type="checkbox" name={TaskDetailsClassNames.GETTING_RESULT_TIME}/>
-                <span class="additional-metric-title">Getting Result Time</span>
-              </span>
-            </li>
-            <li>
-              <span data-toggle="tooltip"
-                    title={ToolTips.PEAK_EXECUTION_MEMORY} data-placement="right">
-                <input type="checkbox" name={TaskDetailsClassNames.PEAK_EXECUTION_MEMORY}/>
-                <span class="additional-metric-title">Peak Execution Memory</span>
-              </span>
-            </li>
-          </ul>
-        </div>
+          }}
+        </ul>
       </div>
 
     val stageGraph = parent.store.asOption(parent.store.operationGraphForStage(stageId))
@@ -268,17 +225,8 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
       accumulableRow,
       stageData.accumulatorUpdates.toSeq)
 
-    val page: Int = {
-      // If the user has changed to a larger page size, then go to page 1 in order to avoid
-      // IndexOutOfBoundsException.
-      if (taskPageSize <= taskPrevPageSize) {
-        taskPage
-      } else {
-        1
-      }
-    }
     val currentTime = System.currentTimeMillis()
-    val (taskTable, taskTableHTML) = try {
+    val taskTable = try {
       val _taskTable = new TaskPagedTable(
         stageData,
         UIUtils.prependBaseUri(request, parent.basePath) +
@@ -289,17 +237,10 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
         desc = taskSortDesc,
         store = parent.store
       )
-      (_taskTable, _taskTable.table(page))
+      _taskTable
     } catch {
       case e @ (_ : IllegalArgumentException | _ : IndexOutOfBoundsException) =>
-        val errorMessage =
-          <div class="alert alert-error">
-            <p>Error while rendering stage table:</p>
-            <pre>
-              {Utils.exceptionString(e)}
-            </pre>
-          </div>
-        (null, errorMessage)
+        null
     }
 
     val jsForScrollingDownToTaskTable =
@@ -317,190 +258,36 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
         }
       </script>
 
-    val metricsSummary = store.taskSummary(stageData.stageId, stageData.attemptId,
-      Array(0, 0.25, 0.5, 0.75, 1.0))
-
-    val summaryTable = metricsSummary.map { metrics =>
-      def timeQuantiles(data: IndexedSeq[Double]): Seq[Node] = {
-        data.map { millis =>
-          <td>{UIUtils.formatDuration(millis.toLong)}</td>
-        }
-      }
-
-      def sizeQuantiles(data: IndexedSeq[Double]): Seq[Node] = {
-        data.map { size =>
-          <td>{Utils.bytesToString(size.toLong)}</td>
-        }
-      }
-
-      def sizeQuantilesWithRecords(
-          data: IndexedSeq[Double],
-          records: IndexedSeq[Double]) : Seq[Node] = {
-        data.zip(records).map { case (d, r) =>
-          <td>{s"${Utils.bytesToString(d.toLong)} / ${r.toLong}"}</td>
-        }
-      }
-
-      def titleCell(title: String, tooltip: String): Seq[Node] = {
-        <td>
-          <span data-toggle="tooltip" title={tooltip} data-placement="right">
-            {title}
-          </span>
-        </td>
-      }
-
-      def simpleTitleCell(title: String): Seq[Node] = <td>{title}</td>
-
-      val deserializationQuantiles = titleCell("Task Deserialization Time",
-        ToolTips.TASK_DESERIALIZATION_TIME) ++ timeQuantiles(metrics.executorDeserializeTime)
-
-      val serviceQuantiles = simpleTitleCell("Duration") ++ timeQuantiles(metrics.executorRunTime)
-
-      val gcQuantiles = titleCell("GC Time", ToolTips.GC_TIME) ++ timeQuantiles(metrics.jvmGcTime)
-
-      val serializationQuantiles = titleCell("Result Serialization Time",
-        ToolTips.RESULT_SERIALIZATION_TIME) ++ timeQuantiles(metrics.resultSerializationTime)
-
-      val gettingResultQuantiles = titleCell("Getting Result Time", ToolTips.GETTING_RESULT_TIME) ++
-        timeQuantiles(metrics.gettingResultTime)
-
-      val peakExecutionMemoryQuantiles = titleCell("Peak Execution Memory",
-        ToolTips.PEAK_EXECUTION_MEMORY) ++ sizeQuantiles(metrics.peakExecutionMemory)
-
-      // The scheduler delay includes the network delay to send the task to the worker
-      // machine and to send back the result (but not the time to fetch the task result,
-      // if it needed to be fetched from the block manager on the worker).
-      val schedulerDelayQuantiles = titleCell("Scheduler Delay", ToolTips.SCHEDULER_DELAY) ++
-        timeQuantiles(metrics.schedulerDelay)
-
-      def inputQuantiles: Seq[Node] = {
-        simpleTitleCell("Input Size / Records") ++
-          sizeQuantilesWithRecords(metrics.inputMetrics.bytesRead, metrics.inputMetrics.recordsRead)
-      }
-
-      def outputQuantiles: Seq[Node] = {
-        simpleTitleCell("Output Size / Records") ++
-          sizeQuantilesWithRecords(metrics.outputMetrics.bytesWritten,
-            metrics.outputMetrics.recordsWritten)
-      }
-
-      def shuffleReadBlockedQuantiles: Seq[Node] = {
-        titleCell("Shuffle Read Blocked Time", ToolTips.SHUFFLE_READ_BLOCKED_TIME) ++
-          timeQuantiles(metrics.shuffleReadMetrics.fetchWaitTime)
-      }
-
-      def shuffleReadTotalQuantiles: Seq[Node] = {
-        titleCell("Shuffle Read Size / Records", ToolTips.SHUFFLE_READ) ++
-          sizeQuantilesWithRecords(metrics.shuffleReadMetrics.readBytes,
-            metrics.shuffleReadMetrics.readRecords)
-      }
-
-      def shuffleReadRemoteQuantiles: Seq[Node] = {
-        titleCell("Shuffle Remote Reads", ToolTips.SHUFFLE_READ_REMOTE_SIZE) ++
-          sizeQuantiles(metrics.shuffleReadMetrics.remoteBytesRead)
-      }
-
-      def shuffleWriteQuantiles: Seq[Node] = {
-        simpleTitleCell("Shuffle Write Size / Records") ++
-          sizeQuantilesWithRecords(metrics.shuffleWriteMetrics.writeBytes,
-            metrics.shuffleWriteMetrics.writeRecords)
-      }
-
-      def memoryBytesSpilledQuantiles: Seq[Node] = {
-        simpleTitleCell("Shuffle spill (memory)") ++ sizeQuantiles(metrics.memoryBytesSpilled)
-      }
-
-      def diskBytesSpilledQuantiles: Seq[Node] = {
-        simpleTitleCell("Shuffle spill (disk)") ++ sizeQuantiles(metrics.diskBytesSpilled)
-      }
-
-      val listings: Seq[Seq[Node]] = Seq(
-        <tr>{serviceQuantiles}</tr>,
-        <tr class={TaskDetailsClassNames.SCHEDULER_DELAY}>{schedulerDelayQuantiles}</tr>,
-        <tr class={TaskDetailsClassNames.TASK_DESERIALIZATION_TIME}>
-          {deserializationQuantiles}
-        </tr>
-        <tr>{gcQuantiles}</tr>,
-        <tr class={TaskDetailsClassNames.RESULT_SERIALIZATION_TIME}>
-          {serializationQuantiles}
-        </tr>,
-        <tr class={TaskDetailsClassNames.GETTING_RESULT_TIME}>{gettingResultQuantiles}</tr>,
-        <tr class={TaskDetailsClassNames.PEAK_EXECUTION_MEMORY}>
-          {peakExecutionMemoryQuantiles}
-        </tr>,
-        if (hasInput(stageData)) <tr>{inputQuantiles}</tr> else Nil,
-        if (hasOutput(stageData)) <tr>{outputQuantiles}</tr> else Nil,
-        if (hasShuffleRead(stageData)) {
-          <tr class={TaskDetailsClassNames.SHUFFLE_READ_BLOCKED_TIME}>
-            {shuffleReadBlockedQuantiles}
-          </tr>
-          <tr>{shuffleReadTotalQuantiles}</tr>
-          <tr class={TaskDetailsClassNames.SHUFFLE_READ_REMOTE_SIZE}>
-            {shuffleReadRemoteQuantiles}
-          </tr>
-        } else {
-          Nil
-        },
-        if (hasShuffleWrite(stageData)) <tr>{shuffleWriteQuantiles}</tr> else Nil,
-        if (hasBytesSpilled(stageData)) <tr>{memoryBytesSpilledQuantiles}</tr> else Nil,
-        if (hasBytesSpilled(stageData)) <tr>{diskBytesSpilledQuantiles}</tr> else Nil)
-
-      val quantileHeaders = Seq("Metric", "Min", "25th percentile", "Median", "75th percentile",
-        "Max")
-      // The summary table does not use CSS to stripe rows, which doesn't work with hidden
-      // rows (instead, JavaScript in table.js is used to stripe the non-hidden rows).
-      UIUtils.listingTable(
-        quantileHeaders,
-        identity[Seq[Node]],
-        listings,
-        fixedWidth = true,
-        id = Some("task-summary-table"),
-        stripeRowsWithCss = false)
-    }
-
-    val executorTable = new ExecutorTable(stageData, parent.store)
-
-    val maybeAccumulableTable: Seq[Node] =
-      if (hasAccumulators(stageData)) { <h4>Accumulators</h4> ++ accumulableTable } else Seq()
-
-    val aggMetrics =
-      <span class="collapse-aggregated-metrics collapse-table"
-            onClick="collapseTable('collapse-aggregated-metrics','aggregated-metrics')">
-        <h4>
-          <span class="collapse-table-arrow arrow-open"></span>
-          <a>Aggregated Metrics by Executor</a>
-        </h4>
-      </span>
-      <div class="aggregated-metrics collapsible-table">
-        {executorTable.toNodeSeq}
-      </div>
-
     val content =
       summary ++
-      dagViz ++
-      showAdditionalMetrics ++
+      dagViz ++ <div id="showAdditionalMetrics"></div> ++
       makeTimeline(
         // Only show the tasks in the table
-        Option(taskTable).map(_.dataSource.tasks).getOrElse(Nil),
-        currentTime) ++
-      <h4>Summary Metrics for <a href="#tasks-section">{numCompleted} Completed Tasks</a></h4> ++
-      <div>{summaryTable.getOrElse("No tasks have reported metrics yet.")}</div> ++
-      aggMetrics ++
-      maybeAccumulableTable ++
-      <span id="tasks-section" class="collapse-aggregated-tasks collapse-table"
-          onClick="collapseTable('collapse-aggregated-tasks','aggregated-tasks')">
-        <h4>
-          <span class="collapse-table-arrow arrow-open"></span>
-          <a>Tasks ({totalTasksNumStr})</a>
-        </h4>
-      </span> ++
-      <div class="aggregated-tasks collapsible-table">
-        {taskTableHTML ++ jsForScrollingDownToTaskTable}
-      </div>
-    UIUtils.headerSparkPage(request, stageHeader, content, parent, showVisualization = true)
+        Option(taskTable).map({ taskPagedTable =>
+          val from = (eventTimelineTaskPage - 1) * eventTimelineTaskPageSize
+          val to = taskPagedTable.dataSource.dataSize.min(
+            eventTimelineTaskPage * eventTimelineTaskPageSize)
+          taskPagedTable.dataSource.sliceData(from, to)}).getOrElse(Nil), currentTime,
+        eventTimelineTaskPage, eventTimelineTaskPageSize, eventTimelineTotalPages, stageId,
+        stageAttemptId, totalTasks) ++
+        <div id="parent-container">
+          <script src={UIUtils.prependBaseUri(request, "/static/utils.js")}></script>
+          <script src={UIUtils.prependBaseUri(request, "/static/stagepage.js")}></script>
+        </div>
+        UIUtils.headerSparkPage(request, stageHeader, content, parent, showVisualization = true,
+          useDataTables = true)
+
   }
 
-  def makeTimeline(tasks: Seq[TaskData], currentTime: Long): Seq[Node] = {
+  def makeTimeline(
+      tasks: Seq[TaskData],
+      currentTime: Long,
+      page: Int,
+      pageSize: Int,
+      totalPages: Int,
+      stageId: Int,
+      stageAttemptId: Int,
+      totalTasks: Int): Seq[Node] = {
     val executorsSet = new HashSet[(String, String)]
     var minLaunchTime = Long.MaxValue
     var maxFinishTime = Long.MinValue
@@ -659,6 +446,31 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
           <input type="checkbox"></input>
           <span>Enable zooming</span>
         </div>
+        <div>
+          <form id={s"form-event-timeline-page"}
+                method="get"
+                action=""
+                class="form-inline pull-right"
+                style="margin-bottom: 0px;">
+            <label>Tasks: {totalTasks}. {totalPages} Pages. Jump to</label>
+            <input type="hidden" name="id" value={stageId.toString} />
+            <input type="hidden" name="attempt" value={stageAttemptId.toString} />
+            <input type="text"
+                   name="task.eventTimelinePageNumber"
+                   id={s"form-event-timeline-page-no"}
+                   value={page.toString} class="span1" />
+
+            <label>. Show </label>
+            <input type="text"
+                   id={s"form-event-timeline-page-size"}
+                   name="task.eventTimelinePageSize"
+                   value={pageSize.toString}
+                   class="span1" />
+            <label>items in a page.</label>
+
+            <button type="submit" class="btn">Go</button>
+          </form>
+        </div>
       </div>
       {TIMELINE_LEGEND}
     </div> ++
@@ -685,7 +497,7 @@ private[ui] class TaskDataSource(
 
   private var _tasksToShow: Seq[TaskData] = null
 
-  override def dataSize: Int = taskCount(stage)
+  override def dataSize: Int = store.taskCount(stage.stageId, stage.attemptId).toInt
 
   override def sliceData(from: Int, to: Int): Seq[TaskData] = {
     if (_tasksToShow == null) {
@@ -722,8 +534,6 @@ private[ui] class TaskPagedTable(
 
   override def pageSizeFormField: String = "task.pageSize"
 
-  override def prevPageSizeFormField: String = "task.prevPageSize"
-
   override def pageNumberFormField: String = "task.page"
 
   override val dataSource: TaskDataSource = new TaskDataSource(
@@ -847,7 +657,7 @@ private[ui] class TaskPagedTable(
         </div>
       </td>
       <td>{UIUtils.formatDate(task.launchTime)}</td>
-      <td>{formatDuration(task.duration)}</td>
+      <td>{formatDuration(task.taskMetrics.map(_.executorRunTime))}</td>
       <td class={TaskDetailsClassNames.SCHEDULER_DELAY}>
         {UIUtils.formatDuration(AppStatusUtils.schedulerDelay(task))}
       </td>
@@ -962,7 +772,7 @@ private[ui] class TaskPagedTable(
   }
 }
 
-private[ui] object ApiHelper {
+private[spark] object ApiHelper {
 
   val HEADER_ID = "ID"
   val HEADER_TASK_INDEX = "Index"
@@ -1000,7 +810,9 @@ private[ui] object ApiHelper {
     HEADER_EXECUTOR -> TaskIndexNames.EXECUTOR,
     HEADER_HOST -> TaskIndexNames.HOST,
     HEADER_LAUNCH_TIME -> TaskIndexNames.LAUNCH_TIME,
-    HEADER_DURATION -> TaskIndexNames.DURATION,
+    // SPARK-26109: Duration of task as executorRunTime to make it consistent with the
+    // aggregated tasks summary metrics table and the previous versions of Spark.
+    HEADER_DURATION -> TaskIndexNames.EXEC_RUN_TIME,
     HEADER_SCHEDULER_DELAY -> TaskIndexNames.SCHEDULER_DELAY,
     HEADER_DESER_TIME -> TaskIndexNames.DESER_TIME,
     HEADER_GC_TIME -> TaskIndexNames.GC_TIME,
@@ -1047,13 +859,8 @@ private[ui] object ApiHelper {
   }
 
   def lastStageNameAndDescription(store: AppStatusStore, job: JobData): (String, String) = {
-    val stage = store.asOption(store.stageAttempt(job.stageIds.max, 0))
+    val stage = store.asOption(store.stageAttempt(job.stageIds.max, 0)._1)
     (stage.map(_.name).getOrElse(""), stage.flatMap(_.description).getOrElse(job.name))
   }
 
-  def taskCount(stageData: StageData): Int = {
-    stageData.numActiveTasks + stageData.numCompleteTasks + stageData.numFailedTasks +
-      stageData.numKilledTasks
-  }
-
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
index d01acdae59c9f..766efc15e26ba 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
@@ -53,8 +53,6 @@ private[ui] class StageTableBase(
   val parameterStageSortColumn = UIUtils.stripXSS(request.getParameter(stageTag + ".sort"))
   val parameterStageSortDesc = UIUtils.stripXSS(request.getParameter(stageTag + ".desc"))
   val parameterStagePageSize = UIUtils.stripXSS(request.getParameter(stageTag + ".pageSize"))
-  val parameterStagePrevPageSize =
-    UIUtils.stripXSS(request.getParameter(stageTag + ".prevPageSize"))
 
   val stagePage = Option(parameterStagePage).map(_.toInt).getOrElse(1)
   val stageSortColumn = Option(parameterStageSortColumn).map { sortColumn =>
@@ -65,18 +63,7 @@ private[ui] class StageTableBase(
     stageSortColumn == "Stage Id"
   )
   val stagePageSize = Option(parameterStagePageSize).map(_.toInt).getOrElse(100)
-  val stagePrevPageSize = Option(parameterStagePrevPageSize).map(_.toInt)
-    .getOrElse(stagePageSize)
-
-  val page: Int = {
-    // If the user has changed to a larger page size, then go to page 1 in order to avoid
-    // IndexOutOfBoundsException.
-    if (stagePageSize <= stagePrevPageSize) {
-      stagePage
-    } else {
-      1
-    }
-  }
+
   val currentTime = System.currentTimeMillis()
 
   val toNodeSeq = try {
@@ -96,7 +83,7 @@ private[ui] class StageTableBase(
       isFailedStage,
       parameterOtherTable,
       request
-    ).table(page)
+    ).table(stagePage)
   } catch {
     case e @ (_ : IllegalArgumentException | _ : IndexOutOfBoundsException) =>
       <div class="alert alert-error">
@@ -161,8 +148,6 @@ private[ui] class StagePagedTable(
 
   override def pageSizeFormField: String = stageTag + ".pageSize"
 
-  override def prevPageSizeFormField: String = stageTag + ".prevPageSize"
-
   override def pageNumberFormField: String = stageTag + ".page"
 
   val parameterPath = UIUtils.prependBaseUri(request, basePath) + s"/$subPath/?" +
@@ -383,7 +368,7 @@ private[ui] class StagePagedTable(
         {if (cachedRddInfos.nonEmpty) {
           Text("RDD: ") ++
           cachedRddInfos.map { i =>
-            <a href={s"$basePathUri/storage/rdd?id=${i.id}"}>{i.name}</a>
+            <a href={s"$basePathUri/storage/rdd/?id=${i.id}"}>{i.name}</a>
           }
         }}
         <pre>{s.details}</pre>
diff --git a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
index 238cd31433660..87da290c83057 100644
--- a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
@@ -39,13 +39,11 @@ private[ui] class RDDPage(parent: SparkUITab, store: AppStatusStore) extends Web
     val parameterBlockSortColumn = UIUtils.stripXSS(request.getParameter("block.sort"))
     val parameterBlockSortDesc = UIUtils.stripXSS(request.getParameter("block.desc"))
     val parameterBlockPageSize = UIUtils.stripXSS(request.getParameter("block.pageSize"))
-    val parameterBlockPrevPageSize = UIUtils.stripXSS(request.getParameter("block.prevPageSize"))
 
     val blockPage = Option(parameterBlockPage).map(_.toInt).getOrElse(1)
     val blockSortColumn = Option(parameterBlockSortColumn).getOrElse("Block Name")
     val blockSortDesc = Option(parameterBlockSortDesc).map(_.toBoolean).getOrElse(false)
     val blockPageSize = Option(parameterBlockPageSize).map(_.toInt).getOrElse(100)
-    val blockPrevPageSize = Option(parameterBlockPrevPageSize).map(_.toInt).getOrElse(blockPageSize)
 
     val rddId = parameterId.toInt
     val rddStorageInfo = try {
@@ -60,16 +58,6 @@ private[ui] class RDDPage(parent: SparkUITab, store: AppStatusStore) extends Web
     val workerTable = UIUtils.listingTable(workerHeader, workerRow,
       rddStorageInfo.dataDistribution.get, id = Some("rdd-storage-by-worker-table"))
 
-    // Block table
-    val page: Int = {
-      // If the user has changed to a larger page size, then go to page 1 in order to avoid
-      // IndexOutOfBoundsException.
-      if (blockPageSize <= blockPrevPageSize) {
-        blockPage
-      } else {
-        1
-      }
-    }
     val blockTableHTML = try {
       val _blockTable = new BlockPagedTable(
         UIUtils.prependBaseUri(request, parent.basePath) + s"/storage/rdd/?id=${rddId}",
@@ -78,7 +66,7 @@ private[ui] class RDDPage(parent: SparkUITab, store: AppStatusStore) extends Web
         blockSortColumn,
         blockSortDesc,
         store.executorList(true))
-      _blockTable.table(page)
+      _blockTable.table(blockPage)
     } catch {
       case e @ (_ : IllegalArgumentException | _ : IndexOutOfBoundsException) =>
         <div class="alert alert-error">{e.getMessage}</div>
@@ -242,8 +230,6 @@ private[ui] class BlockPagedTable(
 
   override def pageSizeFormField: String = "block.pageSize"
 
-  override def prevPageSizeFormField: String = "block.prevPageSize"
-
   override def pageNumberFormField: String = "block.page"
 
   override val dataSource: BlockDataSource = new BlockDataSource(
diff --git a/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala b/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala
index 3eb546e336e99..2488197814ffd 100644
--- a/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala
@@ -78,7 +78,7 @@ private[ui] class StoragePage(parent: SparkUITab, store: AppStatusStore) extends
     <tr>
       <td>{rdd.id}</td>
       <td>
-        <a href={"%s/storage/rdd?id=%s".format(
+        <a href={"%s/storage/rdd/?id=%s".format(
           UIUtils.prependBaseUri(request, parent.basePath), rdd.id)}>
           {rdd.name}
         </a>
diff --git a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
index bf618b4afbce0..d5b3ce36e742a 100644
--- a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
+++ b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
@@ -485,34 +485,3 @@ class CollectionAccumulator[T] extends AccumulatorV2[T, java.util.List[T]] {
     _list.addAll(newValue)
   }
 }
-
-
-class LegacyAccumulatorWrapper[R, T](
-    initialValue: R,
-    param: org.apache.spark.AccumulableParam[R, T]) extends AccumulatorV2[T, R] {
-  private[spark] var _value = initialValue  // Current value on driver
-
-  @transient private lazy val _zero = param.zero(initialValue)
-
-  override def isZero: Boolean = _value.asInstanceOf[AnyRef].eq(_zero.asInstanceOf[AnyRef])
-
-  override def copy(): LegacyAccumulatorWrapper[R, T] = {
-    val acc = new LegacyAccumulatorWrapper(initialValue, param)
-    acc._value = _value
-    acc
-  }
-
-  override def reset(): Unit = {
-    _value = _zero
-  }
-
-  override def add(v: T): Unit = _value = param.addAccumulator(_value, v)
-
-  override def merge(other: AccumulatorV2[T, R]): Unit = other match {
-    case o: LegacyAccumulatorWrapper[R, T] => _value = param.addInPlace(_value, o.value)
-    case _ => throw new UnsupportedOperationException(
-      s"Cannot merge ${this.getClass.getName} with ${other.getClass.getName}")
-  }
-
-  override def value: R = _value
-}
diff --git a/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
index b6c300c4778b1..1b3e525644f00 100644
--- a/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
@@ -23,8 +23,8 @@ import java.lang.invoke.SerializedLambda
 import scala.collection.mutable.{Map, Set, Stack}
 import scala.language.existentials
 
-import org.apache.xbean.asm6.{ClassReader, ClassVisitor, MethodVisitor, Type}
-import org.apache.xbean.asm6.Opcodes._
+import org.apache.xbean.asm7.{ClassReader, ClassVisitor, MethodVisitor, Type}
+import org.apache.xbean.asm7.Opcodes._
 
 import org.apache.spark.{SparkEnv, SparkException}
 import org.apache.spark.internal.Logging
@@ -175,7 +175,7 @@ private[spark] object ClosureCleaner extends Logging {
       closure.getClass.isSynthetic &&
         closure
           .getClass
-          .getInterfaces.exists(_.getName.equals("scala.Serializable"))
+          .getInterfaces.exists(_.getName == "scala.Serializable")
 
     if (isClosureCandidate) {
       try {
@@ -285,8 +285,6 @@ private[spark] object ClosureCleaner extends Logging {
         innerClasses.foreach { c => logDebug(s"     ${c.getName}") }
         logDebug(s" + outer classes: ${outerClasses.size}" )
         outerClasses.foreach { c => logDebug(s"     ${c.getName}") }
-        logDebug(s" + outer objects: ${outerObjects.size}")
-        outerObjects.foreach { o => logDebug(s"     $o") }
       }
 
       // Fail fast if we detect return statements in closures
@@ -318,19 +316,20 @@ private[spark] object ClosureCleaner extends Logging {
       if (outerPairs.nonEmpty) {
         val (outermostClass, outermostObject) = outerPairs.head
         if (isClosure(outermostClass)) {
-          logDebug(s" + outermost object is a closure, so we clone it: ${outerPairs.head}")
+          logDebug(s" + outermost object is a closure, so we clone it: ${outermostClass}")
         } else if (outermostClass.getName.startsWith("$line")) {
           // SPARK-14558: if the outermost object is a REPL line object, we should clone
           // and clean it as it may carray a lot of unnecessary information,
           // e.g. hadoop conf, spark conf, etc.
-          logDebug(s" + outermost object is a REPL line object, so we clone it: ${outerPairs.head}")
+          logDebug(s" + outermost object is a REPL line object, so we clone it:" +
+            s" ${outermostClass}")
         } else {
           // The closure is ultimately nested inside a class; keep the object of that
           // class without cloning it since we don't want to clone the user's objects.
           // Note that we still need to keep around the outermost object itself because
           // we need it to clone its child closure later (see below).
-          logDebug(" + outermost object is not a closure or REPL line object," +
-            "so do not clone it: " +  outerPairs.head)
+          logDebug(s" + outermost object is not a closure or REPL line object," +
+            s" so do not clone it: ${outermostClass}")
           parent = outermostObject // e.g. SparkContext
           outerPairs = outerPairs.tail
         }
@@ -341,7 +340,7 @@ private[spark] object ClosureCleaner extends Logging {
       // Clone the closure objects themselves, nulling out any fields that are not
       // used in the closure we're working on or any of its inner closures.
       for ((cls, obj) <- outerPairs) {
-        logDebug(s" + cloning the object $obj of class ${cls.getName}")
+        logDebug(s" + cloning instance of class ${cls.getName}")
         // We null out these unused references by cloning each object and then filling in all
         // required fields from the original object. We need the parent here because the Java
         // language specification requires the first constructor parameter of any closure to be
@@ -351,7 +350,7 @@ private[spark] object ClosureCleaner extends Logging {
         // If transitive cleaning is enabled, we recursively clean any enclosing closure using
         // the already populated accessed fields map of the starting closure
         if (cleanTransitively && isClosure(clone.getClass)) {
-          logDebug(s" + cleaning cloned closure $clone recursively (${cls.getName})")
+          logDebug(s" + cleaning cloned closure recursively (${cls.getName})")
           // No need to check serializable here for the outer closures because we're
           // only interested in the serializability of the starting closure
           clean(clone, checkSerializable = false, cleanTransitively, accessedFields)
@@ -425,7 +424,7 @@ private[spark] class ReturnStatementInClosureException
   extends SparkException("Return statements aren't allowed in Spark closures")
 
 private class ReturnStatementFinder(targetMethodName: Option[String] = None)
-  extends ClassVisitor(ASM6) {
+  extends ClassVisitor(ASM7) {
   override def visitMethod(access: Int, name: String, desc: String,
       sig: String, exceptions: Array[String]): MethodVisitor = {
 
@@ -439,7 +438,7 @@ private class ReturnStatementFinder(targetMethodName: Option[String] = None)
       val isTargetMethod = targetMethodName.isEmpty ||
         name == targetMethodName.get || name == targetMethodName.get.stripSuffix("$adapted")
 
-      new MethodVisitor(ASM6) {
+      new MethodVisitor(ASM7) {
         override def visitTypeInsn(op: Int, tp: String) {
           if (op == NEW && tp.contains("scala/runtime/NonLocalReturnControl") && isTargetMethod) {
             throw new ReturnStatementInClosureException
@@ -447,7 +446,7 @@ private class ReturnStatementFinder(targetMethodName: Option[String] = None)
         }
       }
     } else {
-      new MethodVisitor(ASM6) {}
+      new MethodVisitor(ASM7) {}
     }
   }
 }
@@ -471,7 +470,7 @@ private[util] class FieldAccessFinder(
     findTransitively: Boolean,
     specificMethod: Option[MethodIdentifier[_]] = None,
     visitedMethods: Set[MethodIdentifier[_]] = Set.empty)
-  extends ClassVisitor(ASM6) {
+  extends ClassVisitor(ASM7) {
 
   override def visitMethod(
       access: Int,
@@ -486,7 +485,7 @@ private[util] class FieldAccessFinder(
       return null
     }
 
-    new MethodVisitor(ASM6) {
+    new MethodVisitor(ASM7) {
       override def visitFieldInsn(op: Int, owner: String, name: String, desc: String) {
         if (op == GETFIELD) {
           for (cl <- fields.keys if cl.getName == owner.replace('/', '.')) {
@@ -526,7 +525,7 @@ private[util] class FieldAccessFinder(
   }
 }
 
-private class InnerClosureFinder(output: Set[Class[_]]) extends ClassVisitor(ASM6) {
+private class InnerClosureFinder(output: Set[Class[_]]) extends ClassVisitor(ASM7) {
   var myName: String = null
 
   // TODO: Recursively find inner closures that we indirectly reference, e.g.
@@ -541,7 +540,7 @@ private class InnerClosureFinder(output: Set[Class[_]]) extends ClassVisitor(ASM
 
   override def visitMethod(access: Int, name: String, desc: String,
       sig: String, exceptions: Array[String]): MethodVisitor = {
-    new MethodVisitor(ASM6) {
+    new MethodVisitor(ASM7) {
       override def visitMethodInsn(
           op: Int, owner: String, name: String, desc: String, itf: Boolean) {
         val argTypes = Type.getArgumentTypes(desc)
diff --git a/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala b/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala
index 21acaa95c5645..f4d6c7a28d2e4 100644
--- a/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala
+++ b/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala
@@ -25,11 +25,14 @@ private[spark]
 abstract class CompletionIterator[ +A, +I <: Iterator[A]](sub: I) extends Iterator[A] {
 
   private[this] var completed = false
-  def next(): A = sub.next()
+  private[this] var iter = sub
+  def next(): A = iter.next()
   def hasNext: Boolean = {
-    val r = sub.hasNext
+    val r = iter.hasNext
     if (!r && !completed) {
       completed = true
+      // reassign to release resources of highly resource consuming iterators early
+      iter = Iterator.empty.asInstanceOf[I]
       completion()
     }
     r
diff --git a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
index 50c6461373dee..0cd8612b8fd1c 100644
--- a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
@@ -31,6 +31,7 @@ import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark._
 import org.apache.spark.executor._
+import org.apache.spark.metrics.ExecutorMetricType
 import org.apache.spark.rdd.RDDOperationScope
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.ExecutorInfo
@@ -98,6 +99,8 @@ private[spark] object JsonProtocol {
         logStartToJson(logStart)
       case metricsUpdate: SparkListenerExecutorMetricsUpdate =>
         executorMetricsUpdateToJson(metricsUpdate)
+      case stageExecutorMetrics: SparkListenerStageExecutorMetrics =>
+        stageExecutorMetricsToJson(stageExecutorMetrics)
       case blockUpdate: SparkListenerBlockUpdated =>
         blockUpdateToJson(blockUpdate)
       case _ => parse(mapper.writeValueAsString(event))
@@ -236,6 +239,7 @@ private[spark] object JsonProtocol {
   def executorMetricsUpdateToJson(metricsUpdate: SparkListenerExecutorMetricsUpdate): JValue = {
     val execId = metricsUpdate.execId
     val accumUpdates = metricsUpdate.accumUpdates
+    val executorMetrics = metricsUpdate.executorUpdates.map(executorMetricsToJson(_))
     ("Event" -> SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.metricsUpdate) ~
     ("Executor ID" -> execId) ~
     ("Metrics Updated" -> accumUpdates.map { case (taskId, stageId, stageAttemptId, updates) =>
@@ -243,7 +247,16 @@ private[spark] object JsonProtocol {
       ("Stage ID" -> stageId) ~
       ("Stage Attempt ID" -> stageAttemptId) ~
       ("Accumulator Updates" -> JArray(updates.map(accumulableInfoToJson).toList))
-    })
+    }) ~
+    ("Executor Metrics Updated" -> executorMetrics)
+  }
+
+  def stageExecutorMetricsToJson(metrics: SparkListenerStageExecutorMetrics): JValue = {
+    ("Event" -> SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.stageExecutorMetrics) ~
+    ("Executor ID" -> metrics.execId) ~
+    ("Stage ID" -> metrics.stageId) ~
+    ("Stage Attempt ID" -> metrics.stageAttemptId) ~
+    ("Executor Metrics" -> executorMetricsToJson(metrics.executorMetrics))
   }
 
   def blockUpdateToJson(blockUpdate: SparkListenerBlockUpdated): JValue = {
@@ -379,6 +392,14 @@ private[spark] object JsonProtocol {
     ("Updated Blocks" -> updatedBlocks)
   }
 
+  /** Convert executor metrics to JSON. */
+  def executorMetricsToJson(executorMetrics: ExecutorMetrics): JValue = {
+    val metrics = ExecutorMetricType.values.map{ metricType =>
+      JField(metricType.name, executorMetrics.getMetricValue(metricType))
+     }
+    JObject(metrics: _*)
+  }
+
   def taskEndReasonToJson(taskEndReason: TaskEndReason): JValue = {
     val reason = Utils.getFormattedClassName(taskEndReason)
     val json: JObject = taskEndReason match {
@@ -531,6 +552,7 @@ private[spark] object JsonProtocol {
     val executorRemoved = Utils.getFormattedClassName(SparkListenerExecutorRemoved)
     val logStart = Utils.getFormattedClassName(SparkListenerLogStart)
     val metricsUpdate = Utils.getFormattedClassName(SparkListenerExecutorMetricsUpdate)
+    val stageExecutorMetrics = Utils.getFormattedClassName(SparkListenerStageExecutorMetrics)
     val blockUpdate = Utils.getFormattedClassName(SparkListenerBlockUpdated)
   }
 
@@ -555,6 +577,7 @@ private[spark] object JsonProtocol {
       case `executorRemoved` => executorRemovedFromJson(json)
       case `logStart` => logStartFromJson(json)
       case `metricsUpdate` => executorMetricsUpdateFromJson(json)
+      case `stageExecutorMetrics` => stageExecutorMetricsFromJson(json)
       case `blockUpdate` => blockUpdateFromJson(json)
       case other => mapper.readValue(compact(render(json)), Utils.classForName(other))
         .asInstanceOf[SparkListenerEvent]
@@ -585,6 +608,15 @@ private[spark] object JsonProtocol {
     SparkListenerTaskGettingResult(taskInfo)
   }
 
+  /** Extract the executor metrics from JSON. */
+  def executorMetricsFromJson(json: JValue): ExecutorMetrics = {
+    val metrics =
+      ExecutorMetricType.values.map { metric =>
+        metric.name -> jsonOption(json \ metric.name).map(_.extract[Long]).getOrElse(0L)
+      }.toMap
+    new ExecutorMetrics(metrics)
+  }
+
   def taskEndFromJson(json: JValue): SparkListenerTaskEnd = {
     val stageId = (json \ "Stage ID").extract[Int]
     val stageAttemptId =
@@ -691,7 +723,18 @@ private[spark] object JsonProtocol {
         (json \ "Accumulator Updates").extract[List[JValue]].map(accumulableInfoFromJson)
       (taskId, stageId, stageAttemptId, updates)
     }
-    SparkListenerExecutorMetricsUpdate(execInfo, accumUpdates)
+    val executorUpdates = jsonOption(json \ "Executor Metrics Updated").map {
+      executorUpdate => executorMetricsFromJson(executorUpdate)
+    }
+    SparkListenerExecutorMetricsUpdate(execInfo, accumUpdates, executorUpdates)
+  }
+
+  def stageExecutorMetricsFromJson(json: JValue): SparkListenerStageExecutorMetrics = {
+    val execId = (json \ "Executor ID").extract[String]
+    val stageId = (json \ "Stage ID").extract[Int]
+    val stageAttemptId = (json \ "Stage Attempt ID").extract[Int]
+    val executorMetrics = executorMetricsFromJson(json \ "Executor Metrics")
+    SparkListenerStageExecutorMetrics(execId, stageId, stageAttemptId, executorMetrics)
   }
 
   def blockUpdateFromJson(json: JValue): SparkListenerBlockUpdated = {
diff --git a/core/src/main/scala/org/apache/spark/util/ListenerBus.scala b/core/src/main/scala/org/apache/spark/util/ListenerBus.scala
index a8f10684d5a2c..2e517707ff774 100644
--- a/core/src/main/scala/org/apache/spark/util/ListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/util/ListenerBus.scala
@@ -60,6 +60,14 @@ private[spark] trait ListenerBus[L <: AnyRef, E] extends Logging {
     }
   }
 
+  /**
+   * Remove all listeners and they won't receive any events. This method is thread-safe and can be
+   * called in any thread.
+   */
+  final def removeAllListeners(): Unit = {
+    listenersPlusTimers.clear()
+  }
+
   /**
    * This can be overridden by subclasses if there is any extra cleanup to do when removing a
    * listener.  In particular AsyncEventQueues can clean up queues in the LiveListenerBus.
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 15c958d3f511e..227c9e734f0af 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -19,7 +19,6 @@ package org.apache.spark.util
 
 import java.io._
 import java.lang.{Byte => JByte}
-import java.lang.InternalError
 import java.lang.management.{LockInfo, ManagementFactory, MonitorInfo, ThreadInfo}
 import java.lang.reflect.InvocationTargetException
 import java.math.{MathContext, RoundingMode}
@@ -32,7 +31,6 @@ import java.security.SecureRandom
 import java.util.{Locale, Properties, Random, UUID}
 import java.util.concurrent._
 import java.util.concurrent.TimeUnit.NANOSECONDS
-import java.util.concurrent.atomic.AtomicBoolean
 import java.util.zip.GZIPInputStream
 
 import scala.annotation.tailrec
@@ -94,53 +92,6 @@ private[spark] object Utils extends Logging {
   private val MAX_DIR_CREATION_ATTEMPTS: Int = 10
   @volatile private var localRootDirs: Array[String] = null
 
-  /**
-   * The performance overhead of creating and logging strings for wide schemas can be large. To
-   * limit the impact, we bound the number of fields to include by default. This can be overridden
-   * by setting the 'spark.debug.maxToStringFields' conf in SparkEnv.
-   */
-  val DEFAULT_MAX_TO_STRING_FIELDS = 25
-
-  private[spark] def maxNumToStringFields = {
-    if (SparkEnv.get != null) {
-      SparkEnv.get.conf.getInt("spark.debug.maxToStringFields", DEFAULT_MAX_TO_STRING_FIELDS)
-    } else {
-      DEFAULT_MAX_TO_STRING_FIELDS
-    }
-  }
-
-  /** Whether we have warned about plan string truncation yet. */
-  private val truncationWarningPrinted = new AtomicBoolean(false)
-
-  /**
-   * Format a sequence with semantics similar to calling .mkString(). Any elements beyond
-   * maxNumToStringFields will be dropped and replaced by a "... N more fields" placeholder.
-   *
-   * @return the trimmed and formatted string.
-   */
-  def truncatedString[T](
-      seq: Seq[T],
-      start: String,
-      sep: String,
-      end: String,
-      maxNumFields: Int = maxNumToStringFields): String = {
-    if (seq.length > maxNumFields) {
-      if (truncationWarningPrinted.compareAndSet(false, true)) {
-        logWarning(
-          "Truncated the string representation of a plan since it was too large. This " +
-          "behavior can be adjusted by setting 'spark.debug.maxToStringFields' in SparkEnv.conf.")
-      }
-      val numFields = math.max(0, maxNumFields - 1)
-      seq.take(numFields).mkString(
-        start, sep, sep + "... " + (seq.length - numFields) + " more fields" + end)
-    } else {
-      seq.mkString(start, sep, end)
-    }
-  }
-
-  /** Shorthand for calling truncatedString() without start or end strings. */
-  def truncatedString[T](seq: Seq[T], sep: String): String = truncatedString(seq, "", sep, "")
-
   /** Serialize an object using Java serialization */
   def serialize[T](o: T): Array[Byte] = {
     val bos = new ByteArrayOutputStream()
@@ -240,6 +191,19 @@ private[spark] object Utils extends Logging {
     // scalastyle:on classforname
   }
 
+  /**
+   * Run a segment of code using a different context class loader in the current thread
+   */
+  def withContextClassLoader[T](ctxClassLoader: ClassLoader)(fn: => T): T = {
+    val oldClassLoader = Thread.currentThread().getContextClassLoader()
+    try {
+      Thread.currentThread().setContextClassLoader(ctxClassLoader)
+      fn
+    } finally {
+      Thread.currentThread().setContextClassLoader(oldClassLoader)
+    }
+  }
+
   /**
    * Primitive often used when writing [[java.nio.ByteBuffer]] to [[java.io.DataOutput]]
    */
@@ -2052,6 +2016,30 @@ private[spark] object Utils extends Logging {
     }
   }
 
+  /**
+   * Implements the same logic as JDK `java.lang.String#trim` by removing leading and trailing
+   * non-printable characters less or equal to '\u0020' (SPACE) but preserves natural line
+   * delimiters according to [[java.util.Properties]] load method. The natural line delimiters are
+   * removed by JDK during load. Therefore any remaining ones have been specifically provided and
+   * escaped by the user, and must not be ignored
+   *
+   * @param str
+   * @return the trimmed value of str
+   */
+  private[util] def trimExceptCRLF(str: String): String = {
+    val nonSpaceOrNaturalLineDelimiter: Char => Boolean = { ch =>
+      ch > ' ' || ch == '\r' || ch == '\n'
+    }
+
+    val firstPos = str.indexWhere(nonSpaceOrNaturalLineDelimiter)
+    val lastPos = str.lastIndexWhere(nonSpaceOrNaturalLineDelimiter)
+    if (firstPos >= 0 && lastPos >= 0) {
+      str.substring(firstPos, lastPos + 1)
+    } else {
+      ""
+    }
+  }
+
   /** Load properties present in the given file. */
   def getPropertiesFromFile(filename: String): Map[String, String] = {
     val file = new File(filename)
@@ -2062,8 +2050,10 @@ private[spark] object Utils extends Logging {
     try {
       val properties = new Properties()
       properties.load(inReader)
-      properties.stringPropertyNames().asScala.map(
-        k => (k, properties.getProperty(k).trim)).toMap
+      properties.stringPropertyNames().asScala
+        .map { k => (k, trimExceptCRLF(properties.getProperty(k))) }
+        .toMap
+
     } catch {
       case e: IOException =>
         throw new SparkException(s"Failed when loading Spark properties from $filename", e)
@@ -2290,7 +2280,12 @@ private[spark] object Utils extends Logging {
    * configure a new log4j level
    */
   def setLogLevel(l: org.apache.log4j.Level) {
-    org.apache.log4j.Logger.getRootLogger().setLevel(l)
+    val rootLogger = org.apache.log4j.Logger.getRootLogger()
+    rootLogger.setLevel(l)
+    rootLogger.getAllAppenders().asScala.foreach {
+      case ca: org.apache.log4j.ConsoleAppender => ca.setThreshold(l)
+      case _ => // no-op
+    }
   }
 
   /**
@@ -2392,7 +2387,8 @@ private[spark] object Utils extends Logging {
       "org.apache.spark.security.ShellBasedGroupsMappingProvider")
     if (groupProviderClassName != "") {
       try {
-        val groupMappingServiceProvider = classForName(groupProviderClassName).newInstance.
+        val groupMappingServiceProvider = classForName(groupProviderClassName).
+          getConstructor().newInstance().
           asInstanceOf[org.apache.spark.security.GroupMappingServiceProvider]
         val currentUserGroups = groupMappingServiceProvider.getGroups(username)
         return currentUserGroups
@@ -2698,7 +2694,7 @@ private[spark] object Utils extends Logging {
     }
 
     val masterScheme = new URI(masterWithoutK8sPrefix).getScheme
-    val resolvedURL = masterScheme.toLowerCase match {
+    val resolvedURL = masterScheme.toLowerCase(Locale.ROOT) match {
       case "https" =>
         masterWithoutK8sPrefix
       case "http" =>
@@ -2795,6 +2791,44 @@ private[spark] object Utils extends Logging {
       }
     }
   }
+
+  /**
+   * Regular expression matching full width characters.
+   *
+   * Looked at all the 0x0000-0xFFFF characters (unicode) and showed them under Xshell.
+   * Found all the full width characters, then get the regular expression.
+   */
+  private val fullWidthRegex = ("""[""" +
+    // scalastyle:off nonascii
+    """\u1100-\u115F""" +
+    """\u2E80-\uA4CF""" +
+    """\uAC00-\uD7A3""" +
+    """\uF900-\uFAFF""" +
+    """\uFE10-\uFE19""" +
+    """\uFE30-\uFE6F""" +
+    """\uFF00-\uFF60""" +
+    """\uFFE0-\uFFE6""" +
+    // scalastyle:on nonascii
+    """]""").r
+
+  /**
+   * Return the number of half widths in a given string. Note that a full width character
+   * occupies two half widths.
+   *
+   * For a string consisting of 1 million characters, the execution of this method requires
+   * about 50ms.
+   */
+  def stringHalfWidth(str: String): Int = {
+    if (str == null) 0 else str.length + fullWidthRegex.findAllIn(str).size
+  }
+
+  def sanitizeDirName(str: String): String = {
+    str.replaceAll("[ :/]", "-").replaceAll("[.${}'\"]", "_").toLowerCase(Locale.ROOT)
+  }
+
+  def isClientMode(conf: SparkConf): Boolean = {
+    "client".equals(conf.get(SparkLauncher.DEPLOY_MODE, "client"))
+  }
 }
 
 private[util] object CallerContext extends Logging {
diff --git a/core/src/main/scala/org/apache/spark/util/VersionUtils.scala b/core/src/main/scala/org/apache/spark/util/VersionUtils.scala
index 828153b868420..c0f8866dd58dc 100644
--- a/core/src/main/scala/org/apache/spark/util/VersionUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/VersionUtils.scala
@@ -23,6 +23,7 @@ package org.apache.spark.util
 private[spark] object VersionUtils {
 
   private val majorMinorRegex = """^(\d+)\.(\d+)(\..*)?$""".r
+  private val shortVersionRegex = """^(\d+\.\d+\.\d+)(.*)?$""".r
 
   /**
    * Given a Spark version string, return the major version number.
@@ -36,6 +37,19 @@ private[spark] object VersionUtils {
    */
   def minorVersion(sparkVersion: String): Int = majorMinorVersion(sparkVersion)._2
 
+  /**
+   * Given a Spark version string, return the short version string.
+   * E.g., for 3.0.0-SNAPSHOT, return '3.0.0'.
+   */
+  def shortVersion(sparkVersion: String): String = {
+    shortVersionRegex.findFirstMatchIn(sparkVersion) match {
+      case Some(m) => m.group(1)
+      case None =>
+        throw new IllegalArgumentException(s"Spark tried to parse '$sparkVersion' as a Spark" +
+          s" version string, but it could not find the major/minor/maintenance version numbers.")
+    }
+  }
+
   /**
    * Given a Spark version string, return the (major version number, minor version number).
    * E.g., for 2.0.1-SNAPSHOT, return (2, 0).
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
index b159200d79222..46279e79d78db 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
@@ -727,9 +727,10 @@ private[spark] class ExternalSorter[K, V, C](
     spills.clear()
     forceSpillFiles.foreach(s => s.file.delete())

  (This diff was longer than 20,000 lines, and has been truncated...)


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org