You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2018/02/28 16:44:58 UTC
spark git commit: [SPARK-23514] Use SessionState.newHadoopConf() to
propage hadoop configs set in SQLConf.
Repository: spark
Updated Branches:
refs/heads/master fab563b9b -> 476a7f026
[SPARK-23514] Use SessionState.newHadoopConf() to propage hadoop configs set in SQLConf.
## What changes were proposed in this pull request?
A few places in `spark-sql` were using `sc.hadoopConfiguration` directly. They should be using `sessionState.newHadoopConf()` to blend in configs that were set through `SQLConf`.
Also, for better UX, for these configs blended in from `SQLConf`, we should consider removing the `spark.hadoop` prefix, so that the settings are recognized whether or not they were specified by the user.
## How was this patch tested?
Tested that AlterTableRecoverPartitions now correctly recognizes settings that are passed in to the FileSystem through SQLConf.
Author: Juliusz Sompolski <ju...@databricks.com>
Closes #20679 from juliuszsompolski/SPARK-23514.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/476a7f02
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/476a7f02
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/476a7f02
Branch: refs/heads/master
Commit: 476a7f026bc45462067ebd39cd269147e84cd641
Parents: fab563b
Author: Juliusz Sompolski <ju...@databricks.com>
Authored: Wed Feb 28 08:44:53 2018 -0800
Committer: gatorsmile <ga...@gmail.com>
Committed: Wed Feb 28 08:44:53 2018 -0800
----------------------------------------------------------------------
.../scala/org/apache/spark/sql/execution/command/ddl.scala | 6 +++---
.../main/scala/org/apache/spark/sql/hive/test/TestHive.scala | 5 +++--
2 files changed, 6 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/476a7f02/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 0142f17..964cbca 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -610,10 +610,10 @@ case class AlterTableRecoverPartitionsCommand(
val root = new Path(table.location)
logInfo(s"Recover all the partitions in $root")
- val fs = root.getFileSystem(spark.sparkContext.hadoopConfiguration)
+ val hadoopConf = spark.sessionState.newHadoopConf()
+ val fs = root.getFileSystem(hadoopConf)
val threshold = spark.conf.get("spark.rdd.parallelListingThreshold", "10").toInt
- val hadoopConf = spark.sparkContext.hadoopConfiguration
val pathFilter = getPathFilter(hadoopConf)
val evalPool = ThreadUtils.newForkJoinPool("AlterTableRecoverPartitionsCommand", 8)
@@ -697,7 +697,7 @@ case class AlterTableRecoverPartitionsCommand(
pathFilter: PathFilter,
threshold: Int): GenMap[String, PartitionStatistics] = {
if (partitionSpecsAndLocs.length > threshold) {
- val hadoopConf = spark.sparkContext.hadoopConfiguration
+ val hadoopConf = spark.sessionState.newHadoopConf()
val serializableConfiguration = new SerializableConfiguration(hadoopConf)
val serializedPaths = partitionSpecsAndLocs.map(_._2.toString).toArray
http://git-wip-us.apache.org/repos/asf/spark/blob/476a7f02/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 1902893..fcf2025 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -518,8 +518,9 @@ private[hive] class TestHiveSparkSession(
// an HDFS scratch dir: ${hive.exec.scratchdir}/<username> is created, with
// ${hive.scratch.dir.permission}. To resolve the permission issue, the simplest way is to
// delete it. Later, it will be re-created with the right permission.
- val location = new Path(sc.hadoopConfiguration.get(ConfVars.SCRATCHDIR.varname))
- val fs = location.getFileSystem(sc.hadoopConfiguration)
+ val hadoopConf = sessionState.newHadoopConf()
+ val location = new Path(hadoopConf.get(ConfVars.SCRATCHDIR.varname))
+ val fs = location.getFileSystem(hadoopConf)
fs.delete(location, true)
// Some tests corrupt this value on purpose, which breaks the RESET call below.
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org