You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by zs...@apache.org on 2017/11/17 23:35:28 UTC

spark git commit: [SPARK-22544][SS] FileStreamSource should use its own hadoop conf to call globPathIfNecessary

Repository: spark
Updated Branches:
  refs/heads/master fccb337f9 -> bf0c0ae2d


[SPARK-22544][SS] FileStreamSource should use its own hadoop conf to call globPathIfNecessary

## What changes were proposed in this pull request?

Pass the FileSystem created using the correct Hadoop conf into `globPathIfNecessary` so that it can pick up user's hadoop configurations, such as credentials.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <zs...@gmail.com>

Closes #19771 from zsxwing/fix-file-stream-conf.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bf0c0ae2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bf0c0ae2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bf0c0ae2

Branch: refs/heads/master
Commit: bf0c0ae2dcc7fd1ce92cd0fb4809bb3d65b2e309
Parents: fccb337
Author: Shixiong Zhu <zs...@gmail.com>
Authored: Fri Nov 17 15:35:24 2017 -0800
Committer: Shixiong Zhu <zs...@gmail.com>
Committed: Fri Nov 17 15:35:24 2017 -0800

----------------------------------------------------------------------
 .../apache/spark/sql/execution/streaming/FileStreamSource.scala | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/bf0c0ae2/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
index f174173..0debd7d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
@@ -47,8 +47,9 @@ class FileStreamSource(
 
   private val hadoopConf = sparkSession.sessionState.newHadoopConf()
 
+  @transient private val fs = new Path(path).getFileSystem(hadoopConf)
+
   private val qualifiedBasePath: Path = {
-    val fs = new Path(path).getFileSystem(hadoopConf)
     fs.makeQualified(new Path(path))  // can contains glob patterns
   }
 
@@ -187,7 +188,7 @@ class FileStreamSource(
     if (SparkHadoopUtil.get.isGlobPath(new Path(path))) Some(false) else None
 
   private def allFilesUsingInMemoryFileIndex() = {
-    val globbedPaths = SparkHadoopUtil.get.globPathIfNecessary(qualifiedBasePath)
+    val globbedPaths = SparkHadoopUtil.get.globPathIfNecessary(fs, qualifiedBasePath)
     val fileIndex = new InMemoryFileIndex(sparkSession, globbedPaths, options, Some(new StructType))
     fileIndex.allFiles()
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org