You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2015/04/12 03:44:58 UTC
spark git commit: [SQL] Handle special characters in the authority of
a Path's URI.
Repository: spark
Updated Branches:
refs/heads/master 352a5da42 -> d2383fb5f
[SQL] Handle special characters in the authority of a Path's URI.
Author: Yin Huai <yh...@databricks.com>
Closes #5381 from yhuai/parquetPath2 and squashes the following commits:
fe296b4 [Yin Huai] Create new Path to take care special characters in the authority of a Path's URI.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d2383fb5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d2383fb5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d2383fb5
Branch: refs/heads/master
Commit: d2383fb5ffafd6b3a56b1ee6e0e035594473e2c8
Parents: 352a5da
Author: Yin Huai <yh...@databricks.com>
Authored: Sat Apr 11 18:44:54 2015 -0700
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Sat Apr 11 18:44:54 2015 -0700
----------------------------------------------------------------------
.../apache/spark/sql/parquet/newParquet.scala | 30 ++++++++++++++++++--
1 file changed, 27 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/d2383fb5/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
index 0dce362..20fdf5e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
@@ -432,7 +432,10 @@ private[sql] case class ParquetRelation2(
// FileInputFormat cannot handle empty lists.
if (selectedFiles.nonEmpty) {
- FileInputFormat.setInputPaths(job, selectedFiles.map(_.getPath): _*)
+ // In order to encode the authority of a Path containning special characters such as /,
+ // we need to use the string retruned by the URI of the path to create a new Path.
+ val selectedPaths = selectedFiles.map(status => new Path(status.getPath.toUri.toString))
+ FileInputFormat.setInputPaths(job, selectedPaths: _*)
}
// Try to push down filters when filter push-down is enabled.
@@ -484,10 +487,31 @@ private[sql] case class ParquetRelation2(
val cacheMetadata = useCache
@transient
- val cachedStatus = selectedFiles
+ val cachedStatus = selectedFiles.map { st =>
+ // In order to encode the authority of a Path containning special characters such as /,
+ // we need to use the string retruned by the URI of the path to create a new Path.
+ val newPath = new Path(st.getPath.toUri.toString)
+
+ new FileStatus(
+ st.getLen,
+ st.isDir,
+ st.getReplication,
+ st.getBlockSize,
+ st.getModificationTime,
+ st.getAccessTime,
+ st.getPermission,
+ st.getOwner,
+ st.getGroup,
+ newPath)
+ }
@transient
- val cachedFooters = selectedFooters
+ val cachedFooters = selectedFooters.map { f =>
+ // In order to encode the authority of a Path containning special characters such as /,
+ // we need to use the string retruned by the URI of the path to create a new Path.
+ new Footer(new Path(f.getFile.toUri.toString), f.getParquetMetadata)
+ }
+
// Overridden so we can inject our own cached files statuses.
override def getPartitions: Array[SparkPartition] = {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org