You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2015/04/12 03:44:58 UTC

spark git commit: [SQL] Handle special characters in the authority of a Path's URI.

Repository: spark
Updated Branches:
  refs/heads/master 352a5da42 -> d2383fb5f


[SQL] Handle special characters in the authority of a Path's URI.

Author: Yin Huai <yh...@databricks.com>

Closes #5381 from yhuai/parquetPath2 and squashes the following commits:

fe296b4 [Yin Huai] Create new Path to take care special characters in the authority of a Path's URI.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d2383fb5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d2383fb5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d2383fb5

Branch: refs/heads/master
Commit: d2383fb5ffafd6b3a56b1ee6e0e035594473e2c8
Parents: 352a5da
Author: Yin Huai <yh...@databricks.com>
Authored: Sat Apr 11 18:44:54 2015 -0700
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Sat Apr 11 18:44:54 2015 -0700

----------------------------------------------------------------------
 .../apache/spark/sql/parquet/newParquet.scala   | 30 ++++++++++++++++++--
 1 file changed, 27 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/d2383fb5/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
index 0dce362..20fdf5e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
@@ -432,7 +432,10 @@ private[sql] case class ParquetRelation2(
 
     // FileInputFormat cannot handle empty lists.
     if (selectedFiles.nonEmpty) {
-      FileInputFormat.setInputPaths(job, selectedFiles.map(_.getPath): _*)
+      // In order to encode the authority of a Path containning special characters such as /,
+      // we need to use the string retruned by the URI of the path to create a new Path.
+      val selectedPaths = selectedFiles.map(status => new Path(status.getPath.toUri.toString))
+      FileInputFormat.setInputPaths(job, selectedPaths: _*)
     }
 
     // Try to push down filters when filter push-down is enabled.
@@ -484,10 +487,31 @@ private[sql] case class ParquetRelation2(
         val cacheMetadata = useCache
 
         @transient
-        val cachedStatus = selectedFiles
+        val cachedStatus = selectedFiles.map { st =>
+          // In order to encode the authority of a Path containning special characters such as /,
+          // we need to use the string retruned by the URI of the path to create a new Path.
+          val newPath = new Path(st.getPath.toUri.toString)
+
+          new FileStatus(
+            st.getLen,
+            st.isDir,
+            st.getReplication,
+            st.getBlockSize,
+            st.getModificationTime,
+            st.getAccessTime,
+            st.getPermission,
+            st.getOwner,
+            st.getGroup,
+            newPath)
+        }
 
         @transient
-        val cachedFooters = selectedFooters
+        val cachedFooters = selectedFooters.map { f =>
+          // In order to encode the authority of a Path containning special characters such as /,
+          // we need to use the string retruned by the URI of the path to create a new Path.
+          new Footer(new Path(f.getFile.toUri.toString), f.getParquetMetadata)
+        }
+
 
         // Overridden so we can inject our own cached files statuses.
         override def getPartitions: Array[SparkPartition] = {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org