You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2014/12/12 07:54:25 UTC
spark git commit: [SPARK-4742][SQL] The name of Parquet File
generated by AppendingParquetOutputFormat should be zero padded
Repository: spark
Updated Branches:
refs/heads/master 0abbff286 -> 8091dd62e
[SPARK-4742][SQL] The name of Parquet File generated by AppendingParquetOutputFormat should be zero padded
When I use Parquet File as a output file using ParquetOutputFormat#getDefaultWorkFile, the file name is not zero padded while RDD#saveAsText does zero padding.
Author: Sasaki Toru <sa...@nttdata.co.jp>
Closes #3602 from sasakitoa/parquet-zeroPadding and squashes the following commits:
6b0e58f [Sasaki Toru] Merge branch 'master' of git://github.com/apache/spark into parquet-zeroPadding
20dc79d [Sasaki Toru] Fixed the name of Parquet File generated by AppendingParquetOutputFormat
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8091dd62
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8091dd62
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8091dd62
Branch: refs/heads/master
Commit: 8091dd62eaff28196dfb9742a4f39182704b1024
Parents: 0abbff2
Author: Sasaki Toru <sa...@nttdata.co.jp>
Authored: Thu Dec 11 22:54:21 2014 -0800
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Thu Dec 11 22:54:21 2014 -0800
----------------------------------------------------------------------
.../org/apache/spark/sql/parquet/ParquetTableOperations.scala | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/8091dd62/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
index 232ef90..5a49384 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.parquet
import java.io.IOException
import java.lang.{Long => JLong}
import java.text.SimpleDateFormat
+import java.text.NumberFormat
import java.util.concurrent.{Callable, TimeUnit}
import java.util.{ArrayList, Collections, Date, List => JList}
@@ -338,9 +339,13 @@ private[parquet] class AppendingParquetOutputFormat(offset: Int)
// override to choose output filename so not overwrite existing ones
override def getDefaultWorkFile(context: TaskAttemptContext, extension: String): Path = {
+ val numfmt = NumberFormat.getInstance()
+ numfmt.setMinimumIntegerDigits(5)
+ numfmt.setGroupingUsed(false)
+
val taskId: TaskID = getTaskAttemptID(context).getTaskID
val partition: Int = taskId.getId
- val filename = s"part-r-${partition + offset}.parquet"
+ val filename = "part-r-" + numfmt.format(partition + offset) + ".parquet"
val committer: FileOutputCommitter =
getOutputCommitter(context).asInstanceOf[FileOutputCommitter]
new Path(committer.getWorkPath, filename)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org