You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2016/12/22 07:46:35 UTC
spark git commit: [DOC] bucketing is applicable to all file-based
data sources
Repository: spark
Updated Branches:
refs/heads/master 7c5b7b3a2 -> 2e861df96
[DOC] bucketing is applicable to all file-based data sources
## What changes were proposed in this pull request?
Starting Spark 2.1.0, bucketing feature is available for all file-based data sources. This patch fixes some function docs that haven't yet been updated to reflect that.
## How was this patch tested?
N/A
Author: Reynold Xin <rx...@databricks.com>
Closes #16349 from rxin/ds-doc.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2e861df9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2e861df9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2e861df9
Branch: refs/heads/master
Commit: 2e861df96eacd821edbbd9883121bff67611074f
Parents: 7c5b7b3
Author: Reynold Xin <rx...@databricks.com>
Authored: Wed Dec 21 23:46:33 2016 -0800
Committer: Reynold Xin <rx...@databricks.com>
Committed: Wed Dec 21 23:46:33 2016 -0800
----------------------------------------------------------------------
.../src/main/scala/org/apache/spark/sql/DataFrameWriter.scala | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/2e861df9/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index d33f7da..9c5660a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -150,7 +150,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
* predicates on the partitioned columns. In order for partitioning to work well, the number
* of distinct values in each column should typically be less than tens of thousands.
*
- * This was initially applicable for Parquet but in 1.5+ covers JSON, text, ORC and avro as well.
+ * This is applicable for all file-based data sources (e.g. Parquet, JSON) staring Spark 2.1.0.
*
* @since 1.4.0
*/
@@ -164,7 +164,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
* Buckets the output by the given columns. If specified, the output is laid out on the file
* system similar to Hive's bucketing scheme.
*
- * This is applicable for Parquet, JSON and ORC.
+ * This is applicable for all file-based data sources (e.g. Parquet, JSON) staring Spark 2.1.0.
*
* @since 2.0
*/
@@ -178,7 +178,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
/**
* Sorts the output in each bucket by the given columns.
*
- * This is applicable for Parquet, JSON and ORC.
+ * This is applicable for all file-based data sources (e.g. Parquet, JSON) staring Spark 2.1.0.
*
* @since 2.0
*/
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org