You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2016/12/22 07:46:35 UTC

spark git commit: [DOC] bucketing is applicable to all file-based data sources

Repository: spark
Updated Branches:
  refs/heads/master 7c5b7b3a2 -> 2e861df96


[DOC] bucketing is applicable to all file-based data sources

## What changes were proposed in this pull request?
Starting Spark 2.1.0, bucketing feature is available for all file-based data sources. This patch fixes some function docs that haven't yet been updated to reflect that.

## How was this patch tested?
N/A

Author: Reynold Xin <rx...@databricks.com>

Closes #16349 from rxin/ds-doc.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2e861df9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2e861df9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2e861df9

Branch: refs/heads/master
Commit: 2e861df96eacd821edbbd9883121bff67611074f
Parents: 7c5b7b3
Author: Reynold Xin <rx...@databricks.com>
Authored: Wed Dec 21 23:46:33 2016 -0800
Committer: Reynold Xin <rx...@databricks.com>
Committed: Wed Dec 21 23:46:33 2016 -0800

----------------------------------------------------------------------
 .../src/main/scala/org/apache/spark/sql/DataFrameWriter.scala  | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/2e861df9/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index d33f7da..9c5660a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -150,7 +150,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    * predicates on the partitioned columns. In order for partitioning to work well, the number
    * of distinct values in each column should typically be less than tens of thousands.
    *
-   * This was initially applicable for Parquet but in 1.5+ covers JSON, text, ORC and avro as well.
+   * This is applicable for all file-based data sources (e.g. Parquet, JSON) staring Spark 2.1.0.
    *
    * @since 1.4.0
    */
@@ -164,7 +164,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    * Buckets the output by the given columns. If specified, the output is laid out on the file
    * system similar to Hive's bucketing scheme.
    *
-   * This is applicable for Parquet, JSON and ORC.
+   * This is applicable for all file-based data sources (e.g. Parquet, JSON) staring Spark 2.1.0.
    *
    * @since 2.0
    */
@@ -178,7 +178,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
   /**
    * Sorts the output in each bucket by the given columns.
    *
-   * This is applicable for Parquet, JSON and ORC.
+   * This is applicable for all file-based data sources (e.g. Parquet, JSON) staring Spark 2.1.0.
    *
    * @since 2.0
    */


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org