You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2017/11/01 17:39:19 UTC
spark git commit: [MINOR] Data source v2 docs update.
Repository: spark
Updated Branches:
refs/heads/master 1ffe03d9e -> d43e1f06b
[MINOR] Data source v2 docs update.
## What changes were proposed in this pull request?
This patch includes some doc updates for data source API v2. I was reading the code and noticed some minor issues.
## How was this patch tested?
This is a doc only change.
Author: Reynold Xin <rx...@databricks.com>
Closes #19626 from rxin/dsv2-update.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d43e1f06
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d43e1f06
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d43e1f06
Branch: refs/heads/master
Commit: d43e1f06bd545d00bfcaf1efb388b469effd5d64
Parents: 1ffe03d
Author: Reynold Xin <rx...@databricks.com>
Authored: Wed Nov 1 18:39:15 2017 +0100
Committer: Reynold Xin <rx...@databricks.com>
Committed: Wed Nov 1 18:39:15 2017 +0100
----------------------------------------------------------------------
.../org/apache/spark/sql/sources/v2/DataSourceV2.java | 9 ++++-----
.../org/apache/spark/sql/sources/v2/WriteSupport.java | 4 ++--
.../spark/sql/sources/v2/reader/DataSourceV2Reader.java | 10 +++++-----
.../v2/reader/SupportsPushDownCatalystFilters.java | 2 --
.../sql/sources/v2/reader/SupportsScanUnsafeRow.java | 2 --
.../spark/sql/sources/v2/writer/DataSourceV2Writer.java | 11 +++--------
.../apache/spark/sql/sources/v2/writer/DataWriter.java | 10 +++++-----
7 files changed, 19 insertions(+), 29 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/d43e1f06/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceV2.java
----------------------------------------------------------------------
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceV2.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceV2.java
index dbcbe32..6234071 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceV2.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceV2.java
@@ -20,12 +20,11 @@ package org.apache.spark.sql.sources.v2;
import org.apache.spark.annotation.InterfaceStability;
/**
- * The base interface for data source v2. Implementations must have a public, no arguments
- * constructor.
+ * The base interface for data source v2. Implementations must have a public, 0-arg constructor.
*
- * Note that this is an empty interface, data source implementations should mix-in at least one of
- * the plug-in interfaces like {@link ReadSupport}. Otherwise it's just a dummy data source which is
- * un-readable/writable.
+ * Note that this is an empty interface. Data source implementations should mix-in at least one of
+ * the plug-in interfaces like {@link ReadSupport} and {@link WriteSupport}. Otherwise it's just
+ * a dummy data source which is un-readable/writable.
*/
@InterfaceStability.Evolving
public interface DataSourceV2 {}
http://git-wip-us.apache.org/repos/asf/spark/blob/d43e1f06/sql/core/src/main/java/org/apache/spark/sql/sources/v2/WriteSupport.java
----------------------------------------------------------------------
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/WriteSupport.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/WriteSupport.java
index a8a9615..8fdfdfd 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/WriteSupport.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/WriteSupport.java
@@ -36,8 +36,8 @@ public interface WriteSupport {
* sources can return None if there is no writing needed to be done according to the save mode.
*
* @param jobId A unique string for the writing job. It's possible that there are many writing
- * jobs running at the same time, and the returned {@link DataSourceV2Writer} should
- * use this job id to distinguish itself with writers of other jobs.
+ * jobs running at the same time, and the returned {@link DataSourceV2Writer} can
+ * use this job id to distinguish itself from other jobs.
* @param schema the schema of the data to be written.
* @param mode the save mode which determines what to do when the data are already in this data
* source, please refer to {@link SaveMode} for more details.
http://git-wip-us.apache.org/repos/asf/spark/blob/d43e1f06/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/DataSourceV2Reader.java
----------------------------------------------------------------------
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/DataSourceV2Reader.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/DataSourceV2Reader.java
index 5989a4a..88c3219 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/DataSourceV2Reader.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/DataSourceV2Reader.java
@@ -34,11 +34,11 @@ import org.apache.spark.sql.types.StructType;
*
* There are mainly 3 kinds of query optimizations:
* 1. Operators push-down. E.g., filter push-down, required columns push-down(aka column
- * pruning), etc. These push-down interfaces are named like `SupportsPushDownXXX`.
- * 2. Information Reporting. E.g., statistics reporting, ordering reporting, etc. These
- * reporting interfaces are named like `SupportsReportingXXX`.
- * 3. Special scans. E.g, columnar scan, unsafe row scan, etc. These scan interfaces are named
- * like `SupportsScanXXX`.
+ * pruning), etc. Names of these interfaces start with `SupportsPushDown`.
+ * 2. Information Reporting. E.g., statistics reporting, ordering reporting, etc.
+ * Names of these interfaces start with `SupportsReporting`.
+ * 3. Special scans. E.g, columnar scan, unsafe row scan, etc.
+ * Names of these interfaces start with `SupportsScan`.
*
* Spark first applies all operator push-down optimizations that this data source supports. Then
* Spark collects information this data source reported for further optimizations. Finally Spark
http://git-wip-us.apache.org/repos/asf/spark/blob/d43e1f06/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownCatalystFilters.java
----------------------------------------------------------------------
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownCatalystFilters.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownCatalystFilters.java
index d609177..efc4224 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownCatalystFilters.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownCatalystFilters.java
@@ -31,8 +31,6 @@ import org.apache.spark.sql.catalyst.expressions.Expression;
* {@link SupportsPushDownFilters}, Spark will ignore {@link SupportsPushDownFilters} and only
* process this interface.
*/
-@InterfaceStability.Evolving
-@Experimental
@InterfaceStability.Unstable
public interface SupportsPushDownCatalystFilters {
http://git-wip-us.apache.org/repos/asf/spark/blob/d43e1f06/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsScanUnsafeRow.java
----------------------------------------------------------------------
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsScanUnsafeRow.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsScanUnsafeRow.java
index d5eada8..6008fb5 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsScanUnsafeRow.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsScanUnsafeRow.java
@@ -30,8 +30,6 @@ import org.apache.spark.sql.catalyst.expressions.UnsafeRow;
* This is an experimental and unstable interface, as {@link UnsafeRow} is not public and may get
* changed in the future Spark versions.
*/
-@InterfaceStability.Evolving
-@Experimental
@InterfaceStability.Unstable
public interface SupportsScanUnsafeRow extends DataSourceV2Reader {
http://git-wip-us.apache.org/repos/asf/spark/blob/d43e1f06/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataSourceV2Writer.java
----------------------------------------------------------------------
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataSourceV2Writer.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataSourceV2Writer.java
index 8d8e336..37bb15f 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataSourceV2Writer.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataSourceV2Writer.java
@@ -40,15 +40,10 @@ import org.apache.spark.sql.types.StructType;
* some writers are aborted, or the job failed with an unknown reason, call
* {@link #abort(WriterCommitMessage[])}.
*
- * Spark won't retry failed writing jobs, users should do it manually in their Spark applications if
- * they want to retry.
+ * While Spark will retry failed writing tasks, Spark won't retry failed writing jobs. Users should
+ * do it manually in their Spark applications if they want to retry.
*
- * Please refer to the document of commit/abort methods for detailed specifications.
- *
- * Note that, this interface provides a protocol between Spark and data sources for transactional
- * data writing, but the transaction here is Spark-level transaction, which may not be the
- * underlying storage transaction. For example, Spark successfully writes data to a Cassandra data
- * source, but Cassandra may need some more time to reach consistency at storage level.
+ * Please refer to the documentation of commit/abort methods for detailed specifications.
*/
@InterfaceStability.Evolving
public interface DataSourceV2Writer {
http://git-wip-us.apache.org/repos/asf/spark/blob/d43e1f06/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriter.java
----------------------------------------------------------------------
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriter.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriter.java
index d84afba..dc1aab3 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriter.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriter.java
@@ -57,8 +57,8 @@ public interface DataWriter<T> {
/**
* Writes one record.
*
- * If this method fails(throw exception), {@link #abort()} will be called and this data writer is
- * considered to be failed.
+ * If this method fails (by throwing an exception), {@link #abort()} will be called and this
+ * data writer is considered to have been failed.
*/
void write(T record);
@@ -70,10 +70,10 @@ public interface DataWriter<T> {
* The written data should only be visible to data source readers after
* {@link DataSourceV2Writer#commit(WriterCommitMessage[])} succeeds, which means this method
* should still "hide" the written data and ask the {@link DataSourceV2Writer} at driver side to
- * do the final commitment via {@link WriterCommitMessage}.
+ * do the final commit via {@link WriterCommitMessage}.
*
- * If this method fails(throw exception), {@link #abort()} will be called and this data writer is
- * considered to be failed.
+ * If this method fails (by throwing an exception), {@link #abort()} will be called and this
+ * data writer is considered to have been failed.
*/
WriterCommitMessage commit();
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org