You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2018/05/16 16:40:46 UTC
spark git commit: [SPARK-24275][SQL] Revise doc comments in
InputPartition
Repository: spark
Updated Branches:
refs/heads/master 943493b16 -> 6fb7d6c4f
[SPARK-24275][SQL] Revise doc comments in InputPartition
## What changes were proposed in this pull request?
In #21145, DataReaderFactory is renamed to InputPartition.
This PR is to revise wording in the comments to make it more clear.
## How was this patch tested?
None
Author: Gengliang Wang <ge...@databricks.com>
Closes #21326 from gengliangwang/revise_reader_comments.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6fb7d6c4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6fb7d6c4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6fb7d6c4
Branch: refs/heads/master
Commit: 6fb7d6c4f71be0007942f7d1fc3099f1bcf8c52b
Parents: 943493b
Author: Gengliang Wang <ge...@databricks.com>
Authored: Thu May 17 00:40:39 2018 +0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Thu May 17 00:40:39 2018 +0800
----------------------------------------------------------------------
.../apache/spark/sql/sources/v2/ReadSupport.java | 2 +-
.../sql/sources/v2/ReadSupportWithSchema.java | 2 +-
.../apache/spark/sql/sources/v2/WriteSupport.java | 2 +-
.../sql/sources/v2/reader/DataSourceReader.java | 16 ++++++++--------
.../sql/sources/v2/reader/InputPartition.java | 17 +++++++++--------
.../sql/sources/v2/writer/DataSourceWriter.java | 6 +++---
.../sql/sources/v2/writer/DataWriterFactory.java | 2 +-
7 files changed, 24 insertions(+), 23 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/6fb7d6c4/sql/core/src/main/java/org/apache/spark/sql/sources/v2/ReadSupport.java
----------------------------------------------------------------------
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/ReadSupport.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/ReadSupport.java
index 0ea4dc6..b2526de 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/ReadSupport.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/ReadSupport.java
@@ -30,7 +30,7 @@ public interface ReadSupport extends DataSourceV2 {
/**
* Creates a {@link DataSourceReader} to scan the data from this data source.
*
- * If this method fails (by throwing an exception), the action would fail and no Spark job was
+ * If this method fails (by throwing an exception), the action will fail and no Spark job will be
* submitted.
*
* @param options the options for the returned data source reader, which is an immutable
http://git-wip-us.apache.org/repos/asf/spark/blob/6fb7d6c4/sql/core/src/main/java/org/apache/spark/sql/sources/v2/ReadSupportWithSchema.java
----------------------------------------------------------------------
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/ReadSupportWithSchema.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/ReadSupportWithSchema.java
index 3801402..f316599 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/ReadSupportWithSchema.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/ReadSupportWithSchema.java
@@ -35,7 +35,7 @@ public interface ReadSupportWithSchema extends DataSourceV2 {
/**
* Create a {@link DataSourceReader} to scan the data from this data source.
*
- * If this method fails (by throwing an exception), the action would fail and no Spark job was
+ * If this method fails (by throwing an exception), the action will fail and no Spark job will be
* submitted.
*
* @param schema the full schema of this data source reader. Full schema usually maps to the
http://git-wip-us.apache.org/repos/asf/spark/blob/6fb7d6c4/sql/core/src/main/java/org/apache/spark/sql/sources/v2/WriteSupport.java
----------------------------------------------------------------------
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/WriteSupport.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/WriteSupport.java
index cab5645..83aeec0 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/WriteSupport.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/WriteSupport.java
@@ -35,7 +35,7 @@ public interface WriteSupport extends DataSourceV2 {
* Creates an optional {@link DataSourceWriter} to save the data to this data source. Data
* sources can return None if there is no writing needed to be done according to the save mode.
*
- * If this method fails (by throwing an exception), the action would fail and no Spark job was
+ * If this method fails (by throwing an exception), the action will fail and no Spark job will be
* submitted.
*
* @param jobId A unique string for the writing job. It's possible that there are many writing
http://git-wip-us.apache.org/repos/asf/spark/blob/6fb7d6c4/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/DataSourceReader.java
----------------------------------------------------------------------
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/DataSourceReader.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/DataSourceReader.java
index f898c29..36a3e54 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/DataSourceReader.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/DataSourceReader.java
@@ -31,7 +31,7 @@ import org.apache.spark.sql.types.StructType;
* {@link ReadSupport#createReader(DataSourceOptions)} or
* {@link ReadSupportWithSchema#createReader(StructType, DataSourceOptions)}.
* It can mix in various query optimization interfaces to speed up the data scan. The actual scan
- * logic is delegated to {@link InputPartition}s that are returned by
+ * logic is delegated to {@link InputPartition}s, which are returned by
* {@link #planInputPartitions()}.
*
* There are mainly 3 kinds of query optimizations:
@@ -45,8 +45,8 @@ import org.apache.spark.sql.types.StructType;
* only one of them would be respected, according to the priority list from high to low:
* {@link SupportsScanColumnarBatch}, {@link SupportsScanUnsafeRow}.
*
- * If an exception was throw when applying any of these query optimizations, the action would fail
- * and no Spark job was submitted.
+ * If an exception was throw when applying any of these query optimizations, the action will fail
+ * and no Spark job will be submitted.
*
* Spark first applies all operator push-down optimizations that this data source supports. Then
* Spark collects information this data source reported for further optimizations. Finally Spark
@@ -59,21 +59,21 @@ public interface DataSourceReader {
* Returns the actual schema of this data source reader, which may be different from the physical
* schema of the underlying storage, as column pruning or other optimizations may happen.
*
- * If this method fails (by throwing an exception), the action would fail and no Spark job was
+ * If this method fails (by throwing an exception), the action will fail and no Spark job will be
* submitted.
*/
StructType readSchema();
/**
- * Returns a list of read tasks. Each task is responsible for creating a data reader to
- * output data for one RDD partition. That means the number of tasks returned here is same as
- * the number of RDD partitions this scan outputs.
+ * Returns a list of {@link InputPartition}s. Each {@link InputPartition} is responsible for
+ * creating a data reader to output data of one RDD partition. The number of input partitions
+ * returned here is the same as the number of RDD partitions this scan outputs.
*
* Note that, this may not be a full scan if the data source reader mixes in other optimization
* interfaces like column pruning, filter push-down, etc. These optimizations are applied before
* Spark issues the scan request.
*
- * If this method fails (by throwing an exception), the action would fail and no Spark job was
+ * If this method fails (by throwing an exception), the action will fail and no Spark job will be
* submitted.
*/
List<InputPartition<Row>> planInputPartitions();
http://git-wip-us.apache.org/repos/asf/spark/blob/6fb7d6c4/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/InputPartition.java
----------------------------------------------------------------------
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/InputPartition.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/InputPartition.java
index c581e3b..3524481 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/InputPartition.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/InputPartition.java
@@ -23,13 +23,14 @@ import org.apache.spark.annotation.InterfaceStability;
/**
* An input partition returned by {@link DataSourceReader#planInputPartitions()} and is
- * responsible for creating the actual data reader. The relationship between
- * {@link InputPartition} and {@link InputPartitionReader}
+ * responsible for creating the actual data reader of one RDD partition.
+ * The relationship between {@link InputPartition} and {@link InputPartitionReader}
* is similar to the relationship between {@link Iterable} and {@link java.util.Iterator}.
*
- * Note that input partitions will be serialized and sent to executors, then the partition reader
- * will be created on executors and do the actual reading. So {@link InputPartition} must be
- * serializable and {@link InputPartitionReader} doesn't need to be.
+ * Note that {@link InputPartition}s will be serialized and sent to executors, then
+ * {@link InputPartitionReader}s will be created on executors to do the actual reading. So
+ * {@link InputPartition} must be serializable while {@link InputPartitionReader} doesn't need to
+ * be.
*/
@InterfaceStability.Evolving
public interface InputPartition<T> extends Serializable {
@@ -41,10 +42,10 @@ public interface InputPartition<T> extends Serializable {
* The location is a string representing the host name.
*
* Note that if a host name cannot be recognized by Spark, it will be ignored as it was not in
- * the returned locations. By default this method returns empty string array, which means this
- * task has no location preference.
+ * the returned locations. The default return value is empty string array, which means this
+ * input partition's reader has no location preference.
*
- * If this method fails (by throwing an exception), the action would fail and no Spark job was
+ * If this method fails (by throwing an exception), the action will fail and no Spark job will be
* submitted.
*/
default String[] preferredLocations() {
http://git-wip-us.apache.org/repos/asf/spark/blob/6fb7d6c4/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataSourceWriter.java
----------------------------------------------------------------------
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataSourceWriter.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataSourceWriter.java
index 0a0fd8d..0030a9f 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataSourceWriter.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataSourceWriter.java
@@ -34,8 +34,8 @@ import org.apache.spark.sql.types.StructType;
* It can mix in various writing optimization interfaces to speed up the data saving. The actual
* writing logic is delegated to {@link DataWriter}.
*
- * If an exception was throw when applying any of these writing optimizations, the action would fail
- * and no Spark job was submitted.
+ * If an exception was throw when applying any of these writing optimizations, the action will fail
+ * and no Spark job will be submitted.
*
* The writing procedure is:
* 1. Create a writer factory by {@link #createWriterFactory()}, serialize and send it to all the
@@ -58,7 +58,7 @@ public interface DataSourceWriter {
/**
* Creates a writer factory which will be serialized and sent to executors.
*
- * If this method fails (by throwing an exception), the action would fail and no Spark job was
+ * If this method fails (by throwing an exception), the action will fail and no Spark job will be
* submitted.
*/
DataWriterFactory<Row> createWriterFactory();
http://git-wip-us.apache.org/repos/asf/spark/blob/6fb7d6c4/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriterFactory.java
----------------------------------------------------------------------
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriterFactory.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriterFactory.java
index c2c2ab7..7527bcc 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriterFactory.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriterFactory.java
@@ -35,7 +35,7 @@ public interface DataWriterFactory<T> extends Serializable {
/**
* Returns a data writer to do the actual writing work.
*
- * If this method fails (by throwing an exception), the action would fail and no Spark job was
+ * If this method fails (by throwing an exception), the action will fail and no Spark job will be
* submitted.
*
* @param partitionId A unique id of the RDD partition that the returned writer will process.
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org