You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2018/12/13 15:03:45 UTC

[spark] branch master updated: [SPARK-26313][SQL] move `newScanBuilder` from Table to read related mix-in traits

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 6c1f7ba  [SPARK-26313][SQL] move `newScanBuilder` from Table to read related mix-in traits
6c1f7ba is described below

commit 6c1f7ba8f627a69cac74f11400066dd9871d9102
Author: Wenchen Fan <we...@databricks.com>
AuthorDate: Thu Dec 13 23:03:26 2018 +0800

    [SPARK-26313][SQL] move `newScanBuilder` from Table to read related mix-in traits
    
    ## What changes were proposed in this pull request?
    
    As discussed in https://github.com/apache/spark/pull/23208/files#r239684490 , we should put `newScanBuilder` in read related mix-in traits like `SupportsBatchRead`, to support write-only table.
    
    In the `Append` operator, we should skip schema validation if not necessary. In the future we would introduce a capability API, so that data source can tell Spark that it doesn't want to do validation.
    
    ## How was this patch tested?
    
    existing tests.
    
    Closes #23266 from cloud-fan/ds-read.
    
    Authored-by: Wenchen Fan <we...@databricks.com>
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
 .../spark/sql/sources/v2/SupportsBatchRead.java      |  8 ++++----
 .../v2/{SupportsBatchRead.java => SupportsRead.java} | 20 +++++++++++---------
 .../java/org/apache/spark/sql/sources/v2/Table.java  | 15 ++-------------
 3 files changed, 17 insertions(+), 26 deletions(-)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java
index 0df89db..6c5a95d 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java
@@ -24,10 +24,10 @@ import org.apache.spark.sql.sources.v2.reader.ScanBuilder;
 /**
  * An empty mix-in interface for {@link Table}, to indicate this table supports batch scan.
  * <p>
- * If a {@link Table} implements this interface, its {@link Table#newScanBuilder(DataSourceOptions)}
- * must return a {@link ScanBuilder} that builds {@link Scan} with {@link Scan#toBatch()}
- * implemented.
+ * If a {@link Table} implements this interface, the
+ * {@link SupportsRead#newScanBuilder(DataSourceOptions)} must return a {@link ScanBuilder} that
+ * builds {@link Scan} with {@link Scan#toBatch()} implemented.
  * </p>
  */
 @Evolving
-public interface SupportsBatchRead extends Table { }
+public interface SupportsBatchRead extends SupportsRead { }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java
similarity index 67%
copy from sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java
copy to sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java
index 0df89db..e22738d 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java
@@ -17,17 +17,19 @@
 
 package org.apache.spark.sql.sources.v2;
 
-import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.sources.v2.reader.Scan;
 import org.apache.spark.sql.sources.v2.reader.ScanBuilder;
 
 /**
- * An empty mix-in interface for {@link Table}, to indicate this table supports batch scan.
- * <p>
- * If a {@link Table} implements this interface, its {@link Table#newScanBuilder(DataSourceOptions)}
- * must return a {@link ScanBuilder} that builds {@link Scan} with {@link Scan#toBatch()}
- * implemented.
- * </p>
+ * An internal base interface of mix-in interfaces for readable {@link Table}. This adds
+ * {@link #newScanBuilder(DataSourceOptions)} that is used to create a scan for batch, micro-batch,
+ * or continuous processing.
  */
-@Evolving
-public interface SupportsBatchRead extends Table { }
+interface SupportsRead extends Table {
+
+  /**
+   * Returns a {@link ScanBuilder} which can be used to build a {@link Scan}. Spark will call this
+   * method to configure each scan.
+   */
+  ScanBuilder newScanBuilder(DataSourceOptions options);
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/Table.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/Table.java
index 0c65fe0..0866485 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/Table.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/Table.java
@@ -18,8 +18,6 @@
 package org.apache.spark.sql.sources.v2;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.reader.Scan;
-import org.apache.spark.sql.sources.v2.reader.ScanBuilder;
 import org.apache.spark.sql.types.StructType;
 
 /**
@@ -43,17 +41,8 @@ public interface Table {
   String name();
 
   /**
-   * Returns the schema of this table.
+   * Returns the schema of this table. If the table is not readable and doesn't have a schema, an
+   * empty schema can be returned here.
    */
   StructType schema();
-
-  /**
-   * Returns a {@link ScanBuilder} which can be used to build a {@link Scan} later. Spark will call
-   * this method for each data scanning query.
-   * <p>
-   * The builder can take some query specific information to do operators pushdown, and keep these
-   * information in the created {@link Scan}.
-   * </p>
-   */
-  ScanBuilder newScanBuilder(DataSourceOptions options);
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org