You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2018/12/13 15:03:45 UTC
[spark] branch master updated: [SPARK-26313][SQL] move
`newScanBuilder` from Table to read related mix-in traits
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 6c1f7ba [SPARK-26313][SQL] move `newScanBuilder` from Table to read related mix-in traits
6c1f7ba is described below
commit 6c1f7ba8f627a69cac74f11400066dd9871d9102
Author: Wenchen Fan <we...@databricks.com>
AuthorDate: Thu Dec 13 23:03:26 2018 +0800
[SPARK-26313][SQL] move `newScanBuilder` from Table to read related mix-in traits
## What changes were proposed in this pull request?
As discussed in https://github.com/apache/spark/pull/23208/files#r239684490 , we should put `newScanBuilder` in read related mix-in traits like `SupportsBatchRead`, to support write-only table.
In the `Append` operator, we should skip schema validation if not necessary. In the future we would introduce a capability API, so that data source can tell Spark that it doesn't want to do validation.
## How was this patch tested?
existing tests.
Closes #23266 from cloud-fan/ds-read.
Authored-by: Wenchen Fan <we...@databricks.com>
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
.../spark/sql/sources/v2/SupportsBatchRead.java | 8 ++++----
.../v2/{SupportsBatchRead.java => SupportsRead.java} | 20 +++++++++++---------
.../java/org/apache/spark/sql/sources/v2/Table.java | 15 ++-------------
3 files changed, 17 insertions(+), 26 deletions(-)
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java
index 0df89db..6c5a95d 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java
@@ -24,10 +24,10 @@ import org.apache.spark.sql.sources.v2.reader.ScanBuilder;
/**
* An empty mix-in interface for {@link Table}, to indicate this table supports batch scan.
* <p>
- * If a {@link Table} implements this interface, its {@link Table#newScanBuilder(DataSourceOptions)}
- * must return a {@link ScanBuilder} that builds {@link Scan} with {@link Scan#toBatch()}
- * implemented.
+ * If a {@link Table} implements this interface, the
+ * {@link SupportsRead#newScanBuilder(DataSourceOptions)} must return a {@link ScanBuilder} that
+ * builds {@link Scan} with {@link Scan#toBatch()} implemented.
* </p>
*/
@Evolving
-public interface SupportsBatchRead extends Table { }
+public interface SupportsBatchRead extends SupportsRead { }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java
similarity index 67%
copy from sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java
copy to sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java
index 0df89db..e22738d 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java
@@ -17,17 +17,19 @@
package org.apache.spark.sql.sources.v2;
-import org.apache.spark.annotation.Evolving;
import org.apache.spark.sql.sources.v2.reader.Scan;
import org.apache.spark.sql.sources.v2.reader.ScanBuilder;
/**
- * An empty mix-in interface for {@link Table}, to indicate this table supports batch scan.
- * <p>
- * If a {@link Table} implements this interface, its {@link Table#newScanBuilder(DataSourceOptions)}
- * must return a {@link ScanBuilder} that builds {@link Scan} with {@link Scan#toBatch()}
- * implemented.
- * </p>
+ * An internal base interface of mix-in interfaces for readable {@link Table}. This adds
+ * {@link #newScanBuilder(DataSourceOptions)} that is used to create a scan for batch, micro-batch,
+ * or continuous processing.
*/
-@Evolving
-public interface SupportsBatchRead extends Table { }
+interface SupportsRead extends Table {
+
+ /**
+ * Returns a {@link ScanBuilder} which can be used to build a {@link Scan}. Spark will call this
+ * method to configure each scan.
+ */
+ ScanBuilder newScanBuilder(DataSourceOptions options);
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/Table.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/Table.java
index 0c65fe0..0866485 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/Table.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/Table.java
@@ -18,8 +18,6 @@
package org.apache.spark.sql.sources.v2;
import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.reader.Scan;
-import org.apache.spark.sql.sources.v2.reader.ScanBuilder;
import org.apache.spark.sql.types.StructType;
/**
@@ -43,17 +41,8 @@ public interface Table {
String name();
/**
- * Returns the schema of this table.
+ * Returns the schema of this table. If the table is not readable and doesn't have a schema, an
+ * empty schema can be returned here.
*/
StructType schema();
-
- /**
- * Returns a {@link ScanBuilder} which can be used to build a {@link Scan} later. Spark will call
- * this method for each data scanning query.
- * <p>
- * The builder can take some query specific information to do operators pushdown, and keep these
- * information in the created {@link Scan}.
- * </p>
- */
- ScanBuilder newScanBuilder(DataSourceOptions options);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org