You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2018/02/21 07:10:17 UTC
spark git commit: [SPARK-23418][SQL] Fail DataSourceV2 reads when
user schema is passed, but not supported.
Repository: spark
Updated Branches:
refs/heads/master 95e25ed1a -> c8c4441df
[SPARK-23418][SQL] Fail DataSourceV2 reads when user schema is passed, but not supported.
## What changes were proposed in this pull request?
DataSourceV2 initially allowed user-supplied schemas when a source doesn't implement `ReadSupportWithSchema`, as long as the schema was identical to the source's schema. This is confusing behavior because changes to an underlying table can cause a previously working job to fail with an exception that user-supplied schemas are not allowed.
This reverts commit adcb25a0624, which was added to #20387 so that it could be removed in a separate JIRA issue and PR.
## How was this patch tested?
Existing tests.
Author: Ryan Blue <bl...@apache.org>
Closes #20603 from rdblue/SPARK-23418-revert-adcb25a0624.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c8c4441d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c8c4441d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c8c4441d
Branch: refs/heads/master
Commit: c8c4441dfdfeda22f8d92e25aee1b6a6269752f9
Parents: 95e25ed
Author: Ryan Blue <bl...@apache.org>
Authored: Wed Feb 21 15:10:08 2018 +0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Wed Feb 21 15:10:08 2018 +0800
----------------------------------------------------------------------
.../datasources/v2/DataSourceV2Relation.scala | 13 +------------
1 file changed, 1 insertion(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/c8c4441d/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
index a98dd48..cc6cb63 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
@@ -174,13 +174,6 @@ object DataSourceV2Relation {
v2Options: DataSourceOptions,
userSchema: Option[StructType]): StructType = {
val reader = userSchema match {
- // TODO: remove this case because it is confusing for users
- case Some(s) if !source.isInstanceOf[ReadSupportWithSchema] =>
- val reader = source.asReadSupport.createReader(v2Options)
- if (reader.readSchema() != s) {
- throw new AnalysisException(s"${source.name} does not allow user-specified schemas.")
- }
- reader
case Some(s) =>
source.asReadSupportWithSchema.createReader(s, v2Options)
case _ =>
@@ -195,11 +188,7 @@ object DataSourceV2Relation {
filters: Option[Seq[Expression]] = None,
userSpecifiedSchema: Option[StructType] = None): DataSourceV2Relation = {
val projection = schema(source, makeV2Options(options), userSpecifiedSchema).toAttributes
- DataSourceV2Relation(source, options, projection, filters,
- // if the source does not implement ReadSupportWithSchema, then the userSpecifiedSchema must
- // be equal to the reader's schema. the schema method enforces this. because the user schema
- // and the reader's schema are identical, drop the user schema.
- if (source.isInstanceOf[ReadSupportWithSchema]) userSpecifiedSchema else None)
+ DataSourceV2Relation(source, options, projection, filters, userSpecifiedSchema)
}
private def pushRequiredColumns(reader: DataSourceReader, struct: StructType): Unit = {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org