You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2018/02/21 07:10:17 UTC

spark git commit: [SPARK-23418][SQL] Fail DataSourceV2 reads when user schema is passed, but not supported.

Repository: spark
Updated Branches:
  refs/heads/master 95e25ed1a -> c8c4441df


[SPARK-23418][SQL] Fail DataSourceV2 reads when user schema is passed, but not supported.

## What changes were proposed in this pull request?

DataSourceV2 initially allowed user-supplied schemas when a source doesn't implement `ReadSupportWithSchema`, as long as the schema was identical to the source's schema. This is confusing behavior because changes to an underlying table can cause a previously working job to fail with an exception that user-supplied schemas are not allowed.

This reverts commit adcb25a0624, which was added to #20387 so that it could be removed in a separate JIRA issue and PR.

## How was this patch tested?

Existing tests.

Author: Ryan Blue <bl...@apache.org>

Closes #20603 from rdblue/SPARK-23418-revert-adcb25a0624.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c8c4441d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c8c4441d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c8c4441d

Branch: refs/heads/master
Commit: c8c4441dfdfeda22f8d92e25aee1b6a6269752f9
Parents: 95e25ed
Author: Ryan Blue <bl...@apache.org>
Authored: Wed Feb 21 15:10:08 2018 +0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Wed Feb 21 15:10:08 2018 +0800

----------------------------------------------------------------------
 .../datasources/v2/DataSourceV2Relation.scala          | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/c8c4441d/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
index a98dd48..cc6cb63 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
@@ -174,13 +174,6 @@ object DataSourceV2Relation {
       v2Options: DataSourceOptions,
       userSchema: Option[StructType]): StructType = {
     val reader = userSchema match {
-      // TODO: remove this case because it is confusing for users
-      case Some(s) if !source.isInstanceOf[ReadSupportWithSchema] =>
-        val reader = source.asReadSupport.createReader(v2Options)
-        if (reader.readSchema() != s) {
-          throw new AnalysisException(s"${source.name} does not allow user-specified schemas.")
-        }
-        reader
       case Some(s) =>
         source.asReadSupportWithSchema.createReader(s, v2Options)
       case _ =>
@@ -195,11 +188,7 @@ object DataSourceV2Relation {
       filters: Option[Seq[Expression]] = None,
       userSpecifiedSchema: Option[StructType] = None): DataSourceV2Relation = {
     val projection = schema(source, makeV2Options(options), userSpecifiedSchema).toAttributes
-    DataSourceV2Relation(source, options, projection, filters,
-      // if the source does not implement ReadSupportWithSchema, then the userSpecifiedSchema must
-      // be equal to the reader's schema. the schema method enforces this. because the user schema
-      // and the reader's schema are identical, drop the user schema.
-      if (source.isInstanceOf[ReadSupportWithSchema]) userSpecifiedSchema else None)
+    DataSourceV2Relation(source, options, projection, filters, userSpecifiedSchema)
   }
 
   private def pushRequiredColumns(reader: DataSourceReader, struct: StructType): Unit = {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org