You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by be...@apache.org on 2023/10/30 13:34:47 UTC
(spark) branch master updated: [SPARK-43380][SQL][FOLLOWUP] Deprecate toSqlType(avroSchema: Schema, …useStableIdForUnionType: Boolean): SchemaType
This is an automated email from the ASF dual-hosted git repository.
beliefer pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new dc02991fa662 [SPARK-43380][SQL][FOLLOWUP] Deprecate toSqlType(avroSchema: Schema, …useStableIdForUnionType: Boolean): SchemaType
dc02991fa662 is described below
commit dc02991fa662c2f760315f190893cf09545e1b83
Author: Jiaan Geng <be...@163.com>
AuthorDate: Mon Oct 30 21:34:30 2023 +0800
[SPARK-43380][SQL][FOLLOWUP] Deprecate toSqlType(avroSchema: Schema, …useStableIdForUnionType: Boolean): SchemaType
### What changes were proposed in this pull request?
https://github.com/apache/spark/pull/43530 provides a new method:
```
/**
* Converts an Avro schema to a corresponding Spark SQL schema.
*
* since 4.0.0
*/
def toSqlType(avroSchema: Schema, useStableIdForUnionType: Boolean): SchemaType = {
toSqlTypeHelper(avroSchema, Set.empty, useStableIdForUnionType)
}
```
Because take `AvroOptions` as parameter causes the performance regression, the old `toSqlType` looks very useless.
This PR also improve some caller of `toSqlType` by pass `useStableIdForUnionType` directly.
### Why are the changes needed?
Deprecate toSqlType(avroSchema: Schema, …useStableIdForUnionType: Boolean): SchemaType
### Does this PR introduce _any_ user-facing change?
'No'.
### How was this patch tested?
Exists test cases.
### Was this patch authored or co-authored using generative AI tooling?
'No'.
Closes #43557 from beliefer/SPARK-43380_followup.
Authored-by: Jiaan Geng <be...@163.com>
Signed-off-by: Jiaan Geng <be...@163.com>
---
.../src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala | 3 ++-
.../avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala | 2 +-
.../src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala | 2 ++
.../src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala | 2 +-
4 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala
index 2c2a45fc3f14..06388409284a 100644
--- a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala
@@ -39,7 +39,8 @@ private[sql] case class AvroDataToCatalyst(
override def inputTypes: Seq[AbstractDataType] = Seq(BinaryType)
override lazy val dataType: DataType = {
- val dt = SchemaConverters.toSqlType(expectedSchema, options).dataType
+ val dt = SchemaConverters.toSqlType(
+ expectedSchema, avroOptions.useStableIdForUnionType).dataType
parseMode match {
// With PermissiveMode, the output Catalyst row might contain columns of null values for
// corrupt records, even if some of the columns are not nullable in the user-provided schema.
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
index e738f541ca79..0e27e4a604c4 100644
--- a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
@@ -61,7 +61,7 @@ private[sql] object AvroUtils extends Logging {
new FileSourceOptions(CaseInsensitiveMap(options)).ignoreCorruptFiles)
}
- SchemaConverters.toSqlType(avroSchema, options).dataType match {
+ SchemaConverters.toSqlType(avroSchema, parsedOptions.useStableIdForUnionType).dataType match {
case t: StructType => Some(t)
case _ => throw new RuntimeException(
s"""Avro schema cannot be converted to a Spark SQL StructType:
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
index ba01a18d76f7..00fb32794e3a 100644
--- a/connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
@@ -62,6 +62,8 @@ object SchemaConverters {
def toSqlType(avroSchema: Schema): SchemaType = {
toSqlType(avroSchema, false)
}
+
+ @deprecated("using toSqlType(..., useStableIdForUnionType: Boolean) instead", "4.0.0")
def toSqlType(avroSchema: Schema, options: Map[String, String]): SchemaType = {
toSqlTypeHelper(avroSchema, Set.empty, AvroOptions(options).useStableIdForUnionType)
}
diff --git a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
index 07865787d287..9095f1c0831a 100644
--- a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
@@ -264,7 +264,7 @@ class AvroFunctionsSuite extends QueryTest with SharedSparkSession {
val avroOptions = AvroOptions(options)
val avroSchema = avroOptions.schema.get
val sparkSchema = SchemaConverters
- .toSqlType(avroSchema, options)
+ .toSqlType(avroSchema, avroOptions.useStableIdForUnionType)
.dataType
.asInstanceOf[StructType]
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org