You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2023/03/23 06:29:22 UTC
[spark] branch branch-3.4 updated: [SPARK-42878][CONNECT] The table API in DataFrameReader could also accept options
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push:
new a25c0eab119 [SPARK-42878][CONNECT] The table API in DataFrameReader could also accept options
a25c0eab119 is described below
commit a25c0eab119efa0516ad0199945cb49586ba9729
Author: Rui Wang <ru...@databricks.com>
AuthorDate: Thu Mar 23 15:27:52 2023 +0900
[SPARK-42878][CONNECT] The table API in DataFrameReader could also accept options
It turns out that `spark.read.option.table` is a valid call chain and the `table` API does accept options when open a table.
Existing Spark Connect implementation does not consider it.
Feature parity.
NO
UT
Closes #40498 from amaliujia/name_table_support_options.
Authored-by: Rui Wang <ru...@databricks.com>
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
(cherry picked from commit 0e29c8d5eda77ef085269f86b08c0a27420ac1f2)
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
.../org/apache/spark/sql/DataFrameReader.scala | 4 +-
.../apache/spark/sql/PlanGenerationTestSuite.scala | 5 +
.../main/protobuf/spark/connect/relations.proto | 3 +
.../explain-results/table_API_with_options.explain | 2 +
.../queries/table_API_with_options.json | 14 ++
.../queries/table_API_with_options.proto.bin | Bin 0 -> 44 bytes
.../sql/connect/planner/SparkConnectPlanner.scala | 5 +-
.../connect/planner/SparkConnectPlannerSuite.scala | 19 ++
python/pyspark/sql/connect/plan.py | 5 +-
python/pyspark/sql/connect/proto/relations_pb2.py | 227 +++++++++++----------
python/pyspark/sql/connect/proto/relations_pb2.pyi | 28 ++-
python/pyspark/sql/connect/readwriter.py | 2 +-
12 files changed, 203 insertions(+), 111 deletions(-)
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 193eb4faaab..40f9ac1df2b 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -458,7 +458,9 @@ class DataFrameReader private[sql] (sparkSession: SparkSession) extends Logging
*/
def table(tableName: String): DataFrame = {
sparkSession.newDataFrame { builder =>
- builder.getReadBuilder.getNamedTableBuilder.setUnparsedIdentifier(tableName)
+ builder.getReadBuilder.getNamedTableBuilder
+ .setUnparsedIdentifier(tableName)
+ .putAllOptions(extraOptions.toMap.asJava)
}
}
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
index 3c7e1fdeee6..a57c6b39012 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
@@ -2123,4 +2123,9 @@ class PlanGenerationTestSuite
test("replace") {
simple.na.replace[Long]("id", Map(1L -> 8L))
}
+
+ /* Reader API */
+ test("table API with options") {
+ session.read.options(Map("p1" -> "v1", "p2" -> "v2")).table("tempdb.myTable")
+ }
}
diff --git a/connector/connect/common/src/main/protobuf/spark/connect/relations.proto b/connector/connect/common/src/main/protobuf/spark/connect/relations.proto
index aba965082ea..1867d4d1169 100644
--- a/connector/connect/common/src/main/protobuf/spark/connect/relations.proto
+++ b/connector/connect/common/src/main/protobuf/spark/connect/relations.proto
@@ -122,6 +122,9 @@ message Read {
message NamedTable {
// (Required) Unparsed identifier for the table.
string unparsed_identifier = 1;
+
+ // Options for the named table. The map key is case insensitive.
+ map<string, string> options = 2;
}
message DataSource {
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/table_API_with_options.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/table_API_with_options.explain
new file mode 100644
index 00000000000..11a96567dbc
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/table_API_with_options.explain
@@ -0,0 +1,2 @@
+SubqueryAlias primary.tempdb.myTable
++- RelationV2[id#0L] primary.tempdb.myTable primary.tempdb.myTable
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/table_API_with_options.json b/connector/connect/common/src/test/resources/query-tests/queries/table_API_with_options.json
new file mode 100644
index 00000000000..acf04923588
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/table_API_with_options.json
@@ -0,0 +1,14 @@
+{
+ "common": {
+ "planId": "0"
+ },
+ "read": {
+ "namedTable": {
+ "unparsedIdentifier": "tempdb.myTable",
+ "options": {
+ "p1": "v1",
+ "p2": "v2"
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/table_API_with_options.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/table_API_with_options.proto.bin
new file mode 100644
index 00000000000..95e044984b4
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/table_API_with_options.proto.bin differ
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
index 41a867d3b9d..9ebaed44820 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
@@ -54,6 +54,7 @@ import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JDBCPartiti
import org.apache.spark.sql.execution.python.UserDefinedPythonFunction
import org.apache.spark.sql.internal.CatalogImpl
import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
import org.apache.spark.util.Utils
final case class InvalidCommandInput(
@@ -734,7 +735,9 @@ class SparkConnectPlanner(val session: SparkSession) {
case proto.Read.ReadTypeCase.NAMED_TABLE =>
val multipartIdentifier =
CatalystSqlParser.parseMultipartIdentifier(rel.getNamedTable.getUnparsedIdentifier)
- UnresolvedRelation(multipartIdentifier)
+ UnresolvedRelation(
+ multipartIdentifier,
+ new CaseInsensitiveStringMap(rel.getNamedTable.getOptionsMap))
case proto.Read.ReadTypeCase.DATA_SOURCE =>
val localMap = CaseInsensitiveMap[String](rel.getDataSource.getOptionsMap.asScala.toMap)
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala
index b6b214c839d..ec2362d5a56 100644
--- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala
@@ -28,6 +28,7 @@ import org.apache.spark.connect.proto.ExecutePlanResponse
import org.apache.spark.connect.proto.Expression.{Alias, ExpressionString, UnresolvedStar}
import org.apache.spark.sql.{AnalysisException, Dataset, Row}
import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
import org.apache.spark.sql.catalyst.expressions.{AttributeReference, UnsafeProjection}
import org.apache.spark.sql.catalyst.plans.logical
import org.apache.spark.sql.connect.common.InvalidPlanInput
@@ -136,6 +137,24 @@ class SparkConnectPlannerSuite extends SparkFunSuite with SparkConnectPlanTest {
assert(res.nodeName == "UnresolvedRelation")
}
+ test("Simple Table with options") {
+ val read = proto.Read.newBuilder().build()
+ // Invalid read without Table name.
+ intercept[InvalidPlanInput](transform(proto.Relation.newBuilder.setRead(read).build()))
+ val readWithTable = read.toBuilder
+ .setNamedTable(
+ proto.Read.NamedTable.newBuilder
+ .setUnparsedIdentifier("name")
+ .putOptions("p1", "v1")
+ .build())
+ .build()
+ val res = transform(proto.Relation.newBuilder.setRead(readWithTable).build())
+ res match {
+ case e: UnresolvedRelation => assert(e.options.get("p1") == "v1")
+ case _ => assert(false, "Do not have expected options")
+ }
+ }
+
test("Simple Project") {
val readWithTable = proto.Read
.newBuilder()
diff --git a/python/pyspark/sql/connect/plan.py b/python/pyspark/sql/connect/plan.py
index dbfcfea7678..91ddfb25d48 100644
--- a/python/pyspark/sql/connect/plan.py
+++ b/python/pyspark/sql/connect/plan.py
@@ -302,13 +302,16 @@ class DataSource(LogicalPlan):
class Read(LogicalPlan):
- def __init__(self, table_name: str) -> None:
+ def __init__(self, table_name: str, options: Optional[Dict[str, str]] = None) -> None:
super().__init__(None)
self.table_name = table_name
+ self.options = options or {}
def plan(self, session: "SparkConnectClient") -> proto.Relation:
plan = self._create_proto_relation()
plan.read.named_table.unparsed_identifier = self.table_name
+ for k, v in self.options.items():
+ plan.read.named_table.options[k] = v
return plan
def print(self, indent: int = 0) -> str:
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.py b/python/pyspark/sql/connect/proto/relations_pb2.py
index aa6d39cd4f0..079a753f6cb 100644
--- a/python/pyspark/sql/connect/proto/relations_pb2.py
+++ b/python/pyspark/sql/connect/proto/relations_pb2.py
@@ -36,7 +36,7 @@ from pyspark.sql.connect.proto import catalog_pb2 as spark_dot_connect_dot_catal
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
- b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto"\xf0\x13\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66il [...]
+ b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto"\xf0\x13\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66il [...]
)
@@ -47,6 +47,7 @@ _SQL = DESCRIPTOR.message_types_by_name["SQL"]
_SQL_ARGSENTRY = _SQL.nested_types_by_name["ArgsEntry"]
_READ = DESCRIPTOR.message_types_by_name["Read"]
_READ_NAMEDTABLE = _READ.nested_types_by_name["NamedTable"]
+_READ_NAMEDTABLE_OPTIONSENTRY = _READ_NAMEDTABLE.nested_types_by_name["OptionsEntry"]
_READ_DATASOURCE = _READ.nested_types_by_name["DataSource"]
_READ_DATASOURCE_OPTIONSENTRY = _READ_DATASOURCE.nested_types_by_name["OptionsEntry"]
_PROJECT = DESCRIPTOR.message_types_by_name["Project"]
@@ -162,6 +163,15 @@ Read = _reflection.GeneratedProtocolMessageType(
"NamedTable",
(_message.Message,),
{
+ "OptionsEntry": _reflection.GeneratedProtocolMessageType(
+ "OptionsEntry",
+ (_message.Message,),
+ {
+ "DESCRIPTOR": _READ_NAMEDTABLE_OPTIONSENTRY,
+ "__module__": "spark.connect.relations_pb2"
+ # @@protoc_insertion_point(class_scope:spark.connect.Read.NamedTable.OptionsEntry)
+ },
+ ),
"DESCRIPTOR": _READ_NAMEDTABLE,
"__module__": "spark.connect.relations_pb2"
# @@protoc_insertion_point(class_scope:spark.connect.Read.NamedTable)
@@ -192,6 +202,7 @@ Read = _reflection.GeneratedProtocolMessageType(
)
_sym_db.RegisterMessage(Read)
_sym_db.RegisterMessage(Read.NamedTable)
+_sym_db.RegisterMessage(Read.NamedTable.OptionsEntry)
_sym_db.RegisterMessage(Read.DataSource)
_sym_db.RegisterMessage(Read.DataSource.OptionsEntry)
@@ -690,6 +701,8 @@ if _descriptor._USE_C_DESCRIPTORS == False:
DESCRIPTOR._serialized_options = b"\n\036org.apache.spark.connect.protoP\001"
_SQL_ARGSENTRY._options = None
_SQL_ARGSENTRY._serialized_options = b"8\001"
+ _READ_NAMEDTABLE_OPTIONSENTRY._options = None
+ _READ_NAMEDTABLE_OPTIONSENTRY._serialized_options = b"8\001"
_READ_DATASOURCE_OPTIONSENTRY._options = None
_READ_DATASOURCE_OPTIONSENTRY._serialized_options = b"8\001"
_WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._options = None
@@ -707,109 +720,111 @@ if _descriptor._USE_C_DESCRIPTORS == False:
_SQL_ARGSENTRY._serialized_start = 2895
_SQL_ARGSENTRY._serialized_end = 2950
_READ._serialized_start = 2953
- _READ._serialized_end = 3449
- _READ_NAMEDTABLE._serialized_start = 3095
- _READ_NAMEDTABLE._serialized_end = 3156
- _READ_DATASOURCE._serialized_start = 3159
- _READ_DATASOURCE._serialized_end = 3436
- _READ_DATASOURCE_OPTIONSENTRY._serialized_start = 3356
- _READ_DATASOURCE_OPTIONSENTRY._serialized_end = 3414
- _PROJECT._serialized_start = 3451
- _PROJECT._serialized_end = 3568
- _FILTER._serialized_start = 3570
- _FILTER._serialized_end = 3682
- _JOIN._serialized_start = 3685
- _JOIN._serialized_end = 4156
- _JOIN_JOINTYPE._serialized_start = 3948
- _JOIN_JOINTYPE._serialized_end = 4156
- _SETOPERATION._serialized_start = 4159
- _SETOPERATION._serialized_end = 4638
- _SETOPERATION_SETOPTYPE._serialized_start = 4475
- _SETOPERATION_SETOPTYPE._serialized_end = 4589
- _LIMIT._serialized_start = 4640
- _LIMIT._serialized_end = 4716
- _OFFSET._serialized_start = 4718
- _OFFSET._serialized_end = 4797
- _TAIL._serialized_start = 4799
- _TAIL._serialized_end = 4874
- _AGGREGATE._serialized_start = 4877
- _AGGREGATE._serialized_end = 5459
- _AGGREGATE_PIVOT._serialized_start = 5216
- _AGGREGATE_PIVOT._serialized_end = 5327
- _AGGREGATE_GROUPTYPE._serialized_start = 5330
- _AGGREGATE_GROUPTYPE._serialized_end = 5459
- _SORT._serialized_start = 5462
- _SORT._serialized_end = 5622
- _DROP._serialized_start = 5625
- _DROP._serialized_end = 5766
- _DEDUPLICATE._serialized_start = 5769
- _DEDUPLICATE._serialized_end = 5940
- _LOCALRELATION._serialized_start = 5942
- _LOCALRELATION._serialized_end = 6031
- _SAMPLE._serialized_start = 6034
- _SAMPLE._serialized_end = 6307
- _RANGE._serialized_start = 6310
- _RANGE._serialized_end = 6455
- _SUBQUERYALIAS._serialized_start = 6457
- _SUBQUERYALIAS._serialized_end = 6571
- _REPARTITION._serialized_start = 6574
- _REPARTITION._serialized_end = 6716
- _SHOWSTRING._serialized_start = 6719
- _SHOWSTRING._serialized_end = 6861
- _STATSUMMARY._serialized_start = 6863
- _STATSUMMARY._serialized_end = 6955
- _STATDESCRIBE._serialized_start = 6957
- _STATDESCRIBE._serialized_end = 7038
- _STATCROSSTAB._serialized_start = 7040
- _STATCROSSTAB._serialized_end = 7141
- _STATCOV._serialized_start = 7143
- _STATCOV._serialized_end = 7239
- _STATCORR._serialized_start = 7242
- _STATCORR._serialized_end = 7379
- _STATAPPROXQUANTILE._serialized_start = 7382
- _STATAPPROXQUANTILE._serialized_end = 7546
- _STATFREQITEMS._serialized_start = 7548
- _STATFREQITEMS._serialized_end = 7673
- _STATSAMPLEBY._serialized_start = 7676
- _STATSAMPLEBY._serialized_end = 7985
- _STATSAMPLEBY_FRACTION._serialized_start = 7877
- _STATSAMPLEBY_FRACTION._serialized_end = 7976
- _NAFILL._serialized_start = 7988
- _NAFILL._serialized_end = 8122
- _NADROP._serialized_start = 8125
- _NADROP._serialized_end = 8259
- _NAREPLACE._serialized_start = 8262
- _NAREPLACE._serialized_end = 8558
- _NAREPLACE_REPLACEMENT._serialized_start = 8417
- _NAREPLACE_REPLACEMENT._serialized_end = 8558
- _TODF._serialized_start = 8560
- _TODF._serialized_end = 8648
- _WITHCOLUMNSRENAMED._serialized_start = 8651
- _WITHCOLUMNSRENAMED._serialized_end = 8890
- _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_start = 8823
- _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_end = 8890
- _WITHCOLUMNS._serialized_start = 8892
- _WITHCOLUMNS._serialized_end = 9011
- _HINT._serialized_start = 9014
- _HINT._serialized_end = 9146
- _UNPIVOT._serialized_start = 9149
- _UNPIVOT._serialized_end = 9476
- _UNPIVOT_VALUES._serialized_start = 9406
- _UNPIVOT_VALUES._serialized_end = 9465
- _TOSCHEMA._serialized_start = 9478
- _TOSCHEMA._serialized_end = 9584
- _REPARTITIONBYEXPRESSION._serialized_start = 9587
- _REPARTITIONBYEXPRESSION._serialized_end = 9790
- _MAPPARTITIONS._serialized_start = 9793
- _MAPPARTITIONS._serialized_end = 9923
- _GROUPMAP._serialized_start = 9926
- _GROUPMAP._serialized_end = 10129
- _COLLECTMETRICS._serialized_start = 10132
- _COLLECTMETRICS._serialized_end = 10268
- _PARSE._serialized_start = 10271
- _PARSE._serialized_end = 10659
- _PARSE_OPTIONSENTRY._serialized_start = 3356
- _PARSE_OPTIONSENTRY._serialized_end = 3414
- _PARSE_PARSEFORMAT._serialized_start = 10560
- _PARSE_PARSEFORMAT._serialized_end = 10648
+ _READ._serialized_end = 3581
+ _READ_NAMEDTABLE._serialized_start = 3096
+ _READ_NAMEDTABLE._serialized_end = 3288
+ _READ_NAMEDTABLE_OPTIONSENTRY._serialized_start = 3230
+ _READ_NAMEDTABLE_OPTIONSENTRY._serialized_end = 3288
+ _READ_DATASOURCE._serialized_start = 3291
+ _READ_DATASOURCE._serialized_end = 3568
+ _READ_DATASOURCE_OPTIONSENTRY._serialized_start = 3230
+ _READ_DATASOURCE_OPTIONSENTRY._serialized_end = 3288
+ _PROJECT._serialized_start = 3583
+ _PROJECT._serialized_end = 3700
+ _FILTER._serialized_start = 3702
+ _FILTER._serialized_end = 3814
+ _JOIN._serialized_start = 3817
+ _JOIN._serialized_end = 4288
+ _JOIN_JOINTYPE._serialized_start = 4080
+ _JOIN_JOINTYPE._serialized_end = 4288
+ _SETOPERATION._serialized_start = 4291
+ _SETOPERATION._serialized_end = 4770
+ _SETOPERATION_SETOPTYPE._serialized_start = 4607
+ _SETOPERATION_SETOPTYPE._serialized_end = 4721
+ _LIMIT._serialized_start = 4772
+ _LIMIT._serialized_end = 4848
+ _OFFSET._serialized_start = 4850
+ _OFFSET._serialized_end = 4929
+ _TAIL._serialized_start = 4931
+ _TAIL._serialized_end = 5006
+ _AGGREGATE._serialized_start = 5009
+ _AGGREGATE._serialized_end = 5591
+ _AGGREGATE_PIVOT._serialized_start = 5348
+ _AGGREGATE_PIVOT._serialized_end = 5459
+ _AGGREGATE_GROUPTYPE._serialized_start = 5462
+ _AGGREGATE_GROUPTYPE._serialized_end = 5591
+ _SORT._serialized_start = 5594
+ _SORT._serialized_end = 5754
+ _DROP._serialized_start = 5757
+ _DROP._serialized_end = 5898
+ _DEDUPLICATE._serialized_start = 5901
+ _DEDUPLICATE._serialized_end = 6072
+ _LOCALRELATION._serialized_start = 6074
+ _LOCALRELATION._serialized_end = 6163
+ _SAMPLE._serialized_start = 6166
+ _SAMPLE._serialized_end = 6439
+ _RANGE._serialized_start = 6442
+ _RANGE._serialized_end = 6587
+ _SUBQUERYALIAS._serialized_start = 6589
+ _SUBQUERYALIAS._serialized_end = 6703
+ _REPARTITION._serialized_start = 6706
+ _REPARTITION._serialized_end = 6848
+ _SHOWSTRING._serialized_start = 6851
+ _SHOWSTRING._serialized_end = 6993
+ _STATSUMMARY._serialized_start = 6995
+ _STATSUMMARY._serialized_end = 7087
+ _STATDESCRIBE._serialized_start = 7089
+ _STATDESCRIBE._serialized_end = 7170
+ _STATCROSSTAB._serialized_start = 7172
+ _STATCROSSTAB._serialized_end = 7273
+ _STATCOV._serialized_start = 7275
+ _STATCOV._serialized_end = 7371
+ _STATCORR._serialized_start = 7374
+ _STATCORR._serialized_end = 7511
+ _STATAPPROXQUANTILE._serialized_start = 7514
+ _STATAPPROXQUANTILE._serialized_end = 7678
+ _STATFREQITEMS._serialized_start = 7680
+ _STATFREQITEMS._serialized_end = 7805
+ _STATSAMPLEBY._serialized_start = 7808
+ _STATSAMPLEBY._serialized_end = 8117
+ _STATSAMPLEBY_FRACTION._serialized_start = 8009
+ _STATSAMPLEBY_FRACTION._serialized_end = 8108
+ _NAFILL._serialized_start = 8120
+ _NAFILL._serialized_end = 8254
+ _NADROP._serialized_start = 8257
+ _NADROP._serialized_end = 8391
+ _NAREPLACE._serialized_start = 8394
+ _NAREPLACE._serialized_end = 8690
+ _NAREPLACE_REPLACEMENT._serialized_start = 8549
+ _NAREPLACE_REPLACEMENT._serialized_end = 8690
+ _TODF._serialized_start = 8692
+ _TODF._serialized_end = 8780
+ _WITHCOLUMNSRENAMED._serialized_start = 8783
+ _WITHCOLUMNSRENAMED._serialized_end = 9022
+ _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_start = 8955
+ _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_end = 9022
+ _WITHCOLUMNS._serialized_start = 9024
+ _WITHCOLUMNS._serialized_end = 9143
+ _HINT._serialized_start = 9146
+ _HINT._serialized_end = 9278
+ _UNPIVOT._serialized_start = 9281
+ _UNPIVOT._serialized_end = 9608
+ _UNPIVOT_VALUES._serialized_start = 9538
+ _UNPIVOT_VALUES._serialized_end = 9597
+ _TOSCHEMA._serialized_start = 9610
+ _TOSCHEMA._serialized_end = 9716
+ _REPARTITIONBYEXPRESSION._serialized_start = 9719
+ _REPARTITIONBYEXPRESSION._serialized_end = 9922
+ _MAPPARTITIONS._serialized_start = 9925
+ _MAPPARTITIONS._serialized_end = 10055
+ _GROUPMAP._serialized_start = 10058
+ _GROUPMAP._serialized_end = 10261
+ _COLLECTMETRICS._serialized_start = 10264
+ _COLLECTMETRICS._serialized_end = 10400
+ _PARSE._serialized_start = 10403
+ _PARSE._serialized_end = 10791
+ _PARSE_OPTIONSENTRY._serialized_start = 3230
+ _PARSE_OPTIONSENTRY._serialized_end = 3288
+ _PARSE_PARSEFORMAT._serialized_start = 10692
+ _PARSE_PARSEFORMAT._serialized_end = 10780
# @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.pyi b/python/pyspark/sql/connect/proto/relations_pb2.pyi
index 6ae4a323f6f..fdc08625297 100644
--- a/python/pyspark/sql/connect/proto/relations_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/relations_pb2.pyi
@@ -591,17 +591,43 @@ class Read(google.protobuf.message.Message):
class NamedTable(google.protobuf.message.Message):
DESCRIPTOR: google.protobuf.descriptor.Descriptor
+ class OptionsEntry(google.protobuf.message.Message):
+ DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+ KEY_FIELD_NUMBER: builtins.int
+ VALUE_FIELD_NUMBER: builtins.int
+ key: builtins.str
+ value: builtins.str
+ def __init__(
+ self,
+ *,
+ key: builtins.str = ...,
+ value: builtins.str = ...,
+ ) -> None: ...
+ def ClearField(
+ self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"]
+ ) -> None: ...
+
UNPARSED_IDENTIFIER_FIELD_NUMBER: builtins.int
+ OPTIONS_FIELD_NUMBER: builtins.int
unparsed_identifier: builtins.str
"""(Required) Unparsed identifier for the table."""
+ @property
+ def options(
+ self,
+ ) -> google.protobuf.internal.containers.ScalarMap[builtins.str, builtins.str]:
+ """Options for the named table. The map key is case insensitive."""
def __init__(
self,
*,
unparsed_identifier: builtins.str = ...,
+ options: collections.abc.Mapping[builtins.str, builtins.str] | None = ...,
) -> None: ...
def ClearField(
self,
- field_name: typing_extensions.Literal["unparsed_identifier", b"unparsed_identifier"],
+ field_name: typing_extensions.Literal[
+ "options", b"options", "unparsed_identifier", b"unparsed_identifier"
+ ],
) -> None: ...
class DataSource(google.protobuf.message.Message):
diff --git a/python/pyspark/sql/connect/readwriter.py b/python/pyspark/sql/connect/readwriter.py
index 192ec68b92a..0c38600ec8a 100644
--- a/python/pyspark/sql/connect/readwriter.py
+++ b/python/pyspark/sql/connect/readwriter.py
@@ -131,7 +131,7 @@ class DataFrameReader(OptionUtils):
return DataFrame.withPlan(plan, self._client)
def table(self, tableName: str) -> "DataFrame":
- return self._df(Read(tableName))
+ return self._df(Read(tableName, self._options))
table.__doc__ = PySparkDataFrameReader.table.__doc__
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org