You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2022/11/17 11:01:08 UTC
[spark] branch master updated: [SPARK-41164][CONNECT] Update relations.proto to follow Connect proto development guide

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new f01a8db4bcf [SPARK-41164][CONNECT] Update relations.proto to follow Connect proto development guide
f01a8db4bcf is described below

commit f01a8db4bcf09c4975029e85722053ff82f8a355
Author: Rui Wang <ru...@databricks.com>
AuthorDate: Thu Nov 17 20:00:52 2022 +0900

    [SPARK-41164][CONNECT] Update relations.proto to follow Connect proto development guide
    
    ### What changes were proposed in this pull request?
    
    As we have a guidance for Connect proto ([adding proto messages](https://github.com/apache/spark/blob/master/connector/connect/docs/adding-proto-messages.md)), this PR updates `relations.proto` to follow the development guide.
    
    This PR also adds some missing documentation for the proto.
    
    ### Why are the changes needed?
    
    1. Follow development guide.
    2. Improve proto Documentation.
    
    ### Does this PR introduce _any_ user-facing change?
    
    NO
    
    ### How was this patch tested?
    
    Existing UT
    
    Closes #38678 from amaliujia/improve_relation_proto_to_follow_proto_rules.
    
    Authored-by: Rui Wang <ru...@databricks.com>
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
 .../main/protobuf/spark/connect/relations.proto    | 128 +++++++--
 python/pyspark/sql/connect/proto/relations_pb2.py  | 114 ++++----
 python/pyspark/sql/connect/proto/relations_pb2.pyi | 302 +++++++++++++++------
 3 files changed, 372 insertions(+), 172 deletions(-)

diff --git a/connector/connect/src/main/protobuf/spark/connect/relations.proto b/connector/connect/src/main/protobuf/spark/connect/relations.proto
index 8fa10c4e093..52ff780d093 100644
--- a/connector/connect/src/main/protobuf/spark/connect/relations.proto
+++ b/connector/connect/src/main/protobuf/spark/connect/relations.proto
@@ -67,11 +67,13 @@ message Unknown {}
 
 // Common metadata of all relations.
 message RelationCommon {
+  // (Required) Shared relation metadata.
   string source_info = 1;
 }
 
 // Relation that uses a SQL query to generate the output.
 message SQL {
+  // (Required) The SQL query.
   string query = 1;
 }
 
@@ -84,15 +86,20 @@ message Read {
   }
 
   message NamedTable {
+    // (Required) Unparsed identifier for the table.
     string unparsed_identifier = 1;
   }
 
   message DataSource {
-    // Required. Supported formats include: parquet, orc, text, json, parquet, csv, avro.
+    // (Required) Supported formats include: parquet, orc, text, json, parquet, csv, avro.
     string format = 1;
-    // Optional. If not set, Spark will infer the schema.
-    string schema = 2;
-    // The key is case insensitive.
+
+    // (Optional) If not set, Spark will infer the schema.
+    optional string schema = 2;
+
+    // Options for the data source. The context of this map varies based on the
+    // data source format. This options could be empty for valid data source format.
+    // The map key is case insensitive.
     map<string, string> options = 3;
   }
 }
@@ -106,13 +113,18 @@ message Project {
   //
   // For example, `SELECT ABS(-1)` is valid plan without an input plan.
   Relation input = 1;
+
+  // (Required) A Project requires at least one expression.
   repeated Expression expressions = 3;
 }
 
 // Relation that applies a boolean expression `condition` on each row of `input` to produce
 // the output result.
 message Filter {
+  // (Required) Input relation for a Filter.
   Relation input = 1;
+
+  // (Required) A Filter must have a condition expression.
   Expression condition = 2;
 }
 
@@ -120,10 +132,20 @@ message Filter {
 //
 // `left` and `right` must be present.
 message Join {
+  // (Required) Left input relation for a Join.
   Relation left = 1;
+
+  // (Required) Right input relation for a Join.
   Relation right = 2;
+
+  // (Optional) The join condition. Could be unset when `using_columns` is utilized.
+  //
+  // This field does not co-exist with using_columns.
   Expression join_condition = 3;
+
+  // (Required) The join type.
   JoinType join_type = 4;
+
   // Optional. using_columns provides a list of columns that should present on both sides of
   // the join inputs that this Join will join on. For example A JOIN B USING col_name is
   // equivalent to A JOIN B on A.col_name = B.col_name.
@@ -144,11 +166,25 @@ message Join {
 
 // Relation of type [[SetOperation]]
 message SetOperation {
+  // (Required) Left input relation for a Set operation.
   Relation left_input = 1;
+
+  // (Required) Right input relation for a Set operation.
   Relation right_input = 2;
+
+  // (Required) The Set operation type.
   SetOpType set_op_type = 3;
-  bool is_all = 4;
-  bool by_name = 5;
+
+  // (Optional) If to remove duplicate rows.
+  //
+  // True to preserve all results.
+  // False to remove duplicate rows.
+  optional bool is_all = 4;
+
+  // (Optional) If to perform the Set operation based on name resolution.
+  //
+  // Only UNION supports this option.
+  optional bool by_name = 5;
 
   enum SetOpType {
     SET_OP_TYPE_UNSPECIFIED = 0;
@@ -160,29 +196,42 @@ message SetOperation {
 
 // Relation of type [[Limit]] that is used to `limit` rows from the input relation.
 message Limit {
+  // (Required) Input relation for a Limit.
   Relation input = 1;
+
+  // (Required) the limit.
   int32 limit = 2;
 }
 
 // Relation of type [[Offset]] that is used to read rows staring from the `offset` on
 // the input relation.
 message Offset {
+  // (Required) Input relation for an Offset.
   Relation input = 1;
+
+  // (Required) the limit.
   int32 offset = 2;
 }
 
 // Relation of type [[Aggregate]].
 message Aggregate {
+  // (Required) Input relation for a Aggregate.
   Relation input = 1;
+
   repeated Expression grouping_expressions = 2;
   repeated Expression result_expressions = 3;
 }
 
 // Relation of type [[Sort]].
 message Sort {
+  // (Required) Input relation for a Sort.
   Relation input = 1;
+
+  // (Required) Sort fields.
   repeated SortField sort_fields = 2;
-  bool is_global = 3;
+
+  // (Optional) if this is a global sort.
+  optional bool is_global = 3;
 
   message SortField {
     Expression expression = 1;
@@ -206,33 +255,56 @@ message Sort {
 // Relation of type [[Deduplicate]] which have duplicate rows removed, could consider either only
 // the subset of columns or all the columns.
 message Deduplicate {
+  // (Required) Input relation for a Deduplicate.
   Relation input = 1;
+
+  // (Optional) Deduplicate based on a list of column names.
+  //
+  // This field does not co-use with `all_columns_as_keys`.
   repeated string column_names = 2;
-  bool all_columns_as_keys = 3;
+
+  // (Optional) Deduplicate based on all the columns of the input relation.
+  //
+  // This field does not co-use with `column_names`.
+  optional bool all_columns_as_keys = 3;
 }
 
+// A relation that does not need to be qualified by name.
 message LocalRelation {
+  // (Optional) A list qualified attributes.
   repeated Expression.QualifiedAttribute attributes = 1;
   // TODO: support local data.
 }
 
 // Relation of type [[Sample]] that samples a fraction of the dataset.
 message Sample {
+  // (Required) Input relation for a Sample.
   Relation input = 1;
+
+  // (Required) lower bound.
   double lower_bound = 2;
+
+  // (Required) upper bound.
   double upper_bound = 3;
-  bool with_replacement = 4;
+
+  // (Optional) Whether to sample with replacement.
+  optional bool with_replacement = 4;
+
+  // (Optional) The random seed.
   optional int64 seed = 5;
 }
 
 // Relation of type [[Range]] that generates a sequence of integers.
 message Range {
-  // Optional. Default value = 0
-  int64 start = 1;
-  // Required.
+  // (Optional) Default value = 0
+  optional int64 start = 1;
+
+  // (Required)
   int64 end = 2;
-  // Required.
+
+  // (Required)
   int64 step = 3;
+
   // Optional. Default value is assigned by 1) SQL conf "spark.sql.leafNodeDefaultParallelism" if
   // it is set, or 2) spark default parallelism.
   optional int32 num_partitions = 4;
@@ -240,24 +312,26 @@ message Range {
 
 // Relation alias.
 message SubqueryAlias {
-  // Required. The input relation.
+  // (Required) The input relation of SubqueryAlias.
   Relation input = 1;
-  // Required. The alias.
+
+  // (Required) The alias.
   string alias = 2;
-  // Optional. Qualifier of the alias.
+
+  // (Optional) Qualifier of the alias.
   repeated string qualifier = 3;
 }
 
 // Relation repartition.
 message Repartition {
-  // Required. The input relation.
+  // (Required) The input relation of Repartition.
   Relation input = 1;
 
-  // Required. Must be positive.
+  // (Required) Must be positive.
   int32 num_partitions = 2;
 
-  // Optional. Default value is false.
-  bool shuffle = 3;
+  // (Optional) Default value is false.
+  optional bool shuffle = 3;
 }
 
 // Compose the string representing rows for output.
@@ -267,14 +341,14 @@ message ShowString {
   Relation input = 1;
 
   // (Required) Number of rows to show.
-  optional int32 numRows = 2;
+  int32 numRows = 2;
 
   // (Required) If set to more than 0, truncates strings to
   // `truncate` characters and all cells will be aligned right.
-  optional int32 truncate = 3;
+  int32 truncate = 3;
 
   // (Required) If set to true, prints output rows vertically (one line per column value).
-  optional bool vertical = 4;
+  bool vertical = 4;
 }
 
 // Computes specified statistics for numeric and string columns.
@@ -344,10 +418,10 @@ message NAFill {
 
 // Rename columns on the input relation by the same length of names.
 message RenameColumnsBySameLengthNames {
-  // Required. The input relation.
+  // (Required) The input relation of RenameColumnsBySameLengthNames.
   Relation input = 1;
 
-  // Required.
+  // (Required)
   //
   // The number of columns of the input relation must be equal to the length
   // of this field. If this is not true, an exception will be returned.
@@ -357,11 +431,11 @@ message RenameColumnsBySameLengthNames {
 
 // Rename columns on the input relation by a map with name to name mapping.
 message RenameColumnsByNameToNameMap {
-  // Required. The input relation.
+  // (Required) The input relation.
   Relation input = 1;
 
 
-  // Required.
+  // (Required)
   //
   // Renaming column names of input relation from A to B where A is the map key
   // and B is the map value. This is a no-op if schema doesn't contain any A. It
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.py b/python/pyspark/sql/connect/proto/relations_pb2.py
index 7e77d56eaa6..9bc5e75ea64 100644
--- a/python/pyspark/sql/connect/proto/relations_pb2.py
+++ b/python/pyspark/sql/connect/proto/relations_pb2.py
@@ -32,7 +32,7 @@ from pyspark.sql.connect.proto import expressions_pb2 as spark_dot_connect_dot_e
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x1fspark/connect/expressions.proto"\xa6\x0b\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x15.spark.connect.FilterH\x00R\x06\x66ilter\x12)\n\x04join\x18\x05 \x01(\x0 [...]
+    b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x1fspark/connect/expressions.proto"\xa6\x0b\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x15.spark.connect.FilterH\x00R\x06\x66ilter\x12)\n\x04join\x18\x05 \x01(\x0 [...]
 )
 
 _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
@@ -54,63 +54,63 @@ if _descriptor._USE_C_DESCRIPTORS == False:
     _SQL._serialized_start = 1592
     _SQL._serialized_end = 1619
     _READ._serialized_start = 1622
-    _READ._serialized_end = 2032
+    _READ._serialized_end = 2048
     _READ_NAMEDTABLE._serialized_start = 1764
     _READ_NAMEDTABLE._serialized_end = 1825
     _READ_DATASOURCE._serialized_start = 1828
-    _READ_DATASOURCE._serialized_end = 2019
-    _READ_DATASOURCE_OPTIONSENTRY._serialized_start = 1961
-    _READ_DATASOURCE_OPTIONSENTRY._serialized_end = 2019
-    _PROJECT._serialized_start = 2034
-    _PROJECT._serialized_end = 2151
-    _FILTER._serialized_start = 2153
-    _FILTER._serialized_end = 2265
-    _JOIN._serialized_start = 2268
-    _JOIN._serialized_end = 2718
-    _JOIN_JOINTYPE._serialized_start = 2531
-    _JOIN_JOINTYPE._serialized_end = 2718
-    _SETOPERATION._serialized_start = 2721
-    _SETOPERATION._serialized_end = 3084
-    _SETOPERATION_SETOPTYPE._serialized_start = 2970
-    _SETOPERATION_SETOPTYPE._serialized_end = 3084
-    _LIMIT._serialized_start = 3086
-    _LIMIT._serialized_end = 3162
-    _OFFSET._serialized_start = 3164
-    _OFFSET._serialized_end = 3243
-    _AGGREGATE._serialized_start = 3246
-    _AGGREGATE._serialized_end = 3456
-    _SORT._serialized_start = 3459
-    _SORT._serialized_end = 3990
-    _SORT_SORTFIELD._serialized_start = 3608
-    _SORT_SORTFIELD._serialized_end = 3796
-    _SORT_SORTDIRECTION._serialized_start = 3798
-    _SORT_SORTDIRECTION._serialized_end = 3906
-    _SORT_SORTNULLS._serialized_start = 3908
-    _SORT_SORTNULLS._serialized_end = 3990
-    _DEDUPLICATE._serialized_start = 3993
-    _DEDUPLICATE._serialized_end = 4135
-    _LOCALRELATION._serialized_start = 4137
-    _LOCALRELATION._serialized_end = 4230
-    _SAMPLE._serialized_start = 4233
-    _SAMPLE._serialized_end = 4431
-    _RANGE._serialized_start = 4434
-    _RANGE._serialized_end = 4564
-    _SUBQUERYALIAS._serialized_start = 4566
-    _SUBQUERYALIAS._serialized_end = 4680
-    _REPARTITION._serialized_start = 4682
-    _REPARTITION._serialized_end = 4807
-    _SHOWSTRING._serialized_start = 4810
-    _SHOWSTRING._serialized_end = 5004
-    _STATSUMMARY._serialized_start = 5006
-    _STATSUMMARY._serialized_end = 5098
-    _STATCROSSTAB._serialized_start = 5100
-    _STATCROSSTAB._serialized_end = 5201
-    _NAFILL._serialized_start = 5204
-    _NAFILL._serialized_end = 5338
-    _RENAMECOLUMNSBYSAMELENGTHNAMES._serialized_start = 5340
-    _RENAMECOLUMNSBYSAMELENGTHNAMES._serialized_end = 5454
-    _RENAMECOLUMNSBYNAMETONAMEMAP._serialized_start = 5457
-    _RENAMECOLUMNSBYNAMETONAMEMAP._serialized_end = 5716
-    _RENAMECOLUMNSBYNAMETONAMEMAP_RENAMECOLUMNSMAPENTRY._serialized_start = 5649
-    _RENAMECOLUMNSBYNAMETONAMEMAP_RENAMECOLUMNSMAPENTRY._serialized_end = 5716
+    _READ_DATASOURCE._serialized_end = 2035
+    _READ_DATASOURCE_OPTIONSENTRY._serialized_start = 1966
+    _READ_DATASOURCE_OPTIONSENTRY._serialized_end = 2024
+    _PROJECT._serialized_start = 2050
+    _PROJECT._serialized_end = 2167
+    _FILTER._serialized_start = 2169
+    _FILTER._serialized_end = 2281
+    _JOIN._serialized_start = 2284
+    _JOIN._serialized_end = 2734
+    _JOIN_JOINTYPE._serialized_start = 2547
+    _JOIN_JOINTYPE._serialized_end = 2734
+    _SETOPERATION._serialized_start = 2737
+    _SETOPERATION._serialized_end = 3133
+    _SETOPERATION_SETOPTYPE._serialized_start = 2996
+    _SETOPERATION_SETOPTYPE._serialized_end = 3110
+    _LIMIT._serialized_start = 3135
+    _LIMIT._serialized_end = 3211
+    _OFFSET._serialized_start = 3213
+    _OFFSET._serialized_end = 3292
+    _AGGREGATE._serialized_start = 3295
+    _AGGREGATE._serialized_end = 3505
+    _SORT._serialized_start = 3508
+    _SORT._serialized_end = 4058
+    _SORT_SORTFIELD._serialized_start = 3662
+    _SORT_SORTFIELD._serialized_end = 3850
+    _SORT_SORTDIRECTION._serialized_start = 3852
+    _SORT_SORTDIRECTION._serialized_end = 3960
+    _SORT_SORTNULLS._serialized_start = 3962
+    _SORT_SORTNULLS._serialized_end = 4044
+    _DEDUPLICATE._serialized_start = 4061
+    _DEDUPLICATE._serialized_end = 4232
+    _LOCALRELATION._serialized_start = 4234
+    _LOCALRELATION._serialized_end = 4327
+    _SAMPLE._serialized_start = 4330
+    _SAMPLE._serialized_end = 4554
+    _RANGE._serialized_start = 4557
+    _RANGE._serialized_end = 4702
+    _SUBQUERYALIAS._serialized_start = 4704
+    _SUBQUERYALIAS._serialized_end = 4818
+    _REPARTITION._serialized_start = 4821
+    _REPARTITION._serialized_end = 4963
+    _SHOWSTRING._serialized_start = 4966
+    _SHOWSTRING._serialized_end = 5107
+    _STATSUMMARY._serialized_start = 5109
+    _STATSUMMARY._serialized_end = 5201
+    _STATCROSSTAB._serialized_start = 5203
+    _STATCROSSTAB._serialized_end = 5304
+    _NAFILL._serialized_start = 5307
+    _NAFILL._serialized_end = 5441
+    _RENAMECOLUMNSBYSAMELENGTHNAMES._serialized_start = 5443
+    _RENAMECOLUMNSBYSAMELENGTHNAMES._serialized_end = 5557
+    _RENAMECOLUMNSBYNAMETONAMEMAP._serialized_start = 5560
+    _RENAMECOLUMNSBYNAMETONAMEMAP._serialized_end = 5819
+    _RENAMECOLUMNSBYNAMETONAMEMAP_RENAMECOLUMNSMAPENTRY._serialized_start = 5752
+    _RENAMECOLUMNSBYNAMETONAMEMAP_RENAMECOLUMNSMAPENTRY._serialized_end = 5819
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.pyi b/python/pyspark/sql/connect/proto/relations_pb2.pyi
index 762ccd8c0e8..27c6db4e748 100644
--- a/python/pyspark/sql/connect/proto/relations_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/relations_pb2.pyi
@@ -319,6 +319,7 @@ class RelationCommon(google.protobuf.message.Message):
 
     SOURCE_INFO_FIELD_NUMBER: builtins.int
     source_info: builtins.str
+    """(Required) Shared relation metadata."""
     def __init__(
         self,
         *,
@@ -337,6 +338,7 @@ class SQL(google.protobuf.message.Message):
 
     QUERY_FIELD_NUMBER: builtins.int
     query: builtins.str
+    """(Required) The SQL query."""
     def __init__(
         self,
         *,
@@ -358,6 +360,7 @@ class Read(google.protobuf.message.Message):
 
         UNPARSED_IDENTIFIER_FIELD_NUMBER: builtins.int
         unparsed_identifier: builtins.str
+        """(Required) Unparsed identifier for the table."""
         def __init__(
             self,
             *,
@@ -392,27 +395,43 @@ class Read(google.protobuf.message.Message):
         SCHEMA_FIELD_NUMBER: builtins.int
         OPTIONS_FIELD_NUMBER: builtins.int
         format: builtins.str
-        """Required. Supported formats include: parquet, orc, text, json, parquet, csv, avro."""
+        """(Required) Supported formats include: parquet, orc, text, json, parquet, csv, avro."""
         schema: builtins.str
-        """Optional. If not set, Spark will infer the schema."""
+        """(Optional) If not set, Spark will infer the schema."""
         @property
         def options(
             self,
         ) -> google.protobuf.internal.containers.ScalarMap[builtins.str, builtins.str]:
-            """The key is case insensitive."""
+            """Options for the data source. The context of this map varies based on the
+            data source format. This options could be empty for valid data source format.
+            The map key is case insensitive.
+            """
         def __init__(
             self,
             *,
             format: builtins.str = ...,
-            schema: builtins.str = ...,
+            schema: builtins.str | None = ...,
             options: collections.abc.Mapping[builtins.str, builtins.str] | None = ...,
         ) -> None: ...
+        def HasField(
+            self, field_name: typing_extensions.Literal["_schema", b"_schema", "schema", b"schema"]
+        ) -> builtins.bool: ...
         def ClearField(
             self,
             field_name: typing_extensions.Literal[
-                "format", b"format", "options", b"options", "schema", b"schema"
+                "_schema",
+                b"_schema",
+                "format",
+                b"format",
+                "options",
+                b"options",
+                "schema",
+                b"schema",
             ],
         ) -> None: ...
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["_schema", b"_schema"]
+        ) -> typing_extensions.Literal["schema"] | None: ...
 
     NAMED_TABLE_FIELD_NUMBER: builtins.int
     DATA_SOURCE_FIELD_NUMBER: builtins.int
@@ -466,7 +485,8 @@ class Project(google.protobuf.message.Message):
         self,
     ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
         pyspark.sql.connect.proto.expressions_pb2.Expression
-    ]: ...
+    ]:
+        """(Required) A Project requires at least one expression."""
     def __init__(
         self,
         *,
@@ -494,9 +514,11 @@ class Filter(google.protobuf.message.Message):
     INPUT_FIELD_NUMBER: builtins.int
     CONDITION_FIELD_NUMBER: builtins.int
     @property
-    def input(self) -> global___Relation: ...
+    def input(self) -> global___Relation:
+        """(Required) Input relation for a Filter."""
     @property
-    def condition(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression: ...
+    def condition(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression:
+        """(Required) A Filter must have a condition expression."""
     def __init__(
         self,
         *,
@@ -552,12 +574,19 @@ class Join(google.protobuf.message.Message):
     JOIN_TYPE_FIELD_NUMBER: builtins.int
     USING_COLUMNS_FIELD_NUMBER: builtins.int
     @property
-    def left(self) -> global___Relation: ...
+    def left(self) -> global___Relation:
+        """(Required) Left input relation for a Join."""
     @property
-    def right(self) -> global___Relation: ...
+    def right(self) -> global___Relation:
+        """(Required) Right input relation for a Join."""
     @property
-    def join_condition(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression: ...
+    def join_condition(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression:
+        """(Optional) The join condition. Could be unset when `using_columns` is utilized.
+
+        This field does not co-exist with using_columns.
+        """
     join_type: global___Join.JoinType.ValueType
+    """(Required) The join type."""
     @property
     def using_columns(
         self,
@@ -634,30 +663,57 @@ class SetOperation(google.protobuf.message.Message):
     IS_ALL_FIELD_NUMBER: builtins.int
     BY_NAME_FIELD_NUMBER: builtins.int
     @property
-    def left_input(self) -> global___Relation: ...
+    def left_input(self) -> global___Relation:
+        """(Required) Left input relation for a Set operation."""
     @property
-    def right_input(self) -> global___Relation: ...
+    def right_input(self) -> global___Relation:
+        """(Required) Right input relation for a Set operation."""
     set_op_type: global___SetOperation.SetOpType.ValueType
+    """(Required) The Set operation type."""
     is_all: builtins.bool
+    """(Optional) If to remove duplicate rows.
+
+    True to preserve all results.
+    False to remove duplicate rows.
+    """
     by_name: builtins.bool
+    """(Optional) If to perform the Set operation based on name resolution.
+
+    Only UNION supports this option.
+    """
     def __init__(
         self,
         *,
         left_input: global___Relation | None = ...,
         right_input: global___Relation | None = ...,
         set_op_type: global___SetOperation.SetOpType.ValueType = ...,
-        is_all: builtins.bool = ...,
-        by_name: builtins.bool = ...,
+        is_all: builtins.bool | None = ...,
+        by_name: builtins.bool | None = ...,
     ) -> None: ...
     def HasField(
         self,
         field_name: typing_extensions.Literal[
-            "left_input", b"left_input", "right_input", b"right_input"
+            "_by_name",
+            b"_by_name",
+            "_is_all",
+            b"_is_all",
+            "by_name",
+            b"by_name",
+            "is_all",
+            b"is_all",
+            "left_input",
+            b"left_input",
+            "right_input",
+            b"right_input",
         ],
     ) -> builtins.bool: ...
     def ClearField(
         self,
         field_name: typing_extensions.Literal[
+            "_by_name",
+            b"_by_name",
+            "_is_all",
+            b"_is_all",
             "by_name",
             b"by_name",
             "is_all",
@@ -670,6 +726,14 @@ class SetOperation(google.protobuf.message.Message):
             b"set_op_type",
         ],
     ) -> None: ...
+    @typing.overload
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_by_name", b"_by_name"]
+    ) -> typing_extensions.Literal["by_name"] | None: ...
+    @typing.overload
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_is_all", b"_is_all"]
+    ) -> typing_extensions.Literal["is_all"] | None: ...
 
 global___SetOperation = SetOperation
 
@@ -681,8 +745,10 @@ class Limit(google.protobuf.message.Message):
     INPUT_FIELD_NUMBER: builtins.int
     LIMIT_FIELD_NUMBER: builtins.int
     @property
-    def input(self) -> global___Relation: ...
+    def input(self) -> global___Relation:
+        """(Required) Input relation for a Limit."""
     limit: builtins.int
+    """(Required) the limit."""
     def __init__(
         self,
         *,
@@ -708,8 +774,10 @@ class Offset(google.protobuf.message.Message):
     INPUT_FIELD_NUMBER: builtins.int
     OFFSET_FIELD_NUMBER: builtins.int
     @property
-    def input(self) -> global___Relation: ...
+    def input(self) -> global___Relation:
+        """(Required) Input relation for an Offset."""
     offset: builtins.int
+    """(Required) the limit."""
     def __init__(
         self,
         *,
@@ -734,7 +802,8 @@ class Aggregate(google.protobuf.message.Message):
     GROUPING_EXPRESSIONS_FIELD_NUMBER: builtins.int
     RESULT_EXPRESSIONS_FIELD_NUMBER: builtins.int
     @property
-    def input(self) -> global___Relation: ...
+    def input(self) -> global___Relation:
+        """(Required) Input relation for a Aggregate."""
     @property
     def grouping_expressions(
         self,
@@ -849,30 +918,46 @@ class Sort(google.protobuf.message.Message):
     SORT_FIELDS_FIELD_NUMBER: builtins.int
     IS_GLOBAL_FIELD_NUMBER: builtins.int
     @property
-    def input(self) -> global___Relation: ...
+    def input(self) -> global___Relation:
+        """(Required) Input relation for a Sort."""
     @property
     def sort_fields(
         self,
     ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
         global___Sort.SortField
-    ]: ...
+    ]:
+        """(Required) Sort fields."""
     is_global: builtins.bool
+    """(Optional) if this is a global sort."""
     def __init__(
         self,
         *,
         input: global___Relation | None = ...,
         sort_fields: collections.abc.Iterable[global___Sort.SortField] | None = ...,
-        is_global: builtins.bool = ...,
+        is_global: builtins.bool | None = ...,
     ) -> None: ...
     def HasField(
-        self, field_name: typing_extensions.Literal["input", b"input"]
+        self,
+        field_name: typing_extensions.Literal[
+            "_is_global", b"_is_global", "input", b"input", "is_global", b"is_global"
+        ],
     ) -> builtins.bool: ...
     def ClearField(
         self,
         field_name: typing_extensions.Literal[
-            "input", b"input", "is_global", b"is_global", "sort_fields", b"sort_fields"
+            "_is_global",
+            b"_is_global",
+            "input",
+            b"input",
+            "is_global",
+            b"is_global",
+            "sort_fields",
+            b"sort_fields",
         ],
     ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_is_global", b"_is_global"]
+    ) -> typing_extensions.Literal["is_global"] | None: ...
 
 global___Sort = Sort
 
@@ -887,25 +972,44 @@ class Deduplicate(google.protobuf.message.Message):
     COLUMN_NAMES_FIELD_NUMBER: builtins.int
     ALL_COLUMNS_AS_KEYS_FIELD_NUMBER: builtins.int
     @property
-    def input(self) -> global___Relation: ...
+    def input(self) -> global___Relation:
+        """(Required) Input relation for a Deduplicate."""
     @property
     def column_names(
         self,
-    ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: ...
+    ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+        """(Optional) Deduplicate based on a list of column names.
+
+        This field does not co-use with `all_columns_as_keys`.
+        """
     all_columns_as_keys: builtins.bool
+    """(Optional) Deduplicate based on all the columns of the input relation.
+
+    This field does not co-use with `column_names`.
+    """
     def __init__(
         self,
         *,
         input: global___Relation | None = ...,
         column_names: collections.abc.Iterable[builtins.str] | None = ...,
-        all_columns_as_keys: builtins.bool = ...,
+        all_columns_as_keys: builtins.bool | None = ...,
     ) -> None: ...
     def HasField(
-        self, field_name: typing_extensions.Literal["input", b"input"]
+        self,
+        field_name: typing_extensions.Literal[
+            "_all_columns_as_keys",
+            b"_all_columns_as_keys",
+            "all_columns_as_keys",
+            b"all_columns_as_keys",
+            "input",
+            b"input",
+        ],
     ) -> builtins.bool: ...
     def ClearField(
         self,
         field_name: typing_extensions.Literal[
+            "_all_columns_as_keys",
+            b"_all_columns_as_keys",
             "all_columns_as_keys",
             b"all_columns_as_keys",
             "column_names",
@@ -914,10 +1018,16 @@ class Deduplicate(google.protobuf.message.Message):
             b"input",
         ],
     ) -> None: ...
+    def WhichOneof(
+        self,
+        oneof_group: typing_extensions.Literal["_all_columns_as_keys", b"_all_columns_as_keys"],
+    ) -> typing_extensions.Literal["all_columns_as_keys"] | None: ...
 
 global___Deduplicate = Deduplicate
 
 class LocalRelation(google.protobuf.message.Message):
+    """A relation that does not need to be qualified by name."""
+
     DESCRIPTOR: google.protobuf.descriptor.Descriptor
 
     ATTRIBUTES_FIELD_NUMBER: builtins.int
@@ -927,7 +1037,9 @@ class LocalRelation(google.protobuf.message.Message):
     ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
         pyspark.sql.connect.proto.expressions_pb2.Expression.QualifiedAttribute
     ]:
-        """TODO: support local data."""
+        """(Optional) A list qualified attributes.
+        TODO: support local data.
+        """
     def __init__(
         self,
         *,
@@ -953,24 +1065,38 @@ class Sample(google.protobuf.message.Message):
     WITH_REPLACEMENT_FIELD_NUMBER: builtins.int
     SEED_FIELD_NUMBER: builtins.int
     @property
-    def input(self) -> global___Relation: ...
+    def input(self) -> global___Relation:
+        """(Required) Input relation for a Sample."""
     lower_bound: builtins.float
+    """(Required) lower bound."""
     upper_bound: builtins.float
+    """(Required) upper bound."""
     with_replacement: builtins.bool
+    """(Optional) Whether to sample with replacement."""
     seed: builtins.int
+    """(Optional) The random seed."""
     def __init__(
         self,
         *,
         input: global___Relation | None = ...,
         lower_bound: builtins.float = ...,
         upper_bound: builtins.float = ...,
-        with_replacement: builtins.bool = ...,
+        with_replacement: builtins.bool | None = ...,
         seed: builtins.int | None = ...,
     ) -> None: ...
     def HasField(
         self,
         field_name: typing_extensions.Literal[
-            "_seed", b"_seed", "input", b"input", "seed", b"seed"
+            "_seed",
+            b"_seed",
+            "_with_replacement",
+            b"_with_replacement",
+            "input",
+            b"input",
+            "seed",
+            b"seed",
+            "with_replacement",
+            b"with_replacement",
         ],
     ) -> builtins.bool: ...
     def ClearField(
@@ -978,6 +1104,8 @@ class Sample(google.protobuf.message.Message):
         field_name: typing_extensions.Literal[
             "_seed",
             b"_seed",
+            "_with_replacement",
+            b"_with_replacement",
             "input",
             b"input",
             "lower_bound",
@@ -990,9 +1118,14 @@ class Sample(google.protobuf.message.Message):
             b"with_replacement",
         ],
     ) -> None: ...
+    @typing.overload
     def WhichOneof(
         self, oneof_group: typing_extensions.Literal["_seed", b"_seed"]
     ) -> typing_extensions.Literal["seed"] | None: ...
+    @typing.overload
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_with_replacement", b"_with_replacement"]
+    ) -> typing_extensions.Literal["with_replacement"] | None: ...
 
 global___Sample = Sample
 
@@ -1006,11 +1139,11 @@ class Range(google.protobuf.message.Message):
     STEP_FIELD_NUMBER: builtins.int
     NUM_PARTITIONS_FIELD_NUMBER: builtins.int
     start: builtins.int
-    """Optional. Default value = 0"""
+    """(Optional) Default value = 0"""
     end: builtins.int
-    """Required."""
+    """(Required)"""
     step: builtins.int
-    """Required."""
+    """(Required)"""
     num_partitions: builtins.int
     """Optional. Default value is assigned by 1) SQL conf "spark.sql.leafNodeDefaultParallelism" if
     it is set, or 2) spark default parallelism.
@@ -1018,7 +1151,7 @@ class Range(google.protobuf.message.Message):
     def __init__(
         self,
         *,
-        start: builtins.int = ...,
+        start: builtins.int | None = ...,
         end: builtins.int = ...,
         step: builtins.int = ...,
         num_partitions: builtins.int | None = ...,
@@ -1026,7 +1159,14 @@ class Range(google.protobuf.message.Message):
     def HasField(
         self,
         field_name: typing_extensions.Literal[
-            "_num_partitions", b"_num_partitions", "num_partitions", b"num_partitions"
+            "_num_partitions",
+            b"_num_partitions",
+            "_start",
+            b"_start",
+            "num_partitions",
+            b"num_partitions",
+            "start",
+            b"start",
         ],
     ) -> builtins.bool: ...
     def ClearField(
@@ -1034,6 +1174,8 @@ class Range(google.protobuf.message.Message):
         field_name: typing_extensions.Literal[
             "_num_partitions",
             b"_num_partitions",
+            "_start",
+            b"_start",
             "end",
             b"end",
             "num_partitions",
@@ -1044,9 +1186,14 @@ class Range(google.protobuf.message.Message):
             b"step",
         ],
     ) -> None: ...
+    @typing.overload
     def WhichOneof(
         self, oneof_group: typing_extensions.Literal["_num_partitions", b"_num_partitions"]
     ) -> typing_extensions.Literal["num_partitions"] | None: ...
+    @typing.overload
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_start", b"_start"]
+    ) -> typing_extensions.Literal["start"] | None: ...
 
 global___Range = Range
 
@@ -1060,14 +1207,14 @@ class SubqueryAlias(google.protobuf.message.Message):
     QUALIFIER_FIELD_NUMBER: builtins.int
     @property
     def input(self) -> global___Relation:
-        """Required. The input relation."""
+        """(Required) The input relation of SubqueryAlias."""
     alias: builtins.str
-    """Required. The alias."""
+    """(Required) The alias."""
     @property
     def qualifier(
         self,
     ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
-        """Optional. Qualifier of the alias."""
+        """(Optional) Qualifier of the alias."""
     def __init__(
         self,
         *,
@@ -1097,27 +1244,40 @@ class Repartition(google.protobuf.message.Message):
     SHUFFLE_FIELD_NUMBER: builtins.int
     @property
     def input(self) -> global___Relation:
-        """Required. The input relation."""
+        """(Required) The input relation of Repartition."""
     num_partitions: builtins.int
-    """Required. Must be positive."""
+    """(Required) Must be positive."""
     shuffle: builtins.bool
-    """Optional. Default value is false."""
+    """(Optional) Default value is false."""
     def __init__(
         self,
         *,
         input: global___Relation | None = ...,
         num_partitions: builtins.int = ...,
-        shuffle: builtins.bool = ...,
+        shuffle: builtins.bool | None = ...,
     ) -> None: ...
     def HasField(
-        self, field_name: typing_extensions.Literal["input", b"input"]
+        self,
+        field_name: typing_extensions.Literal[
+            "_shuffle", b"_shuffle", "input", b"input", "shuffle", b"shuffle"
+        ],
     ) -> builtins.bool: ...
     def ClearField(
         self,
         field_name: typing_extensions.Literal[
-            "input", b"input", "num_partitions", b"num_partitions", "shuffle", b"shuffle"
+            "_shuffle",
+            b"_shuffle",
+            "input",
+            b"input",
+            "num_partitions",
+            b"num_partitions",
+            "shuffle",
+            b"shuffle",
         ],
     ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_shuffle", b"_shuffle"]
+    ) -> typing_extensions.Literal["shuffle"] | None: ...
 
 global___Repartition = Repartition
 
@@ -1147,38 +1307,16 @@ class ShowString(google.protobuf.message.Message):
         self,
         *,
         input: global___Relation | None = ...,
-        numRows: builtins.int | None = ...,
-        truncate: builtins.int | None = ...,
-        vertical: builtins.bool | None = ...,
+        numRows: builtins.int = ...,
+        truncate: builtins.int = ...,
+        vertical: builtins.bool = ...,
     ) -> None: ...
     def HasField(
-        self,
-        field_name: typing_extensions.Literal[
-            "_numRows",
-            b"_numRows",
-            "_truncate",
-            b"_truncate",
-            "_vertical",
-            b"_vertical",
-            "input",
-            b"input",
-            "numRows",
-            b"numRows",
-            "truncate",
-            b"truncate",
-            "vertical",
-            b"vertical",
-        ],
+        self, field_name: typing_extensions.Literal["input", b"input"]
     ) -> builtins.bool: ...
     def ClearField(
         self,
         field_name: typing_extensions.Literal[
-            "_numRows",
-            b"_numRows",
-            "_truncate",
-            b"_truncate",
-            "_vertical",
-            b"_vertical",
             "input",
             b"input",
             "numRows",
@@ -1189,18 +1327,6 @@ class ShowString(google.protobuf.message.Message):
             b"vertical",
         ],
     ) -> None: ...
-    @typing.overload
-    def WhichOneof(
-        self, oneof_group: typing_extensions.Literal["_numRows", b"_numRows"]
-    ) -> typing_extensions.Literal["numRows"] | None: ...
-    @typing.overload
-    def WhichOneof(
-        self, oneof_group: typing_extensions.Literal["_truncate", b"_truncate"]
-    ) -> typing_extensions.Literal["truncate"] | None: ...
-    @typing.overload
-    def WhichOneof(
-        self, oneof_group: typing_extensions.Literal["_vertical", b"_vertical"]
-    ) -> typing_extensions.Literal["vertical"] | None: ...
 
 global___ShowString = ShowString
 
@@ -1359,12 +1485,12 @@ class RenameColumnsBySameLengthNames(google.protobuf.message.Message):
     COLUMN_NAMES_FIELD_NUMBER: builtins.int
     @property
     def input(self) -> global___Relation:
-        """Required. The input relation."""
+        """(Required) The input relation of RenameColumnsBySameLengthNames."""
     @property
     def column_names(
         self,
     ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
-        """Required.
+        """(Required)
 
         The number of columns of the input relation must be equal to the length
         of this field. If this is not true, an exception will be returned.
@@ -1411,12 +1537,12 @@ class RenameColumnsByNameToNameMap(google.protobuf.message.Message):
     RENAME_COLUMNS_MAP_FIELD_NUMBER: builtins.int
     @property
     def input(self) -> global___Relation:
-        """Required. The input relation."""
+        """(Required) The input relation."""
     @property
     def rename_columns_map(
         self,
     ) -> google.protobuf.internal.containers.ScalarMap[builtins.str, builtins.str]:
-        """Required.
+        """(Required)
 
         Renaming column names of input relation from A to B where A is the map key
         and B is the map value. This is a no-op if schema doesn't contain any A. It


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org