You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2022/11/14 04:15:39 UTC

[spark] branch master updated: [SPARK-41116][CONNECT] Input relation can be optional for Project in Connect proto

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new e8965a12beb [SPARK-41116][CONNECT] Input relation can be optional for Project in Connect proto
e8965a12beb is described below

commit e8965a12beb067e25c02baf08987616987608545
Author: Rui Wang <ru...@databricks.com>
AuthorDate: Mon Nov 14 12:15:17 2022 +0800

    [SPARK-41116][CONNECT] Input relation can be optional for Project in Connect proto
    
    ### What changes were proposed in this pull request?
    
    I was writing test cases to test expressions and realized that we can allow `Project` without input plan. For example, `SELECT 1` is a valid query. For SQL it will generate `OneRowRelation` to make up the input plan but for the Connect users they shouldn't need to bother appending that relation. Instead, they can just submit a Project with expressions.
    
    Per our design, Proto is also a API layer and anyone can draft a proto plan without using built-in clients. This PR will improve the proto usability for `Project`.
    
    ### Why are the changes needed?
    
    1. Improve usability.
    2. Help write test cases for expressions.
    
    ### Does this PR introduce _any_ user-facing change?
    
    NO
    
    ### How was this patch tested?
    
    UT
    
    Closes #38632 from amaliujia/SPARK-41116.
    
    Authored-by: Rui Wang <ru...@databricks.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../connect/src/main/protobuf/spark/connect/relations.proto   |  3 +++
 .../main/scala/org/apache/spark/sql/connect/dsl/package.scala | 11 +++++++++++
 .../spark/sql/connect/planner/SparkConnectPlanner.scala       |  6 +++++-
 .../spark/sql/connect/planner/SparkConnectProtoSuite.scala    |  4 ++++
 python/pyspark/sql/connect/proto/relations_pb2.pyi            |  6 +++++-
 5 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/connector/connect/src/main/protobuf/spark/connect/relations.proto b/connector/connect/src/main/protobuf/spark/connect/relations.proto
index 4f30b5bfbde..759e9c04e63 100644
--- a/connector/connect/src/main/protobuf/spark/connect/relations.proto
+++ b/connector/connect/src/main/protobuf/spark/connect/relations.proto
@@ -98,6 +98,9 @@ message Read {
 // The input relation must be specified.
 // The projected expression can be an arbitrary expression.
 message Project {
+  // (Optional) Input relation is optional for Project.
+  //
+  // For example, `SELECT ABS(-1)` is valid plan without an input plan.
   Relation input = 1;
   repeated Expression expressions = 3;
 }
diff --git a/connector/connect/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala b/connector/connect/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala
index f55ed835d23..eeffd054c7b 100644
--- a/connector/connect/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala
+++ b/connector/connect/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala
@@ -241,6 +241,17 @@ package object dsl {
       }
     }
 
+    def select(exprs: Expression*): Relation = {
+      Relation
+        .newBuilder()
+        .setProject(
+          Project
+            .newBuilder()
+            .addAllExpressions(exprs.toIterable.asJava)
+            .build())
+        .build()
+    }
+
     implicit class DslLogicalPlan(val logicalPlan: Relation) {
       def select(exprs: Expression*): Relation = {
         Relation
diff --git a/connector/connect/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
index f8ccc7b62e7..98660c32c4c 100644
--- a/connector/connect/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
+++ b/connector/connect/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
@@ -235,7 +235,11 @@ class SparkConnectPlanner(session: SparkSession) {
   }
 
   private def transformProject(rel: proto.Project): LogicalPlan = {
-    val baseRel = transformRelation(rel.getInput)
+    val baseRel = if (rel.hasInput) {
+      transformRelation(rel.getInput)
+    } else {
+      logical.OneRowRelation()
+    }
     // TODO: support the target field for *.
     val projection =
       if (rel.getExpressionsCount == 1 && rel.getExpressions(0).hasUnresolvedStar) {
diff --git a/connector/connect/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala b/connector/connect/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala
index 53ea1988809..111c6386c52 100644
--- a/connector/connect/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala
+++ b/connector/connect/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala
@@ -429,6 +429,10 @@ class SparkConnectProtoSuite extends PlanTest with SparkConnectPlanTest {
     }
   }
 
+  test("Project does not require an input") {
+    comparePlans(select(1), spark.sql("SELECT 1"))
+  }
+
   private def createLocalRelationProtoByQualifiedAttributes(
       attrs: Seq[proto.Expression.QualifiedAttribute]): proto.Relation = {
     val localRelationBuilder = proto.LocalRelation.newBuilder()
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.pyi b/python/pyspark/sql/connect/proto/relations_pb2.pyi
index e706fa3e11d..ea7ef02249e 100644
--- a/python/pyspark/sql/connect/proto/relations_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/relations_pb2.pyi
@@ -437,7 +437,11 @@ class Project(google.protobuf.message.Message):
     INPUT_FIELD_NUMBER: builtins.int
     EXPRESSIONS_FIELD_NUMBER: builtins.int
     @property
-    def input(self) -> global___Relation: ...
+    def input(self) -> global___Relation:
+        """(Optional) Input relation is optional for Project.
+
+        For example, `SELECT ABS(-1)` is valid plan without an input plan.
+        """
     @property
     def expressions(
         self,


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org