You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2022/11/14 04:15:39 UTC
[spark] branch master updated: [SPARK-41116][CONNECT] Input relation can be optional for Project in Connect proto
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new e8965a12beb [SPARK-41116][CONNECT] Input relation can be optional for Project in Connect proto
e8965a12beb is described below
commit e8965a12beb067e25c02baf08987616987608545
Author: Rui Wang <ru...@databricks.com>
AuthorDate: Mon Nov 14 12:15:17 2022 +0800
[SPARK-41116][CONNECT] Input relation can be optional for Project in Connect proto
### What changes were proposed in this pull request?
I was writing test cases to test expressions and realized that we can allow `Project` without input plan. For example, `SELECT 1` is a valid query. For SQL it will generate `OneRowRelation` to make up the input plan but for the Connect users they shouldn't need to bother appending that relation. Instead, they can just submit a Project with expressions.
Per our design, Proto is also a API layer and anyone can draft a proto plan without using built-in clients. This PR will improve the proto usability for `Project`.
### Why are the changes needed?
1. Improve usability.
2. Help write test cases for expressions.
### Does this PR introduce _any_ user-facing change?
NO
### How was this patch tested?
UT
Closes #38632 from amaliujia/SPARK-41116.
Authored-by: Rui Wang <ru...@databricks.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../connect/src/main/protobuf/spark/connect/relations.proto | 3 +++
.../main/scala/org/apache/spark/sql/connect/dsl/package.scala | 11 +++++++++++
.../spark/sql/connect/planner/SparkConnectPlanner.scala | 6 +++++-
.../spark/sql/connect/planner/SparkConnectProtoSuite.scala | 4 ++++
python/pyspark/sql/connect/proto/relations_pb2.pyi | 6 +++++-
5 files changed, 28 insertions(+), 2 deletions(-)
diff --git a/connector/connect/src/main/protobuf/spark/connect/relations.proto b/connector/connect/src/main/protobuf/spark/connect/relations.proto
index 4f30b5bfbde..759e9c04e63 100644
--- a/connector/connect/src/main/protobuf/spark/connect/relations.proto
+++ b/connector/connect/src/main/protobuf/spark/connect/relations.proto
@@ -98,6 +98,9 @@ message Read {
// The input relation must be specified.
// The projected expression can be an arbitrary expression.
message Project {
+ // (Optional) Input relation is optional for Project.
+ //
+ // For example, `SELECT ABS(-1)` is valid plan without an input plan.
Relation input = 1;
repeated Expression expressions = 3;
}
diff --git a/connector/connect/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala b/connector/connect/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala
index f55ed835d23..eeffd054c7b 100644
--- a/connector/connect/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala
+++ b/connector/connect/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala
@@ -241,6 +241,17 @@ package object dsl {
}
}
+ def select(exprs: Expression*): Relation = {
+ Relation
+ .newBuilder()
+ .setProject(
+ Project
+ .newBuilder()
+ .addAllExpressions(exprs.toIterable.asJava)
+ .build())
+ .build()
+ }
+
implicit class DslLogicalPlan(val logicalPlan: Relation) {
def select(exprs: Expression*): Relation = {
Relation
diff --git a/connector/connect/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
index f8ccc7b62e7..98660c32c4c 100644
--- a/connector/connect/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
+++ b/connector/connect/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
@@ -235,7 +235,11 @@ class SparkConnectPlanner(session: SparkSession) {
}
private def transformProject(rel: proto.Project): LogicalPlan = {
- val baseRel = transformRelation(rel.getInput)
+ val baseRel = if (rel.hasInput) {
+ transformRelation(rel.getInput)
+ } else {
+ logical.OneRowRelation()
+ }
// TODO: support the target field for *.
val projection =
if (rel.getExpressionsCount == 1 && rel.getExpressions(0).hasUnresolvedStar) {
diff --git a/connector/connect/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala b/connector/connect/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala
index 53ea1988809..111c6386c52 100644
--- a/connector/connect/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala
+++ b/connector/connect/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala
@@ -429,6 +429,10 @@ class SparkConnectProtoSuite extends PlanTest with SparkConnectPlanTest {
}
}
+ test("Project does not require an input") {
+ comparePlans(select(1), spark.sql("SELECT 1"))
+ }
+
private def createLocalRelationProtoByQualifiedAttributes(
attrs: Seq[proto.Expression.QualifiedAttribute]): proto.Relation = {
val localRelationBuilder = proto.LocalRelation.newBuilder()
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.pyi b/python/pyspark/sql/connect/proto/relations_pb2.pyi
index e706fa3e11d..ea7ef02249e 100644
--- a/python/pyspark/sql/connect/proto/relations_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/relations_pb2.pyi
@@ -437,7 +437,11 @@ class Project(google.protobuf.message.Message):
INPUT_FIELD_NUMBER: builtins.int
EXPRESSIONS_FIELD_NUMBER: builtins.int
@property
- def input(self) -> global___Relation: ...
+ def input(self) -> global___Relation:
+ """(Optional) Input relation is optional for Project.
+
+ For example, `SELECT ABS(-1)` is valid plan without an input plan.
+ """
@property
def expressions(
self,
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org