You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2017/10/13 01:47:20 UTC
spark git commit: [SPARK-22263][SQL] Refactor deterministic as lazy
value
Repository: spark
Updated Branches:
refs/heads/master 9104add4c -> 3ff766f61
[SPARK-22263][SQL] Refactor deterministic as lazy value
## What changes were proposed in this pull request?
The method `deterministic` is frequently called in optimizer.
Refactor `deterministic` as lazy value, in order to avoid redundant computations.
## How was this patch tested?
Simple benchmark test over TPC-DS queries, run time from query string to optimized plan(continuous 20 runs, and get the average of last 5 results):
Before changes: 12601 ms
After changes: 11993ms
This is 4.8% performance improvement.
Also run test with Unit test.
Author: Wang Gengliang <lt...@gmail.com>
Closes #19478 from gengliangwang/deterministicAsLazyVal.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3ff766f6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3ff766f6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3ff766f6
Branch: refs/heads/master
Commit: 3ff766f61afbd09dcc7a73eae02e68a39114ce3f
Parents: 9104add
Author: Wang Gengliang <lt...@gmail.com>
Authored: Thu Oct 12 18:47:16 2017 -0700
Committer: gatorsmile <ga...@gmail.com>
Committed: Thu Oct 12 18:47:16 2017 -0700
----------------------------------------------------------------------
.../spark/sql/catalyst/expressions/CallMethodViaReflection.scala | 2 +-
.../org/apache/spark/sql/catalyst/expressions/Expression.scala | 4 ++--
.../org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala | 2 +-
.../apache/spark/sql/catalyst/expressions/aggregate/First.scala | 2 +-
.../apache/spark/sql/catalyst/expressions/aggregate/Last.scala | 2 +-
.../spark/sql/catalyst/expressions/aggregate/collect.scala | 2 +-
.../scala/org/apache/spark/sql/catalyst/expressions/misc.scala | 2 +-
.../spark/sql/execution/aggregate/TypedAggregateExpression.scala | 4 ++--
.../scala/org/apache/spark/sql/execution/aggregate/udaf.scala | 2 +-
.../org/apache/spark/sql/TypedImperativeAggregateSuite.scala | 2 +-
sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala | 4 ++--
11 files changed, 14 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/3ff766f6/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
index cd97304..65bb9a8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
@@ -76,7 +76,7 @@ case class CallMethodViaReflection(children: Seq[Expression])
}
}
- override def deterministic: Boolean = false
+ override lazy val deterministic: Boolean = false
override def nullable: Boolean = true
override val dataType: DataType = StringType
http://git-wip-us.apache.org/repos/asf/spark/blob/3ff766f6/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index c058425..0e75ac8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -79,7 +79,7 @@ abstract class Expression extends TreeNode[Expression] {
* An example would be `SparkPartitionID` that relies on the partition id returned by TaskContext.
* By default leaf expressions are deterministic as Nil.forall(_.deterministic) returns true.
*/
- def deterministic: Boolean = children.forall(_.deterministic)
+ lazy val deterministic: Boolean = children.forall(_.deterministic)
def nullable: Boolean
@@ -265,7 +265,7 @@ trait NonSQLExpression extends Expression {
* An expression that is nondeterministic.
*/
trait Nondeterministic extends Expression {
- final override def deterministic: Boolean = false
+ final override lazy val deterministic: Boolean = false
final override def foldable: Boolean = false
@transient
http://git-wip-us.apache.org/repos/asf/spark/blob/3ff766f6/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
index 527f167..1798530 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
@@ -49,7 +49,7 @@ case class ScalaUDF(
udfDeterministic: Boolean = true)
extends Expression with ImplicitCastInputTypes with NonSQLExpression with UserDefinedExpression {
- override def deterministic: Boolean = udfDeterministic && children.forall(_.deterministic)
+ override lazy val deterministic: Boolean = udfDeterministic && children.forall(_.deterministic)
override def toString: String =
s"${udfName.map(name => s"UDF:$name").getOrElse("UDF")}(${children.mkString(", ")})"
http://git-wip-us.apache.org/repos/asf/spark/blob/3ff766f6/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala
index bfc58c2..4e671e1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala
@@ -44,7 +44,7 @@ case class First(child: Expression, ignoreNullsExpr: Expression)
override def nullable: Boolean = true
// First is not a deterministic function.
- override def deterministic: Boolean = false
+ override lazy val deterministic: Boolean = false
// Return data type.
override def dataType: DataType = child.dataType
http://git-wip-us.apache.org/repos/asf/spark/blob/3ff766f6/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
index 96a6ec0..0ccabb9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
@@ -44,7 +44,7 @@ case class Last(child: Expression, ignoreNullsExpr: Expression)
override def nullable: Boolean = true
// Last is not a deterministic function.
- override def deterministic: Boolean = false
+ override lazy val deterministic: Boolean = false
// Return data type.
override def dataType: DataType = child.dataType
http://git-wip-us.apache.org/repos/asf/spark/blob/3ff766f6/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
index 405c206..be972f0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
@@ -44,7 +44,7 @@ abstract class Collect[T <: Growable[Any] with Iterable[Any]] extends TypedImper
// Both `CollectList` and `CollectSet` are non-deterministic since their results depend on the
// actual order of input rows.
- override def deterministic: Boolean = false
+ override lazy val deterministic: Boolean = false
override def update(buffer: T, input: InternalRow): T = {
val value = child.eval(input)
http://git-wip-us.apache.org/repos/asf/spark/blob/3ff766f6/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index ef293ff..b86e271 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -119,7 +119,7 @@ case class CurrentDatabase() extends LeafExpression with Unevaluable {
// scalastyle:on line.size.limit
case class Uuid() extends LeafExpression {
- override def deterministic: Boolean = false
+ override lazy val deterministic: Boolean = false
override def nullable: Boolean = false
http://git-wip-us.apache.org/repos/asf/spark/blob/3ff766f6/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TypedAggregateExpression.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TypedAggregateExpression.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TypedAggregateExpression.scala
index 717758f..aab8cc5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TypedAggregateExpression.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TypedAggregateExpression.scala
@@ -127,7 +127,7 @@ case class SimpleTypedAggregateExpression(
nullable: Boolean)
extends DeclarativeAggregate with TypedAggregateExpression with NonSQLExpression {
- override def deterministic: Boolean = true
+ override lazy val deterministic: Boolean = true
override def children: Seq[Expression] = inputDeserializer.toSeq :+ bufferDeserializer
@@ -221,7 +221,7 @@ case class ComplexTypedAggregateExpression(
inputAggBufferOffset: Int = 0)
extends TypedImperativeAggregate[Any] with TypedAggregateExpression with NonSQLExpression {
- override def deterministic: Boolean = true
+ override lazy val deterministic: Boolean = true
override def children: Seq[Expression] = inputDeserializer.toSeq
http://git-wip-us.apache.org/repos/asf/spark/blob/3ff766f6/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala
index fec1add..72aa4ad 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala
@@ -340,7 +340,7 @@ case class ScalaUDAF(
override def dataType: DataType = udaf.dataType
- override def deterministic: Boolean = udaf.deterministic
+ override lazy val deterministic: Boolean = udaf.deterministic
override val inputTypes: Seq[DataType] = udaf.inputSchema.map(_.dataType)
http://git-wip-us.apache.org/repos/asf/spark/blob/3ff766f6/sql/core/src/test/scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala
index b76f168..c5fb173 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala
@@ -268,7 +268,7 @@ object TypedImperativeAggregateSuite {
}
}
- override def deterministic: Boolean = true
+ override lazy val deterministic: Boolean = true
override def children: Seq[Expression] = Seq(child)
http://git-wip-us.apache.org/repos/asf/spark/blob/3ff766f6/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
index e9bdcf0..68af99e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
@@ -48,7 +48,7 @@ private[hive] case class HiveSimpleUDF(
with Logging
with UserDefinedExpression {
- override def deterministic: Boolean = isUDFDeterministic && children.forall(_.deterministic)
+ override lazy val deterministic: Boolean = isUDFDeterministic && children.forall(_.deterministic)
override def nullable: Boolean = true
@@ -131,7 +131,7 @@ private[hive] case class HiveGenericUDF(
override def nullable: Boolean = true
- override def deterministic: Boolean = isUDFDeterministic && children.forall(_.deterministic)
+ override lazy val deterministic: Boolean = isUDFDeterministic && children.forall(_.deterministic)
override def foldable: Boolean =
isUDFDeterministic && returnInspector.isInstanceOf[ConstantObjectInspector]
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org