You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2014/05/26 09:17:23 UTC

git commit: [SPARK-1914] [SQL] Simplify CountFunction not to traverse to evaluate all child expressions.

Repository: spark
Updated Branches:
  refs/heads/master b6d22af04 -> d6395d86f


[SPARK-1914] [SQL] Simplify CountFunction not to traverse to evaluate all child expressions.

`CountFunction` should count up only if the child's evaluated value is not null.

Because it traverses to evaluate all child expressions, even if the child is null, it counts up if one of the all children is not null.

Author: Takuya UESHIN <ue...@happy-camper.st>

Closes #861 from ueshin/issues/SPARK-1914 and squashes the following commits:

3b37315 [Takuya UESHIN] Merge branch 'master' into issues/SPARK-1914
2afa238 [Takuya UESHIN] Simplify CountFunction not to traverse to evaluate all child expressions.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d6395d86
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d6395d86
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d6395d86

Branch: refs/heads/master
Commit: d6395d86f90d1c47c5b6ad17c618b56e00b7fc85
Parents: b6d22af
Author: Takuya UESHIN <ue...@happy-camper.st>
Authored: Mon May 26 00:17:20 2014 -0700
Committer: Reynold Xin <rx...@apache.org>
Committed: Mon May 26 00:17:20 2014 -0700

----------------------------------------------------------------------
 .../org/apache/spark/sql/catalyst/expressions/aggregates.scala  | 4 ++--
 .../src/test/scala/org/apache/spark/sql/DslQuerySuite.scala     | 5 +++++
 2 files changed, 7 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/d6395d86/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala
index 1bcd4e2..79937b1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala
@@ -298,8 +298,8 @@ case class CountFunction(expr: Expression, base: AggregateExpression) extends Ag
   var count: Long = _
 
   override def update(input: Row): Unit = {
-    val evaluatedExpr = expr.map(_.eval(input))
-    if (evaluatedExpr.map(_ != null).reduceLeft(_ || _)) {
+    val evaluatedExpr = expr.eval(input)
+    if (evaluatedExpr != null) {
       count += 1L
     }
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/d6395d86/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala
index 692569a..8197e8a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala
@@ -126,6 +126,11 @@ class DslQuerySuite extends QueryTest {
     )
 
     checkAnswer(
+      testData3.groupBy('a)('a, Count('a + 'b)),
+      Seq((1,0), (2, 1))
+    )
+
+    checkAnswer(
       testData3.groupBy()(Count('a), Count('b), Count(1), CountDistinct('a :: Nil), CountDistinct('b :: Nil)),
       (2, 1, 2, 2, 1) :: Nil
     )