You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by yh...@apache.org on 2016/08/11 18:02:15 UTC
spark git commit: [SPARK-17021][SQL] simplify the constructor parameters of QuantileSummaries

Repository: spark
Updated Branches:
  refs/heads/master 0f72e4f04 -> acaf2a81a


[SPARK-17021][SQL] simplify the constructor parameters of QuantileSummaries

## What changes were proposed in this pull request?

1. `sampled` doesn't need to be `ArrayBuffer`, we never update it, but assign new value
2. `count` doesn't need to be `var`, we never mutate it.
3. `headSampled` doesn't need to be in constructor, we never pass a non-empty `headSampled` to constructor

## How was this patch tested?

existing tests.

Author: Wenchen Fan <we...@databricks.com>

Closes #14603 from cloud-fan/simply.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/acaf2a81
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/acaf2a81
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/acaf2a81

Branch: refs/heads/master
Commit: acaf2a81ad5238fd1bc81e7be2c328f40c07e755
Parents: 0f72e4f
Author: Wenchen Fan <we...@databricks.com>
Authored: Thu Aug 11 11:02:11 2016 -0700
Committer: Yin Huai <yh...@databricks.com>
Committed: Thu Aug 11 11:02:11 2016 -0700

----------------------------------------------------------------------
 .../sql/execution/stat/StatFunctions.scala      | 21 ++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/acaf2a81/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
index 50eecb4..7c58c48 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
@@ -114,14 +114,15 @@ object StatFunctions extends Logging {
    *   See the G-K article for more details.
    * @param count the count of all the elements *inserted in the sampled buffer*
    *              (excluding the head buffer)
-   * @param headSampled a buffer of latest samples seen so far
    */
   class QuantileSummaries(
       val compressThreshold: Int,
       val relativeError: Double,
-      val sampled: ArrayBuffer[Stats] = ArrayBuffer.empty,
-      private[stat] var count: Long = 0L,
-      val headSampled: ArrayBuffer[Double] = ArrayBuffer.empty) extends Serializable {
+      val sampled: Array[Stats] = Array.empty,
+      val count: Long = 0L) extends Serializable {
+
+    // a buffer of latest samples seen so far
+    private val headSampled: ArrayBuffer[Double] = ArrayBuffer.empty
 
     import QuantileSummaries._
 
@@ -186,7 +187,7 @@ object StatFunctions extends Logging {
         newSamples.append(sampled(sampleIdx))
         sampleIdx += 1
       }
-      new QuantileSummaries(compressThreshold, relativeError, newSamples, currentCount)
+      new QuantileSummaries(compressThreshold, relativeError, newSamples.toArray, currentCount)
     }
 
     /**
@@ -207,7 +208,7 @@ object StatFunctions extends Logging {
     }
 
     private def shallowCopy: QuantileSummaries = {
-      new QuantileSummaries(compressThreshold, relativeError, sampled, count, headSampled)
+      new QuantileSummaries(compressThreshold, relativeError, sampled, count)
     }
 
     /**
@@ -305,11 +306,11 @@ object StatFunctions extends Logging {
 
     private def compressImmut(
         currentSamples: IndexedSeq[Stats],
-        mergeThreshold: Double): ArrayBuffer[Stats] = {
-      val res: ArrayBuffer[Stats] = ArrayBuffer.empty
+        mergeThreshold: Double): Array[Stats] = {
       if (currentSamples.isEmpty) {
-        return res
+        return Array.empty[Stats]
       }
+      val res: ArrayBuffer[Stats] = ArrayBuffer.empty
       // Start for the last element, which is always part of the set.
       // The head contains the current new head, that may be merged with the current element.
       var head = currentSamples.last
@@ -332,7 +333,7 @@ object StatFunctions extends Logging {
       res.prepend(head)
       // If necessary, add the minimum element:
       res.prepend(currentSamples.head)
-      res
+      res.toArray
     }
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org