You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by yh...@apache.org on 2016/08/11 18:02:15 UTC
spark git commit: [SPARK-17021][SQL] simplify the constructor
parameters of QuantileSummaries
Repository: spark
Updated Branches:
refs/heads/master 0f72e4f04 -> acaf2a81a
[SPARK-17021][SQL] simplify the constructor parameters of QuantileSummaries
## What changes were proposed in this pull request?
1. `sampled` doesn't need to be `ArrayBuffer`, we never update it, but assign new value
2. `count` doesn't need to be `var`, we never mutate it.
3. `headSampled` doesn't need to be in constructor, we never pass a non-empty `headSampled` to constructor
## How was this patch tested?
existing tests.
Author: Wenchen Fan <we...@databricks.com>
Closes #14603 from cloud-fan/simply.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/acaf2a81
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/acaf2a81
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/acaf2a81
Branch: refs/heads/master
Commit: acaf2a81ad5238fd1bc81e7be2c328f40c07e755
Parents: 0f72e4f
Author: Wenchen Fan <we...@databricks.com>
Authored: Thu Aug 11 11:02:11 2016 -0700
Committer: Yin Huai <yh...@databricks.com>
Committed: Thu Aug 11 11:02:11 2016 -0700
----------------------------------------------------------------------
.../sql/execution/stat/StatFunctions.scala | 21 ++++++++++----------
1 file changed, 11 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/acaf2a81/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
index 50eecb4..7c58c48 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
@@ -114,14 +114,15 @@ object StatFunctions extends Logging {
* See the G-K article for more details.
* @param count the count of all the elements *inserted in the sampled buffer*
* (excluding the head buffer)
- * @param headSampled a buffer of latest samples seen so far
*/
class QuantileSummaries(
val compressThreshold: Int,
val relativeError: Double,
- val sampled: ArrayBuffer[Stats] = ArrayBuffer.empty,
- private[stat] var count: Long = 0L,
- val headSampled: ArrayBuffer[Double] = ArrayBuffer.empty) extends Serializable {
+ val sampled: Array[Stats] = Array.empty,
+ val count: Long = 0L) extends Serializable {
+
+ // a buffer of latest samples seen so far
+ private val headSampled: ArrayBuffer[Double] = ArrayBuffer.empty
import QuantileSummaries._
@@ -186,7 +187,7 @@ object StatFunctions extends Logging {
newSamples.append(sampled(sampleIdx))
sampleIdx += 1
}
- new QuantileSummaries(compressThreshold, relativeError, newSamples, currentCount)
+ new QuantileSummaries(compressThreshold, relativeError, newSamples.toArray, currentCount)
}
/**
@@ -207,7 +208,7 @@ object StatFunctions extends Logging {
}
private def shallowCopy: QuantileSummaries = {
- new QuantileSummaries(compressThreshold, relativeError, sampled, count, headSampled)
+ new QuantileSummaries(compressThreshold, relativeError, sampled, count)
}
/**
@@ -305,11 +306,11 @@ object StatFunctions extends Logging {
private def compressImmut(
currentSamples: IndexedSeq[Stats],
- mergeThreshold: Double): ArrayBuffer[Stats] = {
- val res: ArrayBuffer[Stats] = ArrayBuffer.empty
+ mergeThreshold: Double): Array[Stats] = {
if (currentSamples.isEmpty) {
- return res
+ return Array.empty[Stats]
}
+ val res: ArrayBuffer[Stats] = ArrayBuffer.empty
// Start for the last element, which is always part of the set.
// The head contains the current new head, that may be merged with the current element.
var head = currentSamples.last
@@ -332,7 +333,7 @@ object StatFunctions extends Logging {
res.prepend(head)
// If necessary, add the minimum element:
res.prepend(currentSamples.head)
- res
+ res.toArray
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org