You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by jo...@apache.org on 2016/09/01 19:15:43 UTC
spark git commit: [SPARK-17331][CORE][MLLIB] Avoid allocating
0-length arrays
Repository: spark
Updated Branches:
refs/heads/master 2be5f8d7e -> 3893e8c57
[SPARK-17331][CORE][MLLIB] Avoid allocating 0-length arrays
## What changes were proposed in this pull request?
Avoid allocating some 0-length arrays, esp. in UTF8String, and by using Array.empty in Scala over Array[T]()
## How was this patch tested?
Jenkins
Author: Sean Owen <so...@cloudera.com>
Closes #14895 from srowen/SPARK-17331.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3893e8c5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3893e8c5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3893e8c5
Branch: refs/heads/master
Commit: 3893e8c576cf1a6decc18701267ce7cd8caaf521
Parents: 2be5f8d
Author: Sean Owen <so...@cloudera.com>
Authored: Thu Sep 1 12:13:07 2016 -0700
Committer: Josh Rosen <jo...@databricks.com>
Committed: Thu Sep 1 12:13:07 2016 -0700
----------------------------------------------------------------------
.../main/java/org/apache/spark/unsafe/types/UTF8String.java | 8 ++++----
core/src/main/scala/org/apache/spark/MapOutputTracker.scala | 4 ++--
.../main/scala/org/apache/spark/rdd/ZippedWithIndexRDD.scala | 2 +-
docs/streaming-kafka-0-8-integration.md | 2 +-
.../src/main/scala/org/apache/spark/ml/linalg/Matrices.scala | 6 +++---
.../ml/classification/MultinomialLogisticRegression.scala | 2 +-
.../scala/org/apache/spark/ml/feature/OneHotEncoder.scala | 4 ++--
.../main/scala/org/apache/spark/mllib/linalg/Matrices.scala | 6 +++---
.../scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala | 2 +-
.../src/main/java/org/apache/spark/sql/types/DataTypes.java | 2 +-
.../spark/sql/execution/datasources/jdbc/JdbcUtils.scala | 2 +-
11 files changed, 20 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/3893e8c5/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
----------------------------------------------------------------------
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index 54a5456..dc03d89 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -470,7 +470,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
while (e >= 0 && getByte(e) <= 0x20 && getByte(e) >= 0x00) e--;
if (s > e) {
// empty string
- return UTF8String.fromBytes(new byte[0]);
+ return EMPTY_UTF8;
} else {
return copyUTF8String(s, e);
}
@@ -482,7 +482,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
while (s < this.numBytes && getByte(s) <= 0x20 && getByte(s) >= 0x00) s++;
if (s == this.numBytes) {
// empty string
- return UTF8String.fromBytes(new byte[0]);
+ return EMPTY_UTF8;
} else {
return copyUTF8String(s, this.numBytes - 1);
}
@@ -495,7 +495,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
if (e < 0) {
// empty string
- return UTF8String.fromBytes(new byte[0]);
+ return EMPTY_UTF8;
} else {
return copyUTF8String(0, e);
}
@@ -761,7 +761,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
if (numInputs == 0) {
// Return an empty string if there is no input, or all the inputs are null.
- return fromBytes(new byte[0]);
+ return EMPTY_UTF8;
}
// Allocate a new byte array, and copy the inputs one by one into it.
http://git-wip-us.apache.org/repos/asf/spark/blob/3893e8c5/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index 486d535..7f8f0f5 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -383,7 +383,7 @@ private[spark] class MapOutputTrackerMaster(conf: SparkConf,
/** Register multiple map output information for the given shuffle */
def registerMapOutputs(shuffleId: Int, statuses: Array[MapStatus], changeEpoch: Boolean = false) {
- mapStatuses.put(shuffleId, Array[MapStatus]() ++ statuses)
+ mapStatuses.put(shuffleId, statuses.clone())
if (changeEpoch) {
incrementEpoch()
}
@@ -535,7 +535,7 @@ private[spark] class MapOutputTrackerMaster(conf: SparkConf,
true
case None =>
logDebug("cached status not found for : " + shuffleId)
- statuses = mapStatuses.getOrElse(shuffleId, Array[MapStatus]())
+ statuses = mapStatuses.getOrElse(shuffleId, Array.empty[MapStatus])
epochGotten = epoch
false
}
http://git-wip-us.apache.org/repos/asf/spark/blob/3893e8c5/core/src/main/scala/org/apache/spark/rdd/ZippedWithIndexRDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/ZippedWithIndexRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ZippedWithIndexRDD.scala
index 32931d5..b5738b9 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ZippedWithIndexRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ZippedWithIndexRDD.scala
@@ -43,7 +43,7 @@ class ZippedWithIndexRDD[T: ClassTag](prev: RDD[T]) extends RDD[(T, Long)](prev)
@transient private val startIndices: Array[Long] = {
val n = prev.partitions.length
if (n == 0) {
- Array[Long]()
+ Array.empty
} else if (n == 1) {
Array(0L)
} else {
http://git-wip-us.apache.org/repos/asf/spark/blob/3893e8c5/docs/streaming-kafka-0-8-integration.md
----------------------------------------------------------------------
diff --git a/docs/streaming-kafka-0-8-integration.md b/docs/streaming-kafka-0-8-integration.md
index d3fc9ad..58b17aa 100644
--- a/docs/streaming-kafka-0-8-integration.md
+++ b/docs/streaming-kafka-0-8-integration.md
@@ -139,7 +139,7 @@ Next, we discuss how to use this approach in your streaming application.
<div class="codetabs">
<div data-lang="scala" markdown="1">
// Hold a reference to the current offset ranges, so it can be used downstream
- var offsetRanges = Array[OffsetRange]()
+ var offsetRanges = Array.empty[OffsetRange]
directKafkaStream.transform { rdd =>
offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
http://git-wip-us.apache.org/repos/asf/spark/blob/3893e8c5/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
----------------------------------------------------------------------
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
index f1ecc65..98080bb 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
@@ -713,7 +713,7 @@ object SparseMatrix {
"The expected number of nonzeros cannot be greater than Int.MaxValue.")
val nnz = math.ceil(expected).toInt
if (density == 0.0) {
- new SparseMatrix(numRows, numCols, new Array[Int](numCols + 1), Array[Int](), Array[Double]())
+ new SparseMatrix(numRows, numCols, new Array[Int](numCols + 1), Array.empty, Array.empty)
} else if (density == 1.0) {
val colPtrs = Array.tabulate(numCols + 1)(j => j * numRows)
val rowIndices = Array.tabulate(size.toInt)(idx => idx % numRows)
@@ -961,7 +961,7 @@ object Matrices {
@Since("2.0.0")
def horzcat(matrices: Array[Matrix]): Matrix = {
if (matrices.isEmpty) {
- return new DenseMatrix(0, 0, Array[Double]())
+ return new DenseMatrix(0, 0, Array.empty)
} else if (matrices.length == 1) {
return matrices(0)
}
@@ -1020,7 +1020,7 @@ object Matrices {
@Since("2.0.0")
def vertcat(matrices: Array[Matrix]): Matrix = {
if (matrices.isEmpty) {
- return new DenseMatrix(0, 0, Array[Double]())
+ return new DenseMatrix(0, 0, Array.empty)
} else if (matrices.length == 1) {
return matrices(0)
}
http://git-wip-us.apache.org/repos/asf/spark/blob/3893e8c5/mllib/src/main/scala/org/apache/spark/ml/classification/MultinomialLogisticRegression.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultinomialLogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/MultinomialLogisticRegression.scala
index f85ac76..006f57c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/MultinomialLogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/MultinomialLogisticRegression.scala
@@ -363,7 +363,7 @@ class MultinomialLogisticRegression @Since("2.1.0") (
rawCoefficients(coefIndex)
}
} else {
- Array[Double]()
+ Array.empty
}
val coefficientArray: Array[Double] = Array.tabulate(numClasses * numFeatures) { i =>
http://git-wip-us.apache.org/repos/asf/spark/blob/3893e8c5/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
index 8b04b5d..e8e28ba 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
@@ -164,8 +164,8 @@ class OneHotEncoder @Since("1.4.0") (@Since("1.4.0") override val uid: String) e
// data transformation
val size = outputAttrGroup.size
val oneValue = Array(1.0)
- val emptyValues = Array[Double]()
- val emptyIndices = Array[Int]()
+ val emptyValues = Array.empty[Double]
+ val emptyIndices = Array.empty[Int]
val encode = udf { label: Double =>
if (label < size) {
Vectors.sparse(size, Array(label.toInt), oneValue)
http://git-wip-us.apache.org/repos/asf/spark/blob/3893e8c5/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
index 4c39cf1..ad882c9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -842,7 +842,7 @@ object SparseMatrix {
"The expected number of nonzeros cannot be greater than Int.MaxValue.")
val nnz = math.ceil(expected).toInt
if (density == 0.0) {
- new SparseMatrix(numRows, numCols, new Array[Int](numCols + 1), Array[Int](), Array[Double]())
+ new SparseMatrix(numRows, numCols, new Array[Int](numCols + 1), Array.empty, Array.empty)
} else if (density == 1.0) {
val colPtrs = Array.tabulate(numCols + 1)(j => j * numRows)
val rowIndices = Array.tabulate(size.toInt)(idx => idx % numRows)
@@ -1098,7 +1098,7 @@ object Matrices {
@Since("1.3.0")
def horzcat(matrices: Array[Matrix]): Matrix = {
if (matrices.isEmpty) {
- return new DenseMatrix(0, 0, Array[Double]())
+ return new DenseMatrix(0, 0, Array.empty)
} else if (matrices.length == 1) {
return matrices(0)
}
@@ -1157,7 +1157,7 @@ object Matrices {
@Since("1.3.0")
def vertcat(matrices: Array[Matrix]): Matrix = {
if (matrices.isEmpty) {
- return new DenseMatrix(0, 0, Array[Double]())
+ return new DenseMatrix(0, 0, Array.empty[Double])
} else if (matrices.length == 1) {
return matrices(0)
}
http://git-wip-us.apache.org/repos/asf/spark/blob/3893e8c5/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
index da5df9b..9a63b8a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
@@ -146,7 +146,7 @@ private[stat] object ChiSqTest extends Logging {
* Uniform distribution is assumed when `expected` is not passed in.
*/
def chiSquared(observed: Vector,
- expected: Vector = Vectors.dense(Array[Double]()),
+ expected: Vector = Vectors.dense(Array.empty[Double]),
methodName: String = PEARSON.name): ChiSqTestResult = {
// Validate input arguments
http://git-wip-us.apache.org/repos/asf/spark/blob/3893e8c5/sql/catalyst/src/main/java/org/apache/spark/sql/types/DataTypes.java
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/types/DataTypes.java b/sql/catalyst/src/main/java/org/apache/spark/sql/types/DataTypes.java
index 24adead..747ab18 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/types/DataTypes.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/types/DataTypes.java
@@ -191,7 +191,7 @@ public class DataTypes {
* Creates a StructType with the given list of StructFields ({@code fields}).
*/
public static StructType createStructType(List<StructField> fields) {
- return createStructType(fields.toArray(new StructField[0]));
+ return createStructType(fields.toArray(new StructField[fields.size()]));
}
/**
http://git-wip-us.apache.org/repos/asf/spark/blob/3893e8c5/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index cbd5046..37153e5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -322,6 +322,7 @@ object JdbcUtils extends Logging {
conn.commit()
}
committed = true
+ Iterator.empty
} catch {
case e: SQLException =>
val cause = e.getNextException
@@ -351,7 +352,6 @@ object JdbcUtils extends Logging {
}
}
}
- Array[Byte]().iterator
}
/**
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org