You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sr...@apache.org on 2016/03/03 10:54:20 UTC
[2/3] spark git commit: [SPARK-13423][WIP][CORE][SQL][STREAMING]
Static analysis fixes for 2.x
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala
index 4b43550..773a2e5 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala
@@ -69,7 +69,7 @@ object PageViewStream {
.groupByKey()
val errorRatePerZipCode = statusesPerZipCode.map{
case(zip, statuses) =>
- val normalCount = statuses.filter(_ == 200).size
+ val normalCount = statuses.count(_ == 200)
val errorCount = statuses.size - normalCount
val errorRatio = errorCount.toFloat / statuses.size
if (errorRatio > 0.05) {
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala
----------------------------------------------------------------------
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala
index 4eb1556..475167a 100644
--- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala
+++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala
@@ -79,7 +79,7 @@ class KafkaRDD[
.map(_.asInstanceOf[KafkaRDDPartition])
.filter(_.count > 0)
- if (num < 1 || nonEmptyPartitions.size < 1) {
+ if (num < 1 || nonEmptyPartitions.isEmpty) {
return new Array[R](0)
}
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
----------------------------------------------------------------------
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
index 3e8c385..87f3bc3 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
@@ -284,7 +284,7 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
if (selectedVertices.count > 1) {
found = true
val collectedVertices = selectedVertices.collect()
- retVal = collectedVertices(Random.nextInt(collectedVertices.size))
+ retVal = collectedVertices(Random.nextInt(collectedVertices.length))
}
}
retVal
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala
----------------------------------------------------------------------
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala b/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala
index 53a9f92..5a0c479 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala
@@ -276,7 +276,7 @@ object VertexRDD {
def apply[VD: ClassTag](vertices: RDD[(VertexId, VD)]): VertexRDD[VD] = {
val vPartitioned: RDD[(VertexId, VD)] = vertices.partitioner match {
case Some(p) => vertices
- case None => vertices.partitionBy(new HashPartitioner(vertices.partitions.size))
+ case None => vertices.partitionBy(new HashPartitioner(vertices.partitions.length))
}
val vertexPartitions = vPartitioned.mapPartitions(
iter => Iterator(ShippableVertexPartition(iter)),
@@ -317,7 +317,7 @@ object VertexRDD {
): VertexRDD[VD] = {
val vPartitioned: RDD[(VertexId, VD)] = vertices.partitioner match {
case Some(p) => vertices
- case None => vertices.partitionBy(new HashPartitioner(vertices.partitions.size))
+ case None => vertices.partitionBy(new HashPartitioner(vertices.partitions.length))
}
val routingTables = createRoutingTables(edges, vPartitioned.partitioner.get)
val vertexPartitions = vPartitioned.zipPartitions(routingTables, preservesPartitioning = true) {
@@ -358,7 +358,7 @@ object VertexRDD {
Function.tupled(RoutingTablePartition.edgePartitionToMsgs)))
.setName("VertexRDD.createRoutingTables - vid2pid (aggregation)")
- val numEdgePartitions = edges.partitions.size
+ val numEdgePartitions = edges.partitions.length
vid2pid.partitionBy(vertexPartitioner).mapPartitions(
iter => Iterator(RoutingTablePartition.fromMsgs(numEdgePartitions, iter)),
preservesPartitioning = true)
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala
----------------------------------------------------------------------
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala
index ab021a2..b1da781 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala
@@ -151,9 +151,9 @@ class EdgePartition[
* applied to each edge
*/
def map[ED2: ClassTag](f: Edge[ED] => ED2): EdgePartition[ED2, VD] = {
- val newData = new Array[ED2](data.size)
+ val newData = new Array[ED2](data.length)
val edge = new Edge[ED]()
- val size = data.size
+ val size = data.length
var i = 0
while (i < size) {
edge.srcId = srcIds(i)
@@ -179,13 +179,13 @@ class EdgePartition[
*/
def map[ED2: ClassTag](iter: Iterator[ED2]): EdgePartition[ED2, VD] = {
// Faster than iter.toArray, because the expected size is known.
- val newData = new Array[ED2](data.size)
+ val newData = new Array[ED2](data.length)
var i = 0
while (iter.hasNext) {
newData(i) = iter.next()
i += 1
}
- assert(newData.size == i)
+ assert(newData.length == i)
this.withData(newData)
}
@@ -311,7 +311,7 @@ class EdgePartition[
*
* @return size of the partition
*/
- val size: Int = localSrcIds.size
+ val size: Int = localSrcIds.length
/** The number of unique source vertices in the partition. */
def indexSize: Int = index.size
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
----------------------------------------------------------------------
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
index b122969..da3db3c 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
@@ -38,9 +38,9 @@ class EdgePartitionBuilder[@specialized(Long, Int, Double) ED: ClassTag, VD: Cla
val edgeArray = edges.trim().array
new Sorter(Edge.edgeArraySortDataFormat[ED])
.sort(edgeArray, 0, edgeArray.length, Edge.lexicographicOrdering)
- val localSrcIds = new Array[Int](edgeArray.size)
- val localDstIds = new Array[Int](edgeArray.size)
- val data = new Array[ED](edgeArray.size)
+ val localSrcIds = new Array[Int](edgeArray.length)
+ val localDstIds = new Array[Int](edgeArray.length)
+ val data = new Array[ED](edgeArray.length)
val index = new GraphXPrimitiveKeyOpenHashMap[VertexId, Int]
val global2local = new GraphXPrimitiveKeyOpenHashMap[VertexId, Int]
val local2global = new PrimitiveVector[VertexId]
@@ -52,7 +52,7 @@ class EdgePartitionBuilder[@specialized(Long, Int, Double) ED: ClassTag, VD: Cla
var currSrcId: VertexId = edgeArray(0).srcId
var currLocalId = -1
var i = 0
- while (i < edgeArray.size) {
+ while (i < edgeArray.length) {
val srcId = edgeArray(i).srcId
val dstId = edgeArray(i).dstId
localSrcIds(i) = global2local.changeValue(srcId,
@@ -98,9 +98,9 @@ class ExistingEdgePartitionBuilder[
val edgeArray = edges.trim().array
new Sorter(EdgeWithLocalIds.edgeArraySortDataFormat[ED])
.sort(edgeArray, 0, edgeArray.length, EdgeWithLocalIds.lexicographicOrdering)
- val localSrcIds = new Array[Int](edgeArray.size)
- val localDstIds = new Array[Int](edgeArray.size)
- val data = new Array[ED](edgeArray.size)
+ val localSrcIds = new Array[Int](edgeArray.length)
+ val localDstIds = new Array[Int](edgeArray.length)
+ val data = new Array[ED](edgeArray.length)
val index = new GraphXPrimitiveKeyOpenHashMap[VertexId, Int]
// Copy edges into columnar structures, tracking the beginnings of source vertex id clusters and
// adding them to the index
@@ -108,7 +108,7 @@ class ExistingEdgePartitionBuilder[
index.update(edgeArray(0).srcId, 0)
var currSrcId: VertexId = edgeArray(0).srcId
var i = 0
- while (i < edgeArray.size) {
+ while (i < edgeArray.length) {
localSrcIds(i) = edgeArray(i).localSrcId
localDstIds(i) = edgeArray(i).localDstId
data(i) = edgeArray(i).attr
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
----------------------------------------------------------------------
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
index 6e153b7..98e082c 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
@@ -45,7 +45,7 @@ class EdgeRDDImpl[ED: ClassTag, VD: ClassTag] private[graphx] (
* partitioner that allows co-partitioning with `partitionsRDD`.
*/
override val partitioner =
- partitionsRDD.partitioner.orElse(Some(new HashPartitioner(partitions.size)))
+ partitionsRDD.partitioner.orElse(Some(new HashPartitioner(partitions.length)))
override def collect(): Array[Edge[ED]] = this.map(_.copy()).collect()
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
----------------------------------------------------------------------
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
index 699731b..7903caa 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
@@ -93,7 +93,7 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
}
override def partitionBy(partitionStrategy: PartitionStrategy): Graph[VD, ED] = {
- partitionBy(partitionStrategy, edges.partitions.size)
+ partitionBy(partitionStrategy, edges.partitions.length)
}
override def partitionBy(
@@ -352,7 +352,8 @@ object GraphImpl {
edgeStorageLevel: StorageLevel,
vertexStorageLevel: StorageLevel): GraphImpl[VD, ED] = {
val edgesCached = edges.withTargetStorageLevel(edgeStorageLevel).cache()
- val vertices = VertexRDD.fromEdges(edgesCached, edgesCached.partitions.size, defaultVertexAttr)
+ val vertices =
+ VertexRDD.fromEdges(edgesCached, edgesCached.partitions.length, defaultVertexAttr)
.withTargetStorageLevel(vertexStorageLevel)
fromExistingRDDs(vertices, edgesCached)
}
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTablePartition.scala
----------------------------------------------------------------------
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTablePartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTablePartition.scala
index 3fd7690..13e25b4 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTablePartition.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTablePartition.scala
@@ -108,10 +108,10 @@ private[graphx]
class RoutingTablePartition(
private val routingTable: Array[(Array[VertexId], BitSet, BitSet)]) extends Serializable {
/** The maximum number of edge partitions this `RoutingTablePartition` is built to join with. */
- val numEdgePartitions: Int = routingTable.size
+ val numEdgePartitions: Int = routingTable.length
/** Returns the number of vertices that will be sent to the specified edge partition. */
- def partitionSize(pid: PartitionID): Int = routingTable(pid)._1.size
+ def partitionSize(pid: PartitionID): Int = routingTable(pid)._1.length
/** Returns an iterator over all vertex ids stored in this `RoutingTablePartition`. */
def iterator: Iterator[VertexId] = routingTable.iterator.flatMap(_._1.iterator)
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala
----------------------------------------------------------------------
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala
index 96d807f..6dab465 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala
@@ -28,7 +28,7 @@ private[graphx]
class VertexAttributeBlock[VD: ClassTag](val vids: Array[VertexId], val attrs: Array[VD])
extends Serializable {
def iterator: Iterator[(VertexId, VD)] =
- (0 until vids.size).iterator.map { i => (vids(i), attrs(i)) }
+ (0 until vids.length).iterator.map { i => (vids(i), attrs(i)) }
}
private[graphx]
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
----------------------------------------------------------------------
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
index 51bcdf2..026fb8b 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
@@ -70,7 +70,7 @@ object TriangleCount {
graph.collectNeighborIds(EdgeDirection.Either).mapValues { (vid, nbrs) =>
val set = new VertexSet(nbrs.length)
var i = 0
- while (i < nbrs.size) {
+ while (i < nbrs.length) {
// prevent self cycle
if (nbrs(i) != vid) {
set.add(nbrs(i))
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala
----------------------------------------------------------------------
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala
index f1aa685..0bb9e0a 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala
@@ -32,7 +32,7 @@ class VertexRDDSuite extends SparkFunSuite with LocalSparkContext {
val n = 100
val verts = vertices(sc, n)
val evens = verts.filter(q => ((q._2 % 2) == 0))
- assert(evens.count === (0 to n).filter(_ % 2 == 0).size)
+ assert(evens.count === (0 to n).count(_ % 2 == 0))
}
}
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
index 18be5c0..3b4209b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
@@ -166,7 +166,7 @@ class MinMaxScalerModel private[ml] (
// 0 in sparse vector will probably be rescaled to non-zero
val values = vector.toArray
- val size = values.size
+ val size = values.length
var i = 0
while (i < size) {
val raw = if (originalRange(i) != 0) (values(i) - minArray(i)) / originalRange(i) else 0.5
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
index 769f440..d75b3ef 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
@@ -166,7 +166,7 @@ object QuantileDiscretizer extends DefaultParamsReadable[QuantileDiscretizer] wi
* needed, and adding a default split value of 0 if no good candidates are found.
*/
private[feature] def getSplits(candidates: Array[Double]): Array[Double] = {
- val effectiveValues = if (candidates.size != 0) {
+ val effectiveValues = if (candidates.nonEmpty) {
if (candidates.head == Double.NegativeInfinity
&& candidates.last == Double.PositiveInfinity) {
candidates.drop(1).dropRight(1)
@@ -181,7 +181,7 @@ object QuantileDiscretizer extends DefaultParamsReadable[QuantileDiscretizer] wi
candidates
}
- if (effectiveValues.size == 0) {
+ if (effectiveValues.isEmpty) {
Array(Double.NegativeInfinity, 0, Double.PositiveInfinity)
} else {
Array(Double.NegativeInfinity) ++ effectiveValues ++ Array(Double.PositiveInfinity)
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index ca0ed95..cf17689 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -1297,7 +1297,7 @@ private[spark] object SerDe extends Serializable {
def saveState(obj: Object, out: OutputStream, pickler: Pickler): Unit = {
val m: DenseMatrix = obj.asInstanceOf[DenseMatrix]
- val bytes = new Array[Byte](8 * m.values.size)
+ val bytes = new Array[Byte](8 * m.values.length)
val order = ByteOrder.nativeOrder()
val isTransposed = if (m.isTransposed) 1 else 0
ByteBuffer.wrap(bytes).order(order).asDoubleBuffer().put(m.values)
@@ -1389,7 +1389,7 @@ private[spark] object SerDe extends Serializable {
def saveState(obj: Object, out: OutputStream, pickler: Pickler): Unit = {
val v: SparseVector = obj.asInstanceOf[SparseVector]
- val n = v.indices.size
+ val n = v.indices.length
val indiceBytes = new Array[Byte](4 * n)
val order = ByteOrder.nativeOrder()
ByteBuffer.wrap(indiceBytes).order(order).asIntBuffer().put(v.indices)
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/mllib/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala
index 2910c02..4308ae0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala
@@ -77,7 +77,7 @@ private[classification] object GLMClassificationModel {
val sqlContext = SQLContext.getOrCreate(sc)
val dataRDD = sqlContext.read.parquet(datapath)
val dataArray = dataRDD.select("weights", "intercept", "threshold").take(1)
- assert(dataArray.size == 1, s"Unable to load $modelClass data from: $datapath")
+ assert(dataArray.length == 1, s"Unable to load $modelClass data from: $datapath")
val data = dataArray(0)
assert(data.size == 3, s"Unable to load $modelClass data from: $datapath")
val (weights, intercept) = data match {
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
index 54bf510..f0b9d64 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
@@ -19,6 +19,7 @@ package org.apache.spark.mllib.clustering
import java.util.Random
+import scala.annotation.tailrec
import scala.collection.mutable
import org.apache.spark.Logging
@@ -467,6 +468,7 @@ private[clustering] class ClusteringTreeNode private[clustering] (
* @param cost the cost to the current center
* @return (predicted leaf cluster index, cost)
*/
+ @tailrec
private def predict(pointWithNorm: VectorWithNorm, cost: Double): (Int, Double) = {
if (isLeaf) {
(index, cost)
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
index 3b91fe8..439e4f8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
@@ -144,7 +144,7 @@ object KMeansModel extends Loader[KMeansModel] {
val centroids = sqlContext.read.parquet(Loader.dataPath(path))
Loader.checkSchema[Cluster](centroids.schema)
val localCentroids = centroids.rdd.map(Cluster.apply).collect()
- assert(k == localCentroids.size)
+ assert(k == localCentroids.length)
new KMeansModel(localCentroids.sortBy(_.id).map(_.point))
}
}
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
index 3029b15..5dde2bd 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
@@ -66,7 +66,7 @@ class MulticlassMetrics @Since("1.1.0") (predictionAndLabels: RDD[(Double, Doubl
*/
@Since("1.1.0")
def confusionMatrix: Matrix = {
- val n = labels.size
+ val n = labels.length
val values = Array.ofDim[Double](n * n)
var i = 0
while (i < n) {
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
index daf6ff4..95b2fef 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
@@ -58,8 +58,8 @@ class MultilabelMetrics @Since("1.2.0") (predictionAndLabels: RDD[(Array[Double]
*/
@Since("1.2.0")
lazy val accuracy: Double = predictionAndLabels.map { case (predictions, labels) =>
- labels.intersect(predictions).size.toDouble /
- (labels.size + predictions.size - labels.intersect(predictions).size)}.sum / numDocs
+ labels.intersect(predictions).length.toDouble /
+ (labels.length + predictions.length - labels.intersect(predictions).length)}.sum / numDocs
/**
@@ -67,7 +67,7 @@ class MultilabelMetrics @Since("1.2.0") (predictionAndLabels: RDD[(Array[Double]
*/
@Since("1.2.0")
lazy val hammingLoss: Double = predictionAndLabels.map { case (predictions, labels) =>
- labels.size + predictions.size - 2 * labels.intersect(predictions).size
+ labels.length + predictions.length - 2 * labels.intersect(predictions).length
}.sum / (numDocs * numLabels)
/**
@@ -75,8 +75,8 @@ class MultilabelMetrics @Since("1.2.0") (predictionAndLabels: RDD[(Array[Double]
*/
@Since("1.2.0")
lazy val precision: Double = predictionAndLabels.map { case (predictions, labels) =>
- if (predictions.size > 0) {
- predictions.intersect(labels).size.toDouble / predictions.size
+ if (predictions.length > 0) {
+ predictions.intersect(labels).length.toDouble / predictions.length
} else {
0
}
@@ -87,7 +87,7 @@ class MultilabelMetrics @Since("1.2.0") (predictionAndLabels: RDD[(Array[Double]
*/
@Since("1.2.0")
lazy val recall: Double = predictionAndLabels.map { case (predictions, labels) =>
- labels.intersect(predictions).size.toDouble / labels.size
+ labels.intersect(predictions).length.toDouble / labels.length
}.sum / numDocs
/**
@@ -95,7 +95,7 @@ class MultilabelMetrics @Since("1.2.0") (predictionAndLabels: RDD[(Array[Double]
*/
@Since("1.2.0")
lazy val f1Measure: Double = predictionAndLabels.map { case (predictions, labels) =>
- 2.0 * predictions.intersect(labels).size / (predictions.size + labels.size)
+ 2.0 * predictions.intersect(labels).length / (predictions.length + labels.length)
}.sum / numDocs
private lazy val tpPerClass = predictionAndLabels.flatMap { case (predictions, labels) =>
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala
index cffa9fb..9457c6e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala
@@ -88,7 +88,7 @@ private object IDF {
}
doc match {
case SparseVector(size, indices, values) =>
- val nnz = indices.size
+ val nnz = indices.length
var k = 0
while (k < nnz) {
if (values(k) > 0) {
@@ -97,7 +97,7 @@ private object IDF {
k += 1
}
case DenseVector(values) =>
- val n = values.size
+ val n = values.length
var j = 0
while (j < n) {
if (values(j) > 0.0) {
@@ -211,7 +211,7 @@ private object IDFModel {
val n = v.size
v match {
case SparseVector(size, indices, values) =>
- val nnz = indices.size
+ val nnz = indices.length
val newValues = new Array[Double](nnz)
var k = 0
while (k < nnz) {
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala
index af0c8e1..99fcb36 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala
@@ -55,7 +55,7 @@ class Normalizer @Since("1.1.0") (p: Double) extends VectorTransformer {
vector match {
case DenseVector(vs) =>
val values = vs.clone()
- val size = values.size
+ val size = values.length
var i = 0
while (i < size) {
values(i) /= norm
@@ -64,7 +64,7 @@ class Normalizer @Since("1.1.0") (p: Double) extends VectorTransformer {
Vectors.dense(values)
case SparseVector(size, ids, vs) =>
val values = vs.clone()
- val nnz = values.size
+ val nnz = values.length
var i = 0
while (i < nnz) {
values(i) /= norm
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
index 6fe573c..500187a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
@@ -132,7 +132,7 @@ class StandardScalerModel @Since("1.3.0") (
vector match {
case DenseVector(vs) =>
val values = vs.clone()
- val size = values.size
+ val size = values.length
if (withStd) {
var i = 0
while (i < size) {
@@ -153,7 +153,7 @@ class StandardScalerModel @Since("1.3.0") (
vector match {
case DenseVector(vs) =>
val values = vs.clone()
- val size = values.size
+ val size = values.length
var i = 0
while(i < size) {
values(i) *= (if (std(i) != 0.0) 1.0 / std(i) else 0.0)
@@ -164,7 +164,7 @@ class StandardScalerModel @Since("1.3.0") (
// For sparse vector, the `index` array inside sparse vector object will not be changed,
// so we can re-use it to save memory.
val values = vs.clone()
- val nnz = values.size
+ val nnz = values.length
var i = 0
while (i < nnz) {
values(i) *= (if (std(indices(i)) != 0.0) 1.0 / std(indices(i)) else 0.0)
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
index 3241ebe..b046f68 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
@@ -346,9 +346,9 @@ class Word2Vec extends Serializable with Logging {
if (alpha < learningRate * 0.0001) alpha = learningRate * 0.0001
logInfo("wordCount = " + wordCount + ", alpha = " + alpha)
}
- wc += sentence.size
+ wc += sentence.length
var pos = 0
- while (pos < sentence.size) {
+ while (pos < sentence.length) {
val word = sentence(pos)
val b = random.nextInt(window)
// Train Skip-gram
@@ -356,7 +356,7 @@ class Word2Vec extends Serializable with Logging {
while (a < window * 2 + 1 - b) {
if (a != window) {
val c = pos - window + a
- if (c >= 0 && c < sentence.size) {
+ if (c >= 0 && c < sentence.length) {
val lastWord = sentence(c)
val l1 = lastWord * vectorSize
val neu1e = new Array[Float](vectorSize)
@@ -579,7 +579,7 @@ object Word2VecModel extends Loader[Word2VecModel] {
private def buildWordVectors(model: Map[String, Array[Float]]): Array[Float] = {
require(model.nonEmpty, "Word2VecMap should be non-empty")
- val (vectorSize, numWords) = (model.head._2.size, model.size)
+ val (vectorSize, numWords) = (model.head._2.length, model.size)
val wordList = model.keys.toArray
val wordVectors = new Array[Float](vectorSize * numWords)
var i = 0
@@ -615,7 +615,7 @@ object Word2VecModel extends Loader[Word2VecModel] {
val sqlContext = SQLContext.getOrCreate(sc)
import sqlContext.implicits._
- val vectorSize = model.values.head.size
+ val vectorSize = model.values.head.length
val numWords = model.size
val metadata = compact(render(
("class" -> classNameV1_0) ~ ("version" -> formatVersionV1_0) ~
@@ -646,7 +646,7 @@ object Word2VecModel extends Loader[Word2VecModel] {
(loadedClassName, loadedVersion) match {
case (classNameV1_0, "1.0") =>
val model = SaveLoadV1_0.load(sc, path)
- val vectorSize = model.getVectors.values.head.size
+ val vectorSize = model.getVectors.values.head.length
val numWords = model.getVectors.size
require(expectedVectorSize == vectorSize,
s"Word2VecModel requires each word to be mapped to a vector of size " +
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
index b35d721..f5b4f25 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
@@ -232,7 +232,7 @@ class FPGrowth private (
partitioner: Partitioner): Array[Item] = {
data.flatMap { t =>
val uniq = t.toSet
- if (t.size != uniq.size) {
+ if (t.length != uniq.size) {
throw new SparkException(s"Items in a transaction must be unique but got ${t.toSeq}.")
}
t
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
index df9f4ae..d2687dc 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
@@ -75,7 +75,7 @@ private[spark] object BLAS extends Serializable with Logging {
val xValues = x.values
val xIndices = x.indices
val yValues = y.values
- val nnz = xIndices.size
+ val nnz = xIndices.length
if (a == 1.0) {
var k = 0
@@ -135,7 +135,7 @@ private[spark] object BLAS extends Serializable with Logging {
val xValues = x.values
val xIndices = x.indices
val yValues = y.values
- val nnz = xIndices.size
+ val nnz = xIndices.length
var sum = 0.0
var k = 0
@@ -154,8 +154,8 @@ private[spark] object BLAS extends Serializable with Logging {
val xIndices = x.indices
val yValues = y.values
val yIndices = y.indices
- val nnzx = xIndices.size
- val nnzy = yIndices.size
+ val nnzx = xIndices.length
+ val nnzy = yIndices.length
var kx = 0
var ky = 0
@@ -188,7 +188,7 @@ private[spark] object BLAS extends Serializable with Logging {
val sxIndices = sx.indices
val sxValues = sx.values
val dyValues = dy.values
- val nnz = sxIndices.size
+ val nnz = sxIndices.length
var i = 0
var k = 0
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala
index ffdcdde..e449479 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala
@@ -33,7 +33,7 @@ private[spark] object CholeskyDecomposition {
* @return the solution array
*/
def solve(A: Array[Double], bx: Array[Double]): Array[Double] = {
- val k = bx.size
+ val k = bx.length
val info = new intW(0)
lapack.dppsv("U", k, 1, A, bx, k, info)
val code = info.`val`
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
index b08da4f..0fdb402 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -987,7 +987,7 @@ object Matrices {
def horzcat(matrices: Array[Matrix]): Matrix = {
if (matrices.isEmpty) {
return new DenseMatrix(0, 0, Array[Double]())
- } else if (matrices.size == 1) {
+ } else if (matrices.length == 1) {
return matrices(0)
}
val numRows = matrices(0).numRows
@@ -1046,7 +1046,7 @@ object Matrices {
def vertcat(matrices: Array[Matrix]): Matrix = {
if (matrices.isEmpty) {
return new DenseMatrix(0, 0, Array[Double]())
- } else if (matrices.size == 1) {
+ } else if (matrices.length == 1) {
return matrices(0)
}
val numCols = matrices(0).numCols
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
index 09527dc..ae1faf6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
@@ -176,7 +176,7 @@ class BlockMatrix @Since("1.3.0") (
val numColBlocks = math.ceil(numCols() * 1.0 / colsPerBlock).toInt
private[mllib] def createPartitioner(): GridPartitioner =
- GridPartitioner(numRowBlocks, numColBlocks, suggestedNumPartitions = blocks.partitions.size)
+ GridPartitioner(numRowBlocks, numColBlocks, suggestedNumPartitions = blocks.partitions.length)
private lazy val blockInfo = blocks.mapValues(block => (block.numRows, block.numCols)).cache()
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
index e8de515..06b9c4a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
@@ -120,9 +120,9 @@ class IndexedRowMatrix @Since("1.0.0") (
val rowIndex = row.index
row.vector match {
case SparseVector(size, indices, values) =>
- Iterator.tabulate(indices.size)(i => MatrixEntry(rowIndex, indices(i), values(i)))
+ Iterator.tabulate(indices.length)(i => MatrixEntry(rowIndex, indices(i), values(i)))
case DenseVector(values) =>
- Iterator.tabulate(values.size)(i => MatrixEntry(rowIndex, i, values(i)))
+ Iterator.tabulate(values.length)(i => MatrixEntry(rowIndex, i, values(i)))
}
}
new CoordinateMatrix(entries, numRows(), numCols())
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
index 3e619c4..a7a843a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
@@ -226,12 +226,12 @@ class ALS private (
val sc = ratings.context
val numUserBlocks = if (this.numUserBlocks == -1) {
- math.max(sc.defaultParallelism, ratings.partitions.size / 2)
+ math.max(sc.defaultParallelism, ratings.partitions.length / 2)
} else {
this.numUserBlocks
}
val numProductBlocks = if (this.numProductBlocks == -1) {
- math.max(sc.defaultParallelism, ratings.partitions.size / 2)
+ math.max(sc.defaultParallelism, ratings.partitions.length / 2)
} else {
this.numProductBlocks
}
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
index 73da899..f7e3c5c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
@@ -350,7 +350,7 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
val partialWeightsArray = scaler.transform(
Vectors.dense(weightsArray.slice(start, end))).toArray
- System.arraycopy(partialWeightsArray, 0, weightsArray, start, partialWeightsArray.size)
+ System.arraycopy(partialWeightsArray, 0, weightsArray, start, partialWeightsArray.length)
i += 1
}
weights = Vectors.dense(weightsArray)
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala
index 02af281..a6e1767 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala
@@ -74,7 +74,7 @@ private[regression] object GLMRegressionModel {
val sqlContext = SQLContext.getOrCreate(sc)
val dataRDD = sqlContext.read.parquet(datapath)
val dataArray = dataRDD.select("weights", "intercept").take(1)
- assert(dataArray.size == 1, s"Unable to load $modelClass data from: $datapath")
+ assert(dataArray.length == 1, s"Unable to load $modelClass data from: $datapath")
val data = dataArray(0)
assert(data.size == 2, s"Unable to load $modelClass data from: $datapath")
data match {
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
index 40440d5..76c3220 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
@@ -17,6 +17,7 @@
package org.apache.spark.mllib.tree
+import scala.annotation.tailrec
import scala.collection.JavaConverters._
import scala.collection.mutable
@@ -286,6 +287,7 @@ object DecisionTree extends Serializable with Logging {
* This index is different from the index used during training a particular
* group of nodes on one call to [[findBestSplits()]].
*/
+ @tailrec
private def predictNodeIndex(
node: Node,
binnedFeatures: Array[Int],
@@ -350,7 +352,7 @@ object DecisionTree extends Serializable with Logging {
featuresForNode: Option[Array[Int]]): Unit = {
val numFeaturesPerNode = if (featuresForNode.nonEmpty) {
// Use subsampled features
- featuresForNode.get.size
+ featuresForNode.get.length
} else {
// Use all features
agg.metadata.numFeatures
@@ -411,7 +413,7 @@ object DecisionTree extends Serializable with Logging {
if (featuresForNode.nonEmpty) {
// Use subsampled features
var featureIndexIdx = 0
- while (featureIndexIdx < featuresForNode.get.size) {
+ while (featureIndexIdx < featuresForNode.get.length) {
val binIndex = treePoint.binnedFeatures(featuresForNode.get.apply(featureIndexIdx))
agg.update(featureIndexIdx, binIndex, label, instanceWeight)
featureIndexIdx += 1
@@ -483,7 +485,7 @@ object DecisionTree extends Serializable with Logging {
*/
// numNodes: Number of nodes in this group
- val numNodes = nodesForGroup.values.map(_.size).sum
+ val numNodes = nodesForGroup.values.map(_.length).sum
logDebug("numNodes = " + numNodes)
logDebug("numFeatures = " + metadata.numFeatures)
logDebug("numClasses = " + metadata.numClasses)
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
index a741972..09017d4 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
@@ -104,9 +104,9 @@ private[tree] class VarianceAggregator()
*/
private[spark] class VarianceCalculator(stats: Array[Double]) extends ImpurityCalculator(stats) {
- require(stats.size == 3,
+ require(stats.length == 3,
s"VarianceCalculator requires sufficient statistics array stats to be of length 3," +
- s" but was given array of length ${stats.size}.")
+ s" but was given array of length ${stats.length}.")
/**
* Make a deep copy of this [[ImpurityCalculator]].
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
index ec5d7b9..e007ee1 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
@@ -250,7 +250,7 @@ object DecisionTreeModel extends Loader[DecisionTreeModel] with Logging {
val nodes = dataRDD.rdd.map(NodeData.apply)
// Build node data into a tree.
val trees = constructTrees(nodes)
- assert(trees.size == 1,
+ assert(trees.length == 1,
"Decision tree should contain exactly one tree but got ${trees.size} trees.")
val model = new DecisionTreeModel(trees(0), Algo.fromString(algo))
assert(model.numNodes == numNodes, s"Unable to load DecisionTreeModel data from: $datapath." +
@@ -266,7 +266,7 @@ object DecisionTreeModel extends Loader[DecisionTreeModel] with Logging {
.map { case (treeId, data) =>
(treeId, constructTree(data))
}.sortBy(_._1)
- val numTrees = trees.size
+ val numTrees = trees.length
val treeIndices = trees.map(_._1).toSeq
assert(treeIndices == (0 until numTrees),
s"Tree indices must start from 0 and increment by 1, but we found $treeIndices.")
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java
----------------------------------------------------------------------
diff --git a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java
index eeeabfe..916fff1 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java
@@ -95,7 +95,9 @@ public class JavaFPGrowthSuite implements Serializable {
try {
model.save(sc.sc(), outputPath);
- FPGrowthModel newModel = FPGrowthModel.load(sc.sc(), outputPath);
+ @SuppressWarnings("unchecked")
+ FPGrowthModel<String> newModel =
+ (FPGrowthModel<String>) FPGrowthModel.load(sc.sc(), outputPath);
List<FPGrowth.FreqItemset<String>> freqItemsets = newModel.freqItemsets().toJavaRDD()
.collect();
assertEquals(18, freqItemsets.size());
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
index 8fb8886..a200e94 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
@@ -38,7 +38,7 @@ class RidgeRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
def predictionError(predictions: Seq[Double], input: Seq[LabeledPoint]): Double = {
predictions.zip(input).map { case (prediction, expected) =>
(prediction - expected.label) * (prediction - expected.label)
- }.reduceLeft(_ + _) / predictions.size
+ }.sum / predictions.size
}
test("ridge regression can help avoid overfitting") {
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/mllib/src/test/scala/org/apache/spark/mllib/stat/StreamingTestSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/StreamingTestSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/stat/StreamingTestSuite.scala
index 5044181..0921fdb 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/stat/StreamingTestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/StreamingTestSuite.scala
@@ -164,7 +164,7 @@ class StreamingTestSuite extends SparkFunSuite with TestSuiteBase {
// number of batches seen so far does not exceed testWindow, expect counts to continue growing
for (i <- 0 until testWindow) {
- assert(outputCounts.drop(2 * i).take(2).forall(_ == (i + 1) * pointsPerBatch / 2))
+ assert(outputCounts.slice(2 * i, 2 * i + 2).forall(_ == (i + 1) * pointsPerBatch / 2))
}
// number of batches seen exceeds testWindow, expect counts to be constant
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 36eb59e..fbbc3ee 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.analysis
import java.lang.reflect.Modifier
+import scala.annotation.tailrec
import scala.collection.mutable.ArrayBuffer
import org.apache.spark.sql.AnalysisException
@@ -689,6 +690,7 @@ class Analyzer(
* Resolve the expression on a specified logical plan and it's child (recursively), until
* the expression is resolved or meet a non-unary node or Subquery.
*/
+ @tailrec
private def resolveExpressionRecursively(expr: Expression, plan: LogicalPlan): Expression = {
val resolved = resolveExpression(expr, plan)
if (resolved.resolved) {
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 1072158..a965cc8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -925,7 +925,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
(c, evPrim, evNull) =>
s"""
- final $rowClass $result = new $rowClass(${fieldsCasts.size});
+ final $rowClass $result = new $rowClass(${fieldsCasts.length});
final InternalRow $tmpRow = $c;
$fieldsEvalCode
$evPrim = $result.copy();
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
index 4cb6af9..cf73e36 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql.catalyst.expressions.codegen
+import scala.annotation.tailrec
+
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate.NoOp
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData}
@@ -120,6 +122,7 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
ExprCode(code, "false", output)
}
+ @tailrec
private def convertToSafe(
ctx: CodegenContext,
input: String,
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index 0df8101..87e4342 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -159,7 +159,7 @@ case class CreateNamedStruct(children: Seq[Expression]) extends Expression {
TypeCheckResult.TypeCheckFailure(
s"Only foldable StringType expressions are allowed to appear at odd position , got :" +
s" ${invalidNames.mkString(",")}")
- } else if (names.forall(_ != null)){
+ } else if (!names.contains(null)){
TypeCheckResult.TypeCheckSuccess
} else {
TypeCheckResult.TypeCheckFailure("Field name should not be null")
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index 33bd3f2..8f260ad 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -20,6 +20,8 @@ package org.apache.spark.sql.catalyst.expressions
import java.security.{MessageDigest, NoSuchAlgorithmException}
import java.util.zip.CRC32
+import scala.annotation.tailrec
+
import org.apache.commons.codec.digest.DigestUtils
import org.apache.spark.sql.catalyst.InternalRow
@@ -352,6 +354,7 @@ case class Murmur3Hash(children: Seq[Expression], seed: Int) extends Expression
}
}
+ @tailrec
private def computeHash(
input: String,
dataType: DataType,
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
index 737346d..75ecbaa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
@@ -17,6 +17,7 @@
package org.apache.spark.sql.catalyst.expressions
+import scala.annotation.tailrec
import scala.language.existentials
import scala.reflect.ClassTag
@@ -370,6 +371,7 @@ case class MapObjects private(
lambdaFunction: Expression,
inputData: Expression) extends Expression with NonSQLExpression {
+ @tailrec
private def itemAccessorMethod(dataType: DataType): String => String = dataType match {
case NullType =>
val nullTypeClassName = NullType.getClass.getName + ".MODULE$"
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 059d8ff..c83ec0f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -17,6 +17,7 @@
package org.apache.spark.sql.catalyst.optimizer
+import scala.annotation.tailrec
import scala.collection.immutable.HashSet
import org.apache.spark.sql.catalyst.analysis.{CleanupAliases, EliminateSubqueryAliases}
@@ -915,6 +916,7 @@ object ReorderJoin extends Rule[LogicalPlan] with PredicateHelper {
* @param input a list of LogicalPlans to join.
* @param conditions a list of condition for join.
*/
+ @tailrec
def createOrderedJoin(input: Seq[LogicalPlan], conditions: Seq[Expression]): LogicalPlan = {
assert(input.size >= 2)
if (input.size == 2) {
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index 9ff41f5..7f96db1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -16,6 +16,8 @@
*/
package org.apache.spark.sql.catalyst.parser
+import scala.annotation.tailrec
+
import org.antlr.runtime._
import org.antlr.runtime.tree.CommonTree
@@ -71,6 +73,7 @@ object ParseDriver extends Logging {
logInfo(s"Parse completed.")
// Find the non null token tree in the result.
+ @tailrec
def nonNullToken(tree: CommonTree): CommonTree = {
if (tree.token != null || tree.getChildCount == 0) tree
else nonNullToken(tree.getChild(0).asInstanceOf[CommonTree])
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index f184d72..5393cb8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -22,6 +22,8 @@ import java.text.{DateFormat, SimpleDateFormat}
import java.util.{Calendar, TimeZone}
import javax.xml.bind.DatatypeConverter
+import scala.annotation.tailrec
+
import org.apache.spark.unsafe.types.UTF8String
/**
@@ -117,6 +119,7 @@ object DateTimeUtils {
}
}
+ @tailrec
def stringToTime(s: String): java.util.Date = {
val indexOfGMT = s.indexOf("GMT")
if (indexOfGMT != -1) {
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
index 43f707f..d9a9b61 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
@@ -104,12 +104,12 @@ package object util {
}
def sideBySide(left: Seq[String], right: Seq[String]): Seq[String] = {
- val maxLeftSize = left.map(_.size).max
+ val maxLeftSize = left.map(_.length).max
val leftPadded = left ++ Seq.fill(math.max(right.size - left.size, 0))("")
val rightPadded = right ++ Seq.fill(math.max(left.size - right.size, 0))("")
leftPadded.zip(rightPadded).map {
- case (l, r) => (if (l == r) " " else "!") + l + (" " * ((maxLeftSize - l.size) + 3)) + r
+ case (l, r) => (if (l == r) " " else "!") + l + (" " * ((maxLeftSize - l.length) + 3)) + r
}
}
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
index 5ff5435..271ca95 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -292,7 +292,7 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
builder.append("struct<")
builder.append(fieldTypes.mkString(", "))
if (fields.length > 2) {
- if (fields.length - fieldTypes.size == 1) {
+ if (fields.length - fieldTypes.length == 1) {
builder.append(" ... 1 more field")
} else {
builder.append(" ... " + (fields.length - 2) + " more fields")
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala
index 9fba792..3c2f8a2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala
@@ -40,7 +40,7 @@ class RandomDataGeneratorSuite extends SparkFunSuite {
if (nullable) {
assert(Iterator.fill(100)(generator()).contains(null))
} else {
- assert(Iterator.fill(100)(generator()).forall(_ != null))
+ assert(!Iterator.fill(100)(generator()).contains(null))
}
for (_ <- 1 to 10) {
val generatedValue = generator()
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
index 43a3eb9..5d688e2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
@@ -97,7 +97,7 @@ class SubexpressionEliminationSuite extends SparkFunSuite {
equivalence.addExprTree(add2, true)
// Should only have one equivalence for `one + two`
- assert(equivalence.getAllEquivalentExprs.filter(_.size > 1).size == 1)
+ assert(equivalence.getAllEquivalentExprs.count(_.size > 1) == 1)
assert(equivalence.getAllEquivalentExprs.filter(_.size > 1).head.size == 4)
// Set up the expressions
@@ -116,7 +116,7 @@ class SubexpressionEliminationSuite extends SparkFunSuite {
equivalence.addExprTree(sum, true)
// (one * two), (one * two) * (one * two) and sqrt( (one * two) * (one * two) ) should be found
- assert(equivalence.getAllEquivalentExprs.filter(_.size > 1).size == 3)
+ assert(equivalence.getAllEquivalentExprs.count(_.size > 1) == 3)
assert(equivalence.getEquivalentExprs(mul).size == 3)
assert(equivalence.getEquivalentExprs(mul2).size == 3)
assert(equivalence.getEquivalentExprs(sqrt).size == 2)
@@ -144,7 +144,7 @@ class SubexpressionEliminationSuite extends SparkFunSuite {
equivalence.addExprTree(price, false)
equivalence.addExprTree(discount, false)
// quantity, price, discount and (price * (1 - discount))
- assert(equivalence.getAllEquivalentExprs.filter(_.size > 1).size == 4)
+ assert(equivalence.getAllEquivalentExprs.count(_.size > 1) == 4)
}
test("Expression equivalence - non deterministic") {
@@ -164,7 +164,7 @@ class SubexpressionEliminationSuite extends SparkFunSuite {
var equivalence = new EquivalentExpressions
equivalence.addExprTree(add, true)
// the `two` inside `explode` should not be added
- assert(equivalence.getAllEquivalentExprs.filter(_.size > 1).size == 0)
- assert(equivalence.getAllEquivalentExprs.filter(_.size == 1).size == 3) // add, two, explode
+ assert(equivalence.getAllEquivalentExprs.count(_.size > 1) == 0)
+ assert(equivalence.getAllEquivalentExprs.count(_.size == 1) == 3) // add, two, explode
}
}
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarBatch.java
----------------------------------------------------------------------
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarBatch.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarBatch.java
index 8a0d7f8..2a78058 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarBatch.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarBatch.java
@@ -251,7 +251,6 @@ public final class ColumnarBatch {
@Override
public Row next() {
- assert(hasNext());
while (rowId < maxRows && ColumnarBatch.this.filteredRows[rowId]) {
++rowId;
}
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/sql/core/src/main/scala/org/apache/spark/sql/ContinuousQueryManager.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/ContinuousQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/ContinuousQueryManager.scala
index 13142d0..0a156ea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/ContinuousQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/ContinuousQueryManager.scala
@@ -55,9 +55,8 @@ class ContinuousQueryManager(sqlContext: SQLContext) {
* @since 2.0.0
*/
def get(name: String): ContinuousQuery = activeQueriesLock.synchronized {
- activeQueries.get(name).getOrElse {
- throw new IllegalArgumentException(s"There is no active query with name $name")
- }
+ activeQueries.getOrElse(name,
+ throw new IllegalArgumentException(s"There is no active query with name $name"))
}
/**
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
index 68a2517..d8af799 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
@@ -94,7 +94,7 @@ private[r] object SQLUtils {
}
def createDF(rdd: RDD[Array[Byte]], schema: StructType, sqlContext: SQLContext): DataFrame = {
- val num = schema.fields.size
+ val num = schema.fields.length
val rowRDD = rdd.map(bytesToRow(_, schema))
sqlContext.createDataFrame(rowRDD, schema)
}
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
index fee36f6..78664ba 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.columnar
import java.nio.{ByteBuffer, ByteOrder}
+import scala.annotation.tailrec
+
import org.apache.spark.sql.catalyst.expressions.{MutableRow, UnsafeArrayData, UnsafeMapData, UnsafeRow}
import org.apache.spark.sql.execution.columnar.compression.CompressibleColumnAccessor
import org.apache.spark.sql.types._
@@ -120,6 +122,7 @@ private[columnar] class MapColumnAccessor(buffer: ByteBuffer, dataType: MapType)
with NullableColumnAccessor
private[columnar] object ColumnAccessor {
+ @tailrec
def apply(dataType: DataType, buffer: ByteBuffer): ColumnAccessor = {
val buf = buffer.order(ByteOrder.nativeOrder)
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
index 9c908b2..3ec0118 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.columnar
import java.math.{BigDecimal, BigInteger}
import java.nio.ByteBuffer
+import scala.annotation.tailrec
import scala.reflect.runtime.universe.TypeTag
import org.apache.spark.sql.catalyst.InternalRow
@@ -548,7 +549,7 @@ private[columnar] object LARGE_DECIMAL {
private[columnar] case class STRUCT(dataType: StructType)
extends ColumnType[UnsafeRow] with DirectCopyColumnType[UnsafeRow] {
- private val numOfFields: Int = dataType.fields.size
+ private val numOfFields: Int = dataType.fields.length
override def defaultSize: Int = 20
@@ -663,6 +664,7 @@ private[columnar] case class MAP(dataType: MapType)
}
private[columnar] object ColumnType {
+ @tailrec
def apply(dataType: DataType): ColumnType[_] = {
dataType match {
case NullType => NULL
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarTableScan.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarTableScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarTableScan.scala
index 22d4278..1f964b1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarTableScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarTableScan.scala
@@ -147,7 +147,7 @@ private[sql] case class InMemoryRelation(
// may result malformed rows, causing ArrayIndexOutOfBoundsException, which is somewhat
// hard to decipher.
assert(
- row.numFields == columnBuilders.size,
+ row.numFields == columnBuilders.length,
s"Row column number mismatch, expected ${output.size} columns, " +
s"but got ${row.numFields}." +
s"\nRow content: $row")
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala
index e9afee1..d2d7996 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala
@@ -204,16 +204,16 @@ object CSVRelation extends Logging {
val rowArray = new Array[Any](safeRequiredIndices.length)
val requiredSize = requiredFields.length
tokenizedRDD.flatMap { tokens =>
- if (params.dropMalformed && schemaFields.length != tokens.size) {
+ if (params.dropMalformed && schemaFields.length != tokens.length) {
logWarning(s"Dropping malformed line: ${tokens.mkString(params.delimiter.toString)}")
None
- } else if (params.failFast && schemaFields.length != tokens.size) {
+ } else if (params.failFast && schemaFields.length != tokens.length) {
throw new RuntimeException(s"Malformed line in FAILFAST mode: " +
s"${tokens.mkString(params.delimiter.toString)}")
} else {
- val indexSafeTokens = if (params.permissive && schemaFields.length > tokens.size) {
- tokens ++ new Array[String](schemaFields.length - tokens.size)
- } else if (params.permissive && schemaFields.length < tokens.size) {
+ val indexSafeTokens = if (params.permissive && schemaFields.length > tokens.length) {
+ tokens ++ new Array[String](schemaFields.length - tokens.length)
+ } else if (params.permissive && schemaFields.length < tokens.length) {
tokens.take(schemaFields.length)
} else {
tokens
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
index ed02b3f..4dd3c50 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
@@ -212,14 +212,14 @@ private[sql] object JDBCRDD extends Logging {
// We can't compile Or filter unless both sub-filters are compiled successfully.
// It applies too for the following And filter.
// If we can make sure compileFilter supports all filters, we can remove this check.
- val or = Seq(f1, f2).map(compileFilter(_)).flatten
+ val or = Seq(f1, f2).flatMap(compileFilter(_))
if (or.size == 2) {
or.map(p => s"($p)").mkString(" OR ")
} else {
null
}
case And(f1, f2) =>
- val and = Seq(f1, f2).map(compileFilter(_)).flatten
+ val and = Seq(f1, f2).flatMap(compileFilter(_))
if (and.size == 2) {
and.map(p => s"($p)").mkString(" AND ")
} else {
@@ -304,7 +304,7 @@ private[sql] class JDBCRDD(
* `filters`, but as a WHERE clause suitable for injection into a SQL query.
*/
private val filterWhereClause: String =
- filters.map(JDBCRDD.compileFilter).flatten.mkString(" AND ")
+ filters.flatMap(JDBCRDD.compileFilter).mkString(" AND ")
/**
* A WHERE clause representing both `filters`, if any, and the current partition.
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala
index 835e7ba..f9d1029 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala
@@ -335,7 +335,7 @@ private[spark] class SQLHistoryListener(conf: SparkConf, sparkUI: SparkUI)
taskEnd.taskInfo.accumulables.flatMap { a =>
// Filter out accumulators that are not SQL metrics
// For now we assume all SQL metrics are Long's that have been JSON serialized as String's
- if (a.metadata.exists(_ == SQLMetrics.ACCUM_IDENTIFIER)) {
+ if (a.metadata.contains(SQLMetrics.ACCUM_IDENTIFIER)) {
val newValue = new LongSQLMetricValue(a.update.map(_.toString.toLong).getOrElse(0L))
Some(a.copy(update = Some(newValue)))
} else {
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
----------------------------------------------------------------------
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
index 0d4c128..ee85626 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
@@ -355,27 +355,27 @@ public class JavaDataFrameSuite {
DataFrame df = context.range(1000);
BloomFilter filter1 = df.stat().bloomFilter("id", 1000, 0.03);
- assert (filter1.expectedFpp() - 0.03 < 1e-3);
+ Assert.assertTrue(filter1.expectedFpp() - 0.03 < 1e-3);
for (int i = 0; i < 1000; i++) {
- assert (filter1.mightContain(i));
+ Assert.assertTrue(filter1.mightContain(i));
}
BloomFilter filter2 = df.stat().bloomFilter(col("id").multiply(3), 1000, 0.03);
- assert (filter2.expectedFpp() - 0.03 < 1e-3);
+ Assert.assertTrue(filter2.expectedFpp() - 0.03 < 1e-3);
for (int i = 0; i < 1000; i++) {
- assert (filter2.mightContain(i * 3));
+ Assert.assertTrue(filter2.mightContain(i * 3));
}
BloomFilter filter3 = df.stat().bloomFilter("id", 1000, 64 * 5);
- assert (filter3.bitSize() == 64 * 5);
+ Assert.assertTrue(filter3.bitSize() == 64 * 5);
for (int i = 0; i < 1000; i++) {
- assert (filter3.mightContain(i));
+ Assert.assertTrue(filter3.mightContain(i));
}
BloomFilter filter4 = df.stat().bloomFilter(col("id").multiply(3), 1000, 64 * 5);
- assert (filter4.bitSize() == 64 * 5);
+ Assert.assertTrue(filter4.bitSize() == 64 * 5);
for (int i = 0; i < 1000; i++) {
- assert (filter4.mightContain(i * 3));
+ Assert.assertTrue(filter4.mightContain(i * 3));
}
}
}
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
----------------------------------------------------------------------
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
index 1181244..e0e56f3 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
@@ -304,15 +304,12 @@ public class JavaDatasetSuite implements Serializable {
Assert.assertEquals(Arrays.asList("abc", "abc"), subtracted.collectAsList());
}
- private <T> Set<T> toSet(List<T> records) {
- Set<T> set = new HashSet<T>();
- for (T record : records) {
- set.add(record);
- }
- return set;
+ private static <T> Set<T> toSet(List<T> records) {
+ return new HashSet<>(records);
}
- private <T> Set<T> asSet(T... records) {
+ @SafeVarargs
+ private static <T> Set<T> asSet(T... records) {
return toSet(Arrays.asList(records));
}
@@ -529,7 +526,7 @@ public class JavaDatasetSuite implements Serializable {
Encoders.kryo(PrivateClassTest.class);
}
- public class SimpleJavaBean implements Serializable {
+ public static class SimpleJavaBean implements Serializable {
private boolean a;
private int b;
private byte[] c;
@@ -612,7 +609,7 @@ public class JavaDatasetSuite implements Serializable {
}
}
- public class SimpleJavaBean2 implements Serializable {
+ public static class SimpleJavaBean2 implements Serializable {
private Timestamp a;
private Date b;
private java.math.BigDecimal c;
@@ -650,7 +647,7 @@ public class JavaDatasetSuite implements Serializable {
}
}
- public class NestedJavaBean implements Serializable {
+ public static class NestedJavaBean implements Serializable {
private SimpleJavaBean a;
public SimpleJavaBean getA() {
@@ -745,7 +742,7 @@ public class JavaDatasetSuite implements Serializable {
ds.collect();
}
- public class SmallBean implements Serializable {
+ public static class SmallBean implements Serializable {
private String a;
private int b;
@@ -780,7 +777,7 @@ public class JavaDatasetSuite implements Serializable {
}
}
- public class NestedSmallBean implements Serializable {
+ public static class NestedSmallBean implements Serializable {
private SmallBean f;
public SmallBean getF() {
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 84f30c0..a824759 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -603,7 +603,7 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
assert(parquetDF.inputFiles.nonEmpty)
val unioned = jsonDF.unionAll(parquetDF).inputFiles.sorted
- val allFiles = (jsonDF.inputFiles ++ parquetDF.inputFiles).toSet.toArray.sorted
+ val allFiles = (jsonDF.inputFiles ++ parquetDF.inputFiles).distinct.sorted
assert(unioned === allFiles)
}
}
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 16e769f..f59faa0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -1562,16 +1562,15 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
e.message.contains("Cannot save interval data type into external storage")
})
- def checkIntervalParseError(s: String): Unit = {
- val e = intercept[AnalysisException] {
- sql(s)
- }
- e.message.contains("at least one time unit should be given for interval literal")
+ val e1 = intercept[AnalysisException] {
+ sql("select interval")
}
-
- checkIntervalParseError("select interval")
+ assert(e1.message.contains("at least one time unit should be given for interval literal"))
// Currently we don't yet support nanosecond
- checkIntervalParseError("select interval 23 nanosecond")
+ val e2 = intercept[AnalysisException] {
+ sql("select interval 23 nanosecond")
+ }
+ assert(e2.message.contains("cannot recognize input near"))
}
test("SPARK-8945: add and subtract expressions for interval type") {
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
index 5d84feb..3e91569 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
@@ -385,7 +385,7 @@ private[hive] trait HiveInspectors {
(o: Any) =>
if (o != null) {
val s = o.asInstanceOf[UTF8String].toString
- new HiveVarchar(s, s.size)
+ new HiveVarchar(s, s.length)
} else {
null
}
@@ -394,7 +394,7 @@ private[hive] trait HiveInspectors {
(o: Any) =>
if (o != null) {
val s = o.asInstanceOf[UTF8String].toString
- new HiveChar(s, s.size)
+ new HiveChar(s, s.length)
} else {
null
}
http://git-wip-us.apache.org/repos/asf/spark/blob/e97fc7f1/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 3788736..ee8ec2d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -149,11 +149,10 @@ private[hive] class HiveMetastoreCatalog(val client: HiveClient, hive: HiveConte
def getColumnNames(colType: String): Seq[String] = {
table.properties.get(s"spark.sql.sources.schema.num${colType.capitalize}Cols").map {
numCols => (0 until numCols.toInt).map { index =>
- table.properties.get(s"spark.sql.sources.schema.${colType}Col.$index").getOrElse {
+ table.properties.getOrElse(s"spark.sql.sources.schema.${colType}Col.$index",
throw new AnalysisException(
s"Could not read $colType columns from the metastore because it is corrupted " +
- s"(missing part $index of it, $numCols parts are expected).")
- }
+ s"(missing part $index of it, $numCols parts are expected)."))
}
}.getOrElse(Nil)
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org