You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "zhengruifeng (Jira)" <ji...@apache.org> on 2020/05/15 07:34:00 UTC
[jira] [Comment Edited] (SPARK-31714) Performance test on java
vectorization vs dot vs gemv vs gemm
[ https://issues.apache.org/jira/browse/SPARK-31714?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17108018#comment-17108018 ]
zhengruifeng edited comment on SPARK-31714 at 5/15/20, 7:33 AM:
----------------------------------------------------------------
additionally test on impl of gemv:
{code:java}
test("performance: gemv vs while-gemv") {
def whileGemv(mat: DenseMatrix, vec: DenseVector): DenseVector = {
require(!mat.isTransposed)
val m = mat.numRows
val n = mat.numCols
require(vec.size == n)
val matValues = mat.values
val vecValues = vec.values
val output = Array.ofDim[Double](m)
var i = 0
var j = 0
while (j < n) {
val startIdx = m * j
val v = vecValues(j)
i = 0
while (i < m) {
output(i) += matValues(startIdx + i) * v
i += 1
}
j += 1
}
new DenseVector(output)
}
val shapeBuffer = mutable.ArrayBuilder.make[String]()
val ratioBuffer = mutable.ArrayBuilder.make[Double]()
for (numRows <- Seq(16, 64, 256, 1024, 4096); numCols <- Seq(16, 64, 256, 1024, 4096)) {
val rng = new Random(123)
val matrix = Matrices.dense(numRows, numCols,
Array.fill(numRows * numCols)(rng.nextDouble)).toDense
val vectors = matrix.rowIter.toArray
val coefVec = Vectors.dense(Array.fill(numCols)(rng.nextDouble)).toDense
val coefArr = coefVec.toArray
val start1 = System.nanoTime
Seq.range(0, 100).foreach { _ => matrix.multiply(coefVec) }
val dur1 = System.nanoTime - start1
val start2 = System.nanoTime
Seq.range(0, 100).foreach { _ => whileGemv(matrix, coefVec) }
val dur2 = System.nanoTime - start2
shapeBuffer += s"$numRows X $numCols"
ratioBuffer += dur1 / dur2.toDouble
println(s"numRows=$numRows, numCols=$numCols, gemv: $dur1, whileGemv: $dur2, " +
s"gemv/whileGemv: ${dur1.toDouble / dur2}")
}
println(s"shapes: ${shapeBuffer.result().mkString(",")}")
println(s"ratios: ${ratioBuffer.result().mkString(",")}")
}
{code}
duration of BLAS(openblas) : BLAS(java) : java vectorization(whileGemv)
Smaller is better
16 X 16: 10.102879222350534 : 9.959393672790585 : 1
16 X 64: 0.6313347039650034 : 1.5069314081915879 : 1
16 X 256: 0.910207085544699 : 1.6170097903436782 : 1
16 X 1024: 0.14076750751831094 : 1.6376539758035005 : 1
16 X 4096: 0.3820137565286111 : 1.4649140200740003 : 1
64 X 16: 2.8673586429725364 : 1.969880827023684 : 1
64 X 64: 1.0055741530692275 : 1.3026339290803859 : 1
64 X 256: 0.5070096449300102 : 1.2295682324328647 : 1
64 X 1024: 0.3274242265593191 : 1.2509151212941314 : 1
64 X 4096: 0.3128853980795693 : 1.2300961378942419 : 1
256 X 16: 0.706246615744421 : 1.2293631722237384 : 1
256 X 64: 0.4953318665588364 : 1.0788036857858834 : 1
256 X 256: 0.3683838887701576 : 1.1598682179753397 : 1
256 X 1024: 0.310782477418242 : 1.1286869048387194 : 1
256 X 4096: 0.5179985507534923 : 1.113165303546807 : 1
1024 X 16: 0.5854246295743595 : 1.2117608900770562 : 1
1024 X 64: 0.4417717319177173 : 1.1725839824047304 : 1
1024 X 256: 0.3816961486090574 : 1.1040280425824138 : 1
1024 X 1024: 0.4209589414251511 : 1.1066541963615741 : 1
1024 X 4096: 0.5353395921250336 : 1.078501530540412 : 1
4096 X 16: 0.5745857849841409 : 1.1618592112098773 : 1
4096 X 64: 0.47592411311765476 : 1.129909923930711 : 1
4096 X 256: 0.4784264781542997 : 1.1055266185525001 : 1
4096 X 1024: 0.5531428334840445 : 1.0856731602285508 : 1
4096 X 4096: 0.592064493623388 : 1.060620615275768 : 1
was (Author: podongfeng):
additionally test on impl of gemv:
{code:java}
test("performance: gemv vs while-gemv") {
def whileGemv(mat: DenseMatrix, vec: DenseVector): DenseVector = {
require(!mat.isTransposed)
val m = mat.numRows
val n = mat.numCols
require(vec.size == n)
val matValues = mat.values
val vecValues = vec.values
val output = Array.ofDim[Double](m)
var i = 0
var j = 0
while (j < n) {
val startIdx = m * j
val v = vecValues(j)
i = 0
while (i < m) {
output(i) += matValues(startIdx + i) * v
i += 1
}
j += 1
}
new DenseVector(output)
}
val shapeBuffer = mutable.ArrayBuilder.make[String]()
val ratioBuffer = mutable.ArrayBuilder.make[Double]()
for (numRows <- Seq(16, 64, 256, 1024, 4096); numCols <- Seq(16, 64, 256, 1024, 4096)) {
val rng = new Random(123)
val matrix = Matrices.dense(numRows, numCols,
Array.fill(numRows * numCols)(rng.nextDouble)).toDense
val vectors = matrix.rowIter.toArray
val coefVec = Vectors.dense(Array.fill(numCols)(rng.nextDouble)).toDense
val coefArr = coefVec.toArray
val start1 = System.nanoTime
Seq.range(0, 100).foreach { _ => matrix.multiply(coefVec) }
val dur1 = System.nanoTime - start1
val start2 = System.nanoTime
Seq.range(0, 100).foreach { _ => whileGemv(matrix, coefVec) }
val dur2 = System.nanoTime - start2
shapeBuffer += s"$numRows X $numCols"
ratioBuffer += dur1 / dur2.toDouble
println(s"numRows=$numRows, numCols=$numCols, gemv: $dur1, whileGemv: $dur2, " +
s"gemv/whileGemv: ${dur1.toDouble / dur2}")
}
println(s"shapes: ${shapeBuffer.result().mkString(",")}")
println(s"ratios: ${ratioBuffer.result().mkString(",")}")
}
{code}
duration of BLAS(openblas), BLAS(java), java vectorization(whileGemv)
16 X 16: 10.102879222350534 : 9.959393672790585 : 1
16 X 64: 0.6313347039650034 : 1.5069314081915879 : 1
16 X 256: 0.910207085544699 : 1.6170097903436782 : 1
16 X 1024: 0.14076750751831094 : 1.6376539758035005 : 1
16 X 4096: 0.3820137565286111 : 1.4649140200740003 : 1
64 X 16: 2.8673586429725364 : 1.969880827023684 : 1
64 X 64: 1.0055741530692275 : 1.3026339290803859 : 1
64 X 256: 0.5070096449300102 : 1.2295682324328647 : 1
64 X 1024: 0.3274242265593191 : 1.2509151212941314 : 1
64 X 4096: 0.3128853980795693 : 1.2300961378942419 : 1
256 X 16: 0.706246615744421 : 1.2293631722237384 : 1
256 X 64: 0.4953318665588364 : 1.0788036857858834 : 1
256 X 256: 0.3683838887701576 : 1.1598682179753397 : 1
256 X 1024: 0.310782477418242 : 1.1286869048387194 : 1
256 X 4096: 0.5179985507534923 : 1.113165303546807 : 1
1024 X 16: 0.5854246295743595 : 1.2117608900770562 : 1
1024 X 64: 0.4417717319177173 : 1.1725839824047304 : 1
1024 X 256: 0.3816961486090574 : 1.1040280425824138 : 1
1024 X 1024: 0.4209589414251511 : 1.1066541963615741 : 1
1024 X 4096: 0.5353395921250336 : 1.078501530540412 : 1
4096 X 16: 0.5745857849841409 : 1.1618592112098773 : 1
4096 X 64: 0.47592411311765476 : 1.129909923930711 : 1
4096 X 256: 0.4784264781542997 : 1.1055266185525001 : 1
4096 X 1024: 0.5531428334840445 : 1.0856731602285508 : 1
4096 X 4096: 0.592064493623388 : 1.060620615275768 : 1
> Performance test on java vectorization vs dot vs gemv vs gemm
> -------------------------------------------------------------
>
> Key: SPARK-31714
> URL: https://issues.apache.org/jira/browse/SPARK-31714
> Project: Spark
> Issue Type: Sub-task
> Components: ML
> Affects Versions: 3.1.0
> Reporter: zhengruifeng
> Assignee: zhengruifeng
> Priority: Minor
> Attachments: BLASSuite.scala, blas-perf
>
>
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org