You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2021/03/15 03:16:14 UTC
[lucene] branch main updated: LUCENE-9837: try to improve
performance of VectorUtil.dotProduct (#17)
This is an automated email from the ASF dual-hosted git repository.
rmuir pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new d48193e LUCENE-9837: try to improve performance of VectorUtil.dotProduct (#17)
d48193e is described below
commit d48193e8cff854aea6f5e00c6b6bb7b176b97c72
Author: Robert Muir <rm...@apache.org>
AuthorDate: Sun Mar 14 23:16:08 2021 -0400
LUCENE-9837: try to improve performance of VectorUtil.dotProduct (#17)
More loop unrolling for VectorUtil.dotProduct to eek out a bit more short-term performance.
---
.../java/org/apache/lucene/util/VectorUtil.java | 64 ++++++++++++++++------
1 file changed, 47 insertions(+), 17 deletions(-)
diff --git a/lucene/core/src/java/org/apache/lucene/util/VectorUtil.java b/lucene/core/src/java/org/apache/lucene/util/VectorUtil.java
index cc9cd15..546d13d 100644
--- a/lucene/core/src/java/org/apache/lucene/util/VectorUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/util/VectorUtil.java
@@ -42,25 +42,55 @@ public final class VectorUtil {
if (a.length < 8) {
return res;
}
- float s0 = 0f;
- float s1 = 0f;
- float s2 = 0f;
- float s3 = 0f;
- float s4 = 0f;
- float s5 = 0f;
- float s6 = 0f;
- float s7 = 0f;
+ for (; i + 31 < a.length; i += 32) {
+ res +=
+ b[i + 0] * a[i + 0]
+ + b[i + 1] * a[i + 1]
+ + b[i + 2] * a[i + 2]
+ + b[i + 3] * a[i + 3]
+ + b[i + 4] * a[i + 4]
+ + b[i + 5] * a[i + 5]
+ + b[i + 6] * a[i + 6]
+ + b[i + 7] * a[i + 7];
+ res +=
+ b[i + 8] * a[i + 8]
+ + b[i + 9] * a[i + 9]
+ + b[i + 10] * a[i + 10]
+ + b[i + 11] * a[i + 11]
+ + b[i + 12] * a[i + 12]
+ + b[i + 13] * a[i + 13]
+ + b[i + 14] * a[i + 14]
+ + b[i + 15] * a[i + 15];
+ res +=
+ b[i + 16] * a[i + 16]
+ + b[i + 17] * a[i + 17]
+ + b[i + 18] * a[i + 18]
+ + b[i + 19] * a[i + 19]
+ + b[i + 20] * a[i + 20]
+ + b[i + 21] * a[i + 21]
+ + b[i + 22] * a[i + 22]
+ + b[i + 23] * a[i + 23];
+ res +=
+ b[i + 24] * a[i + 24]
+ + b[i + 25] * a[i + 25]
+ + b[i + 26] * a[i + 26]
+ + b[i + 27] * a[i + 27]
+ + b[i + 28] * a[i + 28]
+ + b[i + 29] * a[i + 29]
+ + b[i + 30] * a[i + 30]
+ + b[i + 31] * a[i + 31];
+ }
for (; i + 7 < a.length; i += 8) {
- s0 += b[i] * a[i];
- s1 += b[i + 1] * a[i + 1];
- s2 += b[i + 2] * a[i + 2];
- s3 += b[i + 3] * a[i + 3];
- s4 += b[i + 4] * a[i + 4];
- s5 += b[i + 5] * a[i + 5];
- s6 += b[i + 6] * a[i + 6];
- s7 += b[i + 7] * a[i + 7];
+ res +=
+ b[i + 0] * a[i + 0]
+ + b[i + 1] * a[i + 1]
+ + b[i + 2] * a[i + 2]
+ + b[i + 3] * a[i + 3]
+ + b[i + 4] * a[i + 4]
+ + b[i + 5] * a[i + 5]
+ + b[i + 6] * a[i + 6]
+ + b[i + 7] * a[i + 7];
}
- res += s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7;
return res;
}