You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ss...@apache.org on 2014/06/18 22:31:52 UTC
git commit: MAHOUT-1580 Optimize getNumNonZeroElements() closes
apache/mahout#17
Repository: mahout
Updated Branches:
refs/heads/master 6fbf3663b -> 45d88031c
MAHOUT-1580 Optimize getNumNonZeroElements() closes apache/mahout#17
Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/45d88031
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/45d88031
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/45d88031
Branch: refs/heads/master
Commit: 45d88031c8e782a624eaf6cefea9700600805647
Parents: 6fbf366
Author: ssc <ss...@apache.org>
Authored: Wed Jun 18 22:31:34 2014 +0200
Committer: ssc <ss...@apache.org>
Committed: Wed Jun 18 22:31:34 2014 +0200
----------------------------------------------------------------------
CHANGELOG | 2 ++
.../org/apache/mahout/math/DenseVector.java | 11 +++++++
.../apache/mahout/math/PermutedVectorView.java | 13 +++++---
.../mahout/math/RandomAccessSparseVector.java | 14 +++++++++
.../math/SequentialAccessSparseVector.java | 13 ++++++++
.../java/org/apache/mahout/math/VectorTest.java | 32 ++++++++++++++++++++
6 files changed, 80 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mahout/blob/45d88031/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index 1d5bd3d..47aacf6 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,8 @@ Mahout Change Log
Release 1.0 - unreleased
+ MAHOUT-1580 Optimize getNumNonZeroElements() (ssc)
+
MAHOUT-1464: Cooccurrence Analysis on Spark (pat)
MAHOUT-1578: Optimizations in matrix serialization (ssc)
http://git-wip-us.apache.org/repos/asf/mahout/blob/45d88031/math/src/main/java/org/apache/mahout/math/DenseVector.java
----------------------------------------------------------------------
diff --git a/math/src/main/java/org/apache/mahout/math/DenseVector.java b/math/src/main/java/org/apache/mahout/math/DenseVector.java
index 3c3d6b4..5b3dea7 100644
--- a/math/src/main/java/org/apache/mahout/math/DenseVector.java
+++ b/math/src/main/java/org/apache/mahout/math/DenseVector.java
@@ -159,6 +159,17 @@ public class DenseVector extends AbstractVector {
return values.length;
}
+ @Override
+ public int getNumNonZeroElements() {
+ int numNonZeros = 0;
+ for (int index = 0; index < values.length; index++) {
+ if (values[index] != 0) {
+ numNonZeros++;
+ }
+ }
+ return numNonZeros;
+ }
+
public Vector assign(DenseVector vector) {
// make sure the data field has the correct length
if (vector.values.length != this.values.length) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/45d88031/math/src/main/java/org/apache/mahout/math/PermutedVectorView.java
----------------------------------------------------------------------
diff --git a/math/src/main/java/org/apache/mahout/math/PermutedVectorView.java b/math/src/main/java/org/apache/mahout/math/PermutedVectorView.java
index e2d2261..f34f2b0 100644
--- a/math/src/main/java/org/apache/mahout/math/PermutedVectorView.java
+++ b/math/src/main/java/org/apache/mahout/math/PermutedVectorView.java
@@ -215,17 +215,20 @@ public class PermutedVectorView extends AbstractVector {
vector.setQuick(pivot[index], value);
}
- /**
- * Return the number of values in the recipient
- *
- * @return an int
- */
+ /** Return the number of values in the recipient */
@Override
public int getNumNondefaultElements() {
return vector.getNumNondefaultElements();
}
@Override
+ public int getNumNonZeroElements() {
+ // Return the number of nonzeros in the recipient,
+ // so potentially don't have to go through our iterator
+ return vector.getNumNonZeroElements();
+ }
+
+ @Override
public double getLookupCost() {
return vector.getLookupCost();
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/45d88031/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java
----------------------------------------------------------------------
diff --git a/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java b/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java
index 70523ff..dbe5d3a 100644
--- a/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java
+++ b/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java
@@ -20,6 +20,7 @@ package org.apache.mahout.math;
import java.util.Iterator;
import java.util.NoSuchElementException;
+import org.apache.mahout.math.list.DoubleArrayList;
import org.apache.mahout.math.map.OpenIntDoubleHashMap;
import org.apache.mahout.math.map.OpenIntDoubleHashMap.MapElement;
import org.apache.mahout.math.set.AbstractSet;
@@ -146,6 +147,19 @@ public class RandomAccessSparseVector extends AbstractVector {
}
@Override
+ public int getNumNonZeroElements() {
+ DoubleArrayList elementValues = values.values();
+ int numMappedElements = elementValues.size();
+ int numNonZeros = 0;
+ for (int index = 0; index < numMappedElements; index++) {
+ if (elementValues.getQuick(index) != 0) {
+ numNonZeros++;
+ }
+ }
+ return numNonZeros;
+ }
+
+ @Override
public double getLookupCost() {
return 1;
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/45d88031/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java
----------------------------------------------------------------------
diff --git a/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java b/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java
index f3067ef..331662c 100644
--- a/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java
+++ b/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java
@@ -185,6 +185,19 @@ public class SequentialAccessSparseVector extends AbstractVector {
}
@Override
+ public int getNumNonZeroElements() {
+ double[] elementValues = values.getValues();
+ int numMappedElements = values.getNumMappings();
+ int numNonZeros = 0;
+ for (int index = 0; index < numMappedElements; index++) {
+ if (elementValues[index] != 0) {
+ numNonZeros++;
+ }
+ }
+ return numNonZeros;
+ }
+
+ @Override
public double getLookupCost() {
return Math.max(1, Math.round(Functions.LOG2.apply(getNumNondefaultElements())));
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/45d88031/math/src/test/java/org/apache/mahout/math/VectorTest.java
----------------------------------------------------------------------
diff --git a/math/src/test/java/org/apache/mahout/math/VectorTest.java b/math/src/test/java/org/apache/mahout/math/VectorTest.java
index d15e7d9..67dc1e9 100644
--- a/math/src/test/java/org/apache/mahout/math/VectorTest.java
+++ b/math/src/test/java/org/apache/mahout/math/VectorTest.java
@@ -1021,6 +1021,38 @@ public final class VectorTest extends MahoutTestCase {
}
}
+ @Test
+ public void testNumNonZerosDense() {
+ DenseVector vector = new DenseVector(10);
+ vector.assign(1);
+ vector.setQuick(3, 0);
+ vector.set(5, 0);
+
+ assertEquals(8, vector.getNumNonZeroElements());
+ }
+
+ @Test
+ public void testNumNonZerosRandomAccessSparse() {
+ RandomAccessSparseVector vector = new RandomAccessSparseVector(10);
+ vector.setQuick(3, 1);
+ vector.set(5, 1);
+ vector.setQuick(7, 0);
+ vector.set(9, 0);
+
+ assertEquals(2, vector.getNumNonZeroElements());
+ }
+
+ @Test
+ public void testNumNonZerosSequentialAccessSparse() {
+ SequentialAccessSparseVector vector = new SequentialAccessSparseVector(10);
+ vector.setQuick(3, 1);
+ vector.set(5, 1);
+ vector.setQuick(7, 0);
+ vector.set(9, 0);
+
+ assertEquals(2, vector.getNumNonZeroElements());
+ }
+
// Test NonZeroIterator on an list with 1 elements
private static void testSingleNonZeroIterator(Vector vector) {
vector.set(1, 6);