You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by pa...@apache.org on 2014/06/14 18:43:31 UTC
git commit: MAHOUT-1464 fixed bug counting only positive column
elements, now counts all non-zero (pat) closes apache/mahout#18
Repository: mahout
Updated Branches:
refs/heads/master c1ca30872 -> c20eee89c
MAHOUT-1464 fixed bug counting only positive column elements, now counts all non-zero (pat) closes apache/mahout#18
Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/c20eee89
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/c20eee89
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/c20eee89
Branch: refs/heads/master
Commit: c20eee89c6cc669494cf7edbb80255a83e194a15
Parents: c1ca308
Author: pferrel <pa...@occamsmachete.com>
Authored: Sat Jun 14 09:24:30 2014 -0700
Committer: pferrel <pa...@occamsmachete.com>
Committed: Sat Jun 14 09:24:30 2014 -0700
----------------------------------------------------------------------
.../apache/mahout/math/scalabindings/MatrixOps.scala | 7 ++++---
.../mahout/math/scalabindings/MatrixOpsSuite.scala | 12 ++++++++++++
.../org/apache/mahout/math/function/Functions.java | 15 +++++++++++++--
.../apache/mahout/sparkbindings/SparkEngine.scala | 6 ++----
.../apache/mahout/cf/CooccurrenceAnalysisSuite.scala | 4 ++--
.../mahout/sparkbindings/drm/RLikeDrmOpsSuite.scala | 5 +++--
6 files changed, 36 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mahout/blob/c20eee89/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatrixOps.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatrixOps.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatrixOps.scala
index 149feca..28acc5a 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatrixOps.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatrixOps.scala
@@ -176,7 +176,7 @@ class MatrixOps(val m: Matrix) {
def rowMeans() = if (m.ncol == 0) rowSums() else rowSums() /= m.ncol
- def numNonZeroElementsPerColumn() = m.aggregateColumns(vectorCountFunc)
+ def numNonZeroElementsPerColumn() = m.aggregateColumns(vectorCountNonZeroElementsFunc)
}
object MatrixOps {
@@ -188,8 +188,9 @@ object MatrixOps {
def apply(f: Vector): Double = f.sum
}
- private def vectorCountFunc = new VectorFunction {
- def apply(f: Vector): Double = f.aggregate(Functions.PLUS, Functions.greater(0))
+ private def vectorCountNonZeroElementsFunc = new VectorFunction {
+ //def apply(f: Vector): Double = f.aggregate(Functions.PLUS, Functions.notEqual(0))
+ def apply(f: Vector): Double = f.getNumNonZeroElements().toDouble
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/mahout/blob/c20eee89/math-scala/src/test/scala/org/apache/mahout/math/scalabindings/MatrixOpsSuite.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/test/scala/org/apache/mahout/math/scalabindings/MatrixOpsSuite.scala b/math-scala/src/test/scala/org/apache/mahout/math/scalabindings/MatrixOpsSuite.scala
index d59d3a5..8374a9b 100644
--- a/math-scala/src/test/scala/org/apache/mahout/math/scalabindings/MatrixOpsSuite.scala
+++ b/math-scala/src/test/scala/org/apache/mahout/math/scalabindings/MatrixOpsSuite.scala
@@ -123,4 +123,16 @@ class MatrixOpsSuite extends FunSuite with MahoutSuite {
}
+ test("numNonZeroElementsPerColumn") {
+ val a = dense(
+ (2, 3, 4),
+ (3, 4, 5),
+ (-5, 0, -1),
+ (0, 0, 1)
+ )
+
+ a.numNonZeroElementsPerColumn() should equal(dvec(3,2,4))
+
+ }
+
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/mahout/blob/c20eee89/math/src/main/java/org/apache/mahout/math/function/Functions.java
----------------------------------------------------------------------
diff --git a/math/src/main/java/org/apache/mahout/math/function/Functions.java b/math/src/main/java/org/apache/mahout/math/function/Functions.java
index 7a3db98..64315ce 100644
--- a/math/src/main/java/org/apache/mahout/math/function/Functions.java
+++ b/math/src/main/java/org/apache/mahout/math/function/Functions.java
@@ -27,11 +27,11 @@ It is provided "as is" without expressed or implied warranty.
package org.apache.mahout.math.function;
-import java.util.Date;
-
import com.google.common.base.Preconditions;
import org.apache.mahout.math.jet.random.engine.MersenneTwister;
+import java.util.Date;
+
/**
* Function objects to be passed to generic methods. Contains the functions of {@link java.lang.Math} as function
@@ -1393,6 +1393,17 @@ public final class Functions {
};
}
+ /** Constructs a function that returns <tt>a != b ? 1 : 0</tt>. <tt>a</tt> is a variable, <tt>b</tt> is fixed. */
+ public static DoubleFunction notEqual(final double b) {
+ return new DoubleFunction() {
+
+ @Override
+ public double apply(double a) {
+ return a != b ? 1 : 0;
+ }
+ };
+ }
+
/** Constructs a function that returns <tt>a > b ? 1 : 0</tt>. <tt>a</tt> is a variable, <tt>b</tt> is fixed. */
public static DoubleFunction greater(final double b) {
return new DoubleFunction() {
http://git-wip-us.apache.org/repos/asf/mahout/blob/c20eee89/spark/src/main/scala/org/apache/mahout/sparkbindings/SparkEngine.scala
----------------------------------------------------------------------
diff --git a/spark/src/main/scala/org/apache/mahout/sparkbindings/SparkEngine.scala b/spark/src/main/scala/org/apache/mahout/sparkbindings/SparkEngine.scala
index 7a1fb2d..a4eef9d 100644
--- a/spark/src/main/scala/org/apache/mahout/sparkbindings/SparkEngine.scala
+++ b/spark/src/main/scala/org/apache/mahout/sparkbindings/SparkEngine.scala
@@ -60,10 +60,8 @@ object SparkEngine extends DistributedEngine {
.map(_._2)
// Fold() doesn't work with kryo still. So work around it.
.mapPartitions(iter => {
- val acc = ((new DenseVector(n): Vector) /: iter){(acc, v) =>
- v.nonZeroes().foreach { elem =>
- if (elem.get() > 0) acc(elem.index) += 1
- }
+ val acc = ((new DenseVector(n): Vector) /: iter) { (acc, v) =>
+ v.nonZeroes().foreach { elem => acc(elem.index) += 1}
acc
}
Iterator(acc)
http://git-wip-us.apache.org/repos/asf/mahout/blob/c20eee89/spark/src/test/scala/org/apache/mahout/cf/CooccurrenceAnalysisSuite.scala
----------------------------------------------------------------------
diff --git a/spark/src/test/scala/org/apache/mahout/cf/CooccurrenceAnalysisSuite.scala b/spark/src/test/scala/org/apache/mahout/cf/CooccurrenceAnalysisSuite.scala
index 3c05a42..2db5f50 100644
--- a/spark/src/test/scala/org/apache/mahout/cf/CooccurrenceAnalysisSuite.scala
+++ b/spark/src/test/scala/org/apache/mahout/cf/CooccurrenceAnalysisSuite.scala
@@ -118,8 +118,8 @@ class CooccurrenceAnalysisSuite extends FunSuite with MahoutSuite with MahoutLoc
}
test("cooccurrence [A'A], [B'A] integer data using LLR") {
- val a = dense((1000, 10, 0, 0, 0), (0, 0, 10000, 10, 0), (0, 0, 0, 0, 100), (10000, 0, 0, 1000, 0))
- val b = dense((100, 1000, 10000, 10000, 0), (10000, 1000, 100, 10, 0), (0, 0, 10, 0, 100), (10, 100, 0, 1000, 0))
+ val a = dense((1000, 10, 0, 0, 0), (0, 0, -10000, 10, 0), (0, 0, 0, 0, 100), (10000, 0, 0, 1000, 0))
+ val b = dense((100, 1000, -10000, 10000, 0), (10000, 1000, 100, 10, 0), (0, 0, 10, 0, -100), (10, 100, 0, 1000, 0))
val drmA = drmParallelize(m = a, numPartitions = 2)
val drmB = drmParallelize(m = b, numPartitions = 2)
http://git-wip-us.apache.org/repos/asf/mahout/blob/c20eee89/spark/src/test/scala/org/apache/mahout/sparkbindings/drm/RLikeDrmOpsSuite.scala
----------------------------------------------------------------------
diff --git a/spark/src/test/scala/org/apache/mahout/sparkbindings/drm/RLikeDrmOpsSuite.scala b/spark/src/test/scala/org/apache/mahout/sparkbindings/drm/RLikeDrmOpsSuite.scala
index 30a602b..3cd49cd 100644
--- a/spark/src/test/scala/org/apache/mahout/sparkbindings/drm/RLikeDrmOpsSuite.scala
+++ b/spark/src/test/scala/org/apache/mahout/sparkbindings/drm/RLikeDrmOpsSuite.scala
@@ -466,8 +466,9 @@ class RLikeDrmOpsSuite extends FunSuite with Matchers with MahoutLocalContext {
test("numNonZeroElementsPerColumn") {
val inCoreA = dense(
(0, 2),
- (3, 4),
- (0, 30)
+ (3, 0),
+ (0, -30)
+
)
val drmA = drmParallelize(inCoreA, numPartitions = 2)