You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by pw...@apache.org on 2014/04/22 20:06:33 UTC
git commit: fix bugs of dot in python
Repository: spark
Updated Branches:
refs/heads/master 0f87e6ad4 -> c919798f0
fix bugs of dot in python
If there are no `transpose()` in `self.theta`, a
*ValueError: matrices are not aligned*
is occurring. The former test case just ignore this situation.
Author: Xusen Yin <yi...@gmail.com>
Closes #463 from yinxusen/python-naive-bayes and squashes the following commits:
fcbe3bc [Xusen Yin] fix bugs of dot in python
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c919798f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c919798f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c919798f
Branch: refs/heads/master
Commit: c919798f0912dc03c8365b9a384d9ee6d5b25c51
Parents: 0f87e6a
Author: Xusen Yin <yi...@gmail.com>
Authored: Tue Apr 22 11:06:18 2014 -0700
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Tue Apr 22 11:06:18 2014 -0700
----------------------------------------------------------------------
python/pyspark/mllib/classification.py | 2 +-
python/pyspark/mllib/tests.py | 8 ++++----
2 files changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/c919798f/python/pyspark/mllib/classification.py
----------------------------------------------------------------------
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index 3a23e08..c584459 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -154,7 +154,7 @@ class NaiveBayesModel(object):
def predict(self, x):
"""Return the most likely class for a data vector x"""
- return self.labels[numpy.argmax(self.pi + _dot(x, self.theta))]
+ return self.labels[numpy.argmax(self.pi + _dot(x, self.theta.transpose()))]
class NaiveBayes(object):
@classmethod
http://git-wip-us.apache.org/repos/asf/spark/blob/c919798f/python/pyspark/mllib/tests.py
----------------------------------------------------------------------
diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py
index d4771d7..1ee96bb 100644
--- a/python/pyspark/mllib/tests.py
+++ b/python/pyspark/mllib/tests.py
@@ -104,10 +104,10 @@ class ListTests(PySparkTestCase):
def test_classification(self):
from pyspark.mllib.classification import LogisticRegressionWithSGD, SVMWithSGD, NaiveBayes
data = [
- LabeledPoint(0.0, [1, 0]),
- LabeledPoint(1.0, [0, 1]),
- LabeledPoint(0.0, [2, 0]),
- LabeledPoint(1.0, [0, 2])
+ LabeledPoint(0.0, [1, 0, 0]),
+ LabeledPoint(1.0, [0, 1, 1]),
+ LabeledPoint(0.0, [2, 0, 0]),
+ LabeledPoint(1.0, [0, 2, 1])
]
rdd = self.sc.parallelize(data)
features = [p.features.tolist() for p in data]