You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2015/09/21 07:55:28 UTC
spark git commit: [SPARK-10631] [DOCUMENTATION, MLLIB,
PYSPARK] Added documentation for few APIs
Repository: spark
Updated Branches:
refs/heads/master 0c498717b -> 014403951
[SPARK-10631] [DOCUMENTATION, MLLIB, PYSPARK] Added documentation for few APIs
There are some missing API docs in pyspark.mllib.linalg.Vector (including DenseVector and SparseVector). We should add them based on their Scala counterparts.
Author: vinodkc <vi...@gmail.com>
Closes #8834 from vinodkc/fix_SPARK-10631.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/01440395
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/01440395
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/01440395
Branch: refs/heads/master
Commit: 01440395176bdbb2662480f03b27851cb860f385
Parents: 0c49871
Author: vinodkc <vi...@gmail.com>
Authored: Sun Sep 20 22:55:24 2015 -0700
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Sun Sep 20 22:55:24 2015 -0700
----------------------------------------------------------------------
python/pyspark/mllib/linalg/__init__.py | 22 +++++++++++++++++-----
1 file changed, 17 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/01440395/python/pyspark/mllib/linalg/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py
index 4829acb..f929e3e 100644
--- a/python/pyspark/mllib/linalg/__init__.py
+++ b/python/pyspark/mllib/linalg/__init__.py
@@ -301,11 +301,14 @@ class DenseVector(Vector):
return DenseVector, (self.array.tostring(),)
def numNonzeros(self):
+ """
+ Number of nonzero elements. This scans all active values and count non zeros
+ """
return np.count_nonzero(self.array)
def norm(self, p):
"""
- Calculte the norm of a DenseVector.
+ Calculates the norm of a DenseVector.
>>> a = DenseVector([0, -1, 2, -3])
>>> a.norm(2)
@@ -397,10 +400,16 @@ class DenseVector(Vector):
return np.dot(diff, diff)
def toArray(self):
+ """
+ Returns an numpy.ndarray
+ """
return self.array
@property
def values(self):
+ """
+ Returns a list of values
+ """
return self.array
def __getitem__(self, item):
@@ -479,8 +488,8 @@ class SparseVector(Vector):
:param size: Size of the vector.
:param args: Active entries, as a dictionary {index: value, ...},
- a list of tuples [(index, value), ...], or a list of strictly i
- ncreasing indices and a list of corresponding values [index, ...],
+ a list of tuples [(index, value), ...], or a list of strictly
+ increasing indices and a list of corresponding values [index, ...],
[value, ...]. Inactive entries are treated as zeros.
>>> SparseVector(4, {1: 1.0, 3: 5.5})
@@ -521,11 +530,14 @@ class SparseVector(Vector):
raise TypeError("indices array must be sorted")
def numNonzeros(self):
+ """
+ Number of nonzero elements. This scans all active values and count non zeros.
+ """
return np.count_nonzero(self.values)
def norm(self, p):
"""
- Calculte the norm of a SparseVector.
+ Calculates the norm of a SparseVector.
>>> a = SparseVector(4, [0, 1], [3., -4.])
>>> a.norm(1)
@@ -797,7 +809,7 @@ class Vectors(object):
values (sorted by index).
:param size: Size of the vector.
- :param args: Non-zero entries, as a dictionary, list of tupes,
+ :param args: Non-zero entries, as a dictionary, list of tuples,
or two sorted lists containing indices and values.
>>> Vectors.sparse(4, {1: 1.0, 3: 5.5})
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org