You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by jk...@apache.org on 2015/10/17 00:53:30 UTC

spark git commit: [SPARK-11084] [ML] [PYTHON] Check if index can contain non-zero value before binary search

Repository: spark
Updated Branches:
  refs/heads/master 10046ea76 -> 8ac71d62d


[SPARK-11084] [ML] [PYTHON] Check if index can contain non-zero value before binary search

At this moment `SparseVector.__getitem__` executes `np.searchsorted` first and checks if result is in an expected range after that. It is possible to check if index can contain non-zero value before executing `np.searchsorted`.

Author: zero323 <ma...@gmail.com>

Closes #9098 from zero323/sparse_vector_getitem_improved.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8ac71d62
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8ac71d62
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8ac71d62

Branch: refs/heads/master
Commit: 8ac71d62d976bbfd0159cac6816dd8fa580ae1cb
Parents: 10046ea
Author: zero323 <ma...@gmail.com>
Authored: Fri Oct 16 15:53:26 2015 -0700
Committer: Joseph K. Bradley <jo...@databricks.com>
Committed: Fri Oct 16 15:53:26 2015 -0700

----------------------------------------------------------------------
 python/pyspark/mllib/linalg/__init__.py |  4 ++--
 python/pyspark/mllib/tests.py           | 10 ++++++++++
 2 files changed, 12 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/8ac71d62/python/pyspark/mllib/linalg/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py
index 5276eb4..ae9ce58 100644
--- a/python/pyspark/mllib/linalg/__init__.py
+++ b/python/pyspark/mllib/linalg/__init__.py
@@ -770,10 +770,10 @@ class SparseVector(Vector):
         if index < 0:
             index += self.size
 
-        insert_index = np.searchsorted(inds, index)
-        if insert_index >= inds.size:
+        if (inds.size == 0) or (index > inds.item(-1)):
             return 0.
 
+        insert_index = np.searchsorted(inds, index)
         row_ind = inds[insert_index]
         if row_ind == index:
             return vals[insert_index]

http://git-wip-us.apache.org/repos/asf/spark/blob/8ac71d62/python/pyspark/mllib/tests.py
----------------------------------------------------------------------
diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py
index 2a6a5cd..2ad69a0 100644
--- a/python/pyspark/mllib/tests.py
+++ b/python/pyspark/mllib/tests.py
@@ -252,6 +252,16 @@ class VectorTests(MLlibTestCase):
         for ind in [7.8, '1']:
             self.assertRaises(TypeError, sv.__getitem__, ind)
 
+        zeros = SparseVector(4, {})
+        self.assertEqual(zeros[0], 0.0)
+        self.assertEqual(zeros[3], 0.0)
+        for ind in [4, -5]:
+            self.assertRaises(ValueError, zeros.__getitem__, ind)
+
+        empty = SparseVector(0, {})
+        for ind in [-1, 0, 1]:
+            self.assertRaises(ValueError, empty.__getitem__, ind)
+
     def test_matrix_indexing(self):
         mat = DenseMatrix(3, 2, [0, 1, 4, 6, 8, 10])
         expected = [[0, 6], [1, 8], [4, 10]]


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org