You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by jd...@apache.org on 2016/02/05 21:53:47 UTC

[3/5] incubator-kudu git commit: [python] - Allow to scan batch-by-batch

[python] - Allow to scan batch-by-batch

Currently the python client would only allow to read all the tuples
from the scan at once, which could easily cause oom issues.

This patch makes a couple of changes necessary to allow to scan batch-by-batch
and adds a test to make sure it works.

Change-Id: Ifced955be40943dc4ad648d90a24db98b27eed70
Reviewed-on: http://gerrit.cloudera.org:8080/2052
Reviewed-by: David Ribeiro Alves <da...@cloudera.com>
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <to...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/incubator-kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kudu/commit/786834d1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kudu/tree/786834d1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kudu/diff/786834d1

Branch: refs/heads/master
Commit: 786834d1d4b4313535c2763db7022a25ce1e1dd0
Parents: 6d74508
Author: David Alves <da...@cloudera.com>
Authored: Thu Feb 4 15:01:19 2016 -0800
Committer: David Ribeiro Alves <da...@cloudera.com>
Committed: Fri Feb 5 20:41:26 2016 +0000

----------------------------------------------------------------------
 python/kudu/client.pyx            |  4 ++--
 python/kudu/tests/test_scanner.py | 18 ++++++++++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/786834d1/python/kudu/client.pyx
----------------------------------------------------------------------
diff --git a/python/kudu/client.pyx b/python/kudu/client.pyx
index 3802361..bb9a5d7 100644
--- a/python/kudu/client.pyx
+++ b/python/kudu/client.pyx
@@ -793,7 +793,7 @@ cdef class Row:
     def __dealloc__(self):
         pass
 
-    cdef tuple as_tuple(self):
+    cpdef tuple as_tuple(self):
         """
         Return the row as a Python tuple
         """
@@ -1098,7 +1098,7 @@ cdef class Scanner:
     def read_next_batch_tuples(self):
         return self.next_batch().as_tuples()
 
-    cdef RowBatch next_batch(self):
+    cpdef RowBatch next_batch(self):
         """
         Retrieve the next batch of rows from the scanner.
 

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/786834d1/python/kudu/tests/test_scanner.py
----------------------------------------------------------------------
diff --git a/python/kudu/tests/test_scanner.py b/python/kudu/tests/test_scanner.py
index 9699beb..0ae2036 100644
--- a/python/kudu/tests/test_scanner.py
+++ b/python/kudu/tests/test_scanner.py
@@ -121,3 +121,21 @@ class TestScanner(KuduTestBase, unittest.TestCase):
 
         with self.assertRaises(kudu.KuduInvalidArgument):
             scanner.add_predicates([sv >= 1])
+
+    def test_scan_batch_by_batch(self):
+        scanner = self.table.scanner()
+        scanner.set_fault_tolerant()
+        lower_bound = scanner.new_bound()
+        lower_bound['key'] = 10
+        scanner.add_lower_bound(lower_bound)
+        upper_bound = scanner.new_bound()
+        upper_bound['key'] = 90
+        scanner.add_exclusive_upper_bound(upper_bound)
+        scanner.open()
+
+        tuples = []
+        while scanner.has_more_rows():
+            batch = scanner.next_batch()
+            tuples.extend(batch.as_tuples())
+
+        self.assertEqual(sorted(tuples), self.tuples[10:90])