You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by jd...@apache.org on 2016/02/05 21:53:47 UTC
[3/5] incubator-kudu git commit: [python] - Allow to scan
batch-by-batch
[python] - Allow to scan batch-by-batch
Currently the python client would only allow to read all the tuples
from the scan at once, which could easily cause oom issues.
This patch makes a couple of changes necessary to allow to scan batch-by-batch
and adds a test to make sure it works.
Change-Id: Ifced955be40943dc4ad648d90a24db98b27eed70
Reviewed-on: http://gerrit.cloudera.org:8080/2052
Reviewed-by: David Ribeiro Alves <da...@cloudera.com>
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <to...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/incubator-kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kudu/commit/786834d1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kudu/tree/786834d1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kudu/diff/786834d1
Branch: refs/heads/master
Commit: 786834d1d4b4313535c2763db7022a25ce1e1dd0
Parents: 6d74508
Author: David Alves <da...@cloudera.com>
Authored: Thu Feb 4 15:01:19 2016 -0800
Committer: David Ribeiro Alves <da...@cloudera.com>
Committed: Fri Feb 5 20:41:26 2016 +0000
----------------------------------------------------------------------
python/kudu/client.pyx | 4 ++--
python/kudu/tests/test_scanner.py | 18 ++++++++++++++++++
2 files changed, 20 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/786834d1/python/kudu/client.pyx
----------------------------------------------------------------------
diff --git a/python/kudu/client.pyx b/python/kudu/client.pyx
index 3802361..bb9a5d7 100644
--- a/python/kudu/client.pyx
+++ b/python/kudu/client.pyx
@@ -793,7 +793,7 @@ cdef class Row:
def __dealloc__(self):
pass
- cdef tuple as_tuple(self):
+ cpdef tuple as_tuple(self):
"""
Return the row as a Python tuple
"""
@@ -1098,7 +1098,7 @@ cdef class Scanner:
def read_next_batch_tuples(self):
return self.next_batch().as_tuples()
- cdef RowBatch next_batch(self):
+ cpdef RowBatch next_batch(self):
"""
Retrieve the next batch of rows from the scanner.
http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/786834d1/python/kudu/tests/test_scanner.py
----------------------------------------------------------------------
diff --git a/python/kudu/tests/test_scanner.py b/python/kudu/tests/test_scanner.py
index 9699beb..0ae2036 100644
--- a/python/kudu/tests/test_scanner.py
+++ b/python/kudu/tests/test_scanner.py
@@ -121,3 +121,21 @@ class TestScanner(KuduTestBase, unittest.TestCase):
with self.assertRaises(kudu.KuduInvalidArgument):
scanner.add_predicates([sv >= 1])
+
+ def test_scan_batch_by_batch(self):
+ scanner = self.table.scanner()
+ scanner.set_fault_tolerant()
+ lower_bound = scanner.new_bound()
+ lower_bound['key'] = 10
+ scanner.add_lower_bound(lower_bound)
+ upper_bound = scanner.new_bound()
+ upper_bound['key'] = 90
+ scanner.add_exclusive_upper_bound(upper_bound)
+ scanner.open()
+
+ tuples = []
+ while scanner.has_more_rows():
+ batch = scanner.next_batch()
+ tuples.extend(batch.as_tuples())
+
+ self.assertEqual(sorted(tuples), self.tuples[10:90])