You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@libcloud.apache.org by to...@apache.org on 2012/11/08 07:10:23 UTC

svn commit: r1406938 - in /libcloud/trunk: ./ libcloud/dns/ libcloud/dns/drivers/ libcloud/storage/ libcloud/storage/drivers/

Author: tomaz
Date: Thu Nov  8 06:10:22 2012
New Revision: 1406938

URL: http://svn.apache.org/viewvc?rev=1406938&view=rev
Log:
Deprecate LazyList method of iteration over large paginated collections
and use a new, more efficient generator based approach which doesn't
require the iterator to be pre-exhausted and buffering all of the values
in memory.

Existing list_* methods which previously used LazyList class are
preserving the old behavior and new iterate_* methods which use a new
generator based approach have been added.

Contributed by Mahendra M, part of LIBCLOUD-254.

Modified:
    libcloud/trunk/CHANGES
    libcloud/trunk/libcloud/dns/base.py
    libcloud/trunk/libcloud/dns/drivers/zerigo.py
    libcloud/trunk/libcloud/storage/base.py
    libcloud/trunk/libcloud/storage/drivers/atmos.py
    libcloud/trunk/libcloud/storage/drivers/cloudfiles.py
    libcloud/trunk/libcloud/storage/drivers/local.py
    libcloud/trunk/libcloud/storage/drivers/s3.py

Modified: libcloud/trunk/CHANGES
URL: http://svn.apache.org/viewvc/libcloud/trunk/CHANGES?rev=1406938&r1=1406937&r2=1406938&view=diff
==============================================================================
--- libcloud/trunk/CHANGES (original)
+++ libcloud/trunk/CHANGES Thu Nov  8 06:10:22 2012
@@ -2,6 +2,18 @@
 
 Changes with Apache Libcloud in development:
 
+  *) General
+
+    - Deprecate LazyList method of iteration over large paginated collections
+      and use a new, more efficient generator based approach which doesn't
+      require the iterator to be pre-exhausted and buffering all of the values
+      in memory.
+
+      Existing list_* methods which previously used LazyList class are
+      preserving the old behavior and new iterate_* methods which use a new
+      generator based approach have been added.
+      [Mahendra M]
+
   *) Compute
 
     - Fix string interpolation bug in __repr__ methods in the IBM SCE driver. ;

Modified: libcloud/trunk/libcloud/dns/base.py
URL: http://svn.apache.org/viewvc/libcloud/trunk/libcloud/dns/base.py?rev=1406938&r1=1406937&r2=1406938&view=diff
==============================================================================
--- libcloud/trunk/libcloud/dns/base.py (original)
+++ libcloud/trunk/libcloud/dns/base.py Thu Nov  8 06:10:22 2012
@@ -164,14 +164,34 @@ class DNSDriver(BaseDriver):
         """
         return list(self.RECORD_TYPE_MAP.keys())
 
+    def iterate_zones(self):
+        """
+        Return a generator to iterate over available zones.
+
+        @rtype: C{generator} of L{Zone}
+        """
+        raise NotImplementedError(
+            'iterate_zones not implemented for this driver')
+
     def list_zones(self):
         """
         Return a list of zones.
 
         @rtype: C{list} of L{Zone}
         """
+        return list(self.iterate_zones())
+
+    def iterate_records(self, zone):
+        """
+        Return a generator to iterate over records for the provided zone.
+
+        @param zone: Zone to list records for.
+        @type zone: L{Zone}
+
+        @rtype: C{generator} of L{Record}
+        """
         raise NotImplementedError(
-            'list_zones not implemented for this driver')
+            'iterate_records not implemented for this driver')
 
     def list_records(self, zone):
         """
@@ -182,8 +202,7 @@ class DNSDriver(BaseDriver):
 
         @rtype: C{list} of L{Record}
         """
-        raise NotImplementedError(
-            'list_records not implemented for this driver')
+        return list(self.iterate_records(zone))
 
     def get_zone(self, zone_id):
         """

Modified: libcloud/trunk/libcloud/dns/drivers/zerigo.py
URL: http://svn.apache.org/viewvc/libcloud/trunk/libcloud/dns/drivers/zerigo.py?rev=1406938&r1=1406937&r2=1406938&view=diff
==============================================================================
--- libcloud/trunk/libcloud/dns/drivers/zerigo.py (original)
+++ libcloud/trunk/libcloud/dns/drivers/zerigo.py Thu Nov  8 06:10:22 2012
@@ -30,7 +30,7 @@ from libcloud.utils.misc import merge_va
 from libcloud.utils.xml import findtext, findall
 from libcloud.common.base import XmlResponse, ConnectionUserAndKey
 from libcloud.common.types import InvalidCredsError, LibcloudError
-from libcloud.common.types import MalformedResponseError, LazyList
+from libcloud.common.types import MalformedResponseError
 from libcloud.dns.types import Provider, RecordType
 from libcloud.dns.types import ZoneDoesNotExistError, RecordDoesNotExistError
 from libcloud.dns.base import DNSDriver, Zone, Record
@@ -143,13 +143,11 @@ class ZerigoDNSDriver(DNSDriver):
         RecordType.URL: 'URL',
     }
 
-    def list_zones(self):
-        value_dict = {'type': 'zones'}
-        return LazyList(get_more=self._get_more, value_dict=value_dict)
-
-    def list_records(self, zone):
-        value_dict = {'type': 'records', 'zone': zone}
-        return LazyList(get_more=self._get_more, value_dict=value_dict)
+    def iterate_zones(self):
+        return self._get_more('zones')
+
+    def iterate_records(self, zone):
+        return self._get_more('records', zone=zone)
 
     def get_zone(self, zone_id):
         path = API_ROOT + 'zones/%s.xml' % (zone_id)
@@ -433,36 +431,44 @@ class ZerigoDNSDriver(DNSDriver):
                         zone=zone, driver=self, extra=extra)
         return record
 
-    def _get_more(self, last_key, value_dict):
+    def _get_more(self, rtype, **kwargs):
+        exhausted = False
+        last_key = None
+
+        while not exhausted:
+            items, last_key, exhausted = self._get_data(
+                                            rtype, last_key, **kwargs)
+
+            for item in items:
+                yield item
+
+    def _get_data(self, rtype, last_key, **kwargs):
         # Note: last_key in this case really is a "last_page".
         # TODO: Update base driver and change last_key to something more
         # generic - e.g. marker
         params = {}
         params['per_page'] = ITEMS_PER_PAGE
         params['page'] = last_key + 1 if last_key else 1
-        transform_func_kwargs = {}
 
-        if value_dict['type'] == 'zones':
+        if rtype == 'zones':
             path = API_ROOT + 'zones.xml'
             response = self.connection.request(path)
             transform_func = self._to_zones
-        elif value_dict['type'] == 'records':
-            zone = value_dict['zone']
+        elif rtype == 'records':
+            zone = kwargs['zone']
             path = API_ROOT + 'zones/%s/hosts.xml' % (zone.id)
             self.connection.set_context({'resource': 'zone', 'id': zone.id})
             response = self.connection.request(path, params=params)
             transform_func = self._to_records
-            transform_func_kwargs['zone'] = value_dict['zone']
 
         exhausted = False
         result_count = int(response.headers.get('x-query-count', 0))
-        transform_func_kwargs['elem'] = response.object
 
         if (params['page'] * ITEMS_PER_PAGE) >= result_count:
             exhausted = True
 
         if response.status == httplib.OK:
-            items = transform_func(**transform_func_kwargs)
+            items = transform_func(elem=response.object, **kwargs)
             return items, params['page'], exhausted
         else:
             return [], None, True

Modified: libcloud/trunk/libcloud/storage/base.py
URL: http://svn.apache.org/viewvc/libcloud/trunk/libcloud/storage/base.py?rev=1406938&r1=1406937&r2=1406938&view=diff
==============================================================================
--- libcloud/trunk/libcloud/storage/base.py (original)
+++ libcloud/trunk/libcloud/storage/base.py Thu Nov  8 06:10:22 2012
@@ -118,6 +118,9 @@ class Container(object):
         self.extra = extra or {}
         self.driver = driver
 
+    def iterate_objects(self):
+        return self.driver.iterate_container_objects(container=self)
+
     def list_objects(self):
         return self.driver.list_container_objects(container=self)
 
@@ -185,6 +188,19 @@ class StorageDriver(BaseDriver):
         raise NotImplementedError(
             'list_containers not implemented for this driver')
 
+    def iterate_container_objects(self, container):
+        """
+        Return a generator of objects for the given container.
+
+        @param container: Container instance
+        @type container: L{Container}
+
+        @return: A generator of Object instances.
+        @rtype: C{generator} of L{Object}
+        """
+        raise NotImplementedError(
+            'iterate_container_objects not implemented for this driver')
+
     def list_container_objects(self, container):
         """
         Return a list of objects for the given container.
@@ -195,8 +211,7 @@ class StorageDriver(BaseDriver):
         @return: A list of Object instances.
         @rtype: C{list} of L{Object}
         """
-        raise NotImplementedError(
-            'list_objects not implemented for this driver')
+        return list(self.iterate_container_objects(container))
 
     def get_container(self, container_name):
         """

Modified: libcloud/trunk/libcloud/storage/drivers/atmos.py
URL: http://svn.apache.org/viewvc/libcloud/trunk/libcloud/storage/drivers/atmos.py?rev=1406938&r1=1406937&r2=1406938&view=diff
==============================================================================
--- libcloud/trunk/libcloud/storage/drivers/atmos.py (original)
+++ libcloud/trunk/libcloud/storage/drivers/atmos.py Thu Nov  8 06:10:22 2012
@@ -33,7 +33,7 @@ if PY3:
 
 from libcloud.utils.files import read_in_chunks, guess_file_mime_type
 from libcloud.common.base import ConnectionUserAndKey, XmlResponse
-from libcloud.common.types import LazyList, LibcloudError
+from libcloud.common.types import LibcloudError
 
 from libcloud.storage.base import Object, Container, StorageDriver, CHUNK_SIZE
 from libcloud.storage.types import ContainerAlreadyExistsError, \
@@ -384,10 +384,6 @@ class AtmosDriver(StorageDriver):
             raise ObjectDoesNotExistError(e, self, obj.name)
         return True
 
-    def list_container_objects(self, container):
-        value_dict = {'container': container}
-        return LazyList(get_more=self._get_more, value_dict=value_dict)
-
     def enable_object_cdn(self, obj):
         return True
 
@@ -468,8 +464,7 @@ class AtmosDriver(StorageDriver):
         meta = meta.split(', ')
         return dict([x.split('=', 1) for x in meta])
 
-    def _get_more(self, last_key, value_dict):
-        container = value_dict['container']
+    def iterate_container_objects(self, container):
         headers = {'x-emc-include-meta': '1'}
         path = self._namespace_path(container.name) + '/'
         result = self.connection.request(path, headers=headers)
@@ -477,6 +472,4 @@ class AtmosDriver(StorageDriver):
         objects = []
         for entry in entries:
             metadata = {'object_id': entry['id']}
-            objects.append(Object(entry['name'], 0, '', {}, metadata,
-                                  container, self))
-        return objects, None, True
+            yield Object(entry['name'], 0, '', {}, metadata, container, self)

Modified: libcloud/trunk/libcloud/storage/drivers/cloudfiles.py
URL: http://svn.apache.org/viewvc/libcloud/trunk/libcloud/storage/drivers/cloudfiles.py?rev=1406938&r1=1406937&r2=1406938&view=diff
==============================================================================
--- libcloud/trunk/libcloud/storage/drivers/cloudfiles.py (original)
+++ libcloud/trunk/libcloud/storage/drivers/cloudfiles.py Thu Nov  8 06:10:22 2012
@@ -45,7 +45,6 @@ from libcloud.storage.types import Conta
 from libcloud.storage.types import ObjectDoesNotExistError
 from libcloud.storage.types import ObjectHashMismatchError
 from libcloud.storage.types import InvalidContainerNameError
-from libcloud.common.types import LazyList
 from libcloud.common.openstack import OpenStackBaseConnection
 from libcloud.common.openstack import OpenStackDriverMixin
 
@@ -229,10 +228,6 @@ class CloudFilesStorageDriver(StorageDri
 
         raise LibcloudError('Unexpected status code: %s' % (response.status))
 
-    def list_container_objects(self, container):
-        value_dict = {'container': container}
-        return LazyList(get_more=self._get_more, value_dict=value_dict)
-
     def get_container(self, container_name):
         response = self.connection.request('/%s' % (container_name),
                                            method='HEAD')
@@ -618,30 +613,30 @@ class CloudFilesStorageDriver(StorageDri
 
         return obj
 
-    def _get_more(self, last_key, value_dict):
-        container = value_dict['container']
+    def iterate_container_objects(self, container):
         params = {}
 
-        if last_key:
-            params['marker'] = last_key
-
-        response = self.connection.request('/%s' % (container.name),
-                                           params=params)
-
-        if response.status == httplib.NO_CONTENT:
-            # Empty or inexistent container
-            return [], None, True
-        elif response.status == httplib.OK:
-            objects = self._to_object_list(json.loads(response.body),
-                                           container)
-
-            # TODO: Is this really needed?
-            if len(objects) == 0:
-                return [], None, True
-
-            return objects, objects[-1].name, False
-
-        raise LibcloudError('Unexpected status code: %s' % (response.status))
+        while True:
+            response = self.connection.request('/%s' % (container.name),
+                                               params=params)
+
+            if response.status == httplib.NO_CONTENT:
+                # Empty or non-existent container
+                break
+            elif response.status == httplib.OK:
+                objects = self._to_object_list(json.loads(response.body),
+                                               container)
+
+                if len(objects) == 0:
+                    break
+
+                for obj in objects:
+                    yield obj
+                params['marker'] = obj.name
+
+            else:
+                raise LibcloudError('Unexpected status code: %s' %
+                                    (response.status))
 
     def _put_object(self, container, object_name, upload_func,
                     upload_func_kwargs, extra=None, file_path=None,

Modified: libcloud/trunk/libcloud/storage/drivers/local.py
URL: http://svn.apache.org/viewvc/libcloud/trunk/libcloud/storage/drivers/local.py?rev=1406938&r1=1406937&r2=1406938&view=diff
==============================================================================
--- libcloud/trunk/libcloud/storage/drivers/local.py (original)
+++ libcloud/trunk/libcloud/storage/drivers/local.py Thu Nov  8 06:10:22 2012
@@ -33,7 +33,7 @@ except ImportError:
 from libcloud.utils.files import read_in_chunks
 from libcloud.common.base import Connection
 from libcloud.storage.base import Object, Container, StorageDriver
-from libcloud.common.types import LibcloudError, LazyList
+from libcloud.common.types import LibcloudError
 from libcloud.storage.types import ContainerAlreadyExistsError
 from libcloud.storage.types import ContainerDoesNotExistError
 from libcloud.storage.types import ContainerIsNotEmptyError
@@ -213,28 +213,18 @@ class LocalStorageDriver(StorageDriver):
                 object_name = os.path.relpath(full_path, start=cpath)
                 yield self._make_object(container, object_name)
 
-    def _get_more(self, last_key, value_dict):
+    def iterate_container_objects(self, container):
         """
-        A handler for using with LazyList
-        """
-        container = value_dict['container']
-        objects = [obj for obj in self._get_objects(container)]
-
-        return (objects, None, True)
-
-    def list_container_objects(self, container):
-        """
-        Return a list of objects for the given container.
+        Returns a generator of objects for the given container.
 
         @param container: Container instance
         @type container: L{Container}
 
-        @return: A list of Object instances.
-        @rtype: C{list} of L{Object}
+        @return: A generator of Object instances.
+        @rtype: C{generator} of L{Object}
         """
 
-        value_dict = {'container': container}
-        return LazyList(get_more=self._get_more, value_dict=value_dict)
+        return self._get_objects(container)
 
     def get_container(self, container_name):
         """

Modified: libcloud/trunk/libcloud/storage/drivers/s3.py
URL: http://svn.apache.org/viewvc/libcloud/trunk/libcloud/storage/drivers/s3.py?rev=1406938&r1=1406937&r2=1406938&view=diff
==============================================================================
--- libcloud/trunk/libcloud/storage/drivers/s3.py (original)
+++ libcloud/trunk/libcloud/storage/drivers/s3.py Thu Nov  8 06:10:22 2012
@@ -38,7 +38,6 @@ from libcloud.storage.types import Inval
 from libcloud.storage.types import ContainerDoesNotExistError
 from libcloud.storage.types import ObjectDoesNotExistError
 from libcloud.storage.types import ObjectHashMismatchError
-from libcloud.common.types import LazyList
 
 # How long before the token expires
 EXPIRATION_SECONDS = 15 * 60
@@ -182,9 +181,32 @@ class S3StorageDriver(StorageDriver):
         raise LibcloudError('Unexpected status code: %s' % (response.status),
                             driver=self)
 
-    def list_container_objects(self, container):
-        value_dict = {'container': container}
-        return LazyList(get_more=self._get_more, value_dict=value_dict)
+    def iterate_container_objects(self, container):
+        params = {}
+        last_key = None
+        exhausted = False
+
+        while not exhausted:
+            if last_key:
+                params['marker'] = last_key
+
+            response = self.connection.request('/%s' % (container.name),
+                                               params=params)
+
+            if response.status != httplib.OK:
+                raise LibcloudError('Unexpected status code: %s' %
+                                    (response.status), driver=self)
+
+            objects = self._to_objs(obj=response.object,
+                                    xpath='Contents', container=container)
+            is_truncated = response.object.findtext(fixxpath(
+                    xpath='IsTruncated', namespace=self.namespace)).lower()
+            exhausted = (is_truncated == 'false')
+
+            last_key = None
+            for obj in objects:
+                last_key = obj.name
+                yield obj
 
     def get_container(self, container_name):
         # This is very inefficient, but afaik it's the only way to do it
@@ -355,32 +377,6 @@ class S3StorageDriver(StorageDriver):
         name = urlquote(name)
         return name
 
-    def _get_more(self, last_key, value_dict):
-        container = value_dict['container']
-        params = {}
-
-        if last_key:
-            params['marker'] = last_key
-
-        response = self.connection.request('/%s' % (container.name),
-                                           params=params)
-
-        if response.status == httplib.OK:
-            objects = self._to_objs(obj=response.object,
-                                    xpath='Contents', container=container)
-            is_truncated = response.object.findtext(fixxpath(
-                xpath='IsTruncated', namespace=self.namespace)).lower()
-            exhausted = (is_truncated == 'false')
-
-            if (len(objects) > 0):
-                last_key = objects[-1].name
-            else:
-                last_key = None
-            return objects, last_key, exhausted
-
-        raise LibcloudError('Unexpected status code: %s' % (response.status),
-                            driver=self)
-
     def _put_object(self, container, object_name, upload_func,
                     upload_func_kwargs, extra=None, file_path=None,
                     iterator=None, verify_hash=True, storage_class=None):