You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@buildstream.apache.org by ro...@apache.org on 2020/12/29 13:36:19 UTC

[buildstream] 01/05: _yaml.py: Rip out ChainMap(), node_chain_copy(), node_list_copy()

This is an automated email from the ASF dual-hosted git repository.

root pushed a commit to branch jennis/add_yaml_roundtrip_module
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 9e6abded683baa77051186ede4189d225b16febf
Author: James Ennis <ja...@codethink.co.uk>
AuthorDate: Wed Mar 6 17:09:26 2019 +0000

    _yaml.py: Rip out ChainMap(), node_chain_copy(), node_list_copy()
    
    This class and these two functions exist as they were intended to
    bring efficiency. Benchmarking this patch against the debian-stack.bst
    element in the debian-like project [0] showed that although this
    took 15M more RAM (peak usage), there was a ~20s gain in the time taken
    to 'show' the stack. Thus the class and functions have been removed.
    
    This also has the advantage of removing a lot of duplicate and unnecessary
    code.
    
    [0] https://gitlab.com/jennis/debian-stretch-bst
---
 buildstream/_includes.py |   2 +-
 buildstream/_yaml.py     | 155 +++--------------------------------------------
 buildstream/element.py   |  20 +++---
 buildstream/source.py    |   2 +-
 tests/internals/yaml.py  |   8 +--
 5 files changed, 26 insertions(+), 161 deletions(-)

diff --git a/buildstream/_includes.py b/buildstream/_includes.py
index f4a1627..8db12bd 100644
--- a/buildstream/_includes.py
+++ b/buildstream/_includes.py
@@ -72,7 +72,7 @@ class Includes:
                 # Because the included node will be modified, we need
                 # to copy it so that we do not modify the toplevel
                 # node of the provenance.
-                include_node = _yaml.node_chain_copy(include_node)
+                include_node = _yaml.node_copy(include_node)
 
                 try:
                     included.add(file_path)
diff --git a/buildstream/_yaml.py b/buildstream/_yaml.py
index 74ed1bb..6a9fb71 100644
--- a/buildstream/_yaml.py
+++ b/buildstream/_yaml.py
@@ -623,7 +623,7 @@ def composite_list_prepend(target_node, target_key, source_node, source_key):
     if target_node.get(target_key) is None:
         target_node[target_key] = []
 
-    source_list = list_chain_copy(source_list)
+    source_list = list_copy(source_list)
     target_list = target_node[target_key]
 
     for element in reversed(source_list):
@@ -663,7 +663,7 @@ def composite_list_append(target_node, target_key, source_node, source_key):
     if target_node.get(target_key) is None:
         target_node[target_key] = []
 
-    source_list = list_chain_copy(source_list)
+    source_list = list_copy(source_list)
     target_list = target_node[target_key]
 
     target_list.extend(source_list)
@@ -702,7 +702,7 @@ def composite_list_overwrite(target_node, target_key, source_node, source_key):
     target_provenance = node_get_provenance(target_node)
     source_provenance = node_get_provenance(source_node)
 
-    target_node[target_key] = list_chain_copy(source_list)
+    target_node[target_key] = list_copy(source_list)
     target_provenance.members[target_key] = source_provenance.members[source_key].clone()
 
     return True
@@ -763,7 +763,7 @@ def composite_list(target_node, source_node, key):
             source_provenance = node_get_provenance(source_node)
             target_provenance = node_get_provenance(target_node)
 
-            target_node[key] = node_chain_copy(source_value)
+            target_node[key] = node_copy(source_value)
             target_provenance.members[key] = source_provenance.members[key].clone()
 
         # If the target is a literal list, then composition
@@ -951,8 +951,8 @@ def node_sanitize(node):
     elif node_type is list:
         return [node_sanitize(elt) for elt in node]
 
-    # Finally ChainMap and dict, and other Mappings need special handling
-    if node_type in (dict, ChainMap) or isinstance(node, collections.abc.Mapping):
+    # Finally dict, and other Mappings need special handling
+    if node_type is dict or isinstance(node, collections.abc.Mapping):
         result = SanitizedDict()
 
         key_list = [key for key, _ in node_items(node)]
@@ -996,123 +996,24 @@ def node_validate(node, valid_keys):
                         "{}: Unexpected key: {}".format(provenance, invalid))
 
 
-# ChainMap
-#
-# This is a derivative of collections.ChainMap(), but supports
-# explicit deletions of keys.
-#
-# The purpose of this is to create a virtual copy-on-write
-# copy of a dictionary, so that mutating it in any way does
-# not affect the underlying dictionaries.
-#
-# collections.ChainMap covers this already mostly, but fails
-# to record internal state so as to hide keys which have been
-# explicitly deleted.
-#
-class ChainMap(collections.ChainMap):
-
-    def __init__(self, *maps):
-        super().__init__(*maps)
-        self.__deletions = set()
-
-    def __getitem__(self, key):
-
-        # Honor deletion state of 'key'
-        if key in self.__deletions:
-            return self.__missing__(key)
-
-        return super().__getitem__(key)
-
-    def __len__(self):
-        return len(set().union(*self.maps) - self.__deletions)
-
-    def __iter__(self):
-        return iter(set().union(*self.maps) - self.__deletions)
-
-    def __contains__(self, key):
-        if key in self.__deletions:
-            return False
-        return any(key in m for m in self.maps)
-
-    def __bool__(self):
-        # Attempt to preserve 'any' optimization
-        any_keys = any(self.maps)
-
-        # Something existed, try again with deletions subtracted
-        if any_keys:
-            return any(set().union(*self.maps) - self.__deletions)
-
-        return False
-
-    def __setitem__(self, key, value):
-        self.__deletions.discard(key)
-        super().__setitem__(key, value)
-
-    def __delitem__(self, key):
-        if key in self.__deletions:
-            raise KeyError('Key was already deleted from this mapping: {!r}'.format(key))
-
-        # Ignore KeyError if it's not in the first map, just save the deletion state
-        try:
-            super().__delitem__(key)
-        except KeyError:
-            pass
-
-        # Store deleted state
-        self.__deletions.add(key)
-
-    def popitem(self):
-        poppable = set().union(*self.maps) - self.__deletions
-        for key in poppable:
-            return self.pop(key)
-
-        raise KeyError('No keys found.')
-
-    __marker = object()
-
-    def pop(self, key, default=__marker):
-        # Reimplement MutableMapping's behavior here
-        try:
-            value = self[key]
-        except KeyError:
-            if default is self.__marker:
-                raise
-            return default
-        else:
-            del self[key]
-            return value
-
-    def clear(self):
-        clearable = set().union(*self.maps) - self.__deletions
-        for key in clearable:
-            del self[key]
-
-    def get(self, key, default=None):
-        try:
-            return self[key]
-        except KeyError:
-            return default
-
-
 # Node copying
 #
 # Unfortunately we copy nodes a *lot* and `isinstance()` is super-slow when
 # things from collections.abc get involved.  The result is the following
 # intricate but substantially faster group of tuples and the use of `in`.
 #
-# If any of the {node,list}_{chain_,}_copy routines raise a ValueError
+# If any of the {node,list}_copy routines raise a ValueError
 # then it's likely additional types need adding to these tuples.
 
-# When chaining a copy, these types are skipped since the ChainMap will
-# retrieve them from the source node when needed.  Other copiers might copy
-# them, so we call them __QUICK_TYPES.
+
+# These types just have their value copied
 __QUICK_TYPES = (str, bool,
                  yaml.scalarstring.PreservedScalarString,
                  yaml.scalarstring.SingleQuotedScalarString,
                  yaml.scalarstring.DoubleQuotedScalarString)
 
 # These types have to be iterated like a dictionary
-__DICT_TYPES = (dict, ChainMap, yaml.comments.CommentedMap)
+__DICT_TYPES = (dict, yaml.comments.CommentedMap)
 
 # These types have to be iterated like a list
 __LIST_TYPES = (list, yaml.comments.CommentedSeq)
@@ -1126,42 +1027,6 @@ __PROVENANCE_TYPES = (Provenance, DictProvenance, MemberProvenance, ElementProve
 __NODE_ASSERT_COMPOSITION_DIRECTIVES = ('(>)', '(<)', '(=)')
 
 
-def node_chain_copy(source):
-    copy = ChainMap({}, source)
-    for key, value in source.items():
-        value_type = type(value)
-        if value_type in __DICT_TYPES:
-            copy[key] = node_chain_copy(value)
-        elif value_type in __LIST_TYPES:
-            copy[key] = list_chain_copy(value)
-        elif value_type in __PROVENANCE_TYPES:
-            copy[key] = value.clone()
-        elif value_type in __QUICK_TYPES:
-            pass  # No need to copy these, the chainmap deals with it
-        else:
-            raise ValueError("Unable to be quick about node_chain_copy of {}".format(value_type))
-
-    return copy
-
-
-def list_chain_copy(source):
-    copy = []
-    for item in source:
-        item_type = type(item)
-        if item_type in __DICT_TYPES:
-            copy.append(node_chain_copy(item))
-        elif item_type in __LIST_TYPES:
-            copy.append(list_chain_copy(item))
-        elif item_type in __PROVENANCE_TYPES:
-            copy.append(item.clone())
-        elif item_type in __QUICK_TYPES:
-            copy.append(item)
-        else:  # Fallback
-            raise ValueError("Unable to be quick about list_chain_copy of {}".format(item_type))
-
-    return copy
-
-
 def node_copy(source):
     copy = {}
     for key, value in source.items():
diff --git a/buildstream/element.py b/buildstream/element.py
index 47ca04c..191cc73 100644
--- a/buildstream/element.py
+++ b/buildstream/element.py
@@ -2371,11 +2371,11 @@ class Element(Plugin):
         element_splits = _yaml.node_get(element_bst, Mapping, 'split-rules', default_value={})
 
         if self.__is_junction:
-            splits = _yaml.node_chain_copy(element_splits)
+            splits = _yaml.node_copy(element_splits)
         else:
             assert project._splits is not None
 
-            splits = _yaml.node_chain_copy(project._splits)
+            splits = _yaml.node_copy(project._splits)
             # Extend project wide split rules with any split rules defined by the element
             _yaml.composite(splits, element_splits)
 
@@ -2428,7 +2428,7 @@ class Element(Plugin):
             environment = {}
         else:
             project = self._get_project()
-            environment = _yaml.node_chain_copy(project.base_environment)
+            environment = _yaml.node_copy(project.base_environment)
 
         _yaml.composite(environment, default_env)
         _yaml.composite(environment, meta.environment)
@@ -2468,10 +2468,10 @@ class Element(Plugin):
 
         project = self._get_project()
         if self.__is_junction:
-            variables = _yaml.node_chain_copy(project.first_pass_config.base_variables)
+            variables = _yaml.node_copy(project.first_pass_config.base_variables)
         else:
             project.ensure_fully_loaded()
-            variables = _yaml.node_chain_copy(project.base_variables)
+            variables = _yaml.node_copy(project.base_variables)
 
         _yaml.composite(variables, default_vars)
         _yaml.composite(variables, meta.variables)
@@ -2493,7 +2493,7 @@ class Element(Plugin):
 
         # The default config is already composited with the project overrides
         config = _yaml.node_get(self.__defaults, Mapping, 'config', default_value={})
-        config = _yaml.node_chain_copy(config)
+        config = _yaml.node_copy(config)
 
         _yaml.composite(config, meta.config)
         _yaml.node_final_assertions(config)
@@ -2509,7 +2509,7 @@ class Element(Plugin):
         else:
             project = self._get_project()
             project.ensure_fully_loaded()
-            sandbox_config = _yaml.node_chain_copy(project._sandbox)
+            sandbox_config = _yaml.node_copy(project._sandbox)
 
         # Get the platform to ask for host architecture
         platform = Platform.get_platform()
@@ -2518,7 +2518,7 @@ class Element(Plugin):
 
         # The default config is already composited with the project overrides
         sandbox_defaults = _yaml.node_get(self.__defaults, Mapping, 'sandbox', default_value={})
-        sandbox_defaults = _yaml.node_chain_copy(sandbox_defaults)
+        sandbox_defaults = _yaml.node_copy(sandbox_defaults)
 
         _yaml.composite(sandbox_config, sandbox_defaults)
         _yaml.composite(sandbox_config, meta.sandbox)
@@ -2544,12 +2544,12 @@ class Element(Plugin):
     #
     def __extract_public(self, meta):
         base_public = _yaml.node_get(self.__defaults, Mapping, 'public', default_value={})
-        base_public = _yaml.node_chain_copy(base_public)
+        base_public = _yaml.node_copy(base_public)
 
         base_bst = _yaml.node_get(base_public, Mapping, 'bst', default_value={})
         base_splits = _yaml.node_get(base_bst, Mapping, 'split-rules', default_value={})
 
-        element_public = _yaml.node_chain_copy(meta.public)
+        element_public = _yaml.node_copy(meta.public)
         element_bst = _yaml.node_get(element_public, Mapping, 'bst', default_value={})
         element_splits = _yaml.node_get(element_bst, Mapping, 'split-rules', default_value={})
 
diff --git a/buildstream/source.py b/buildstream/source.py
index 97995a8..b0ba1c7 100644
--- a/buildstream/source.py
+++ b/buildstream/source.py
@@ -1135,7 +1135,7 @@ class Source(Plugin):
     #
     def __extract_config(self, meta):
         config = _yaml.node_get(self.__defaults, Mapping, 'config', default_value={})
-        config = _yaml.node_chain_copy(config)
+        config = _yaml.node_copy(config)
 
         _yaml.composite(config, meta.config)
         _yaml.node_final_assertions(config)
diff --git a/tests/internals/yaml.py b/tests/internals/yaml.py
index b2d9625..a7b4457 100644
--- a/tests/internals/yaml.py
+++ b/tests/internals/yaml.py
@@ -124,9 +124,9 @@ def test_node_get(datafiles):
     assert (exc.value.reason == LoadErrorReason.INVALID_DATA)
 
 
-# Really this is testing _yaml.node_chain_copy(), we want to
-# be sure that when using a ChainMap copy, compositing values
-# still preserves the original values in the copied dict.
+# Really this is testing _yaml.node_copy(), we want to
+# be sure that compositing values still preserves the original
+# values in the copied dict.
 #
 @pytest.mark.datafiles(os.path.join(DATA_DIR))
 def test_composite_preserve_originals(datafiles):
@@ -140,7 +140,7 @@ def test_composite_preserve_originals(datafiles):
 
     base = _yaml.load(filename)
     overlay = _yaml.load(overlayfile)
-    base_copy = _yaml.node_chain_copy(base)
+    base_copy = _yaml.node_copy(base)
     _yaml.composite_dict(base_copy, overlay)
 
     copy_extra = _yaml.node_get(base_copy, Mapping, 'extra')