You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@buildstream.apache.org by tv...@apache.org on 2021/02/04 08:09:25 UTC

[buildstream] branch jennis/YAML_optimisations created (now 681fc33)

This is an automated email from the ASF dual-hosted git repository.

tvb pushed a change to branch jennis/YAML_optimisations
in repository https://gitbox.apache.org/repos/asf/buildstream.git.


      at 681fc33  _yaml.py: Add fast_load switch to enable loading with pyyaml

This branch includes the following new commits:

     new 681fc33  _yaml.py: Add fast_load switch to enable loading with pyyaml

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[buildstream] 01/01: _yaml.py: Add fast_load switch to enable loading with pyyaml

Posted by tv...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tvb pushed a commit to branch jennis/YAML_optimisations
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 681fc33182c483c32faf7ec9231aed850f23535a
Author: James Ennis <ja...@codethink.co.uk>
AuthorDate: Wed Feb 27 15:41:36 2019 +0000

    _yaml.py: Add fast_load switch to enable loading with pyyaml
    
    * This also requires skipping ensure_provenance calls when necessary
    * pyyaml has been added to the relevant requirement files
---
 buildstream/_loader/loadelement.py |  3 +-
 buildstream/_yaml.py               | 78 +++++++++++++++++++++++---------------
 requirements/requirements.in       |  1 +
 requirements/requirements.txt      |  1 +
 4 files changed, 52 insertions(+), 31 deletions(-)

diff --git a/buildstream/_loader/loadelement.py b/buildstream/_loader/loadelement.py
index 6773354..5239ce2 100644
--- a/buildstream/_loader/loadelement.py
+++ b/buildstream/_loader/loadelement.py
@@ -176,6 +176,7 @@ def _extract_depends_from_node(node, *, key=None):
         output_deps.append(dependency)
 
     # Now delete the field, we dont want it anymore
-    del node[key]
+    if key in node:
+        del node[key]
 
     return output_deps
diff --git a/buildstream/_yaml.py b/buildstream/_yaml.py
index 230094c..80864f7 100644
--- a/buildstream/_yaml.py
+++ b/buildstream/_yaml.py
@@ -24,7 +24,8 @@ from copy import deepcopy
 from contextlib import ExitStack
 from pathlib import Path
 
-from ruamel import yaml
+import yaml  # this is pyyaml
+from ruamel import yaml as ruamelyaml
 from ruamel.yaml.representer import SafeRepresenter, RoundTripRepresenter
 from ruamel.yaml.constructor import RoundTripConstructor
 from ._exceptions import LoadError, LoadErrorReason
@@ -184,12 +185,13 @@ class CompositeTypeError(CompositeError):
 #    copy_tree (bool): Whether to make a copy, preserving the original toplevels
 #                      for later serialization
 #    yaml_cache (YamlCache): A yaml cache to consult rather than parsing
+#    fast_load (bool): whether to load the YAML with pyyaml (instead of ruamel.yaml)
 #
 # Returns (dict): A loaded copy of the YAML file with provenance information
 #
 # Raises: LoadError
 #
-def load(filename, shortname=None, copy_tree=False, *, project=None, yaml_cache=None):
+def load(filename, shortname=None, copy_tree=False, *, project=None, yaml_cache=None, fast_load=False):
     if not shortname:
         shortname = filename
 
@@ -203,7 +205,7 @@ def load(filename, shortname=None, copy_tree=False, *, project=None, yaml_cache=
             data, key = yaml_cache.get(project, filename, contents, copy_tree)
 
         if not data:
-            data = load_data(contents, file, copy_tree=copy_tree)
+            data = load_data(contents, file, copy_tree=copy_tree, fast_load=fast_load)
             if yaml_cache:
                 yaml_cache.put_from_key(project, filename, key, data)
 
@@ -219,13 +221,22 @@ def load(filename, shortname=None, copy_tree=False, *, project=None, yaml_cache=
 
 # Like load(), but doesnt require the data to be in a file
 #
-def load_data(data, file=None, copy_tree=False):
+def load_data(data, file=None, copy_tree=False, fast_load=True):
 
-    try:
-        contents = yaml.load(data, yaml.loader.RoundTripLoader, preserve_quotes=True)
-    except (yaml.scanner.ScannerError, yaml.composer.ComposerError, yaml.parser.ParserError) as e:
-        raise LoadError(LoadErrorReason.INVALID_YAML,
-                        "Malformed YAML:\n\n{}\n\n{}\n".format(e.problem, e.problem_mark)) from e
+    if fast_load:
+        try:
+            contents = yaml.load(data, Loader=yaml.BaseLoader)  # BaseLoader returns strings... There is also a CBaseLoader for speed
+        except yaml.YAMLError as e:
+            mark = e.problem_mark
+            raise LoadError(LoadErrorReason.INVALID_YAML,
+                            "{}:\nMalformed YAML: Line {}, Column: {}".format(file.name, mark.line+1, mark.column+1))
+
+    else:
+        try:
+            contents = ruamelyaml.load(data, ruamelyaml.loader.RoundTripLoader, preserve_quotes=True)
+        except (ruamelyaml.scanner.ScannerError, ruamelyaml.composer.ComposerError, ruamelyaml.parser.ParserError) as e:
+            raise LoadError(LoadErrorReason.INVALID_YAML,
+                            "Malformed YAML:\n\n{}\n\n{}\n".format(e.problem, e.problem_mark)) from e
 
     if not isinstance(contents, dict):
         # Special case allowance for None, when the loaded file has only comments in it.
@@ -236,7 +247,10 @@ def load_data(data, file=None, copy_tree=False):
                             "YAML file has content of type '{}' instead of expected type 'dict': {}"
                             .format(type(contents).__name__, file.name))
 
-    return node_decorated_copy(file, contents, copy_tree=copy_tree)
+    if fast_load:
+        return contents
+    else:
+        return node_decorated_copy(file, contents, copy_tree=copy_tree)
 
 
 # Dumps a previously loaded YAML node to a file
@@ -252,7 +266,7 @@ def dump(node, filename=None):
             f = stack.enter_context(utils.save_file_atomic(filename, 'w'))
         else:
             f = sys.stdout
-        yaml.round_trip_dump(node, f)
+        ruamelyaml.round_trip_dump(node, f)
 
 
 # node_decorated_copy()
@@ -554,7 +568,7 @@ def is_ruamel_str(value):
 
     if isinstance(value, str):
         return True
-    elif isinstance(value, yaml.scalarstring.ScalarString):
+    elif isinstance(value, ruamelyaml.scalarstring.ScalarString):
         return True
 
     return False
@@ -829,9 +843,10 @@ def composite_list(target_node, source_node, key):
 # This is useful for overriding configuration files and element
 # configurations.
 #
-def composite_dict(target, source, path=None):
-    target_provenance = ensure_provenance(target)
-    source_provenance = ensure_provenance(source)
+def composite_dict(target, source, path=None, skip_provenance=False):
+    if not skip_provenance:
+        target_provenance = ensure_provenance(target)
+        source_provenance = ensure_provenance(source)
 
     for key, source_value in node_items(source):
 
@@ -854,19 +869,20 @@ def composite_dict(target, source, path=None):
                 target_value = {}
                 target[key] = target_value
 
-                # Give the new dict provenance
-                value_provenance = source_value.get(PROVENANCE_KEY)
-                if value_provenance:
-                    target_value[PROVENANCE_KEY] = value_provenance.clone()
+                if not skip_provenance:
+                    # Give the new dict provenance
+                    value_provenance = source_value.get(PROVENANCE_KEY)
+                    if value_provenance:
+                        target_value[PROVENANCE_KEY] = value_provenance.clone()
 
-                # Add a new provenance member element to the containing dict
-                target_provenance.members[key] = source_provenance.members[key]
+                        # Add a new provenance member element to the containing dict
+                        target_provenance.members[key] = source_provenance.members[key]
 
             if not isinstance(target_value, collections.abc.Mapping):
                 raise CompositeTypeError(thispath, type(target_value), type(source_value))
 
             # Recurse into matching dictionary
-            composite_dict(target_value, source_value, path=thispath)
+            composite_dict(target_value, source_value, path=thispath, skip_provenance=skip_provenance)
 
         else:
 
@@ -879,7 +895,8 @@ def composite_dict(target, source, path=None):
                     raise CompositeTypeError(thispath, type(target_value), type(source_value))
 
             # Overwrite simple values, lists and mappings have already been handled
-            target_provenance.members[key] = source_provenance.members[key].clone()
+            if not skip_provenance:
+                target_provenance.members[key] = source_provenance.members[key].clone()
             target[key] = source_value
 
 
@@ -890,7 +907,7 @@ def composite(target, source):
 
     source_provenance = node_get_provenance(source)
     try:
-        composite_dict(target, source)
+        composite_dict(target, source, skip_provenance=source_provenance is None)
     except CompositeTypeError as e:
         error_prefix = ""
         if source_provenance:
@@ -1095,15 +1112,15 @@ class ChainMap(collections.ChainMap):
 # retrieve them from the source node when needed.  Other copiers might copy
 # them, so we call them __QUICK_TYPES.
 __QUICK_TYPES = (str, bool,
-                 yaml.scalarstring.PreservedScalarString,
-                 yaml.scalarstring.SingleQuotedScalarString,
-                 yaml.scalarstring.DoubleQuotedScalarString)
+                 ruamelyaml.scalarstring.PreservedScalarString,
+                 ruamelyaml.scalarstring.SingleQuotedScalarString,
+                 ruamelyaml.scalarstring.DoubleQuotedScalarString)
 
 # These types have to be iterated like a dictionary
-__DICT_TYPES = (dict, ChainMap, yaml.comments.CommentedMap)
+__DICT_TYPES = (dict, ChainMap, ruamelyaml.comments.CommentedMap)
 
 # These types have to be iterated like a list
-__LIST_TYPES = (list, yaml.comments.CommentedSeq)
+__LIST_TYPES = (list, ruamelyaml.comments.CommentedSeq)
 
 # These are the provenance types, which have to be cloned rather than any other
 # copying tactic.
@@ -1165,7 +1182,8 @@ def node_copy(source):
         else:
             raise ValueError("Unable to be quick about node_copy of {}".format(value_type))
 
-    ensure_provenance(copy)
+    #ensure_provenance(source)
+    # Let's not ensure that...
 
     return copy
 
diff --git a/requirements/requirements.in b/requirements/requirements.in
index 4f08969..52df032 100644
--- a/requirements/requirements.in
+++ b/requirements/requirements.in
@@ -12,6 +12,7 @@ psutil
 #
 # See issues #571 and #790.
 ruamel.yaml >= 0.15.41, < 0.15.52
+pyyaml
 setuptools
 pyroaring
 ujson
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
index d46f925..aaf787d 100644
--- a/requirements/requirements.txt
+++ b/requirements/requirements.txt
@@ -14,6 +14,7 @@ psutil==5.4.8
 ruamel.yaml==0.15.51
 setuptools==39.0.1
 pyroaring==0.2.6
+pyyaml==3.13
 ujson==1.35
 ## The following requirements were added by pip freeze:
 MarkupSafe==1.1.0