You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@buildstream.apache.org by tv...@apache.org on 2021/02/04 07:39:19 UTC

[buildstream] branch tristan/variables-refactor created (now a307e29)

This is an automated email from the ASF dual-hosted git repository.

tvb pushed a change to branch tristan/variables-refactor
in repository https://gitbox.apache.org/repos/asf/buildstream.git.


      at a307e29  _variables.pyx: Fast and slow paths now exist

This branch includes the following new commits:

     new 331ac50  Do not check for all variables in junctions
     new 725a6c5  _variables.pyx: Renaming some things, and documenting for better readability.
     new 814a3e8  tests/format/variables.py: Added some new tests
     new 06a4788  tests/frontend/overlaps.py: Test undefined variables
     new 47cfa63  exceptions.py: Add circular reference error for variables
     new 29ccada  _variables.pyx: Make _expand_var() and _expand_variable_expression() methods
     new 72f8bd2  _variables.pyx: Some more renaming, make subst() take a ScalarNode.
     new 30478ad  _variables.pyx: Fixed provenance from Variables.subst()
     new 8f27d43  element.py: Pass ScalarNode to Variable.subst() when substituting the whitelists
     new 3e298a0  _variables.pyx: Separate public/private APIs and improve consistency of doc comments
     new fdcaa7b  _variables.pyx: Setting up code for split of fast/slow paths
     new 23510da  _variables.pyx: Improve _fast_expand_var()
     new d60ba68  _variables.pyx: Return to cpdef for Variables.subst() and Variables.expand()
     new 1b1a9f8  _variables.pyx: Try to improve fast path performance
     new 0566089  _variables.pyx: Restore the calling node context in _expand_value_expression()
     new a307e29  _variables.pyx: Fast and slow paths now exist

The 16 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[buildstream] 14/16: _variables.pyx: Try to improve fast path performance

Posted by tv...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tvb pushed a commit to branch tristan/variables-refactor
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 1b1a9f8d8a611f900c3578a413b2f6d175c8ed2c
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Sat Jul 18 16:15:56 2020 +0900

    _variables.pyx: Try to improve fast path performance
    
    Try using enumerate() for look instead of repeatedly indexing
    the list throughout a while loop.
---
 src/buildstream/_variables.pyx | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index c4f86b6..53a5696 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -374,18 +374,15 @@ cdef class Variables:
         if counter > 1000:
             raise RecursionError()
 
-        cdef Py_ssize_t idx = 0
-        cdef Py_ssize_t value_len = len(value)
-        cdef str sub
+        cdef Py_ssize_t idx
+        cdef object val
         cdef list acc = []
 
-        while idx < value_len:
-            acc.append(value[idx])
-            idx += 1
-
-            if idx < value_len:
-                acc.append(self._fast_expand_var(<str> value[idx], counter + 1))
-            idx += 1
+        for idx, val in enumerate(value):
+            if (idx % 2) == 0:
+                acc.append(val)
+            else:
+                acc.append(self._fast_expand_var(<str> val, counter + 1))
 
         return "".join(acc)
 


[buildstream] 12/16: _variables.pyx: Improve _fast_expand_var()

Posted by tv...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tvb pushed a commit to branch tristan/variables-refactor
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 23510dab2732e5cd10868335bed61ef25fab079a
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Sat Jul 18 15:37:57 2020 +0900

    _variables.pyx: Improve _fast_expand_var()
    
      * Reduce typechecking using some typecasts
    
      * Reduce dictionary lookups by using a temporary variable
---
 src/buildstream/_variables.pyx | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index b5d62c1..d6384f9 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -374,12 +374,15 @@ cdef class Variables:
     #################################################################
     cdef str _fast_expand_var(self, str name, int counter = 0):
         cdef str sub
+        cdef list value_expression
 
-        if len(self._values[name]) > 1:
-            sub = self._fast_expand_value_expression(<list> self._values[name], counter)
-            self._values[name] = [sys.intern(sub)]
+        value_expression = <list> self._values[name]
+        if len(value_expression) > 1:
+            sub = self._fast_expand_value_expression(value_expression, counter)
+            value_expression = [sys.intern(sub)]
+            self._values[name] = value_expression
 
-        return self._values[name][0]
+        return <str> value_expression[0]
 
     cdef str _fast_expand_value_expression(self, list value, int counter = 0):
         if counter > 1000:


[buildstream] 05/16: exceptions.py: Add circular reference error for variables

Posted by tv...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tvb pushed a commit to branch tristan/variables-refactor
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 47cfa6326fdda46efa49b608549739776db79813
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Fri Jul 17 15:41:05 2020 +0900

    exceptions.py: Add circular reference error for variables
---
 src/buildstream/exceptions.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/buildstream/exceptions.py b/src/buildstream/exceptions.py
index 0eefec3..a7db247 100644
--- a/src/buildstream/exceptions.py
+++ b/src/buildstream/exceptions.py
@@ -142,3 +142,6 @@ class LoadErrorReason(Enum):
 
     CIRCULAR_REFERENCE = 26
     """A circular element reference was detected"""
+
+    CIRCULAR_REFERENCE_VARIABLE = 27
+    """A circular variable reference was detected"""


[buildstream] 15/16: _variables.pyx: Restore the calling node context in _expand_value_expression()

Posted by tv...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tvb pushed a commit to branch tristan/variables-refactor
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 056608990f7c541e4aaca31798d7e9ef16f58af7
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Sat Jul 18 16:33:56 2020 +0900

    _variables.pyx: Restore the calling node context in _expand_value_expression()
    
    This will be required in the slower algorithm with error reporting, and
    is already used to fix error reporting for references to undefined variables
    in toplevel variable substitutions.
---
 src/buildstream/_variables.pyx | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index 53a5696..a8eebb4 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -196,7 +196,7 @@ cdef class Variables:
     # Substitutes any variables in 'string' and returns the result.
     #
     # Args:
-    #    (ScalarNode): The ScalarNode to substitute variables in
+    #    node (ScalarNode): The ScalarNode to substitute variables in
     #
     # Returns:
     #    (str): The new string with any substitutions made
@@ -207,7 +207,7 @@ cdef class Variables:
     #
     cpdef str subst(self, ScalarNode node):
         value_expression = _parse_value_expression(node.as_str())
-        return self._expand_value_expression(value_expression)
+        return self._expand_value_expression(value_expression, node)
 
     #################################################################
     #                          Private API                          #
@@ -316,11 +316,11 @@ cdef class Variables:
 
     # _expand_value_expression()
     #
-    # Helper to expand a given top level expansion string tuple in the context
-    # of the given dictionary of expansion strings.
+    # Expands a given top level expansion string.
     #
     # Args:
     #     value_expression (list): The parsed value expression to be expanded
+    #     node (ScalarNode): The toplevel ScalarNode who is asking for an expansion
     #
     # Returns:
     #     (str): The expanded value expression
@@ -329,7 +329,7 @@ cdef class Variables:
     #     KeyError, if any expansion is missing
     #     RecursionError, if recursion required for evaluation is too deep
     #
-    cdef str _expand_value_expression(self, list value_expression):
+    cdef str _expand_value_expression(self, list value_expression, ScalarNode node):
         try:
             return self._fast_expand_value_expression(value_expression)
         except (KeyError, RecursionError):
@@ -343,7 +343,8 @@ cdef class Variables:
                     unmatched.append(var)
 
             if unmatched:
-                message = "Unresolved variable{}: {}".format(
+                message = "{}: Unresolved variable{}: {}".format(
+                    node.get_provenance(),
                     "s" if len(unmatched) > 1 else "",
                     ", ".join(unmatched)
                 )


[buildstream] 04/16: tests/frontend/overlaps.py: Test undefined variables

Posted by tv...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tvb pushed a commit to branch tristan/variables-refactor
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 06a4788c215c8184e21e85471e26cdc1a5c7294b
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Mon Jun 29 19:59:53 2020 +0900

    tests/frontend/overlaps.py: Test undefined variables
    
    Ensure that we get the expected provenance when expanding a variable
    included in an overlap whitelist entry.
---
 tests/frontend/overlaps.py                      | 15 ++++++++++++++-
 tests/frontend/overlaps/whitelist-undefined.bst | 13 +++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/tests/frontend/overlaps.py b/tests/frontend/overlaps.py
index a45fc70..28bf8a7 100644
--- a/tests/frontend/overlaps.py
+++ b/tests/frontend/overlaps.py
@@ -4,7 +4,7 @@
 import os
 import pytest
 from buildstream.testing.runcli import cli  # pylint: disable=unused-import
-from buildstream.exceptions import ErrorDomain
+from buildstream.exceptions import ErrorDomain, LoadErrorReason
 from buildstream import _yaml
 from buildstream.plugin import CoreWarnings
 from tests.testutils import generate_junction
@@ -71,6 +71,19 @@ def test_overlaps_whitelist_on_overlapper(cli, datafiles):
 
 
 @pytest.mark.datafiles(DATA_DIR)
+def test_overlaps_whitelist_undefined_variable(cli, datafiles):
+    project_dir = str(datafiles)
+    gen_project(project_dir, False)
+    result = cli.run(project=project_dir, silent=True, args=["build", "whitelist-undefined.bst"])
+
+    # Assert that we get the expected undefined variable error,
+    # and that it has the provenance we expect from whitelist-undefined.bst
+    #
+    result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.UNRESOLVED_VARIABLE)
+    assert "whitelist-undefined.bst [line 13 column 6]" in result.stderr
+
+
+@pytest.mark.datafiles(DATA_DIR)
 @pytest.mark.parametrize("use_fatal_warnings", [True, False])
 def test_overlaps_script(cli, datafiles, use_fatal_warnings):
     # Test overlaps with script element to test
diff --git a/tests/frontend/overlaps/whitelist-undefined.bst b/tests/frontend/overlaps/whitelist-undefined.bst
new file mode 100644
index 0000000..5cb0c64
--- /dev/null
+++ b/tests/frontend/overlaps/whitelist-undefined.bst
@@ -0,0 +1,13 @@
+kind: import
+config:
+  source: /
+  target: /
+depends:
+- b-whitelisted.bst
+sources:
+- kind: local
+  path: "a"
+public:
+  bst:
+    overlap-whitelist:
+    - "%{undefined-variable}/*"


[buildstream] 02/16: _variables.pyx: Renaming some things, and documenting for better readability.

Posted by tv...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tvb pushed a commit to branch tristan/variables-refactor
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 725a6c51f5a7aba540c45608b175614da9882bfa
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Fri Jul 17 15:27:17 2020 +0900

    _variables.pyx: Renaming some things, and documenting for better readability.
---
 src/buildstream/_variables.pyx | 50 +++++++++++++++++++++++++++++-------------
 1 file changed, 35 insertions(+), 15 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index bc4b92d..83d0eaa 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -48,8 +48,8 @@ PARSE_EXPANSION = re.compile(r"\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}")
 # is added to the concatenation for the result.  Finally ", how are you?" is
 # copied in and the whole lot concatenated for return.
 #
-# To see how strings are parsed, see `_parse_expstr()` after the class, and
-# to see how expansion strings are expanded, see `_expand_expstr()` after that.
+# To see how strings are parsed, see `_parse_value_expression()` after the class, and
+# to see how expansion strings are expanded, see `_expand_value_expression()` after that.
 
 
 # The Variables helper object will resolve the variable references in
@@ -74,9 +74,6 @@ cdef class Variables:
         self.original = node
         self._expstr_map = self._resolve(node)
 
-    def check(self):
-        self._check_variables()
-
     def __getitem__(self, str name):
         if name not in self._expstr_map:
             raise KeyError(name)
@@ -86,6 +83,17 @@ cdef class Variables:
             self._check_variables(subset=[name])
             raise
 
+    # __contains__()
+    #
+    # Checks whether a given variable exists, allows
+    # supporting `if 'foo' in variables` expressions.
+    #
+    # Args:
+    #    name (str): The name of the variable to check for
+    #
+    # Returns:
+    #    (bool): True if `name` is a valid variable
+    #
     def __contains__(self, str name):
         return name in self._expstr_map
 
@@ -99,6 +107,19 @@ cdef class Variables:
     def __iter__(self):
         return _VariablesIterator(self._expstr_map)
 
+    # check()
+    #
+    # Assert that all variables declared on this Variables
+    # instance have been resolved properly, and reports errors
+    # for undefined references and circular references.
+    #
+    # Raises:
+    #   (LoadError): In the case of references to undefined variables
+    #                or circular variable references.
+    #
+    def check(self):
+        self._check_variables()
+
     # get()
     #
     # Expand definition of variable by name. If the variable is not
@@ -150,10 +171,10 @@ cdef class Variables:
     #    LoadError, if the string contains unresolved variable references.
     #
     cpdef subst(self, str string):
-        expstr = _parse_expstr(string)
+        expstr = _parse_value_expression(string)
 
         try:
-            return _expand_expstr(self._expstr_map, expstr)
+            return _expand_value_expression(self._expstr_map, expstr)
         except (KeyError, RecursionError):
             # We also check for unmatch for recursion errors since _check_variables expects
             # subset to be defined
@@ -196,10 +217,9 @@ cdef class Variables:
 
         for key in node.keys():
             value = node.get_str(key)
-            ret[sys.intern(key)] = _parse_expstr(value)
+            ret[sys.intern(key)] = _parse_value_expression(value)
         return ret
 
-
     def _check_variables(self, *, subset=None):
         summary = []
 
@@ -239,7 +259,7 @@ cdef class Variables:
 # something which might "waste" memory, in reality each of these
 # will live as long as the element which uses it, which is the
 # vast majority of the memory usage across the execution of BuildStream.
-cdef dict PARSE_CACHE = {
+cdef dict VALUE_EXPRESSION_CACHE = {
     # Prime the cache with the empty string since otherwise that can
     # cause issues with the parser, complications to which cause slowdown
     "": [""],
@@ -249,11 +269,11 @@ cdef dict PARSE_CACHE = {
 # Helper to parse a string into an expansion string tuple, caching
 # the results so that future parse requests don't need to think about
 # the string
-cdef list _parse_expstr(str instr):
+cdef list _parse_value_expression(str instr):
     cdef list ret
 
     try:
-        return <list> PARSE_CACHE[instr]
+        return <list> VALUE_EXPRESSION_CACHE[instr]
     except KeyError:
         # This use of the regex turns a string like "foo %{bar} baz" into
         # a list ["foo ", "bar", " baz"]
@@ -268,7 +288,7 @@ cdef list _parse_expstr(str instr):
         # but this is measurably cheaper than calculating it each time during
         # string expansion.
         ret = [sys.intern(<str> s) for s in <list> splits]
-        PARSE_CACHE[instr] = ret
+        VALUE_EXPRESSION_CACHE[instr] = ret
         return ret
 
 
@@ -291,7 +311,7 @@ cdef str _expand_var(dict content, str name, int counter = 0):
     cdef str sub
 
     if len(content[name]) > 1:
-        sub = _expand_expstr(content, <list> content[name], counter)
+        sub = _expand_value_expression(content, <list> content[name], counter)
         content[name] = [sys.intern(sub)]
 
     return content[name][0]
@@ -312,7 +332,7 @@ cdef str _expand_var(dict content, str name, int counter = 0):
 #     KeyError, if any expansion is missing
 #     RecursionError, if recursion required for evaluation is too deep
 #
-cdef str _expand_expstr(dict content, list value, int counter = 0):
+cdef str _expand_value_expression(dict content, list value, int counter = 0):
     if counter > 1000:
         raise RecursionError()
 


[buildstream] 13/16: _variables.pyx: Return to cpdef for Variables.subst() and Variables.expand()

Posted by tv...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tvb pushed a commit to branch tristan/variables-refactor
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit d60ba68e777dbee021e11e9280235dc248247028
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Sat Jul 18 16:00:43 2020 +0900

    _variables.pyx: Return to cpdef for Variables.subst() and Variables.expand()
    
    Seems to add a slight calling overhead to separate these implementations
    as cdef.
---
 src/buildstream/_variables.pyx | 38 ++++++++++++--------------------------
 1 file changed, 12 insertions(+), 26 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index d6384f9..c4f86b6 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -180,7 +180,16 @@ cdef class Variables:
     #                 a cyclic variable reference
     #
     cpdef expand(self, Node node):
-        self._expand(node)
+        if isinstance(node, ScalarNode):
+            (<ScalarNode> node).value = self.subst(<ScalarNode> node)
+        elif isinstance(node, SequenceNode):
+            for entry in (<SequenceNode> node).value:
+                self.expand(entry)
+        elif isinstance(node, MappingNode):
+            for entry in (<MappingNode> node).value.values():
+                self.expand(entry)
+        else:
+            assert False, "Unknown 'Node' type"
 
     # subst():
     #
@@ -197,7 +206,8 @@ cdef class Variables:
     #                 a cyclic variable reference
     #
     cpdef str subst(self, ScalarNode node):
-        return self._subst(node)
+        value_expression = _parse_value_expression(node.as_str())
+        return self._expand_value_expression(value_expression)
 
     #################################################################
     #                          Private API                          #
@@ -224,30 +234,6 @@ cdef class Variables:
             ret[sys.intern(key)] = _parse_value_expression(value)
         return ret
 
-    # _expand():
-    #
-    # Internal pure cython implementation of Variables.expand().
-    #
-    cdef _expand(self, Node node):
-        if isinstance(node, ScalarNode):
-            (<ScalarNode> node).value = self._subst(<ScalarNode> node)
-        elif isinstance(node, SequenceNode):
-            for entry in (<SequenceNode> node).value:
-                self._expand(entry)
-        elif isinstance(node, MappingNode):
-            for entry in (<MappingNode> node).value.values():
-                self._expand(entry)
-        else:
-            assert False, "Unknown 'Node' type"
-
-    # _subst():
-    #
-    # Internal pure cython implementation of Variables.subst().
-    #
-    cdef str _subst(self, ScalarNode node):
-        value_expression = _parse_value_expression(node.as_str())
-        return self._expand_value_expression(value_expression)
-
     # _check_variables()
     #
     # Raises a user facing error in the case that an error was detected


[buildstream] 01/16: Do not check for all variables in junctions

Posted by tv...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tvb pushed a commit to branch tristan/variables-refactor
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 331ac50347551bc4211fea2d0a061b5f7f6dda33
Author: Valentin David <va...@codethink.co.uk>
AuthorDate: Tue Jun 2 13:13:28 2020 +0200

    Do not check for all variables in junctions
    
    Junctions are loaded before all variables can loaded. So there should
    not be errors for unused variables that are defined using undefined
    variables.
    
    Also this checks for undefined variables in the same time as looking
    for cycles. This will allow to show more errors to the user when both
    type of errors happen in the same time. This also simplify error
    handling in `_variables.pyx`.
---
 src/buildstream/_variables.pyx                     | 77 +++++++++++++---------
 src/buildstream/element.py                         |  2 +
 src/buildstream/exceptions.py                      |  7 +-
 tests/format/variables.py                          | 14 +++-
 tests/format/variables/partial_context/base.bst    |  4 ++
 .../variables/partial_context/base/project.conf    |  3 +
 .../format/variables/partial_context/base/vars.yml |  2 +
 .../format/variables/partial_context/project.conf  |  8 +++
 tests/format/variables/partial_context/test.bst    |  3 +
 9 files changed, 82 insertions(+), 38 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index e3fcfc3..bc4b92d 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -22,6 +22,7 @@
 
 import re
 import sys
+import itertools
 
 from ._exceptions import LoadError
 from .exceptions import LoadErrorReason
@@ -72,11 +73,18 @@ cdef class Variables:
     def __init__(self, MappingNode node):
         self.original = node
         self._expstr_map = self._resolve(node)
-        self._check_for_missing()
-        self._check_for_cycles()
+
+    def check(self):
+        self._check_variables()
 
     def __getitem__(self, str name):
-        return _expand_var(self._expstr_map, name)
+        if name not in self._expstr_map:
+            raise KeyError(name)
+        try:
+            return _expand_var(self._expstr_map, name)
+        except (KeyError, RecursionError):
+            self._check_variables(subset=[name])
+            raise
 
     def __contains__(self, str name):
         return name in self._expstr_map
@@ -105,7 +113,7 @@ cdef class Variables:
     cpdef str get(self, str name):
         if name not in self._expstr_map:
             return None
-        return _expand_var(self._expstr_map, name)
+        return self[name]
 
     # expand()
     #
@@ -146,7 +154,9 @@ cdef class Variables:
 
         try:
             return _expand_expstr(self._expstr_map, expstr)
-        except KeyError:
+        except (KeyError, RecursionError):
+            # We also check for unmatch for recursion errors since _check_variables expects
+            # subset to be defined
             unmatched = []
 
             # Look for any unmatched variable names in the expansion string
@@ -161,6 +171,9 @@ cdef class Variables:
                 )
 
                 raise LoadError(message, LoadErrorReason.UNRESOLVED_VARIABLE)
+
+            # Otherwise the missing key is from a variable definition
+            self._check_variables(subset=expstr[1::2])
             # Otherwise, re-raise the KeyError since it clearly came from some
             # other unknowable cause.
             raise
@@ -186,39 +199,41 @@ cdef class Variables:
             ret[sys.intern(key)] = _parse_expstr(value)
         return ret
 
-    def _check_for_missing(self):
-        # First the check for anything unresolvable
+
+    def _check_variables(self, *, subset=None):
         summary = []
-        for key, expstr in self._expstr_map.items():
-            for var in expstr[1::2]:
-                if var not in self._expstr_map:
-                    line = "  unresolved variable '{unmatched}' in declaration of '{variable}' at: {provenance}"
-                    provenance = self.original.get_scalar(key).get_provenance()
-                    summary.append(line.format(unmatched=var, variable=key, provenance=provenance))
-        if summary:
-            raise LoadError("Failed to resolve one or more variable:\n{}\n".format("\n".join(summary)),
-                            LoadErrorReason.UNRESOLVED_VARIABLE)
 
-    def _check_for_cycles(self):
-        # And now the cycle checks
-        def cycle_check(expstr, visited, cleared):
+        def rec_check(name, expstr, visited, cleared):
             for var in expstr[1::2]:
                 if var in cleared:
                     continue
-                if var in visited:
-                    raise LoadError("{}: ".format(self.original.get_scalar(var).get_provenance()) +
-                                    ("Variable '{}' expands to contain a reference to itself. " +
-                                     "Perhaps '{}' contains '%{{{}}}").format(var, visited[-1], var),
-                                     LoadErrorReason.RECURSIVE_VARIABLE)
-                visited.append(var)
-                cycle_check(self._expstr_map[var], visited, cleared)
-                visited.pop()
-                cleared.add(var)
+                elif var in visited:
+                    chain = list(itertools.takewhile(lambda x: x != var, reversed(visited)))
+                    chain.append(var)
+                    chain.reverse()
+                    for i in range(len(chain)-1):
+                        line = "  Variable '{variable}' recusively uses '{rec}' at: {provenance}"
+                        provenance = self.original.get_scalar(chain[i]).get_provenance()
+                        summary.append(line.format(rec=chain[i+1], variable=chain[i], provenance=provenance))
+                elif var not in self._expstr_map:
+                    line = "  unresolved variable '{unmatched}' in declaration of '{variable}' at: {provenance}"
+                    provenance = self.original.get_scalar(name).get_provenance()
+                    summary.append(line.format(unmatched=var, variable=name, provenance=provenance))
+                else:
+                    visited.append(var)
+                    rec_check(var, self._expstr_map[var], visited, cleared)
+                    visited.pop()
+                    cleared.add(var)
 
         cleared = set()
-        for key, expstr in self._expstr_map.items():
-            if key not in cleared:
-                cycle_check(expstr, [key], cleared)
+        for key in subset if subset is not None else self._expstr_map:
+            visited = []
+            rec_check(key, self._expstr_map[key], visited, cleared)
+            cleared.add(key)
+
+        if summary:
+            raise LoadError("Failed to resolve one or more variable:\n{}\n".format("\n".join(summary)),
+                            LoadErrorReason.UNRESOLVED_VARIABLE)
 
 # Cache for the parsed expansion strings.  While this is nominally
 # something which might "waste" memory, in reality each of these
diff --git a/src/buildstream/element.py b/src/buildstream/element.py
index 6a0fa5f..d152630 100644
--- a/src/buildstream/element.py
+++ b/src/buildstream/element.py
@@ -285,6 +285,8 @@ class Element(Plugin):
         variables = self.__extract_variables(project, meta)
         variables["element-name"] = self.name
         self.__variables = Variables(variables)
+        if not meta.is_junction:
+            self.__variables.check()
 
         # Collect the composited environment now that we have variables
         unexpanded_env = self.__extract_environment(project, meta)
diff --git a/src/buildstream/exceptions.py b/src/buildstream/exceptions.py
index e77d64f..0eefec3 100644
--- a/src/buildstream/exceptions.py
+++ b/src/buildstream/exceptions.py
@@ -131,13 +131,10 @@ class LoadErrorReason(Enum):
     RECURSIVE_INCLUDE = 21
     """A recursive include has been encountered"""
 
-    RECURSIVE_VARIABLE = 22
-    """A recursive variable has been encountered"""
-
-    PROTECTED_VARIABLE_REDEFINED = 23
+    PROTECTED_VARIABLE_REDEFINED = 22
     """An attempt was made to set the value of a protected variable"""
 
-    DUPLICATE_DEPENDENCY = 24
+    DUPLICATE_DEPENDENCY = 23
     """A duplicate dependency was detected"""
 
     LINK_FORBIDDEN_DEPENDENCIES = 25
diff --git a/tests/format/variables.py b/tests/format/variables.py
index c5e8eeb..5fad104 100644
--- a/tests/format/variables.py
+++ b/tests/format/variables.py
@@ -67,7 +67,7 @@ def test_simple_cyclic_variables(cli, datafiles):
     print_warning("Performing cyclic test, if this test times out it will " + "exit the test sequence")
     project = str(datafiles)
     result = cli.run(project=project, silent=True, args=["build", "simple-cyclic.bst"])
-    result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.RECURSIVE_VARIABLE)
+    result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.UNRESOLVED_VARIABLE)
 
 
 @pytest.mark.timeout(15, method="signal")
@@ -76,7 +76,7 @@ def test_cyclic_variables(cli, datafiles):
     print_warning("Performing cyclic test, if this test times out it will " + "exit the test sequence")
     project = str(datafiles)
     result = cli.run(project=project, silent=True, args=["build", "cyclic.bst"])
-    result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.RECURSIVE_VARIABLE)
+    result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.UNRESOLVED_VARIABLE)
 
 
 @pytest.mark.parametrize("protected_var", PROTECTED_VARIABLES)
@@ -168,3 +168,13 @@ def test_variables_resolving_errors_in_public_section(cli, datafiles):
 
     result = cli.run(project=project, args=["show", "--format", "%{public}", "public_unresolved.bst"])
     result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.UNRESOLVED_VARIABLE)
+
+
+@pytest.mark.datafiles(os.path.join(DATA_DIR, "partial_context"))
+def test_partial_context_junctions(cli, datafiles):
+    project = str(datafiles)
+
+    result = cli.run(project=project, args=["show", "--format", "%{vars}", "test.bst"])
+    result.assert_success()
+    result_vars = _yaml.load_data(result.output)
+    assert result_vars.get_str("eltvar") == "/bar/foo/baz"
diff --git a/tests/format/variables/partial_context/base.bst b/tests/format/variables/partial_context/base.bst
new file mode 100644
index 0000000..10ce559
--- /dev/null
+++ b/tests/format/variables/partial_context/base.bst
@@ -0,0 +1,4 @@
+kind: junction
+sources:
+- kind: local
+  path: base
diff --git a/tests/format/variables/partial_context/base/project.conf b/tests/format/variables/partial_context/base/project.conf
new file mode 100644
index 0000000..91511bf
--- /dev/null
+++ b/tests/format/variables/partial_context/base/project.conf
@@ -0,0 +1,3 @@
+name: base
+min-version: 2.0
+
diff --git a/tests/format/variables/partial_context/base/vars.yml b/tests/format/variables/partial_context/base/vars.yml
new file mode 100644
index 0000000..3a91e13
--- /dev/null
+++ b/tests/format/variables/partial_context/base/vars.yml
@@ -0,0 +1,2 @@
+variables:
+  subvar: "/bar"
diff --git a/tests/format/variables/partial_context/project.conf b/tests/format/variables/partial_context/project.conf
new file mode 100644
index 0000000..e8a8ed0
--- /dev/null
+++ b/tests/format/variables/partial_context/project.conf
@@ -0,0 +1,8 @@
+name: test
+min-version: 2.0
+
+(@): base.bst:vars.yml
+
+variables:
+  var: "%{subvar}/foo"
+
diff --git a/tests/format/variables/partial_context/test.bst b/tests/format/variables/partial_context/test.bst
new file mode 100644
index 0000000..8276763
--- /dev/null
+++ b/tests/format/variables/partial_context/test.bst
@@ -0,0 +1,3 @@
+kind: manual
+variables:
+  eltvar: "%{var}/baz"


[buildstream] 03/16: tests/format/variables.py: Added some new tests

Posted by tv...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tvb pushed a commit to branch tristan/variables-refactor
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 814a3e8d17433be602a99e32fdfa1ca122edf6d6
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Sun Jun 28 23:59:42 2020 +0900

    tests/format/variables.py: Added some new tests
    
      * Test scenarios where a junction needs to resolve variables
        in order to configure a subproject, but where some other variables
        may be derived from the same subproject.
    
        In this scenario we allow partial resolution of variables
        for junction elements.
    
      * Enhanced the undefined variables and circular reference tests
        to also check for the expected provenances.
---
 tests/format/variables.py                          | 59 ++++++++++++++++------
 tests/format/variables/cyclic_variables/cyclic.bst |  2 +-
 .../variables/cyclic_variables/indirect-cyclic.bst |  8 +++
 .../variables/cyclic_variables/self-reference.bst  |  4 ++
 .../format/variables/missing_variables/manual3.bst | 10 ++++
 5 files changed, 67 insertions(+), 16 deletions(-)

diff --git a/tests/format/variables.py b/tests/format/variables.py
index 5fad104..7074732 100644
--- a/tests/format/variables.py
+++ b/tests/format/variables.py
@@ -53,30 +53,59 @@ def test_overrides(cli, datafiles, target, varname, expected):
     assert result_vars.get_str(varname) == expected
 
 
-@pytest.mark.parametrize("element", ["manual.bst", "manual2.bst"])
+@pytest.mark.parametrize(
+    "element,provenance",
+    [
+        # This test makes a reference to an undefined variable in a build command
+        ("manual.bst", "manual.bst [line 5 column 6]"),
+        # This test makes a reference to an undefined variable by another variable,
+        # ensuring that we validate variables even when they are unused
+        ("manual2.bst", "manual2.bst [line 4 column 8]"),
+        # This test uses a build command to refer to some variables which ultimately
+        # refer to an undefined variable, testing a more complex case.
+        ("manual3.bst", "manual3.bst [line 6 column 8]"),
+    ],
+    ids=["build-command", "variables", "complex"],
+)
 @pytest.mark.datafiles(os.path.join(DATA_DIR, "missing_variables"))
-def test_missing_variable(cli, datafiles, element):
+def test_undefined(cli, datafiles, element, provenance):
     project = str(datafiles)
     result = cli.run(project=project, silent=True, args=["show", "--deps", "none", "--format", "%{config}", element])
     result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.UNRESOLVED_VARIABLE)
+    assert provenance in result.stderr
 
 
+@pytest.mark.parametrize(
+    "element,provenances",
+    [
+        # Test a simple a -> b and b -> a reference
+        ("simple-cyclic.bst", ["simple-cyclic.bst [line 4 column 5]", "simple-cyclic.bst [line 5 column 5]"]),
+        # Test a simple a -> b and b -> a reference with some text involved
+        ("cyclic.bst", ["cyclic.bst [line 5 column 10]", "cyclic.bst [line 4 column 5]"]),
+        # Test an indirect circular dependency
+        (
+            "indirect-cyclic.bst",
+            [
+                "indirect-cyclic.bst [line 5 column 5]",
+                "indirect-cyclic.bst [line 6 column 5]",
+                "indirect-cyclic.bst [line 7 column 5]",
+                "indirect-cyclic.bst [line 8 column 5]",
+            ],
+        ),
+        # Test an indirect circular dependency
+        ("self-reference.bst", ["self-reference.bst [line 4 column 5]"]),
+    ],
+    ids=["simple", "simple-text", "indirect", "self-reference"],
+)
 @pytest.mark.timeout(15, method="signal")
 @pytest.mark.datafiles(os.path.join(DATA_DIR, "cyclic_variables"))
-def test_simple_cyclic_variables(cli, datafiles):
-    print_warning("Performing cyclic test, if this test times out it will " + "exit the test sequence")
-    project = str(datafiles)
-    result = cli.run(project=project, silent=True, args=["build", "simple-cyclic.bst"])
-    result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.UNRESOLVED_VARIABLE)
-
-
-@pytest.mark.timeout(15, method="signal")
-@pytest.mark.datafiles(os.path.join(DATA_DIR, "cyclic_variables"))
-def test_cyclic_variables(cli, datafiles):
-    print_warning("Performing cyclic test, if this test times out it will " + "exit the test sequence")
+def test_circular_reference(cli, datafiles, element, provenances):
+    print_warning("Performing cyclic test, if this test times out it will exit the test sequence")
     project = str(datafiles)
-    result = cli.run(project=project, silent=True, args=["build", "cyclic.bst"])
-    result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.UNRESOLVED_VARIABLE)
+    result = cli.run(project=project, silent=True, args=["build", element])
+    result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.CIRCULAR_REFERENCE_VARIABLE)
+    for provenance in provenances:
+        assert provenance in result.stderr
 
 
 @pytest.mark.parametrize("protected_var", PROTECTED_VARIABLES)
diff --git a/tests/format/variables/cyclic_variables/cyclic.bst b/tests/format/variables/cyclic_variables/cyclic.bst
index a05a40b..38832fa 100644
--- a/tests/format/variables/cyclic_variables/cyclic.bst
+++ b/tests/format/variables/cyclic_variables/cyclic.bst
@@ -2,4 +2,4 @@ kind: manual
 
 variables:
   a: "%{prefix}/a"
-  prefix: "%{a}/some_prefix/"
\ No newline at end of file
+  prefix: "%{a}/some_prefix/"
diff --git a/tests/format/variables/cyclic_variables/indirect-cyclic.bst b/tests/format/variables/cyclic_variables/indirect-cyclic.bst
new file mode 100644
index 0000000..fb06fb0
--- /dev/null
+++ b/tests/format/variables/cyclic_variables/indirect-cyclic.bst
@@ -0,0 +1,8 @@
+kind: manual
+
+variables:
+  foo: "%{a}"
+  a: "%{b}"
+  b: "%{c}"
+  c: "%{d}"
+  d: "%{a}"
diff --git a/tests/format/variables/cyclic_variables/self-reference.bst b/tests/format/variables/cyclic_variables/self-reference.bst
new file mode 100644
index 0000000..2e9829d
--- /dev/null
+++ b/tests/format/variables/cyclic_variables/self-reference.bst
@@ -0,0 +1,4 @@
+kind: manual
+
+variables:
+  a: "Referencing itself with %{a}"
diff --git a/tests/format/variables/missing_variables/manual3.bst b/tests/format/variables/missing_variables/manual3.bst
new file mode 100644
index 0000000..ff3c8d5
--- /dev/null
+++ b/tests/format/variables/missing_variables/manual3.bst
@@ -0,0 +1,10 @@
+kind: manual
+
+variables:
+  hello: "Hello mister %{pony}"
+  greeting: "The %{hello} string twice: %{hello} again"
+  pony: "The pony is %{undefined}"
+  
+config:
+  build-commands:
+  - Some indirectly undefined variable %{greeting}


[buildstream] 16/16: _variables.pyx: Fast and slow paths now exist

Posted by tv...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tvb pushed a commit to branch tristan/variables-refactor
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit a307e29ff15bd20c836e03602c22e50deba94100
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Sun Jul 19 23:40:45 2020 +0900

    _variables.pyx: Fast and slow paths now exist
---
 src/buildstream/_variables.pyx | 309 +++++++++++++++++++++++++++++++----------
 1 file changed, 235 insertions(+), 74 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index a8eebb4..25bf987 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -1,5 +1,5 @@
 #
-#  Copyright (C) 2016 Codethink Limited
+#  Copyright (C) 2020 Codethink Limited
 #  Copyright (C) 2019 Bloomberg L.P.
 #
 #  This program is free software; you can redistribute it and/or
@@ -26,12 +26,13 @@ import itertools
 
 from ._exceptions import LoadError
 from .exceptions import LoadErrorReason
-from .node cimport MappingNode, Node, ScalarNode, SequenceNode
+from .node cimport MappingNode, Node, ScalarNode, SequenceNode, ProvenanceInformation
 
 # Variables are allowed to have dashes here
 #
 PARSE_EXPANSION = re.compile(r"\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}")
 
+cdef Py_ssize_t MAX_RECURSION_DEPTH = 200
 
 # Throughout this code you will see variables named things like `expstr`.
 # These hold data structures called "expansion strings" and are the parsed
@@ -148,7 +149,11 @@ cdef class Variables:
     #                 a cyclic variable reference
     #
     cpdef check(self):
-        self._check_variables()
+        cdef object key
+
+        # Just resolve all variables.
+        for key in self._values.keys():
+            self._expand_var(<str> key)
 
     # get()
     #
@@ -234,47 +239,6 @@ cdef class Variables:
             ret[sys.intern(key)] = _parse_value_expression(value)
         return ret
 
-    # _check_variables()
-    #
-    # Raises a user facing error in the case that an error was detected
-    # while attempting to resolve a variable.
-    #
-    cdef _check_variables(self, list subset=None):
-        summary = []
-
-        def rec_check(name, expstr, visited, cleared):
-            for var in expstr[1::2]:
-                if var in cleared:
-                    continue
-                elif var in visited:
-                    chain = list(itertools.takewhile(lambda x: x != var, reversed(visited)))
-                    chain.append(var)
-                    chain.reverse()
-                    for i in range(len(chain)-1):
-                        line = "  Variable '{variable}' recusively uses '{rec}' at: {provenance}"
-                        provenance = self._original.get_scalar(chain[i]).get_provenance()
-                        summary.append(line.format(rec=chain[i+1], variable=chain[i], provenance=provenance))
-                elif var not in self._values:
-                    line = "  unresolved variable '{unmatched}' in declaration of '{variable}' at: {provenance}"
-                    provenance = self._original.get_scalar(name).get_provenance()
-                    summary.append(line.format(unmatched=var, variable=name, provenance=provenance))
-                else:
-                    visited.append(var)
-                    rec_check(var, self._values[var], visited, cleared)
-                    visited.pop()
-                    cleared.add(var)
-
-        cleared = set()
-        for key in subset if subset is not None else self._values:
-            visited = []
-            rec_check(key, self._values[key], visited, cleared)
-            cleared.add(key)
-
-        if summary:
-            raise LoadError("Failed to resolve one or more variable:\n{}\n".format("\n".join(summary)),
-                            LoadErrorReason.UNRESOLVED_VARIABLE)
-
-
     #################################################################
     #                     Resolution algorithm                      #
     #################################################################
@@ -311,50 +275,28 @@ cdef class Variables:
         try:
             return self._fast_expand_var(name)
         except (KeyError, RecursionError):
-            self._check_variables(subset=[name])
-            raise
+            return self._slow_expand_var(name)
 
     # _expand_value_expression()
     #
     # Expands a given top level expansion string.
     #
     # Args:
-    #     value_expression (list): The parsed value expression to be expanded
-    #     node (ScalarNode): The toplevel ScalarNode who is asking for an expansion
+    #    value_expression (list): The parsed value expression to be expanded
+    #    node (ScalarNode): The toplevel ScalarNode who is asking for an expansion
     #
     # Returns:
-    #     (str): The expanded value expression
+    #    (str): The expanded value expression
     #
     # Raises:
-    #     KeyError, if any expansion is missing
-    #     RecursionError, if recursion required for evaluation is too deep
+    #    KeyError, if any expansion is missing
+    #    RecursionError, if recursion required for evaluation is too deep
     #
     cdef str _expand_value_expression(self, list value_expression, ScalarNode node):
         try:
             return self._fast_expand_value_expression(value_expression)
         except (KeyError, RecursionError):
-            # We also check for unmatch for recursion errors since _check_variables expects
-            # subset to be defined
-            unmatched = []
-
-            # Look for any unmatched variable names in the expansion string
-            for var in value_expression[1::2]:
-                if var not in self._values:
-                    unmatched.append(var)
-
-            if unmatched:
-                message = "{}: Unresolved variable{}: {}".format(
-                    node.get_provenance(),
-                    "s" if len(unmatched) > 1 else "",
-                    ", ".join(unmatched)
-                )
-                raise LoadError(message, LoadErrorReason.UNRESOLVED_VARIABLE)
-
-            # Otherwise the missing key is from a variable definition
-            self._check_variables(subset=value_expression[1::2])
-            # Otherwise, re-raise the KeyError since it clearly came from some
-            # other unknowable cause.
-            raise
+            return self._slow_expand_value_expression(None, value_expression, node)
 
     #################################################################
     #             Resolution algorithm: fast path                   #
@@ -372,7 +314,7 @@ cdef class Variables:
         return <str> value_expression[0]
 
     cdef str _fast_expand_value_expression(self, list value, int counter = 0):
-        if counter > 1000:
+        if counter > MAX_RECURSION_DEPTH:
             raise RecursionError()
 
         cdef Py_ssize_t idx
@@ -387,6 +329,225 @@ cdef class Variables:
 
         return "".join(acc)
 
+    #################################################################
+    #             Resolution algorithm: slow path                   #
+    #################################################################
+
+    # _get_checked_value_expression()
+    #
+    # Fetches a value expression from the value table and raises a user
+    # facing error if the value is undefined.
+    #
+    # Args:
+    #    varname (str): The variable name to fetch
+    #    referee (str): The variable name referring to `varname`, or None
+    #    node (ScalarNode): The ScalarNode for which we need to resolve `name`
+    #
+    # Returns:
+    #    (list): The value expression for varname
+    #
+    # Raises:
+    #    (LoadError): An appropriate error in case of undefined variables
+    #
+    cdef list _get_checked_value_expression(self, str varname, str referee, ScalarNode node):
+        cdef ProvenanceInformation provenance = None
+        cdef Node referee_value
+        cdef str error_message
+
+        #
+        # Fetch the value and detect undefined references
+        #
+        try:
+            return <list> self._values[varname]
+        except KeyError as e:
+
+            # Either the provenance is the toplevel calling provenance,
+            # or it is the provenance of the direct referee
+            referee_node = self._original.get_node(referee, allowed_types=None, allow_none=True)
+            if referee_node:
+                provenance = referee_node.get_provenance()
+            elif node:
+                provenance = node.get_provenance()
+
+            error_message = "Reference to undefined variable '{}'".format(varname)
+            if provenance:
+                error_message = "{}: {}".format(provenance, error_message)
+            raise LoadError(error_message, LoadErrorReason.UNRESOLVED_VARIABLE) from e
+
+    cdef str _slow_expand_var(self, str name):
+        cdef list value_expression
+        cdef str expanded
+
+        value_expression = self._get_checked_value_expression(name, None, None)
+        if len(value_expression) > 1:
+            expanded = self._slow_expand_value_expression(name, value_expression, None)
+            value_expression = [sys.intern(expanded)]
+            self._values[name] = value_expression
+
+        return <str> value_expression[0]
+
+    cdef str _slow_expand_value_expression(self, str varname, list value_expression, ScalarNode node):
+        cdef ResolutionStep step
+        cdef ResolutionStep new_step
+        cdef ResolutionStep this_step
+        cdef list iter_value_expression
+        cdef Py_ssize_t idx = 0
+        cdef object value
+        cdef str resolved_varname
+        cdef str resolved_value = None
+
+        # We will collect the varnames and value expressions which need
+        # to be resolved in the loop, sorted by dependency, and then
+        # finally reverse through them resolving them one at a time
+        #
+        cdef list resolved_varnames = []
+        cdef list resolved_values = []
+        
+        step = ResolutionStep()
+        step.init(varname, value_expression, None)
+        while step:
+            # Keep a hold of the current overall step
+            this_step = step
+            step = step.prev
+
+            # Check for circular dependencies
+            this_step.check_circular(self._original)
+
+            for idx, value in enumerate(this_step.value_expression):
+
+                # Skip literal parts of the value expression
+                if (idx % 2) == 0:
+                    continue
+
+                iter_value_expression = self._get_checked_value_expression(<str> value, this_step.referee, node)
+
+                # Queue up this value.
+                #
+                # Even if the value was already resolved, we need it in context to resolve
+                # previously enqueued variables
+                resolved_values.append(iter_value_expression)
+                resolved_varnames.append(value)
+
+                # Queue up the values dependencies.
+                #
+                if len(iter_value_expression) > 1:
+                    new_step = ResolutionStep()
+                    new_step.init(<str> value, iter_value_expression, this_step)
+
+                    # Link it to the end of the stack
+                    new_step.prev = step
+                    step = new_step
+
+        # We've now constructed the dependencies queue such that
+        # later dependencies are on the right, we can now safely peddle
+        # backwards and the last (leftmost) resolved value is the one
+        # we want to return.
+        #
+        idx = len(resolved_values) -1
+        while idx >= 0:
+            # Values in, strings out
+            #
+            iter_value_expression = <list> resolved_values[idx]
+            resolved_varname = <str> resolved_varnames[idx]
+
+            # Resolve as needed
+            #
+            if len(iter_value_expression) > 1:
+                resolved_value = self._resolve_value_expression(iter_value_expression)
+                iter_value_expression = [resolved_value]
+                if resolved_varname is not None:
+                    self._values[resolved_varname] = iter_value_expression
+
+            idx -= 1
+
+        return resolved_value
+
+    cdef str _resolve_value_expression(self, list value_expression):
+        cdef Py_ssize_t idx
+        cdef object value
+        cdef list acc = []
+
+        for idx, value in enumerate(value_expression):
+            if (idx % 2) == 0:
+                acc.append(value)
+            else:
+                acc.append(self._values[value][0])
+
+        return "".join(acc)
+
+
+# ResolutionStep()
+#
+# The context for a single iteration in variable resolution.
+#
+# This only exists for better performance than constructing
+# and unpacking tuples.
+#
+cdef class ResolutionStep:
+    cdef str referee
+    cdef list value_expression
+    cdef ResolutionStep parent
+    cdef ResolutionStep prev
+
+    # init()
+    #
+    # Initialize this ResolutionStep
+    #
+    # Args:
+    #    referee (str): The name of the referring variable
+    #    value_expression (list): The parsed value expression to be expanded
+    #    parent (ResolutionStep): The parent ResolutionStep
+    #
+    cdef init(self, str referee, list value_expression, ResolutionStep parent):
+        self.referee = referee
+        self.value_expression = value_expression
+        self.parent = parent
+        self.prev = None
+
+    # check_circular()
+    #
+    # Check for circular references in this step.
+    #
+    # Args:
+    #    original_values (MappingNode): The original MappingNode for the Variables
+    #
+    # Raises:
+    #    (LoadError): Will raise a user facing LoadError with
+    #                 LoadErrorReason.CIRCULAR_REFERENCE_VARIABLE in case
+    #                 circular references were encountered.
+    #
+    cdef check_circular(self, MappingNode original_values):
+        cdef ResolutionStep step = self.parent
+        while step:
+            if self.referee is step.referee:
+                self._raise_circular_reference_error(step, original_values)
+            step = step.parent
+
+    # _raise_circular_reference_error()
+    #
+    # Helper function to construct a full report and raise the circular reference error.
+    #
+    cdef _raise_circular_reference_error(self, ResolutionStep conflict, MappingNode original_values):
+        cdef list error_lines = []
+        cdef ResolutionStep step = self
+        cdef ScalarNode node
+        cdef str referee
+
+        while step is not conflict:
+            if step.parent:
+                referee = step.parent.referee
+            else:
+                referee = self.referee
+
+            node = original_values.get_scalar(referee)
+
+            error_lines.append("{}: Variable '{}' refers to variable '{}'".format(node.get_provenance(), referee, step.referee))
+            step = step.parent
+
+        raise LoadError("Circular dependency detected on variable '{}'".format(self.referee),
+                        LoadErrorReason.CIRCULAR_REFERENCE_VARIABLE,
+                        detail="\n".join(reversed(error_lines)))
+
 
 # Cache for the parsed expansion strings.  While this is nominally
 # something which might "waste" memory, in reality each of these


[buildstream] 08/16: _variables.pyx: Fixed provenance from Variables.subst()

Posted by tv...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tvb pushed a commit to branch tristan/variables-refactor
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 30478ad6b22e626736308c99bbf9e254e42b5eff
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Fri Jul 17 17:18:14 2020 +0900

    _variables.pyx: Fixed provenance from Variables.subst()
    
    Report provenance of the input ScalarNode when it refers to an
    undefined variable.
---
 src/buildstream/_variables.pyx | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index 9dd4102..433b328 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -197,7 +197,8 @@ cdef class Variables:
                     unmatched.append(var)
 
             if unmatched:
-                message = "Unresolved variable{}: {}".format(
+                message = "{}: Unresolved variable{}: {}".format(
+                    node.get_provenance(),
                     "s" if len(unmatched) > 1 else "",
                     ", ".join(unmatched)
                 )


[buildstream] 11/16: _variables.pyx: Setting up code for split of fast/slow paths

Posted by tv...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tvb pushed a commit to branch tristan/variables-refactor
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit fdcaa7bdd6ba777ca093bfe0e7d8135f2603dae6
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Sat Jul 18 15:26:23 2020 +0900

    _variables.pyx: Setting up code for split of fast/slow paths
    
    Use _expand_var() and _expand_value_expression() as gateways for
    handling KeyError and RecursionError and still calling _check_variables()
    for now as a fallback.
    
    Moved implementations to _fast_expand_var() and _fast_expand_value_expression()
    and in a later commit we will fallback to the non-recursive algorithm, and
    remove _check_variables() completely as this will be obsoleted.
---
 src/buildstream/_variables.pyx | 116 +++++++++++++++++++++++++----------------
 1 file changed, 70 insertions(+), 46 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index ef9f939..b5d62c1 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -106,11 +106,8 @@ cdef class Variables:
     def __getitem__(self, str name):
         if name not in self._values:
             raise KeyError(name)
-        try:
-            return self._expand_var(name)
-        except (KeyError, RecursionError):
-            self._check_variables(subset=[name])
-            raise
+
+        return self._expand_var(name)
 
     # __contains__()
     #
@@ -249,33 +246,7 @@ cdef class Variables:
     #
     cdef str _subst(self, ScalarNode node):
         value_expression = _parse_value_expression(node.as_str())
-
-        try:
-            return self._expand_value_expression(value_expression)
-        except (KeyError, RecursionError):
-            # We also check for unmatch for recursion errors since _check_variables expects
-            # subset to be defined
-            unmatched = []
-
-            # Look for any unmatched variable names in the expansion string
-            for var in value_expression[1::2]:
-                if var not in self._values:
-                    unmatched.append(var)
-
-            if unmatched:
-                message = "{}: Unresolved variable{}: {}".format(
-                    node.get_provenance(),
-                    "s" if len(unmatched) > 1 else "",
-                    ", ".join(unmatched)
-                )
-
-                raise LoadError(message, LoadErrorReason.UNRESOLVED_VARIABLE)
-
-            # Otherwise the missing key is from a variable definition
-            self._check_variables(subset=value_expression[1::2])
-            # Otherwise, re-raise the KeyError since it clearly came from some
-            # other unknowable cause.
-            raise
+        return self._expand_value_expression(value_expression)
 
     # _check_variables()
     #
@@ -317,13 +288,31 @@ cdef class Variables:
             raise LoadError("Failed to resolve one or more variable:\n{}\n".format("\n".join(summary)),
                             LoadErrorReason.UNRESOLVED_VARIABLE)
 
+
+    #################################################################
+    #                     Resolution algorithm                      #
+    #################################################################
+    #
+    # This is split into a fast path and a slower path, with a small
+    # calling layer in between for expanding variables and for expanding
+    # value expressions which refer to variables expected to be defined
+    # in this Variables instance.
+    #
+    # The fast path is initially used, it supports limited variable
+    # expansion depth due to it's recursive nature, and does not support
+    # full error reporting.
+    #
+    # The fallback slower path is non-recursive and reports user facing
+    # errors, it is called in the case that KeyError or RecursionError
+    # are reported from the faster path.
+    #
+
     # _expand_var()
     #
     # Helper to expand and cache a variable definition.
     #
     # Args:
     #     name (str): Name of the variable to expand
-    #     counter (int): Recursion counter
     #
     # Returns:
     #     (str): The expanded value of variable
@@ -332,14 +321,12 @@ cdef class Variables:
     #     KeyError, if any expansion is missing
     #     RecursionError, if recursion required for evaluation is too deep
     #
-    cdef str _expand_var(self, str name, int counter = 0):
-        cdef str sub
-
-        if len(self._values[name]) > 1:
-            sub = self._expand_value_expression(<list> self._values[name], counter)
-            self._values[name] = [sys.intern(sub)]
-
-        return self._values[name][0]
+    cdef str _expand_var(self, str name):
+        try:
+            return self._fast_expand_var(name)
+        except (KeyError, RecursionError):
+            self._check_variables(subset=[name])
+            raise
 
     # _expand_value_expression()
     #
@@ -347,17 +334,54 @@ cdef class Variables:
     # of the given dictionary of expansion strings.
     #
     # Args:
-    #     name (str): Name of the variable to expand
-    #     counter (int): Recursion counter
+    #     value_expression (list): The parsed value expression to be expanded
     #
     # Returns:
-    #     (str): The expanded value of variable
+    #     (str): The expanded value expression
     #
     # Raises:
     #     KeyError, if any expansion is missing
     #     RecursionError, if recursion required for evaluation is too deep
     #
-    cdef str _expand_value_expression(self, list value, int counter = 0):
+    cdef str _expand_value_expression(self, list value_expression):
+        try:
+            return self._fast_expand_value_expression(value_expression)
+        except (KeyError, RecursionError):
+            # We also check for unmatch for recursion errors since _check_variables expects
+            # subset to be defined
+            unmatched = []
+
+            # Look for any unmatched variable names in the expansion string
+            for var in value_expression[1::2]:
+                if var not in self._values:
+                    unmatched.append(var)
+
+            if unmatched:
+                message = "Unresolved variable{}: {}".format(
+                    "s" if len(unmatched) > 1 else "",
+                    ", ".join(unmatched)
+                )
+                raise LoadError(message, LoadErrorReason.UNRESOLVED_VARIABLE)
+
+            # Otherwise the missing key is from a variable definition
+            self._check_variables(subset=value_expression[1::2])
+            # Otherwise, re-raise the KeyError since it clearly came from some
+            # other unknowable cause.
+            raise
+
+    #################################################################
+    #             Resolution algorithm: fast path                   #
+    #################################################################
+    cdef str _fast_expand_var(self, str name, int counter = 0):
+        cdef str sub
+
+        if len(self._values[name]) > 1:
+            sub = self._fast_expand_value_expression(<list> self._values[name], counter)
+            self._values[name] = [sys.intern(sub)]
+
+        return self._values[name][0]
+
+    cdef str _fast_expand_value_expression(self, list value, int counter = 0):
         if counter > 1000:
             raise RecursionError()
 
@@ -371,7 +395,7 @@ cdef class Variables:
             idx += 1
 
             if idx < value_len:
-                acc.append(self._expand_var(<str> value[idx], counter + 1))
+                acc.append(self._fast_expand_var(<str> value[idx], counter + 1))
             idx += 1
 
         return "".join(acc)


[buildstream] 09/16: element.py: Pass ScalarNode to Variable.subst() when substituting the whitelists

Posted by tv...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tvb pushed a commit to branch tristan/variables-refactor
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 8f27d432f50ff7588f80db305aa466f44e993375
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Fri Jul 17 17:18:18 2020 +0900

    element.py: Pass ScalarNode to Variable.subst() when substituting the whitelists
---
 src/buildstream/element.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/buildstream/element.py b/src/buildstream/element.py
index d152630..6b98ae9 100644
--- a/src/buildstream/element.py
+++ b/src/buildstream/element.py
@@ -2819,8 +2819,8 @@ class Element(Plugin):
         # If this ever changes, things will go wrong unexpectedly.
         if not self.__whitelist_regex:
             bstdata = self.get_public_data("bst")
-            whitelist = bstdata.get_str_list("overlap-whitelist", default=[])
-            whitelist_expressions = [utils._glob2re(self.__variables.subst(exp.strip())) for exp in whitelist]
+            whitelist = bstdata.get_sequence("overlap-whitelist", default=[])
+            whitelist_expressions = [utils._glob2re(self.__variables.subst(node)) for node in whitelist]
             expression = "^(?:" + "|".join(whitelist_expressions) + ")$"
             self.__whitelist_regex = re.compile(expression)
         return self.__whitelist_regex.match(os.path.join(os.sep, path))


[buildstream] 07/16: _variables.pyx: Some more renaming, make subst() take a ScalarNode.

Posted by tv...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tvb pushed a commit to branch tristan/variables-refactor
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 72f8bd25eee9aeb893018db3f7bc6e8fd24bcec0
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Fri Jul 17 17:12:36 2020 +0900

    _variables.pyx: Some more renaming, make subst() take a ScalarNode.
---
 src/buildstream/_variables.pyx | 59 +++++++++++++++++++++++++-----------------
 1 file changed, 35 insertions(+), 24 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index 05ca4df..9dd4102 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -67,15 +67,26 @@ PARSE_EXPANSION = re.compile(r"\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}")
 #
 cdef class Variables:
 
-    cdef MappingNode original
-    cdef dict _expstr_map
+    cdef MappingNode _original
+    cdef dict _values
 
     def __init__(self, MappingNode node):
-        self.original = node
-        self._expstr_map = self._resolve(node)
+
+        # The original MappingNode, we need to keep this
+        # around for proper error reporting.
+        #
+        self._original = node
+
+        # The value map, this dictionary contains either unresolved
+        # value expressions, or resolved values.
+        #
+        # Each mapping value is a list, in the case that the value
+        # is resolved, then the list is only 1 element long.
+        #
+        self._values = self._init_values(node)
 
     def __getitem__(self, str name):
-        if name not in self._expstr_map:
+        if name not in self._values:
             raise KeyError(name)
         try:
             return self._expand_var(name)
@@ -95,7 +106,7 @@ cdef class Variables:
     #    (bool): True if `name` is a valid variable
     #
     def __contains__(self, str name):
-        return name in self._expstr_map
+        return name in self._values
 
     # __iter__()
     #
@@ -132,7 +143,7 @@ cdef class Variables:
     #   (str|None): The expanded value for the variable or None variable was not defined.
     #
     cpdef str get(self, str name):
-        if name not in self._expstr_map:
+        if name not in self._values:
             return None
         return self[name]
 
@@ -147,7 +158,7 @@ cdef class Variables:
     #
     cpdef expand(self, Node node):
         if isinstance(node, ScalarNode):
-            (<ScalarNode> node).value = self.subst((<ScalarNode> node).value)
+            (<ScalarNode> node).value = self.subst(<ScalarNode> node)
         elif isinstance(node, SequenceNode):
             for entry in (<SequenceNode> node).value:
                 self.expand(entry)
@@ -162,7 +173,7 @@ cdef class Variables:
     # Substitutes any variables in 'string' and returns the result.
     #
     # Args:
-    #    (string): The string to substitute
+    #    (ScalarNode): The ScalarNode to substitute variables in
     #
     # Returns:
     #    (string): The new string with any substitutions made
@@ -170,8 +181,8 @@ cdef class Variables:
     # Raises:
     #    LoadError, if the string contains unresolved variable references.
     #
-    cpdef subst(self, str string):
-        value_expression = _parse_value_expression(string)
+    cpdef subst(self, ScalarNode node):
+        value_expression = _parse_value_expression(node.as_str())
 
         try:
             return self._expand_value_expression(value_expression)
@@ -182,7 +193,7 @@ cdef class Variables:
 
             # Look for any unmatched variable names in the expansion string
             for var in value_expression[1::2]:
-                if var not in self._expstr_map:
+                if var not in self._values:
                     unmatched.append(var)
 
             if unmatched:
@@ -203,7 +214,7 @@ cdef class Variables:
     #
     # Here we resolve all of our inputs into a dictionary, ready for use
     # in subst()
-    cdef dict _resolve(self, MappingNode node):
+    cdef dict _init_values(self, MappingNode node):
         # Special case, if notparallel is specified in the variables for this
         # element, then override max-jobs to be 1.
         # Initialize it as a string as all variables are processed as strings.
@@ -233,22 +244,22 @@ cdef class Variables:
                     chain.reverse()
                     for i in range(len(chain)-1):
                         line = "  Variable '{variable}' recusively uses '{rec}' at: {provenance}"
-                        provenance = self.original.get_scalar(chain[i]).get_provenance()
+                        provenance = self._original.get_scalar(chain[i]).get_provenance()
                         summary.append(line.format(rec=chain[i+1], variable=chain[i], provenance=provenance))
-                elif var not in self._expstr_map:
+                elif var not in self._values:
                     line = "  unresolved variable '{unmatched}' in declaration of '{variable}' at: {provenance}"
-                    provenance = self.original.get_scalar(name).get_provenance()
+                    provenance = self._original.get_scalar(name).get_provenance()
                     summary.append(line.format(unmatched=var, variable=name, provenance=provenance))
                 else:
                     visited.append(var)
-                    rec_check(var, self._expstr_map[var], visited, cleared)
+                    rec_check(var, self._values[var], visited, cleared)
                     visited.pop()
                     cleared.add(var)
 
         cleared = set()
-        for key in subset if subset is not None else self._expstr_map:
+        for key in subset if subset is not None else self._values:
             visited = []
-            rec_check(key, self._expstr_map[key], visited, cleared)
+            rec_check(key, self._values[key], visited, cleared)
             cleared.add(key)
 
         if summary:
@@ -274,11 +285,11 @@ cdef class Variables:
     cdef str _expand_var(self, str name, int counter = 0):
         cdef str sub
 
-        if len(self._expstr_map[name]) > 1:
-            sub = self._expand_value_expression(<list> self._expstr_map[name], counter)
-            self._expstr_map[name] = [sys.intern(sub)]
+        if len(self._values[name]) > 1:
+            sub = self._expand_value_expression(<list> self._values[name], counter)
+            self._values[name] = [sys.intern(sub)]
 
-        return self._expstr_map[name][0]
+        return self._values[name][0]
 
     # _expand_value_expression()
     #
@@ -361,7 +372,7 @@ cdef class _VariablesIterator:
 
     def __init__(self, Variables variables):
         self._variables = variables
-        self._iter = iter(variables._expstr_map)
+        self._iter = iter(variables._values)
 
     def __iter__(self):
         return self


[buildstream] 10/16: _variables.pyx: Separate public/private APIs and improve consistency of doc comments

Posted by tv...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tvb pushed a commit to branch tristan/variables-refactor
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 3e298a0a3c346dabc440dc324de2b5072b7c9173
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Sat Jul 18 14:54:14 2020 +0900

    _variables.pyx: Separate public/private APIs and improve consistency of doc comments
    
    Now the public facing APIs use `cpdef` and all implementations use `cdef`,
    this is a more explict approach for ensuring fast paths are used whenever
    invoking cython code from cython functions.
---
 src/buildstream/_variables.pyx | 131 ++++++++++++++++++++++++++++-------------
 1 file changed, 90 insertions(+), 41 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index 433b328..ef9f939 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -70,6 +70,9 @@ cdef class Variables:
     cdef MappingNode _original
     cdef dict _values
 
+    #################################################################
+    #                        Magic Methods                          #
+    #################################################################
     def __init__(self, MappingNode node):
 
         # The original MappingNode, we need to keep this
@@ -85,6 +88,21 @@ cdef class Variables:
         #
         self._values = self._init_values(node)
 
+    # __getitem__()
+    #
+    # Fetches a resolved variable by it's name, allows
+    # addressing the Variables instance like a dictionary.
+    #
+    # Args:
+    #    name (str): The name of the variable
+    #
+    # Returns:
+    #    (str): The resolved variable value
+    #
+    # Raises:
+    #    (LoadError): In the case of an undefined variable or
+    #                 a cyclic variable reference
+    #
     def __getitem__(self, str name):
         if name not in self._values:
             raise KeyError(name)
@@ -118,6 +136,10 @@ cdef class Variables:
     def __iter__(self):
         return _VariablesIterator(self)
 
+    #################################################################
+    #                          Public API                           #
+    #################################################################
+
     # check()
     #
     # Assert that all variables declared on this Variables
@@ -125,10 +147,10 @@ cdef class Variables:
     # for undefined references and circular references.
     #
     # Raises:
-    #   (LoadError): In the case of references to undefined variables
-    #                or circular variable references.
+    #    (LoadError): In the case of an undefined variable or
+    #                 a cyclic variable reference
     #
-    def check(self):
+    cpdef check(self):
         self._check_variables()
 
     # get()
@@ -137,10 +159,10 @@ cdef class Variables:
     # defined, it will return None instead of failing.
     #
     # Args:
-    #   name (str): Name of the variable to expand
+    #    name (str): Name of the variable to expand
     #
     # Returns:
-    #   (str|None): The expanded value for the variable or None variable was not defined.
+    #    (str|None): The expanded value for the variable or None variable was not defined.
     #
     cpdef str get(self, str name):
         if name not in self._values:
@@ -154,19 +176,14 @@ cdef class Variables:
     # the node untouched, you should use `node.clone()` beforehand
     #
     # Args:
-    #   (Node): A node for which to substitute the values
+    #    (Node): A node for which to substitute the values
+    #
+    # Raises:
+    #    (LoadError): In the case of an undefined variable or
+    #                 a cyclic variable reference
     #
     cpdef expand(self, Node node):
-        if isinstance(node, ScalarNode):
-            (<ScalarNode> node).value = self.subst(<ScalarNode> node)
-        elif isinstance(node, SequenceNode):
-            for entry in (<SequenceNode> node).value:
-                self.expand(entry)
-        elif isinstance(node, MappingNode):
-            for entry in (<MappingNode> node).value.values():
-                self.expand(entry)
-        else:
-            assert False, "Unknown 'Node' type"
+        self._expand(node)
 
     # subst():
     #
@@ -176,12 +193,61 @@ cdef class Variables:
     #    (ScalarNode): The ScalarNode to substitute variables in
     #
     # Returns:
-    #    (string): The new string with any substitutions made
+    #    (str): The new string with any substitutions made
     #
     # Raises:
-    #    LoadError, if the string contains unresolved variable references.
+    #    (LoadError): In the case of an undefined variable or
+    #                 a cyclic variable reference
+    #
+    cpdef str subst(self, ScalarNode node):
+        return self._subst(node)
+
+    #################################################################
+    #                          Private API                          #
+    #################################################################
+
+    # Variable resolving code
     #
-    cpdef subst(self, ScalarNode node):
+    # Here we resolve all of our inputs into a dictionary, ready for use
+    # in subst()
+    cdef dict _init_values(self, MappingNode node):
+        # Special case, if notparallel is specified in the variables for this
+        # element, then override max-jobs to be 1.
+        # Initialize it as a string as all variables are processed as strings.
+        #
+        if node.get_bool('notparallel', False):
+            node['max-jobs'] = str(1)
+
+        cdef dict ret = {}
+        cdef str key
+        cdef str value
+
+        for key in node.keys():
+            value = node.get_str(key)
+            ret[sys.intern(key)] = _parse_value_expression(value)
+        return ret
+
+    # _expand():
+    #
+    # Internal pure cython implementation of Variables.expand().
+    #
+    cdef _expand(self, Node node):
+        if isinstance(node, ScalarNode):
+            (<ScalarNode> node).value = self._subst(<ScalarNode> node)
+        elif isinstance(node, SequenceNode):
+            for entry in (<SequenceNode> node).value:
+                self._expand(entry)
+        elif isinstance(node, MappingNode):
+            for entry in (<MappingNode> node).value.values():
+                self._expand(entry)
+        else:
+            assert False, "Unknown 'Node' type"
+
+    # _subst():
+    #
+    # Internal pure cython implementation of Variables.subst().
+    #
+    cdef str _subst(self, ScalarNode node):
         value_expression = _parse_value_expression(node.as_str())
 
         try:
@@ -211,28 +277,12 @@ cdef class Variables:
             # other unknowable cause.
             raise
 
-    # Variable resolving code
+    # _check_variables()
     #
-    # Here we resolve all of our inputs into a dictionary, ready for use
-    # in subst()
-    cdef dict _init_values(self, MappingNode node):
-        # Special case, if notparallel is specified in the variables for this
-        # element, then override max-jobs to be 1.
-        # Initialize it as a string as all variables are processed as strings.
-        #
-        if node.get_bool('notparallel', False):
-            node['max-jobs'] = str(1)
-
-        cdef dict ret = {}
-        cdef str key
-        cdef str value
-
-        for key in node.keys():
-            value = node.get_str(key)
-            ret[sys.intern(key)] = _parse_value_expression(value)
-        return ret
-
-    def _check_variables(self, *, subset=None):
+    # Raises a user facing error in the case that an error was detected
+    # while attempting to resolve a variable.
+    #
+    cdef _check_variables(self, list subset=None):
         summary = []
 
         def rec_check(name, expstr, visited, cleared):
@@ -267,7 +317,6 @@ cdef class Variables:
             raise LoadError("Failed to resolve one or more variable:\n{}\n".format("\n".join(summary)),
                             LoadErrorReason.UNRESOLVED_VARIABLE)
 
-
     # _expand_var()
     #
     # Helper to expand and cache a variable definition.


[buildstream] 06/16: _variables.pyx: Make _expand_var() and _expand_variable_expression() methods

Posted by tv...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tvb pushed a commit to branch tristan/variables-refactor
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 29ccadabfc20b2ab2de8323c6add67f571610a96
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Fri Jul 17 16:06:28 2020 +0900

    _variables.pyx: Make _expand_var() and _expand_variable_expression() methods
    
    Instead of functions at the toplevel.
---
 src/buildstream/_variables.pyx | 143 +++++++++++++++++++++--------------------
 1 file changed, 72 insertions(+), 71 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index 83d0eaa..05ca4df 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -78,7 +78,7 @@ cdef class Variables:
         if name not in self._expstr_map:
             raise KeyError(name)
         try:
-            return _expand_var(self._expstr_map, name)
+            return self._expand_var(name)
         except (KeyError, RecursionError):
             self._check_variables(subset=[name])
             raise
@@ -105,7 +105,7 @@ cdef class Variables:
     #   (Iterator[Tuple[str, str]])
     #
     def __iter__(self):
-        return _VariablesIterator(self._expstr_map)
+        return _VariablesIterator(self)
 
     # check()
     #
@@ -171,17 +171,17 @@ cdef class Variables:
     #    LoadError, if the string contains unresolved variable references.
     #
     cpdef subst(self, str string):
-        expstr = _parse_value_expression(string)
+        value_expression = _parse_value_expression(string)
 
         try:
-            return _expand_value_expression(self._expstr_map, expstr)
+            return self._expand_value_expression(value_expression)
         except (KeyError, RecursionError):
             # We also check for unmatch for recursion errors since _check_variables expects
             # subset to be defined
             unmatched = []
 
             # Look for any unmatched variable names in the expansion string
-            for var in expstr[1::2]:
+            for var in value_expression[1::2]:
                 if var not in self._expstr_map:
                     unmatched.append(var)
 
@@ -194,7 +194,7 @@ cdef class Variables:
                 raise LoadError(message, LoadErrorReason.UNRESOLVED_VARIABLE)
 
             # Otherwise the missing key is from a variable definition
-            self._check_variables(subset=expstr[1::2])
+            self._check_variables(subset=value_expression[1::2])
             # Otherwise, re-raise the KeyError since it clearly came from some
             # other unknowable cause.
             raise
@@ -255,6 +255,67 @@ cdef class Variables:
             raise LoadError("Failed to resolve one or more variable:\n{}\n".format("\n".join(summary)),
                             LoadErrorReason.UNRESOLVED_VARIABLE)
 
+
+    # _expand_var()
+    #
+    # Helper to expand and cache a variable definition.
+    #
+    # Args:
+    #     name (str): Name of the variable to expand
+    #     counter (int): Recursion counter
+    #
+    # Returns:
+    #     (str): The expanded value of variable
+    #
+    # Raises:
+    #     KeyError, if any expansion is missing
+    #     RecursionError, if recursion required for evaluation is too deep
+    #
+    cdef str _expand_var(self, str name, int counter = 0):
+        cdef str sub
+
+        if len(self._expstr_map[name]) > 1:
+            sub = self._expand_value_expression(<list> self._expstr_map[name], counter)
+            self._expstr_map[name] = [sys.intern(sub)]
+
+        return self._expstr_map[name][0]
+
+    # _expand_value_expression()
+    #
+    # Helper to expand a given top level expansion string tuple in the context
+    # of the given dictionary of expansion strings.
+    #
+    # Args:
+    #     name (str): Name of the variable to expand
+    #     counter (int): Recursion counter
+    #
+    # Returns:
+    #     (str): The expanded value of variable
+    #
+    # Raises:
+    #     KeyError, if any expansion is missing
+    #     RecursionError, if recursion required for evaluation is too deep
+    #
+    cdef str _expand_value_expression(self, list value, int counter = 0):
+        if counter > 1000:
+            raise RecursionError()
+
+        cdef Py_ssize_t idx = 0
+        cdef Py_ssize_t value_len = len(value)
+        cdef str sub
+        cdef list acc = []
+
+        while idx < value_len:
+            acc.append(value[idx])
+            idx += 1
+
+            if idx < value_len:
+                acc.append(self._expand_var(<str> value[idx], counter + 1))
+            idx += 1
+
+        return "".join(acc)
+
+
 # Cache for the parsed expansion strings.  While this is nominally
 # something which might "waste" memory, in reality each of these
 # will live as long as the element which uses it, which is the
@@ -292,79 +353,19 @@ cdef list _parse_value_expression(str instr):
         return ret
 
 
-# Helper to expand and cache a variable definition in the context of
-# the given dictionary of expansion strings.
-#
-# Args:
-#     content (dict): Dictionary of expansion strings
-#     name (str): Name of the variable to expand
-#     counter (int): Recursion counter
-#
-# Returns:
-#     (str): The expanded value of variable
-#
-# Raises:
-#     KeyError, if any expansion is missing
-#     RecursionError, if recursion required for evaluation is too deep
-#
-cdef str _expand_var(dict content, str name, int counter = 0):
-    cdef str sub
-
-    if len(content[name]) > 1:
-        sub = _expand_value_expression(content, <list> content[name], counter)
-        content[name] = [sys.intern(sub)]
-
-    return content[name][0]
-
-
-# Helper to expand a given top level expansion string tuple in the context
-# of the given dictionary of expansion strings.
-#
-# Args:
-#     content (dict): Dictionary of expansion strings
-#     name (str): Name of the variable to expand
-#     counter (int): Recursion counter
-#
-# Returns:
-#     (str): The expanded value of variable
-#
-# Raises:
-#     KeyError, if any expansion is missing
-#     RecursionError, if recursion required for evaluation is too deep
-#
-cdef str _expand_value_expression(dict content, list value, int counter = 0):
-    if counter > 1000:
-        raise RecursionError()
-
-    cdef Py_ssize_t idx = 0
-    cdef Py_ssize_t value_len = len(value)
-    cdef str sub
-    cdef list acc = []
-
-    while idx < value_len:
-        acc.append(value[idx])
-        idx += 1
-
-        if idx < value_len:
-            acc.append(_expand_var(content, <str> value[idx], counter + 1))
-        idx += 1
-
-    return "".join(acc)
-
-
 # Iterator for all flatten variables.
 # Used by Variables.__iter__
 cdef class _VariablesIterator:
-    cdef dict _expstr_map
+    cdef Variables _variables
     cdef object _iter
 
-    def __init__(self, dict expstr_map):
-        self._expstr_map = expstr_map
-        self._iter = iter(expstr_map)
+    def __init__(self, Variables variables):
+        self._variables = variables
+        self._iter = iter(variables._expstr_map)
 
     def __iter__(self):
         return self
 
     def __next__(self):
         name = next(self._iter)
-        return name, _expand_var(self._expstr_map, name)
+        return name, self._variables._expand_var(name)