You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@buildstream.apache.org by gi...@apache.org on 2020/12/29 13:01:50 UTC

[buildstream] branch tristan/partial-variables-manual-string-join created (now 87added)

This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a change to branch tristan/partial-variables-manual-string-join
in repository https://gitbox.apache.org/repos/asf/buildstream.git.


      at 87added  _variables.pyx: Change ResolutionStep() to be an allocated struct

This branch includes the following new commits:

     new e125da0  _variables.pyx: Rewrite of variables code.
     new b55f31b  tests/format/variables.py: Added some new tests
     new 24503c3  tests/frontend/overlaps.py: Test undefined variables
     new f4b1c34  _variables.py: Some microoptimizations, remove tuplization and some typchecking overheads
     new 081d85d  _variables.pyx: Remove lists from the loops, use links instead
     new 310b294  _variables.pyx: Use a linked list for ValuePart instead of a list.
     new 727533c  _variables.py: Revert to using `list` time in favor of ValueLink objects
     new c67fe08  _variables: Convert ValuePart to C struct
     new a1c6a7e  _variables.pyx: Change the variable_names set for an array
     new 0bac401  _variables.pyx: Don't hold onto ProvenanceInformation, hold onto nodes instead.
     new 116eec0  _variables.pyx: Pass around provenance node instead of provenance itself
     new 667da2a  _variables.pyx: Restructuring and removing yellowness
     new c43bbf9  _variables.pyx: Only intern the value class strings, not the variable names
     new a85b3e5  _variables.pyx: Added ObjectArray, using for ValueClass varnames
     new d5aab43  _variables.pyx: Use ObjectArray to accumulate deps in _resolve()
     new 4a148bc  _variables: ENABLE PROFILING
     new 3889321  _variables.pyx: Remove refcounting from ObjectArray
     new c763970  _variables.pyx: Avoid dictionary lookups in Value.resolve()
     new 8694922  _variables.py: Remove dependencies, use only parts.
     new 8ba877d  _variables.pyx: Try optimizing "".join() with C API
     new b1713ea  _variables.pyx: Value.resolve() now takes strings as input, no more _do_resolve()
     new 20db0e1  _variables.pyx: Early return from _resolve()
     new 07d5b63  _variables.pyx: Reorganize loop for better early return
     new c61359c  _variables.pyx: Move the meat of value resolution to ValueClass
     new a3ed24f  _variables.pyx: Minor improvement on resolve() APIs (Pass PyObject **)
     new 1757a42  _variables.pyx: Cache the array used to accumulate variables in _resolve_value()
     new 87added  _variables.pyx: Change ResolutionStep() to be an allocated struct

The 27 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[buildstream] 17/27: _variables.pyx: Remove refcounting from ObjectArray

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch tristan/partial-variables-manual-string-join
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 3889321a5b58fb3dffacff4f5cb41a3fd41674ba
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Fri Jul 3 17:12:36 2020 +0900

    _variables.pyx: Remove refcounting from ObjectArray
---
 src/buildstream/_variables.pyx | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index c1d7b92..c2e0b29 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -774,7 +774,7 @@ cdef void object_array_append(ObjectArray *array, PyObject *obj):
         if not array.array:
             raise MemoryError()
 
-    Py_XINCREF(obj)
+    # Py_XINCREF(obj)
     array.array[array.length] = obj
     array.length += 1
 
@@ -787,11 +787,10 @@ cdef void object_array_append(ObjectArray *array, PyObject *obj):
 #    array (ObjectArray *): The array to free up
 #
 cdef void object_array_free(ObjectArray *array):
-    cdef Py_ssize_t idx = 0
-    while idx < array.length:
-        Py_XDECREF(array.array[idx])
-        idx += 1
-
+    #cdef Py_ssize_t idx = 0
+    #while idx < array.length:
+    #    Py_XDECREF(array.array[idx])
+    #    idx += 1
     if array._size > 0:
         PyMem_Free(array.array)
 


[buildstream] 05/27: _variables.pyx: Remove lists from the loops, use links instead

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch tristan/partial-variables-manual-string-join
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 081d85d638b20c05ba5ec5f82f5dceb33c82006b
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Wed Jul 1 18:18:38 2020 +0900

    _variables.pyx: Remove lists from the loops, use links instead
---
 src/buildstream/_variables.pyx | 47 +++++++++++++++++++++++++++++-------------
 1 file changed, 33 insertions(+), 14 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index 0fa64fa..9730948 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -230,10 +230,12 @@ cdef class Variables:
     cdef str _resolve(self, str name, ProvenanceInformation provenance):
         cdef ResolutionStep step
         cdef ResolutionStep new_step
+        cdef ResolutionStep this_step
         cdef Value iter_value
         cdef object iter_name_object
         cdef str iter_name
         cdef set iter_value_deps
+        cdef str resolved_value
 
         # While iterating over the first loop, we collect all of the variable
         # dependencies, and perform all required validation.
@@ -241,24 +243,25 @@ cdef class Variables:
         # Each iteration processes a ResolutionStep object and has the possibility
         # to enque more ResolutionStep objects as a result.
         #
-        cdef list pending;
-        cdef list deps = []
+        cdef ValueLink deps = None
+        cdef ValueLink dep = None
         cdef bint first_iteration = True
 
         step = ResolutionStep()
         step.init(None, { sys.intern(name) }, None)
-        pending = [step]
 
-        while pending:
-            step = pending.pop()
+        while step:
+            # Keep a hold of the current overall step
+            this_step = step
+            step = step.prev
 
             # Check for circular dependencies
-            step.check_circular(self._values)
+            this_step.check_circular(self._values)
 
             # For each dependency stemming from this provenance
-            for iter_name_object in step.varnames:
+            for iter_name_object in this_step.varnames:
                 iter_name = <str> iter_name_object
-                iter_value = self._get_checked_value(iter_name, step.referee, provenance)
+                iter_value = self._get_checked_value(iter_name, this_step.referee, provenance)
 
                 # Earliest return for an already resolved value
                 #
@@ -269,24 +272,29 @@ cdef class Variables:
 
                 # Queue up this value to be resolved in the next loop
                 if iter_value._resolved is None:
-                    deps.append(iter_value)
+                    dep = ValueLink()
+                    dep.value = iter_value
+                    dep.prev = deps
+                    deps = dep
 
                     # Queue up it's dependencies for resolution
                     iter_value_deps = iter_value.dependencies()
                     if iter_value_deps:
                         new_step = ResolutionStep()
-                        new_step.init(iter_name, iter_value_deps, step)
-                        pending.append(new_step)
+                        new_step.init(iter_name, iter_value_deps, this_step)
+
+                        # Link it to the end of the stack
+                        new_step.prev = step
+                        step = new_step
 
         # We've now constructed the dependencies queue such that
         # later dependencies are on the right, we can now safely peddle
         # backwards and the last (leftmost) resolved value is the one
         # we want to return.
         #
-        cdef str resolved_value
         while deps:
-            iter_value = deps.pop()
-            resolved_value = iter_value.resolve(self._values)
+            resolved_value = deps.value.resolve(self._values)
+            deps = deps.prev
 
         return resolved_value
 
@@ -344,6 +352,7 @@ cdef class ResolutionStep:
     cdef str referee
     cdef set varnames
     cdef ResolutionStep parent
+    cdef ResolutionStep prev
 
     # init()
     #
@@ -358,6 +367,7 @@ cdef class ResolutionStep:
         self.referee = referee
         self.varnames = varnames
         self.parent = parent
+        self.prev = None
 
     # check_circular()
     #
@@ -403,6 +413,15 @@ cdef class ResolutionStep:
                         detail="\n".join(reversed(error_lines)))
 
 
+# ValueLink
+#
+# A link list for Values.
+#
+cdef class ValueLink:
+    cdef Value value
+    cdef ValueLink prev
+
+
 # ValuePart()
 #
 # Represents a part of a value (a string and an indicator


[buildstream] 19/27: _variables.py: Remove dependencies, use only parts.

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch tristan/partial-variables-manual-string-join
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 869492255fa4fc8e725e581ef6fd99e2a1c47f15
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Fri Jul 3 20:06:59 2020 +0900

    _variables.py: Remove dependencies, use only parts.
---
 src/buildstream/_variables.pyx | 100 +++++++++++++++--------------------------
 1 file changed, 35 insertions(+), 65 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index 6f439c6..d8e6fcc 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -218,28 +218,23 @@ cdef class Variables:
     cpdef str _subst(self, ScalarNode node):
         cdef Value value = Value()
         cdef Value iter_value
-        cdef ObjectArray *dependencies
-        cdef Py_ssize_t idx = 0
-        cdef str dep_name
-        cdef str resolve_value
+        cdef str resolved_value
+        cdef ValuePart *part
 
         cdef ObjectArray values
         object_array_init(&(values), -1)
 
         value.init(node)
-        dependencies = value.dependencies()
-        while idx < dependencies.length:
-            dep_name = <str> dependencies.array[idx]
-            iter_value = self._do_resolve(dep_name, node)
-
-            object_array_append(&(values), <PyObject *>iter_value)
-
-            idx += 1
+        part = value._value_class.parts
+        while part:
+            if part.is_variable:
+                iter_value = self._do_resolve(<str> part.text, node)
+                object_array_append(&(values), <PyObject *>iter_value)
+            part = part.next_part
 
         resolved_value = value.resolve(&values, 0)
 
         object_array_free(&(values))
-
         return resolved_value
 
     # _expand()
@@ -292,11 +287,7 @@ cdef class Variables:
         cdef ResolutionStep step
         cdef ResolutionStep new_step
         cdef ResolutionStep this_step
-
         cdef Value iter_value
-        cdef str iter_name
-
-        cdef ObjectArray *iter_value_deps
         cdef Py_ssize_t idx = 0
 
         # We'll be collecting the values to resolve at the end in here
@@ -309,12 +300,13 @@ cdef class Variables:
         # Each iteration processes a ResolutionStep object and has the possibility
         # to enque more ResolutionStep objects as a result.
         #
-        cdef ObjectArray initial_deps
-        object_array_init(&(initial_deps), 1)
-        object_array_append(&(initial_deps), <PyObject *>name)
+        cdef ValuePart initial_part
+        initial_part.text = <PyObject *>name
+        initial_part.is_variable = True
+        initial_part.next_part = NULL
 
         step = ResolutionStep()
-        step.init(None, &(initial_deps), None)
+        step.init(None, &initial_part, None)
 
         while step:
             # Keep a hold of the current overall step
@@ -324,11 +316,13 @@ cdef class Variables:
             # Check for circular dependencies
             this_step.check_circular(self._values)
 
-            idx = 0
-            while idx < this_step.varnames.length:
-                iter_name = <str> this_step.varnames.array[idx]
-                iter_value = self._get_checked_value(iter_name, this_step.referee, pnode)
-                idx += 1
+            part = this_step.parts
+            while part:
+                if not part.is_variable:
+                    part = part.next_part
+                    continue
+
+                iter_value = self._get_checked_value(<str> part.text, this_step.referee, pnode)
 
                 # Queue up this value.
                 #
@@ -339,14 +333,18 @@ cdef class Variables:
                 # Queue up the values dependencies.
                 #
                 # These will be NULL if this value has previously been resolved.
-                iter_value_deps = iter_value.dependencies()
-                if iter_value_deps:
-                    new_step = ResolutionStep()
-                    new_step.init(iter_name, iter_value_deps, this_step)
+                if iter_value._resolved is None:
+                    iter_value_parts = iter_value._value_class.parts
+                    if iter_value_parts:
+                        new_step = ResolutionStep()
+                        new_step.init(<str> part.text, iter_value_parts, this_step)
 
-                    # Link it to the end of the stack
-                    new_step.prev = step
-                    step = new_step
+                        # Link it to the end of the stack
+                        new_step.prev = step
+                        step = new_step
+
+                # Next part of this variable
+                part = part.next_part
 
         # We've now constructed the dependencies queue such that
         # later dependencies are on the right, we can now safely peddle
@@ -361,7 +359,6 @@ cdef class Variables:
 
         # Cleanup
         #
-        object_array_free(&(initial_deps))
         object_array_free(&(values))
 
         return iter_value
@@ -423,7 +420,7 @@ cdef class ResolutionStep:
     cdef str referee
     cdef ResolutionStep parent
     cdef ResolutionStep prev
-    cdef ObjectArray *varnames
+    cdef ValuePart *parts
 
     # init()
     #
@@ -431,12 +428,12 @@ cdef class ResolutionStep:
     #
     # Args:
     #    referee (str): The name of the referring variable
-    #    varnames (set): A set of variable names which referee refers to.
+    #    parts (ValuePart *): A link list of ValueParts which `referee` refers to
     #    parent (ResolutionStep): The parent ResolutionStep
     #
-    cdef init(self, str referee, ObjectArray *varnames, ResolutionStep parent):
+    cdef init(self, str referee, ValuePart *parts, ResolutionStep parent):
         self.referee = referee
-        self.varnames = varnames
+        self.parts = parts
         self.parent = parent
         self.prev = None
 
@@ -556,20 +553,6 @@ cdef class Value:
 
         return self._resolved
 
-    # dependencies()
-    #
-    # Returns the array of dependency variable names
-    #
-    # Returns:
-    #    (ObjectArray *): The array of variable names, or NULL
-    #
-    cdef ObjectArray *dependencies(self):
-        if self._resolved is None:
-            return &(self._value_class.varnames)
-
-        # If we're already resolved, we don't have any dependencies anymore
-        return NULL
-
     # _load_value_class()
     #
     # Load the ValueClass for this Value, possibly reusing
@@ -627,7 +610,6 @@ cdef class ValueClass:
     # Public
     #
     cdef ValuePart *parts
-    cdef ObjectArray varnames
 
     # __dealloc__()
     #
@@ -635,7 +617,6 @@ cdef class ValueClass:
     #
     def __dealloc__(self):
         free_value_parts(self.parts)
-        object_array_free(&(self.varnames))
 
     # init():
     #
@@ -678,7 +659,6 @@ cdef class ValueClass:
         cdef object split_object
         cdef str split
         cdef Py_ssize_t idx = 0
-        cdef Py_ssize_t n_variables = 0
         cdef int is_variable
 
         # Adding parts
@@ -697,7 +677,6 @@ cdef class ValueClass:
                 if (idx % 2) == 0:
                     is_variable = False
                 else:
-                    n_variables += 1
                     is_variable = True
 
                 part = new_value_part(split, is_variable)
@@ -709,15 +688,6 @@ cdef class ValueClass:
 
             idx += 1
 
-        # Initialize the variables array
-        #
-        object_array_init(&(self.varnames), n_variables)
-        part = self.parts
-        while part:
-            if part.is_variable:
-                object_array_append(&(self.varnames), part.text)
-            part = part.next_part
-
 # ValueIterator()
 #
 # Iterator for all flatten variables.


[buildstream] 25/27: _variables.pyx: Minor improvement on resolve() APIs (Pass PyObject **)

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch tristan/partial-variables-manual-string-join
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit a3ed24fa91bd1226b3505712dff97ddcfc34d085
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Sat Jul 11 14:58:15 2020 +0900

    _variables.pyx: Minor improvement on resolve() APIs (Pass PyObject **)
---
 src/buildstream/_variables.pyx | 42 ++++++++++++++++++++++++++++--------------
 1 file changed, 28 insertions(+), 14 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index 36708c9..a084e6f 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -261,7 +261,7 @@ cdef class Variables:
                 object_array_append(&(values), <PyObject *>iter_value)
             part = part.next_part
 
-        resolved_value = value.resolve(&values, 0)
+        resolved_value = value.resolve(values.array)
 
         object_array_free(&(values))
         return resolved_value
@@ -390,14 +390,14 @@ cdef class Variables:
                 # expansion though, because of how variables are
                 # sorted.
                 #
-                iter_value.resolve(&values, idx + 1)
+                iter_value.resolve(&(values.array[idx + 1]))
 
             values.array[idx] = <PyObject *>iter_value._resolved
             idx -= 1
 
         # Save the return of Value.resolve from the toplevel value
         iter_value = <Value>values.array[0]
-        resolved_value = iter_value.resolve(&values, 1)
+        resolved_value = iter_value.resolve(&(values.array[1]))
 
         # Cleanup
         #
@@ -559,19 +559,24 @@ cdef class Value:
     # resolve()
     #
     # Resolve the value of this variable, this function expects
-    # all dependency values to already be resolved, otherwise
-    # it will fail due to an undefined variable.
+    # that variables referred to by this Value's ValueClass be
+    # already resolved and prepared as an array of strings.
+    #
+    # The array of strings will be used as needed while resolving
+    # this value from left to right (so the array of strings *must*
+    # contain appropriately resolved values for any variables referred
+    # to in this value, in the correct order).
     #
     # Args:
-    #    values (PyObject **): Array of resolved strings to fill in the values
+    #    values (PyObject **): Array of string objects to fill in variable values
     #
     # Returns:
     #    (str): The resolved value
     #
-    cdef str resolve(self, ObjectArray *values, Py_ssize_t value_idx):
+    cdef str resolve(self, PyObject **values):
 
         if self._resolved is None:
-            self._resolved = self._value_class.resolve(values, value_idx)
+            self._resolved = self._value_class.resolve(values)
 
         return self._resolved
 
@@ -653,13 +658,22 @@ cdef class ValueClass:
 
     # resolve()
     #
+    # Resolve an expanded string for this ValueClass, by filling
+    # in any variables referred to by this ValueClass using the
+    # strings provided in the array, from left to right.
+    #
+    # Args:
+    #    values (PyObject **): Array of string objects to fill in variable values
     #
-    cdef str resolve(self, ObjectArray *values, Py_ssize_t value_idx):
+    # Returns:
+    #    (str): The resolved value
+    #
+    cdef str resolve(self, PyObject **values):
         cdef ValuePart *part
         cdef Py_UCS4 maxchar = 0
         cdef Py_UCS4 part_maxchar
         cdef Py_ssize_t full_length = 0
-        cdef Py_ssize_t idx
+        cdef Py_ssize_t idx = 0
         cdef Py_ssize_t offset = 0
         cdef Py_ssize_t part_length
         cdef PyObject *resolved
@@ -667,11 +681,11 @@ cdef class ValueClass:
 
         # Calculate the number of codepoints and maximum character width
         # required for the strings involved.
-        idx = value_idx
+        idx = 0
         part = self.parts
         while part:
             if part.is_variable:
-                part_object = values.array[idx]
+                part_object = values[idx]
                 idx += 1
             else:
                 part_object = part.text
@@ -687,11 +701,11 @@ cdef class ValueClass:
         resolved = PyUnicode_New(full_length, maxchar)
 
         # This time copy characters as we loop through the parts
-        idx = value_idx
+        idx = 0
         part = self.parts
         while part:
             if part.is_variable:
-                part_object = values.array[idx]
+                part_object = values[idx]
                 idx += 1
             else:
                 part_object = part.text


[buildstream] 07/27: _variables.py: Revert to using `list` time in favor of ValueLink objects

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch tristan/partial-variables-manual-string-join
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 727533c666d67f875b64dbe8889ca7de75157447
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Wed Jul 1 20:00:24 2020 +0900

    _variables.py: Revert to using `list` time in favor of ValueLink objects
    
    Since ValueLink needs to be allocated in the loop, it's unclear whether
    this is more performant than pushing values onto a python list and then
    popping them off later on.
---
 src/buildstream/_variables.pyx | 21 ++++-----------------
 1 file changed, 4 insertions(+), 17 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index 2a01b1c..245cbab 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -243,8 +243,7 @@ cdef class Variables:
         # Each iteration processes a ResolutionStep object and has the possibility
         # to enque more ResolutionStep objects as a result.
         #
-        cdef ValueLink deps = None
-        cdef ValueLink dep = None
+        cdef list deps = []
         cdef bint first_iteration = True
 
         step = ResolutionStep()
@@ -272,10 +271,7 @@ cdef class Variables:
 
                 # Queue up this value to be resolved in the next loop
                 if iter_value._resolved is None:
-                    dep = ValueLink()
-                    dep.value = iter_value
-                    dep.prev = deps
-                    deps = dep
+                    deps.append(iter_value)
 
                     # Queue up it's dependencies for resolution
                     iter_value_deps = iter_value.dependencies()
@@ -293,8 +289,8 @@ cdef class Variables:
         # we want to return.
         #
         while deps:
-            resolved_value = deps.value.resolve(self._values)
-            deps = deps.prev
+            iter_value = deps.pop()
+            resolved_value = iter_value.resolve(self._values)
 
         return resolved_value
 
@@ -413,15 +409,6 @@ cdef class ResolutionStep:
                         detail="\n".join(reversed(error_lines)))
 
 
-# ValueLink
-#
-# A link list for Values.
-#
-cdef class ValueLink:
-    cdef Value value
-    cdef ValueLink prev
-
-
 # ValuePart()
 #
 # Represents a part of a value (a string and an indicator


[buildstream] 01/27: _variables.pyx: Rewrite of variables code.

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch tristan/partial-variables-manual-string-join
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit e125da043c4b2a0c57cc19313adb9c0084338b6d
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Sun Jun 28 23:51:55 2020 +0900

    _variables.pyx: Rewrite of variables code.
    
    This includes some structural refactoring from Valentin David,
    which introduces the `Variables.check()` method in order to make
    the checks for unresolved variables optional, and avoids performing
    this check on junction elements where it is expected that not all
    variables are in context.
    
    This changes the Variables implementation so that it is split in
    three parts:
    
      * The ValueClass class is a class of value.
    
        This is the broken down representation of a value string
        which might have variable references in it, such as "Hello %{world}".
    
        Instead of caching lists, we now cache these more helpful
        ValueClass objects.
    
      * The Value class is the stateful class which represents a value,
        and has the ability to resolve the value, given other resolved
        values and it's ValueClass
    
      * The Variables class takes care of high level variable resolution
        and providing the main interface for variable resolution and access.
    
    Notably, the variables code no longer does any recursive resolution
    algorithms, and does not need to raise RecursionError anymore.
    
    Related changes:
    
      * exceptions.py: Renamed RECURSIVE_VARIABLE to CIRCULAR_REFERENCE_VARIABLE
    
        This is a more accurate description of what the error actually is
    
      * element.py:
    
        - Only validate the variables if the element is not a junction
    
        - Pass the ScalarNodes to Variables.subst() instead of raw strings
          when substituting variables in overlap whitelists.
    
          This is now required, and will result in proper provenance reporting
          in the case that overlap whitelists make references to undefined variables.
---
 src/buildstream/_variables.pyx | 668 +++++++++++++++++++++++++++++------------
 src/buildstream/element.py     |   6 +-
 src/buildstream/exceptions.py  |   4 +-
 3 files changed, 475 insertions(+), 203 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index e3fcfc3..e73658d 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -1,5 +1,5 @@
 #
-#  Copyright (C) 2016 Codethink Limited
+#  Copyright (C) 2020 Codethink Limited
 #  Copyright (C) 2019 Bloomberg L.P.
 #
 #  This program is free software; you can redistribute it and/or
@@ -22,33 +22,11 @@
 
 import re
 import sys
+import itertools
 
 from ._exceptions import LoadError
 from .exceptions import LoadErrorReason
-from .node cimport MappingNode, Node, ScalarNode, SequenceNode
-
-# Variables are allowed to have dashes here
-#
-PARSE_EXPANSION = re.compile(r"\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}")
-
-
-# Throughout this code you will see variables named things like `expstr`.
-# These hold data structures called "expansion strings" and are the parsed
-# form of the strings which are the input to this subsystem.  Strings
-# such as "Hello %{name}, how are you?" are parsed into the form:
-# ["Hello ", "name", ", how are you?"]
-# i.e. a list which consists of one or more strings.
-# Strings in even indices of the list (0, 2, 4, etc) are constants which
-# are copied into the output of the expansion algorithm.  Strings in the
-# odd indices (1, 3, 5, etc) are the names of further expansions to make.
-# In the example above, first "Hello " is copied, then "name" is expanded
-# and so must be another named expansion string passed in to the constructor
-# of the Variables class, and whatever is yielded from the expansion of "name"
-# is added to the concatenation for the result.  Finally ", how are you?" is
-# copied in and the whole lot concatenated for return.
-#
-# To see how strings are parsed, see `_parse_expstr()` after the class, and
-# to see how expansion strings are expanded, see `_expand_expstr()` after that.
+from .node cimport MappingNode, Node, ScalarNode, SequenceNode, ProvenanceInformation
 
 
 # The Variables helper object will resolve the variable references in
@@ -66,30 +44,61 @@ PARSE_EXPANSION = re.compile(r"\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}")
 #
 cdef class Variables:
 
-    cdef MappingNode original
-    cdef dict _expstr_map
+    cdef dict _values  # The Value objects
 
     def __init__(self, MappingNode node):
-        self.original = node
-        self._expstr_map = self._resolve(node)
-        self._check_for_missing()
-        self._check_for_cycles()
 
-    def __getitem__(self, str name):
-        return _expand_var(self._expstr_map, name)
+        # Special case, if notparallel is specified in the variables for this
+        # element, then override max-jobs to be 1.
+        # Initialize it as a string as all variables are processed as strings.
+        #
+        if node.get_bool('notparallel', False):
+            #
+            # The MappingNode API will automatically convert this `str(1)`
+            # into a ScalarNode, no need to manually create the ScalarNode here.
+            #
+            node['max-jobs'] = str(1)
 
-    def __contains__(self, str name):
-        return name in self._expstr_map
+        self._values = self._init_values(node)
+
+    # __getitem__()
+    #
+    # Enables indexing access to variables.
+    #
+    # Args:
+    #    name (str): The key
+    #
+    # Returns:
+    #    (str): The value
+    #
+    def __getitem__(self, str name) -> str:
+        return self._resolve(name, None)
+
+    # __contains__()
+    #
+    # Implements syntaxes like `if "foo" in variables`
+    #
+    # Args:
+    #    name (str): The key
+    #
+    # Returns:
+    #    (bool): Whether the name exists as a key in this variables mapping
+    #
+    def __contains__(self, str name) -> bool:
+        return name in self._values
 
     # __iter__()
     #
-    # Provide an iterator for all variables effective values
+    # Implements the iterator interface so that we can iterate over the
+    # Variables type, this also allows transformation of the Variables
+    # type into a simple dictionary where the keys are the variable names
+    # and the values are the fully resolve values.
     #
     # Returns:
-    #   (Iterator[Tuple[str, str]])
+    #    (Iterator[Tuple[str, str]]): The variable names and resolved values
     #
     def __iter__(self):
-        return _VariablesIterator(self._expstr_map)
+        return ValueIterator(self, self._values)
 
     # get()
     #
@@ -103,9 +112,9 @@ cdef class Variables:
     #   (str|None): The expanded value for the variable or None variable was not defined.
     #
     cpdef str get(self, str name):
-        if name not in self._expstr_map:
+        if name not in self._values:
             return None
-        return _expand_var(self._expstr_map, name)
+        return self[name]
 
     # expand()
     #
@@ -117,219 +126,480 @@ cdef class Variables:
     #   (Node): A node for which to substitute the values
     #
     cpdef expand(self, Node node):
+        self._expand(node)
+
+    # subst():
+    #
+    # Substitutes any variables in 'string' and returns the result.
+    #
+    # Args:
+    #    (string): The string to substitute
+    #
+    # Returns:
+    #    (string): The new string with any substitutions made
+    #
+    # Raises:
+    #    LoadError, if the string contains unresolved variable references.
+    #
+    cpdef subst(self, ScalarNode node):
+        cdef Value value = Value(node)
+        cdef str dep_name
+
+        for dep_name in value.dependencies():
+            self._resolve(dep_name, node.get_provenance())
+
+        return value.resolve(self._values)
+
+    # check()
+    #
+    # Checks the variables for unresolved references
+    #
+    # Raises:
+    #    (LoadError): If there are unresolved references, then a LoadError
+    #                 with LoadErrorReason.UNRESOLVED_VARIABLE reason will
+    #                 be raised.
+    #
+    cpdef check(self):
+
+        # Resolve all variables.
+        for key in self._values.keys():
+            self._resolve(key, None)
+
+    # _init_values()
+    #
+    # Here we initialize the Value() table, which contains
+    # as of yet unresolved variables.
+    #
+    cdef dict _init_values(self, MappingNode node):
+        cdef dict ret = {}
+        cdef str key
+        cdef ScalarNode value
+
+        for key, value in node.items():
+            ret[sys.intern(key)] = Value(value)
+
+        return ret
+
+    # _expand()
+    #
+    # Internal implementation of Variables.expand()
+    #
+    # Args:
+    #   (Node): A node for which to substitute the values
+    #
+    cdef _expand(self, Node node):
         if isinstance(node, ScalarNode):
-            (<ScalarNode> node).value = self.subst((<ScalarNode> node).value)
+            (<ScalarNode> node).value = self.subst(node)
         elif isinstance(node, SequenceNode):
             for entry in (<SequenceNode> node).value:
-                self.expand(entry)
+                self._expand(entry)
         elif isinstance(node, MappingNode):
             for entry in (<MappingNode> node).value.values():
-                self.expand(entry)
+                self._expand(entry)
         else:
             assert False, "Unknown 'Node' type"
 
-    # subst():
+    # _resolve()
     #
-    # Substitutes any variables in 'string' and returns the result.
+    # Helper to expand and cache a variable definition in the context of
+    # the given dictionary of expansion strings.
     #
     # Args:
-    #    (string): The string to substitute
+    #     name (str): Name of the variable to expand
+    #     provenance (ProvenanceInformation): Whence this variable was refered from
     #
     # Returns:
-    #    (string): The new string with any substitutions made
+    #     (str): The expanded value of variable
     #
     # Raises:
-    #    LoadError, if the string contains unresolved variable references.
+    #     (LoadError): In case there was any undefined variables or circular
+    #                  references encountered when resolving the variable.
     #
-    cpdef subst(self, str string):
-        expstr = _parse_expstr(string)
+    cdef str _resolve(self, str name, ProvenanceInformation provenance):
+        cdef ResolutionStep step
+        cdef Value referee_value
+        cdef Value iter_value
+        cdef object iter_name_object
+        cdef str iter_name
+        cdef set iter_value_deps
+        cdef str cyclic_variable
+
+        # While iterating over the first loop, we collect all of the variable
+        # dependencies, and perform all required validation.
+        #
+        # Each iteration processes a ResolutionStep object and has the possibility
+        # to enque more ResolutionStep objects as a result.
+        #
+        cdef list pending = [ResolutionStep(None, { sys.intern(name) }, None)]
+        cdef list deps = []
+        cdef bint first_iteration = True
 
-        try:
-            return _expand_expstr(self._expstr_map, expstr)
-        except KeyError:
-            unmatched = []
+        while pending:
+            step = pending.pop()
+
+            #
+            # Handle circular deps
+            #
+            step.check_circular(self._values)
 
-            # Look for any unmatched variable names in the expansion string
-            for var in expstr[1::2]:
-                if var not in self._expstr_map:
-                    unmatched.append(var)
+            # For each dependency stemming from this provenance
+            for iter_name_object in step.varnames:
+                iter_name = <str> iter_name_object
 
-            if unmatched:
-                message = "Unresolved variable{}: {}".format(
-                    "s" if len(unmatched) > 1 else "",
-                    ", ".join(unmatched)
-                )
+                iter_value = self._get_checked_value(iter_name, step.referee, provenance)
 
-                raise LoadError(message, LoadErrorReason.UNRESOLVED_VARIABLE)
-            # Otherwise, re-raise the KeyError since it clearly came from some
-            # other unknowable cause.
-            raise
+                # Earliest return for an already resolved value
+                #
+                if first_iteration:
+                    if iter_value._resolved is not None:
+                        return iter_value.resolve(self._values)
+                    first_iteration = False
 
-    # Variable resolving code
+                # Queue up this value to be resolved in the next loop
+                if iter_value._resolved is None:
+                    deps.append(iter_value)
+
+                    # Queue up it's dependencies for resolution
+                    iter_value_deps = iter_value.dependencies()
+                    if iter_value_deps:
+                        pending.append(ResolutionStep(iter_name, iter_value_deps, step))
+
+        #
+        # Now that we've pushed all of the required dependencies onto the deps queue,
+        # we know that there are no undefined variables or circular references, we
+        # also know that the deps queue is ordered such that the earlier items in
+        # the list depend on resolution of later items in the list.
+        #
+        # Now we can bubble back up iterating backwards over the list, and
+        # the last resolved value is the one we want to return.
+        #
+        cdef str resolved_value
+        while deps:
+            iter_value = deps.pop()
+            resolved_value = iter_value.resolve(self._values)
+
+        return resolved_value
+
+    # _get_checked_value()
     #
-    # Here we resolve all of our inputs into a dictionary, ready for use
-    # in subst()
-    cdef dict _resolve(self, MappingNode node):
-        # Special case, if notparallel is specified in the variables for this
-        # element, then override max-jobs to be 1.
-        # Initialize it as a string as all variables are processed as strings.
+    # Fetches a value from the value table and raises a user
+    # facing error if the value is undefined.
+    #
+    # Args:
+    #    varname (str): The variable name to fetch
+    #    referee (str): The variable name referring to `varname`, or None
+    #    provenance (ProvenanceInformation): The provenance, incase referee is None.
+    #
+    # Returns:
+    #   (Value): The Value for varname
+    #
+    # Raises:
+    #   (LoadError): An appropriate error in case of undefined variables
+    #
+    cdef Value _get_checked_value(self, str varname, str referee, ProvenanceInformation provenance):
+        cdef Value referee_value
+        cdef str error_message
+
         #
-        if node.get_bool('notparallel', False):
-            node['max-jobs'] = str(1)
+        # Fetch the value and detect undefined references
+        #
+        try:
+            return self._values[varname]
+        except KeyError as e:
 
-        cdef dict ret = {}
-        cdef str key
-        cdef str value
+            # Either the provenance is the toplevel calling provenance,
+            # or it is the provenance of the direct referee
+            try:
+                referee_value = self._values[referee]
+            except KeyError:
+                referee_value = None
 
-        for key in node.keys():
-            value = node.get_str(key)
-            ret[sys.intern(key)] = _parse_expstr(value)
-        return ret
+            if referee_value:
+                provenance = referee_value.provenance
 
-    def _check_for_missing(self):
-        # First the check for anything unresolvable
-        summary = []
-        for key, expstr in self._expstr_map.items():
-            for var in expstr[1::2]:
-                if var not in self._expstr_map:
-                    line = "  unresolved variable '{unmatched}' in declaration of '{variable}' at: {provenance}"
-                    provenance = self.original.get_scalar(key).get_provenance()
-                    summary.append(line.format(unmatched=var, variable=key, provenance=provenance))
-        if summary:
-            raise LoadError("Failed to resolve one or more variable:\n{}\n".format("\n".join(summary)),
-                            LoadErrorReason.UNRESOLVED_VARIABLE)
-
-    def _check_for_cycles(self):
-        # And now the cycle checks
-        def cycle_check(expstr, visited, cleared):
-            for var in expstr[1::2]:
-                if var in cleared:
-                    continue
-                if var in visited:
-                    raise LoadError("{}: ".format(self.original.get_scalar(var).get_provenance()) +
-                                    ("Variable '{}' expands to contain a reference to itself. " +
-                                     "Perhaps '{}' contains '%{{{}}}").format(var, visited[-1], var),
-                                     LoadErrorReason.RECURSIVE_VARIABLE)
-                visited.append(var)
-                cycle_check(self._expstr_map[var], visited, cleared)
-                visited.pop()
-                cleared.add(var)
-
-        cleared = set()
-        for key, expstr in self._expstr_map.items():
-            if key not in cleared:
-                cycle_check(expstr, [key], cleared)
-
-# Cache for the parsed expansion strings.  While this is nominally
-# something which might "waste" memory, in reality each of these
-# will live as long as the element which uses it, which is the
-# vast majority of the memory usage across the execution of BuildStream.
-cdef dict PARSE_CACHE = {
-    # Prime the cache with the empty string since otherwise that can
-    # cause issues with the parser, complications to which cause slowdown
-    "": [""],
-}
-
-
-# Helper to parse a string into an expansion string tuple, caching
-# the results so that future parse requests don't need to think about
-# the string
-cdef list _parse_expstr(str instr):
-    cdef list ret
-
-    try:
-        return <list> PARSE_CACHE[instr]
-    except KeyError:
-        # This use of the regex turns a string like "foo %{bar} baz" into
-        # a list ["foo ", "bar", " baz"]
-        splits = PARSE_EXPANSION.split(instr)
-        # If an expansion ends the string, we get an empty string on the end
-        # which we can optimise away, making the expansion routines not need
-        # a test for this.
-        if splits[-1] == '':
-           del splits [-1]
-        # Cache an interned copy of this.  We intern it to try and reduce the
-        # memory impact of the cache.  It seems odd to cache the list length
-        # but this is measurably cheaper than calculating it each time during
-        # string expansion.
-        ret = [sys.intern(<str> s) for s in <list> splits]
-        PARSE_CACHE[instr] = ret
-        return ret
+            error_message = "Reference to undefined variable '{}'".format(varname)
+            if provenance:
+                error_message = "{}: {}".format(provenance, error_message)
+            raise LoadError(error_message, LoadErrorReason.UNRESOLVED_VARIABLE) from e
 
 
-# Helper to expand and cache a variable definition in the context of
-# the given dictionary of expansion strings.
+# ResolutionStep()
+#
+# The context for a single iteration in variable resolution.
+#
+# This only exists for better performance than constructing
+# and unpacking tuples.
 #
 # Args:
-#     content (dict): Dictionary of expansion strings
-#     name (str): Name of the variable to expand
-#     counter (int): Recursion counter
+#    referee (str): The name of the referring variable
+#    varnames (set): A set of variable names which referee refers to.
 #
-# Returns:
-#     (str): The expanded value of variable
+cdef class ResolutionStep:
+    cdef str referee
+    cdef set varnames
+    cdef ResolutionStep parent
+
+    def __cinit__(self, str referee, set varnames, ResolutionStep parent):
+        self.referee = referee
+        self.varnames = varnames
+        self.parent = parent
+
+    cdef check_circular(self, dict values):
+        cdef ResolutionStep step = self.parent
+        while step:
+            if self.referee is step.referee:
+                self._raise_circular_reference_error(step, values)
+            step = step.parent
+
+    cdef _raise_circular_reference_error(self, ResolutionStep conflict, dict values):
+        cdef list error_lines = []
+        cdef ResolutionStep step = self
+        cdef Value value
+        cdef str referee
+
+        while step is not conflict:
+            if step.parent:
+                referee = step.parent.referee
+            else:
+                referee = self.referee
+            value = values[referee]
+
+            error_lines.append("{}: Variable '{}' refers to variable '{}'".format(value.provenance, referee, step.referee))
+            step = step.parent
+
+        raise LoadError("Circular dependency detected on variable '{}'".format(self.referee),
+                        LoadErrorReason.CIRCULAR_REFERENCE_VARIABLE,
+                        detail="\n".join(reversed(error_lines)))
+
+
+# ValuePart()
 #
-# Raises:
-#     KeyError, if any expansion is missing
-#     RecursionError, if recursion required for evaluation is too deep
+# Represents a part of a value (a string and an indicator
+# of whether the string is a variable or not).
+#
+# This only exists for better performance than constructing
+# and unpacking tuples.
+#
+# Args:
+#    text (str): The text of this part
+#    is_variable (bint): True if the text is a variable, False if it's literal
 #
-cdef str _expand_var(dict content, str name, int counter = 0):
-    cdef str sub
+cdef class ValuePart:
+    cdef str text
+    cdef bint is_variable
 
-    if len(content[name]) > 1:
-        sub = _expand_expstr(content, <list> content[name], counter)
-        content[name] = [sys.intern(sub)]
+    def __cinit__(self, str text, bint is_variable):
+        self.text = text
+        self.is_variable = is_variable
 
-    return content[name][0]
 
+cdef EMPTY_SET = set()
 
-# Helper to expand a given top level expansion string tuple in the context
-# of the given dictionary of expansion strings.
+# Value():
 #
-# Args:
-#     content (dict): Dictionary of expansion strings
-#     name (str): Name of the variable to expand
-#     counter (int): Recursion counter
+# Represents a variable value
+#
+cdef class Value:
+    cdef ProvenanceInformation provenance
+    cdef ValueClass _value_class
+    cdef str _resolved
+
+    def __cinit__(self, ScalarNode node):
+
+        # Public
+        self.provenance = node.get_provenance()
+
+        # Private
+        self._value_class = self._load_value_class(node.as_str())
+        self._resolved = None
+
+    # resolve()
+    #
+    # Resolve the value of this variable, this function expects
+    # all dependency values to already be resolved, otherwise
+    # it will fail due to an undefined variable.
+    #
+    # Args:
+    #    values (dict): The full value table for resolving dependencies
+    #
+    # Returns:
+    #    (str): The resolved value
+    #
+    cdef str resolve(self, dict values):
+        cdef str dep_name
+        cdef Value part_var
+        cdef ValuePart part
+        cdef object part_object
+        cdef list parts = []
+
+        if self._resolved is None:
+            for part_object in self._value_class.parts:
+                part = <ValuePart> part_object
+
+                if part.is_variable:
+                    part_var = values[part.text]
+                    parts.append(part_var._resolved)
+                else:
+                    parts.append(part.text)
+
+            self._resolved = "".join(parts)
+
+        return self._resolved
+
+    # dependencies()
+    #
+    # Returns the set of dependency variable names
+    #
+    # Returns:
+    #    (set): The set of variable names which this ValueClass depends on, or None.
+    #
+    cdef set dependencies(self):
+        if self._resolved is None:
+            return self._value_class.variable_names
+
+        # If we're already resolved, we don't have any dependencies anymore
+        return EMPTY_SET
+
+    # _load_value_class()
+    #
+    # Load the ValueClass for this Value, possibly reusing
+    # a pre-cached ValueClass if one exists.
+    #
+    # Args:
+    #    string (str): The string to parse
+    #
+    # Returns:
+    #    (ValueClass): The ValueClass object
+    #
+    cdef ValueClass _load_value_class(self, str string):
+        cdef ValueClass ret
+        cdef internal_string = sys.intern(string)
+
+        try:
+            ret = VALUE_CLASS_TABLE[internal_string]
+        except KeyError:
+            ret = ValueClass(internal_string)
+            VALUE_CLASS_TABLE[internal_string] = ret
+
+        return ret
+
+
+# Global cache of all ValueClass objects ever instantiated.
 #
-# Returns:
-#     (str): The expanded value of variable
+# While many elements share exactly the same ValueClasses, they
+# all have their own Value instances and can resolve to different
+# string values.
 #
-# Raises:
-#     KeyError, if any expansion is missing
-#     RecursionError, if recursion required for evaluation is too deep
+# Holding on to this avoids ever parsing the same value strings
+# more than once.
+#
+cdef dict VALUE_CLASS_TABLE = {}
+
+
+#
+# The regular expression used for parsing ValueClass strings.
+#
+# Note that Variable names are allowed to have alphanumeric characters
+# and dashes and underscores, but cannot start with a dash, underscore
+# or a digit.
+#
+VALUE_CLASS_PARSE_EXPANSION = re.compile(r"\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}")
+
+
+# ValueClass()
+#
+# A class representing a broken down parse of a value.
 #
-cdef str _expand_expstr(dict content, list value, int counter = 0):
-    if counter > 1000:
-        raise RecursionError()
+# Args:
+#    string (str): The string which can contain variables
+#
+cdef class ValueClass:
+    #
+    # Public
+    #
+    cdef set variable_names  # A set of variable names
+    cdef list parts  # A list of ValuePart objects
 
-    cdef Py_ssize_t idx = 0
-    cdef Py_ssize_t value_len = len(value)
-    cdef str sub
-    cdef list acc = []
+    def __cinit__(self, str string):
+        self.variable_names = set()
+        self.parts = []
+        self._parse_string(string)
 
-    while idx < value_len:
-        acc.append(value[idx])
-        idx += 1
+    # _parse_string()
+    #
+    # Parse the string for this ValueClass, breaking it down into
+    # the parts list, which is an ordered list of literal values
+    # and variable names, which when resolved, can be joined into
+    # the resolved value.
+    #
+    cdef _parse_string(self, str string):
 
-        if idx < value_len:
-            acc.append(_expand_var(content, <str> value[idx], counter + 1))
-        idx += 1
+        # This use of this regex turns a string like
+        # "foo %{bar} baz" into a list ["foo ", "bar", " baz"]
+        #
+        # This split has special properties in that it will
+        # return empty strings, and even/odd members of the
+        # returned list are meaningful.
+        #
+        # The even number indices are slices of the text which
+        # did not match the regular expression, while the odd
+        # number indices represent variable names, with the "%{}"
+        # portions stripped away.
+        #
+        # In case you are still wondering: Yes. This is very, very weird.
+        #
+        # What do you expect ? These are regular expressions after all,
+        # they are *supposed* to be weird.
+        #
+        cdef list splits = VALUE_CLASS_PARSE_EXPANSION.split(string)
+        cdef object split_object
+        cdef str split
+        cdef Py_ssize_t split_idx = 0
+        cdef bint is_variable
 
-    return "".join(acc)
+        #
+        # Collect the weird regex return value into something
+        # more comprehensible.
+        #
+
+        for split_object in splits:
+            split = <str> split_object
+            if split:
+
+                # Use an intern for the part, this will not only
+                # save memory but it will speed up lookups in the
+                # case that the part in question is used to lookup
+                # variable values.
+                split = <str> sys.intern(split)
+
+                if (split_idx % 2) == 0:
+                    is_variable = False
+                else:
+                    is_variable = True
+                    self.variable_names.add(split)
 
+                self.parts.append(ValuePart(split, is_variable))
 
+            split_idx += 1
+
+
+# ValueIterator()
+#
 # Iterator for all flatten variables.
+#
 # Used by Variables.__iter__
-cdef class _VariablesIterator:
-    cdef dict _expstr_map
+#
+cdef class ValueIterator:
+    cdef Variables _variables
     cdef object _iter
 
-    def __init__(self, dict expstr_map):
-        self._expstr_map = expstr_map
-        self._iter = iter(expstr_map)
+    def __cinit__(self, Variables variables, dict values):
+        self._variables = variables
+        self._iter = iter(values)
 
     def __iter__(self):
         return self
 
     def __next__(self):
         name = next(self._iter)
-        return name, _expand_var(self._expstr_map, name)
+        return name, self._variables[name]
diff --git a/src/buildstream/element.py b/src/buildstream/element.py
index 6a0fa5f..e9f6c51 100644
--- a/src/buildstream/element.py
+++ b/src/buildstream/element.py
@@ -285,6 +285,8 @@ class Element(Plugin):
         variables = self.__extract_variables(project, meta)
         variables["element-name"] = self.name
         self.__variables = Variables(variables)
+        if not meta.is_junction:
+            self.__variables.check()
 
         # Collect the composited environment now that we have variables
         unexpanded_env = self.__extract_environment(project, meta)
@@ -2817,8 +2819,8 @@ class Element(Plugin):
         # If this ever changes, things will go wrong unexpectedly.
         if not self.__whitelist_regex:
             bstdata = self.get_public_data("bst")
-            whitelist = bstdata.get_str_list("overlap-whitelist", default=[])
-            whitelist_expressions = [utils._glob2re(self.__variables.subst(exp.strip())) for exp in whitelist]
+            whitelist = bstdata.get_sequence("overlap-whitelist", default=[])
+            whitelist_expressions = [utils._glob2re(self.__variables.subst(exp)) for exp in whitelist]
             expression = "^(?:" + "|".join(whitelist_expressions) + ")$"
             self.__whitelist_regex = re.compile(expression)
         return self.__whitelist_regex.match(os.path.join(os.sep, path))
diff --git a/src/buildstream/exceptions.py b/src/buildstream/exceptions.py
index e77d64f..79142fa 100644
--- a/src/buildstream/exceptions.py
+++ b/src/buildstream/exceptions.py
@@ -131,8 +131,8 @@ class LoadErrorReason(Enum):
     RECURSIVE_INCLUDE = 21
     """A recursive include has been encountered"""
 
-    RECURSIVE_VARIABLE = 22
-    """A recursive variable has been encountered"""
+    CIRCULAR_REFERENCE_VARIABLE = 22
+    """A circular reference was detected in a variable declaration"""
 
     PROTECTED_VARIABLE_REDEFINED = 23
     """An attempt was made to set the value of a protected variable"""


[buildstream] 15/27: _variables.pyx: Use ObjectArray to accumulate deps in _resolve()

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch tristan/partial-variables-manual-string-join
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit d5aab43b018ea05214db744d659650ae13d6c55a
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Fri Jul 3 02:11:56 2020 +0900

    _variables.pyx: Use ObjectArray to accumulate deps in _resolve()
---
 src/buildstream/_variables.pyx | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index d6c7445..c1c368a 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -274,10 +274,12 @@ cdef class Variables:
         cdef Py_ssize_t idx = 0
 
         cdef str resolved_value = None
-
-        cdef list deps = []
         cdef bint first_iteration = True
 
+        # We'll be collecting the values to resolve at the end in here
+        cdef ObjectArray values
+        object_array_init(&(values), -1)
+
         # While iterating over the first loop, we collect all of the variable
         # dependencies, and perform all required validation.
         #
@@ -314,7 +316,8 @@ cdef class Variables:
 
                 # Queue up this value to be resolved in the next loop
                 if iter_value._resolved is None:
-                    deps.append(iter_value)
+
+                    object_array_append(&(values), <PyObject *>iter_value)
 
                     # Queue up it's dependencies for resolution
                     iter_value_deps = iter_value.dependencies()
@@ -331,13 +334,16 @@ cdef class Variables:
         # backwards and the last (leftmost) resolved value is the one
         # we want to return.
         #
-        while deps:
-            iter_value = deps.pop()
+        idx = values.length -1
+        while idx >= 0:
+            iter_value = <Value>values.array[idx]
             resolved_value = iter_value.resolve(self._values)
+            idx -= 1
 
         # Cleanup
         #
         object_array_free(&(initial_deps))
+        object_array_free(&(values))
 
         return resolved_value
 


[buildstream] 12/27: _variables.pyx: Restructuring and removing yellowness

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch tristan/partial-variables-manual-string-join
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 667da2ac3e50079e1ce5354e40cfcb142021d41d
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Thu Jul 2 20:35:01 2020 +0900

    _variables.pyx: Restructuring and removing yellowness
---
 src/buildstream/_variables.pyx | 82 ++++++++++++++++++++++++++----------------
 1 file changed, 52 insertions(+), 30 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index 8020efe..152a8d3 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -129,7 +129,10 @@ cdef class Variables:
     # the node untouched, you should use `node.clone()` beforehand
     #
     # Args:
-    #   (Node): A node for which to substitute the values
+    #    (Node): A node for which to substitute the values
+    #
+    # Raises:
+    #    (LoadError): if the string contains unresolved variable references.
     #
     cpdef expand(self, Node node):
         self._expand(node)
@@ -145,23 +148,10 @@ cdef class Variables:
     #    (string): The new string with any substitutions made
     #
     # Raises:
-    #    LoadError, if the string contains unresolved variable references.
+    #    (LoadError): if the string contains unresolved variable references.
     #
-    cpdef subst(self, ScalarNode node):
-        cdef Value value = Value()
-        cdef PyObject **dependencies
-        cdef Py_ssize_t n_dependencies
-        cdef Py_ssize_t idx = 0
-        cdef str dep_name
-
-        value.init(node)
-        dependencies, n_dependencies = value.dependencies()
-        while idx < n_dependencies:
-            dep_name = <str> dependencies[idx]
-            self._resolve(dep_name, node)
-            idx += 1
-
-        return value.resolve(self._values)
+    cpdef str subst(self, ScalarNode node):
+        return self._subst(node)
 
     # check()
     #
@@ -173,10 +163,11 @@ cdef class Variables:
     #                 be raised.
     #
     cpdef check(self):
+        cdef object key
 
         # Resolve all variables.
         for key in self._values.keys():
-            self._resolve(key, None)
+            self._resolve(<str> key, None)
 
     # _init_values()
     #
@@ -185,20 +176,47 @@ cdef class Variables:
     #
     cdef dict _init_values(self, MappingNode node):
         cdef dict ret = {}
-        cdef key_object
-        cdef value_node_object
-        cdef str key
-        cdef ScalarNode value_node
-
-        for key_object, value_object in node.items():
-            key = <str> sys.intern(<str> key_object)
-            value_node = <ScalarNode> value_object
+        cdef object key
+        cdef object value_node
+        cdef Value value
+
+        for key, value_node in node.items():
+            key = <object> sys.intern(<str> key)
             value = Value()
-            value.init(value_node)
+            value.init(<ScalarNode> value_node)
             ret[key] = value
 
         return ret
 
+    # _subst():
+    #
+    # Internal implementation of Variables.subst()
+    #
+    # Args:
+    #    (string): The string to substitute
+    #
+    # Returns:
+    #    (string): The new string with any substitutions made
+    #
+    # Raises:
+    #    (LoadError): if the string contains unresolved variable references.
+    #
+    cpdef str _subst(self, ScalarNode node):
+        cdef Value value = Value()
+        cdef PyObject **dependencies
+        cdef Py_ssize_t n_dependencies
+        cdef Py_ssize_t idx = 0
+        cdef str dep_name
+
+        value.init(node)
+        dependencies, n_dependencies = value.dependencies()
+        while idx < n_dependencies:
+            dep_name = <str> dependencies[idx]
+            self._resolve(dep_name, node)
+            idx += 1
+
+        return value.resolve(self._values)
+
     # _expand()
     #
     # Internal implementation of Variables.expand()
@@ -206,15 +224,19 @@ cdef class Variables:
     # Args:
     #   (Node): A node for which to substitute the values
     #
+    # Raises:
+    #   (LoadError): if the string contains unresolved variable references.
+    #
     cdef _expand(self, Node node):
+        cdef object entry
         if isinstance(node, ScalarNode):
-            (<ScalarNode> node).value = self.subst(node)
+            (<ScalarNode> node).value = self._subst(<ScalarNode> node)
         elif isinstance(node, SequenceNode):
             for entry in (<SequenceNode> node).value:
-                self._expand(entry)
+                self._expand(<Node> entry)
         elif isinstance(node, MappingNode):
             for entry in (<MappingNode> node).value.values():
-                self._expand(entry)
+                self._expand(<Node> entry)
         else:
             assert False, "Unknown 'Node' type"
 


[buildstream] 16/27: _variables: ENABLE PROFILING

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch tristan/partial-variables-manual-string-join
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 4a148bc3ccdfe509e64240e1b59d186bd6764c13
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Tue Jul 7 22:32:46 2020 +0900

    _variables: ENABLE PROFILING
---
 src/buildstream/_profile.py    |  2 ++
 src/buildstream/_variables.pyx | 21 ++++++++++++++-------
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/src/buildstream/_profile.py b/src/buildstream/_profile.py
index 0219e83..bc3958e 100644
--- a/src/buildstream/_profile.py
+++ b/src/buildstream/_profile.py
@@ -48,6 +48,8 @@ class Topics:
     LOAD_PIPELINE = "load-pipeline"
     LOAD_SELECTION = "load-selection"
     SCHEDULER = "scheduler"
+    VARIABLES_INIT = "variables-init"
+    VARIABLES_CHECK = "variables-check"
     ALL = "all"
 
 
diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index c1c368a..c1d7b92 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -28,6 +28,7 @@ from cpython.mem cimport PyMem_Malloc, PyMem_Free, PyMem_Realloc
 from cpython.object cimport PyObject
 from cpython.ref cimport Py_XINCREF, Py_XDECREF
 
+from ._profile import Topics, PROFILER
 from ._exceptions import LoadError
 from .exceptions import LoadErrorReason
 from .node cimport MappingNode, Node, ScalarNode, SequenceNode, ProvenanceInformation
@@ -57,9 +58,12 @@ ctypedef struct ObjectArray:
 cdef class Variables:
 
     cdef dict _values  # The Value objects
+    cdef MappingNode _origin
 
     def __init__(self, MappingNode node):
 
+        self._origin = node
+
         # Special case, if notparallel is specified in the variables for this
         # element, then override max-jobs to be 1.
         # Initialize it as a string as all variables are processed as strings.
@@ -173,9 +177,10 @@ cdef class Variables:
     cpdef check(self):
         cdef object key
 
-        # Resolve all variables.
-        for key in self._values.keys():
-            self._resolve(<str> key, None)
+        with PROFILER.profile(Topics.VARIABLES_CHECK, id(self._origin)):
+            # Resolve all variables.
+            for key in self._values.keys():
+                self._resolve(<str> key, None)
 
     # _init_values()
     #
@@ -188,10 +193,12 @@ cdef class Variables:
         cdef object value_node
         cdef Value value
 
-        for key, value_node in node.items():
-            value = Value()
-            value.init(<ScalarNode> value_node)
-            ret[key] = value
+        with PROFILER.profile(Topics.VARIABLES_INIT, id(self._origin)):
+
+            for key, value_node in node.items():
+                value = Value()
+                value.init(<ScalarNode> value_node)
+                ret[key] = value
 
         return ret
 


[buildstream] 18/27: _variables.pyx: Avoid dictionary lookups in Value.resolve()

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch tristan/partial-variables-manual-string-join
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit c763970d83cdeb3748f3e09b17ef750540bde9dc
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Fri Jul 3 18:40:35 2020 +0900

    _variables.pyx: Avoid dictionary lookups in Value.resolve()
    
    Instead feed the list of already resolved variables to the function.
---
 src/buildstream/_variables.pyx | 74 +++++++++++++++++++++++++-----------------
 1 file changed, 45 insertions(+), 29 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index c2e0b29..6f439c6 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -217,18 +217,30 @@ cdef class Variables:
     #
     cpdef str _subst(self, ScalarNode node):
         cdef Value value = Value()
+        cdef Value iter_value
         cdef ObjectArray *dependencies
         cdef Py_ssize_t idx = 0
         cdef str dep_name
+        cdef str resolve_value
+
+        cdef ObjectArray values
+        object_array_init(&(values), -1)
 
         value.init(node)
         dependencies = value.dependencies()
         while idx < dependencies.length:
             dep_name = <str> dependencies.array[idx]
-            self._resolve(dep_name, node)
+            iter_value = self._do_resolve(dep_name, node)
+
+            object_array_append(&(values), <PyObject *>iter_value)
+
             idx += 1
 
-        return value.resolve(self._values)
+        resolved_value = value.resolve(&values, 0)
+
+        object_array_free(&(values))
+
+        return resolved_value
 
     # _expand()
     #
@@ -253,7 +265,14 @@ cdef class Variables:
         else:
             assert False, "Unknown 'Node' type"
 
-    # _resolve()
+    # XXX
+    #
+    cdef str _resolve(self, str name, ScalarNode pnode):
+        cdef Value value
+        value = self._do_resolve(name, pnode)
+        return value._resolved
+
+    # _do_resolve()
     #
     # Helper to expand and cache a variable definition in the context of
     # the given dictionary of expansion strings.
@@ -269,7 +288,7 @@ cdef class Variables:
     #    (LoadError): In case there was any undefined variables or circular
     #                 references encountered when resolving the variable.
     #
-    cdef str _resolve(self, str name, ScalarNode pnode):
+    cdef Value _do_resolve(self, str name, ScalarNode pnode):
         cdef ResolutionStep step
         cdef ResolutionStep new_step
         cdef ResolutionStep this_step
@@ -280,9 +299,6 @@ cdef class Variables:
         cdef ObjectArray *iter_value_deps
         cdef Py_ssize_t idx = 0
 
-        cdef str resolved_value = None
-        cdef bint first_iteration = True
-
         # We'll be collecting the values to resolve at the end in here
         cdef ObjectArray values
         object_array_init(&(values), -1)
@@ -314,27 +330,23 @@ cdef class Variables:
                 iter_value = self._get_checked_value(iter_name, this_step.referee, pnode)
                 idx += 1
 
-                # Earliest return for an already resolved value
+                # Queue up this value.
                 #
-                if first_iteration:
-                    if iter_value._resolved is not None:
-                        return iter_value.resolve(self._values)
-                    first_iteration = False
-
-                # Queue up this value to be resolved in the next loop
-                if iter_value._resolved is None:
+                # Even if the value was already resolved, we need it in context to resolve
+                # previously enqueued variables
+                object_array_append(&(values), <PyObject *>iter_value)
 
-                    object_array_append(&(values), <PyObject *>iter_value)
-
-                    # Queue up it's dependencies for resolution
-                    iter_value_deps = iter_value.dependencies()
-                    if iter_value_deps:
-                        new_step = ResolutionStep()
-                        new_step.init(iter_name, iter_value_deps, this_step)
+                # Queue up the values dependencies.
+                #
+                # These will be NULL if this value has previously been resolved.
+                iter_value_deps = iter_value.dependencies()
+                if iter_value_deps:
+                    new_step = ResolutionStep()
+                    new_step.init(iter_name, iter_value_deps, this_step)
 
-                        # Link it to the end of the stack
-                        new_step.prev = step
-                        step = new_step
+                    # Link it to the end of the stack
+                    new_step.prev = step
+                    step = new_step
 
         # We've now constructed the dependencies queue such that
         # later dependencies are on the right, we can now safely peddle
@@ -344,7 +356,7 @@ cdef class Variables:
         idx = values.length -1
         while idx >= 0:
             iter_value = <Value>values.array[idx]
-            resolved_value = iter_value.resolve(self._values)
+            iter_value.resolve(&values, idx + 1)
             idx -= 1
 
         # Cleanup
@@ -352,7 +364,7 @@ cdef class Variables:
         object_array_free(&(initial_deps))
         object_array_free(&(values))
 
-        return resolved_value
+        return iter_value
 
     # _get_checked_value()
     #
@@ -517,7 +529,7 @@ cdef class Value:
     # Returns:
     #    (str): The resolved value
     #
-    cdef str resolve(self, dict values):
+    cdef str resolve(self, ObjectArray *resolved_values, Py_ssize_t idx):
         cdef str dep_name
         cdef Value part_var
         cdef ValuePart *part
@@ -529,7 +541,11 @@ cdef class Value:
 
             while part:
                 if part.is_variable:
-                    part_var = <Value> values[<str>part.text]
+
+                    # Consume one variable
+                    part_var = <Value> resolved_values.array[idx]
+                    idx += 1
+
                     parts.append(part_var._resolved)
                 else:
                     parts.append(<str>part.text)


[buildstream] 09/27: _variables.pyx: Change the variable_names set for an array

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch tristan/partial-variables-manual-string-join
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit a1c6a7e2ed4f7a6577ea940e1bc200420d012da6
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Thu Jul 2 04:52:22 2020 +0900

    _variables.pyx: Change the variable_names set for an array
---
 src/buildstream/_variables.pyx | 140 ++++++++++++++++++++++++++++++-----------
 1 file changed, 103 insertions(+), 37 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index 9512ab5..67d41b5 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -24,6 +24,10 @@ import re
 import sys
 import itertools
 
+from cpython.mem cimport PyMem_Malloc, PyMem_Free
+from cpython.object cimport PyObject
+from cpython.ref cimport Py_XINCREF, Py_XDECREF
+
 from ._exceptions import LoadError
 from .exceptions import LoadErrorReason
 from .node cimport MappingNode, Node, ScalarNode, SequenceNode, ProvenanceInformation
@@ -145,14 +149,17 @@ cdef class Variables:
     #
     cpdef subst(self, ScalarNode node):
         cdef Value value = Value()
-        cdef object dep_name_object
+        cdef PyObject **dependencies
+        cdef Py_ssize_t n_dependencies
+        cdef Py_ssize_t idx = 0
         cdef str dep_name
 
         value.init(node)
-
-        for dep_name_object in value.dependencies():
-            dep_name = <str> dep_name_object
+        dependencies, n_dependencies = value.dependencies()
+        while idx < n_dependencies:
+            dep_name = <str> dependencies[idx]
             self._resolve(dep_name, node.get_provenance())
+            idx += 1
 
         return value.resolve(self._values)
 
@@ -231,11 +238,18 @@ cdef class Variables:
         cdef ResolutionStep step
         cdef ResolutionStep new_step
         cdef ResolutionStep this_step
+
         cdef Value iter_value
-        cdef object iter_name_object
         cdef str iter_name
-        cdef set iter_value_deps
-        cdef str resolved_value
+
+        cdef PyObject **iter_value_deps
+        cdef Py_ssize_t n_iter_value_deps
+        cdef Py_ssize_t idx = 0
+
+        cdef str resolved_value = None
+
+        cdef list deps = []
+        cdef bint first_iteration = True
 
         # While iterating over the first loop, we collect all of the variable
         # dependencies, and perform all required validation.
@@ -243,11 +257,12 @@ cdef class Variables:
         # Each iteration processes a ResolutionStep object and has the possibility
         # to enque more ResolutionStep objects as a result.
         #
-        cdef list deps = []
-        cdef bint first_iteration = True
+        name = sys.intern(name)
+        cdef PyObject *names[1]
+        names[0] = <PyObject *>name
 
         step = ResolutionStep()
-        step.init(None, { sys.intern(name) }, None)
+        step.init(None, names, 1, None)
 
         while step:
             # Keep a hold of the current overall step
@@ -257,10 +272,11 @@ cdef class Variables:
             # Check for circular dependencies
             this_step.check_circular(self._values)
 
-            # For each dependency stemming from this provenance
-            for iter_name_object in this_step.varnames:
-                iter_name = <str> iter_name_object
+            idx = 0
+            while idx < this_step.n_varnames:
+                iter_name = <str> this_step.varnames[idx]
                 iter_value = self._get_checked_value(iter_name, this_step.referee, provenance)
+                idx += 1
 
                 # Earliest return for an already resolved value
                 #
@@ -274,10 +290,10 @@ cdef class Variables:
                     deps.append(iter_value)
 
                     # Queue up it's dependencies for resolution
-                    iter_value_deps = iter_value.dependencies()
-                    if iter_value_deps:
+                    iter_value_deps, n_iter_value_deps = iter_value.dependencies()
+                    if n_iter_value_deps > 0:
                         new_step = ResolutionStep()
-                        new_step.init(iter_name, iter_value_deps, this_step)
+                        new_step.init(iter_name, iter_value_deps, n_iter_value_deps, this_step)
 
                         # Link it to the end of the stack
                         new_step.prev = step
@@ -346,9 +362,10 @@ cdef class Variables:
 #
 cdef class ResolutionStep:
     cdef str referee
-    cdef set varnames
     cdef ResolutionStep parent
     cdef ResolutionStep prev
+    cdef PyObject **varnames
+    cdef Py_ssize_t n_varnames
 
     # init()
     #
@@ -359,9 +376,10 @@ cdef class ResolutionStep:
     #    varnames (set): A set of variable names which referee refers to.
     #    parent (ResolutionStep): The parent ResolutionStep
     #
-    cdef init(self, str referee, set varnames, ResolutionStep parent):
+    cdef init(self, str referee, PyObject **varnames, Py_ssize_t n_varnames, ResolutionStep parent):
         self.referee = referee
         self.varnames = varnames
+        self.n_varnames = n_varnames
         self.parent = parent
         self.prev = None
 
@@ -473,17 +491,18 @@ cdef class Value:
 
     # dependencies()
     #
-    # Returns the set of dependency variable names
+    # Returns the array of dependency variable names
     #
     # Returns:
-    #    (set): The set of variable names which this ValueClass depends on, or None.
+    #    (PyObject **): The array of variable names which this ValueClass depends on, or NULL
+    #    (int): The length of the returned array
     #
-    cdef set dependencies(self):
+    cdef (PyObject **, Py_ssize_t)dependencies(self):
         if self._resolved is None:
-            return self._value_class.variable_names
+            return self._value_class.variable_names, self._value_class.n_variable_names
 
         # If we're already resolved, we don't have any dependencies anymore
-        return <set> EMPTY_SET
+        return NULL, 0
 
     # _load_value_class()
     #
@@ -540,8 +559,17 @@ cdef class ValueClass:
     #
     # Public
     #
-    cdef set variable_names  # A set of variable names
     cdef ValuePart *parts
+    cdef PyObject **variable_names
+    cdef Py_ssize_t n_variable_names
+
+    # __dealloc__()
+    #
+    # Cleanup stuff which cython wont cleanup automatically
+    #
+    def __dealloc__(self):
+        free_value_parts(self.parts)
+        PyMem_Free(self.variable_names)
 
     # init():
     #
@@ -551,15 +579,11 @@ cdef class ValueClass:
     #    string (str): The string which can contain variables
     #
     cdef init(self, str string):
-
-        self.variable_names = set()
         self.parts = NULL
-
+        self.variable_names = NULL
+        self.n_variable_names = 0
         self._parse_string(string)
 
-    def __dealloc__(self):
-        free_value_parts(self.parts)
-
     # _parse_string()
     #
     # Parse the string for this ValueClass, breaking it down into
@@ -589,7 +613,7 @@ cdef class ValueClass:
         cdef splits = VALUE_CLASS_PARSE_EXPANSION.split(string)
         cdef object split_object
         cdef str split
-        cdef Py_ssize_t split_idx = 0
+        cdef Py_ssize_t idx = 0
         cdef int is_variable
 
         # Adding parts
@@ -611,11 +635,11 @@ cdef class ValueClass:
                 # variable values.
                 split = <str> sys.intern(split)
 
-                if (split_idx % 2) == 0:
+                if (idx % 2) == 0:
                     is_variable = False
                 else:
+                    self.n_variable_names += 1
                     is_variable = True
-                    self.variable_names.add(split)
 
                 part = new_value_part(split, is_variable)
                 if last_part:
@@ -624,8 +648,32 @@ cdef class ValueClass:
                     self.parts = part
                 last_part = part
 
-            split_idx += 1
+            idx += 1
+
+        # Initialize the variables array
+        #
+        # Note that we don't bother ref counting the string objects, as the
+        # ValuePart already takes care of owning the strings.
+        #
+        if self.n_variable_names > 0:
+            self.variable_names = <PyObject **>PyMem_Malloc(self.n_variable_names * sizeof(PyObject *))
+            if not self.variable_names:
+                raise MemoryError()
+
+            part = self.parts
+            idx = 0
+            while part:
+                if part.is_variable:
 
+                    # Record only unique variable names in the variable_names array.
+                    #
+                    if object_array_search(part.text, self.variable_names, idx) < 0:
+                        self.variable_names[idx] = part.text
+                        idx += 1
+                    else:
+                        self.n_variable_names -= 1
+
+                part = part.next_part
 
 # ValueIterator()
 #
@@ -652,9 +700,27 @@ cdef class ValueIterator:
 ############################## BASEMENT ########################################
 
 
-from cpython.mem cimport PyMem_Malloc, PyMem_Free
-from cpython.object cimport PyObject
-from cpython.ref cimport Py_XINCREF, Py_XDECREF
+# object_array_search()
+#
+# Searches for an object pointer in an array of object pointers.
+#
+# Args:
+#    search (PyObject *): The object to search for
+#    array (PyObject **): The array to search in
+#    length (Py_ssize_t): The length of the array
+#
+# Returns:
+#    (Py_ssize_t): The index of `search` in `array`, or -1 if `search` is not found.
+#
+cdef Py_ssize_t object_array_search(PyObject *search, PyObject **array, Py_ssize_t length):
+    cdef Py_ssize_t idx = 0
+
+    while idx < length:
+        if array[idx] == search:
+            return idx
+        idx += 1
+
+    return -1
 
 # ValuePart()
 #


[buildstream] 23/27: _variables.pyx: Reorganize loop for better early return

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch tristan/partial-variables-manual-string-join
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 07d5b63cc6cabaa1dc580e72a678ceeb76d48994
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Thu Jul 9 20:01:52 2020 +0900

    _variables.pyx: Reorganize loop for better early return
---
 src/buildstream/_variables.pyx | 60 +++++++++++++++++++++---------------------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index 64c3ece..1958dd0 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -305,15 +305,19 @@ cdef class Variables:
     #    (LoadError): In case there was any undefined variables or circular
     #                 references encountered when resolving the variable.
     #
+
+
     cdef str _resolve(self, str name, ScalarNode pnode):
+        cdef Value value
 
-        cdef Value iter_value
+        value = self._get_checked_value(name, None, pnode)
+        if value._resolved is None:
+            return self._resolve_value(name, value)
 
-        # Try early return first
-        iter_value = self._get_checked_value(name, None, pnode)
-        if iter_value._resolved:
-            return iter_value._resolved
+        return value._resolved
 
+    cdef str _resolve_value(self, str name, Value value):
+        cdef Value iter_value
         cdef ResolutionStep step
         cdef ResolutionStep new_step
         cdef ResolutionStep this_step
@@ -323,20 +327,10 @@ cdef class Variables:
         # We'll be collecting the values to resolve at the end in here
         cdef ObjectArray values
         object_array_init(&(values), -1)
-
-        # While iterating over the first loop, we collect all of the variable
-        # dependencies, and perform all required validation.
-        #
-        # Each iteration processes a ResolutionStep object and has the possibility
-        # to enque more ResolutionStep objects as a result.
-        #
-        cdef ValuePart initial_part
-        initial_part.text = <PyObject *>name
-        initial_part.is_variable = True
-        initial_part.next_part = NULL
+        object_array_append(&(values), <PyObject *>value)
 
         step = ResolutionStep()
-        step.init(None, &initial_part, None)
+        step.init(name, value._value_class.parts, None)
 
         while step:
             # Keep a hold of the current overall step
@@ -348,11 +342,14 @@ cdef class Variables:
 
             part = this_step.parts
             while part:
+
+                # Skip literal ValueParts
+                #
                 if not part.is_variable:
                     part = part.next_part
                     continue
 
-                iter_value = self._get_checked_value(<str> part.text, this_step.referee, pnode)
+                iter_value = self._get_checked_value(<str> part.text, this_step.referee, None)
 
                 # Queue up this value.
                 #
@@ -362,16 +359,13 @@ cdef class Variables:
 
                 # Queue up the values dependencies.
                 #
-                # These will be NULL if this value has previously been resolved.
                 if iter_value._resolved is None:
-                    iter_value_parts = iter_value._value_class.parts
-                    if iter_value_parts:
-                        new_step = ResolutionStep()
-                        new_step.init(<str> part.text, iter_value_parts, this_step)
+                    new_step = ResolutionStep()
+                    new_step.init(<str> part.text, iter_value._value_class.parts, this_step)
 
-                        # Link it to the end of the stack
-                        new_step.prev = step
-                        step = new_step
+                    # Link it to the end of the stack
+                    new_step.prev = step
+                    step = new_step
 
                 # Next part of this variable
                 part = part.next_part
@@ -382,15 +376,21 @@ cdef class Variables:
         # we want to return.
         #
         idx = values.length -1
-        while idx >= 0:
-
+        while idx > 0:
             # Values in, strings out
             #
             iter_value = <Value>values.array[idx]
-            resolved_value = iter_value.resolve(&values, idx + 1)
-            values.array[idx] = <PyObject *>resolved_value
+
+            if iter_value._resolved is None:
+                iter_value.resolve(&values, idx + 1)
+
+            values.array[idx] = <PyObject *>iter_value._resolved
             idx -= 1
 
+        # Save the return of Value.resolve from the toplevel value
+        iter_value = <Value>values.array[0]
+        resolved_value = iter_value.resolve(&values, 1)
+
         # Cleanup
         #
         object_array_free(&(values))


[buildstream] 10/27: _variables.pyx: Don't hold onto ProvenanceInformation, hold onto nodes instead.

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch tristan/partial-variables-manual-string-join
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 0bac4011e1c52ba21e10ecccb7cfb176429ccbe6
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Thu Jul 2 16:57:51 2020 +0900

    _variables.pyx: Don't hold onto ProvenanceInformation, hold onto nodes instead.
    
    Since ProvenanceInformation() is generated on the fly, they add overhead
    to generate them and overhead in memory.
---
 src/buildstream/_variables.pyx | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index 67d41b5..18772ae 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -345,7 +345,7 @@ cdef class Variables:
                 referee_value = None
 
             if referee_value:
-                provenance = referee_value.provenance
+                provenance = referee_value.get_provenance()
 
             error_message = "Reference to undefined variable '{}'".format(varname)
             if provenance:
@@ -419,7 +419,7 @@ cdef class ResolutionStep:
                 referee = self.referee
             value = values[referee]
 
-            error_lines.append("{}: Variable '{}' refers to variable '{}'".format(value.provenance, referee, step.referee))
+            error_lines.append("{}: Variable '{}' refers to variable '{}'".format(value.get_provenance(), referee, step.referee))
             step = step.parent
 
         raise LoadError("Circular dependency detected on variable '{}'".format(self.referee),
@@ -434,7 +434,7 @@ cdef EMPTY_SET = set()
 # Represents a variable value
 #
 cdef class Value:
-    cdef ProvenanceInformation provenance
+    cdef ScalarNode _node
     cdef ValueClass _value_class
     cdef str _resolved
 
@@ -446,14 +446,20 @@ cdef class Value:
     #    node (ScalarNode): The node representing this value.
     #
     cdef init(self, ScalarNode node):
-
-        # Public
-        self.provenance = node.get_provenance()
-
-        # Private
+        self._node = node
         self._value_class = self._load_value_class(node.as_str())
         self._resolved = None
 
+    # get_provenance():
+    #
+    # Fetches the provenance of this Value
+    #
+    # Returns:
+    #    (ProvenanceInformation): The provenance of this Value
+    #
+    cdef get_provenance(self):
+        return self._node.get_provenance()
+
     # resolve()
     #
     # Resolve the value of this variable, this function expects


[buildstream] 04/27: _variables.py: Some microoptimizations, remove tuplization and some typchecking overheads

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch tristan/partial-variables-manual-string-join
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit f4b1c34065f26f82f80a3d4efb970274cf9aa313
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Wed Jul 1 17:53:42 2020 +0900

    _variables.py: Some microoptimizations, remove tuplization and some typchecking overheads
---
 src/buildstream/_variables.pyx | 130 ++++++++++++++++++++++++++++-------------
 1 file changed, 88 insertions(+), 42 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index e73658d..0fa64fa 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -112,9 +112,11 @@ cdef class Variables:
     #   (str|None): The expanded value for the variable or None variable was not defined.
     #
     cpdef str get(self, str name):
-        if name not in self._values:
-            return None
-        return self[name]
+        try:
+            return <str> self._resolve(name, None)
+        except LoadError as e:
+            if e.reason == LoadErrorReason.UNRESOLVED_VARIABLE:
+                return None
 
     # expand()
     #
@@ -142,10 +144,14 @@ cdef class Variables:
     #    LoadError, if the string contains unresolved variable references.
     #
     cpdef subst(self, ScalarNode node):
-        cdef Value value = Value(node)
+        cdef Value value = Value()
+        cdef object dep_name_object
         cdef str dep_name
 
-        for dep_name in value.dependencies():
+        value.init(node)
+
+        for dep_name_object in value.dependencies():
+            dep_name = <str> dep_name_object
             self._resolve(dep_name, node.get_provenance())
 
         return value.resolve(self._values)
@@ -172,11 +178,17 @@ cdef class Variables:
     #
     cdef dict _init_values(self, MappingNode node):
         cdef dict ret = {}
+        cdef key_object
+        cdef value_node_object
         cdef str key
-        cdef ScalarNode value
+        cdef ScalarNode value_node
 
-        for key, value in node.items():
-            ret[sys.intern(key)] = Value(value)
+        for key_object, value_object in node.items():
+            key = <str> sys.intern(<str> key_object)
+            value_node = <ScalarNode> value_object
+            value = Value()
+            value.init(value_node)
+            ret[key] = value
 
         return ret
 
@@ -217,12 +229,11 @@ cdef class Variables:
     #
     cdef str _resolve(self, str name, ProvenanceInformation provenance):
         cdef ResolutionStep step
-        cdef Value referee_value
+        cdef ResolutionStep new_step
         cdef Value iter_value
         cdef object iter_name_object
         cdef str iter_name
         cdef set iter_value_deps
-        cdef str cyclic_variable
 
         # While iterating over the first loop, we collect all of the variable
         # dependencies, and perform all required validation.
@@ -230,22 +241,23 @@ cdef class Variables:
         # Each iteration processes a ResolutionStep object and has the possibility
         # to enque more ResolutionStep objects as a result.
         #
-        cdef list pending = [ResolutionStep(None, { sys.intern(name) }, None)]
+        cdef list pending;
         cdef list deps = []
         cdef bint first_iteration = True
 
+        step = ResolutionStep()
+        step.init(None, { sys.intern(name) }, None)
+        pending = [step]
+
         while pending:
             step = pending.pop()
 
-            #
-            # Handle circular deps
-            #
+            # Check for circular dependencies
             step.check_circular(self._values)
 
             # For each dependency stemming from this provenance
             for iter_name_object in step.varnames:
                 iter_name = <str> iter_name_object
-
                 iter_value = self._get_checked_value(iter_name, step.referee, provenance)
 
                 # Earliest return for an already resolved value
@@ -262,16 +274,14 @@ cdef class Variables:
                     # Queue up it's dependencies for resolution
                     iter_value_deps = iter_value.dependencies()
                     if iter_value_deps:
-                        pending.append(ResolutionStep(iter_name, iter_value_deps, step))
-
-        #
-        # Now that we've pushed all of the required dependencies onto the deps queue,
-        # we know that there are no undefined variables or circular references, we
-        # also know that the deps queue is ordered such that the earlier items in
-        # the list depend on resolution of later items in the list.
-        #
-        # Now we can bubble back up iterating backwards over the list, and
-        # the last resolved value is the one we want to return.
+                        new_step = ResolutionStep()
+                        new_step.init(iter_name, iter_value_deps, step)
+                        pending.append(new_step)
+
+        # We've now constructed the dependencies queue such that
+        # later dependencies are on the right, we can now safely peddle
+        # backwards and the last (leftmost) resolved value is the one
+        # we want to return.
         #
         cdef str resolved_value
         while deps:
@@ -304,7 +314,7 @@ cdef class Variables:
         # Fetch the value and detect undefined references
         #
         try:
-            return self._values[varname]
+            return <Value> self._values[varname]
         except KeyError as e:
 
             # Either the provenance is the toplevel calling provenance,
@@ -330,20 +340,37 @@ cdef class Variables:
 # This only exists for better performance than constructing
 # and unpacking tuples.
 #
-# Args:
-#    referee (str): The name of the referring variable
-#    varnames (set): A set of variable names which referee refers to.
-#
 cdef class ResolutionStep:
     cdef str referee
     cdef set varnames
     cdef ResolutionStep parent
 
-    def __cinit__(self, str referee, set varnames, ResolutionStep parent):
+    # init()
+    #
+    # Initialize this ResolutionStep
+    #
+    # Args:
+    #    referee (str): The name of the referring variable
+    #    varnames (set): A set of variable names which referee refers to.
+    #    parent (ResolutionStep): The parent ResolutionStep
+    #
+    cdef init(self, str referee, set varnames, ResolutionStep parent):
         self.referee = referee
         self.varnames = varnames
         self.parent = parent
 
+    # check_circular()
+    #
+    # Check for circular references in this step.
+    #
+    # Args:
+    #    values (dict): The value dictionary for lookups
+    #
+    # Raises:
+    #    (LoadError): Will raise a user facing LoadError with
+    #                 LoadErrorReason.CIRCULAR_REFERENCE_VARIABLE in case
+    #                 circular references were encountered.
+    #
     cdef check_circular(self, dict values):
         cdef ResolutionStep step = self.parent
         while step:
@@ -351,6 +378,10 @@ cdef class ResolutionStep:
                 self._raise_circular_reference_error(step, values)
             step = step.parent
 
+    # _raise_circular_reference_error()
+    #
+    # Helper function to construct a full report and raise the circular reference error.
+    #
     cdef _raise_circular_reference_error(self, ResolutionStep conflict, dict values):
         cdef list error_lines = []
         cdef ResolutionStep step = self
@@ -388,7 +419,7 @@ cdef class ValuePart:
     cdef str text
     cdef bint is_variable
 
-    def __cinit__(self, str text, bint is_variable):
+    cdef init(self, str text, bint is_variable):
         self.text = text
         self.is_variable = is_variable
 
@@ -404,7 +435,14 @@ cdef class Value:
     cdef ValueClass _value_class
     cdef str _resolved
 
-    def __cinit__(self, ScalarNode node):
+    # init()
+    #
+    # Initialize the Value
+    #
+    # Args:
+    #    node (ScalarNode): The node representing this value.
+    #
+    cdef init(self, ScalarNode node):
 
         # Public
         self.provenance = node.get_provenance()
@@ -437,7 +475,7 @@ cdef class Value:
                 part = <ValuePart> part_object
 
                 if part.is_variable:
-                    part_var = values[part.text]
+                    part_var = <Value> values[part.text]
                     parts.append(part_var._resolved)
                 else:
                     parts.append(part.text)
@@ -458,7 +496,7 @@ cdef class Value:
             return self._value_class.variable_names
 
         # If we're already resolved, we don't have any dependencies anymore
-        return EMPTY_SET
+        return <set> EMPTY_SET
 
     # _load_value_class()
     #
@@ -473,12 +511,13 @@ cdef class Value:
     #
     cdef ValueClass _load_value_class(self, str string):
         cdef ValueClass ret
-        cdef internal_string = sys.intern(string)
+        cdef str internal_string = sys.intern(string)
 
         try:
             ret = VALUE_CLASS_TABLE[internal_string]
         except KeyError:
-            ret = ValueClass(internal_string)
+            ret = ValueClass()
+            ret.init(internal_string)
             VALUE_CLASS_TABLE[internal_string] = ret
 
         return ret
@@ -510,9 +549,6 @@ VALUE_CLASS_PARSE_EXPANSION = re.compile(r"\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}")
 #
 # A class representing a broken down parse of a value.
 #
-# Args:
-#    string (str): The string which can contain variables
-#
 cdef class ValueClass:
     #
     # Public
@@ -520,7 +556,14 @@ cdef class ValueClass:
     cdef set variable_names  # A set of variable names
     cdef list parts  # A list of ValuePart objects
 
-    def __cinit__(self, str string):
+    # init():
+    #
+    # Initialize the ValueClass()
+    #
+    # Args:
+    #    string (str): The string which can contain variables
+    #
+    cdef init(self, str string):
         self.variable_names = set()
         self.parts = []
         self._parse_string(string)
@@ -556,6 +599,7 @@ cdef class ValueClass:
         cdef str split
         cdef Py_ssize_t split_idx = 0
         cdef bint is_variable
+        cdef ValuePart part
 
         #
         # Collect the weird regex return value into something
@@ -578,7 +622,9 @@ cdef class ValueClass:
                     is_variable = True
                     self.variable_names.add(split)
 
-                self.parts.append(ValuePart(split, is_variable))
+                part = ValuePart()
+                part.init(split, is_variable)
+                self.parts.append(part)
 
             split_idx += 1
 


[buildstream] 20/27: _variables.pyx: Try optimizing "".join() with C API

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch tristan/partial-variables-manual-string-join
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 8ba877da58a5cffcc630109c7c6e87e0de6d8ef4
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Tue Jul 7 22:28:09 2020 +0900

    _variables.pyx: Try optimizing "".join() with C API
---
 src/buildstream/_variables.pyx | 86 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 76 insertions(+), 10 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index d8e6fcc..a02f976 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -28,6 +28,35 @@ from cpython.mem cimport PyMem_Malloc, PyMem_Free, PyMem_Realloc
 from cpython.object cimport PyObject
 from cpython.ref cimport Py_XINCREF, Py_XDECREF
 
+# Some of this is already imported from cpython.unicode by the cython
+# layer, but since this header is incomplete, let's just import all the
+# necessary bits directly from the C API.
+#
+cdef extern from "Python.h":
+
+    # Returns the length of the unicode string in code points.
+    #
+    Py_ssize_t PyUnicode_GET_LENGTH(PyObject *o)
+
+    # Macro expands to the maximum character width required for
+    # a given existing unicode object (suitable for the `maxchar`
+    # argument of `PyUnicode_New()`).
+    #
+    Py_UCS4 PyUnicode_MAX_CHAR_VALUE(PyObject *o)
+
+    # Creates a new unicode object with a preallocated buffer
+    # of `size` code points, with wide enough code points to
+    # account for codepoints as wide as `maxchar` requires.
+    #
+    PyObject* PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
+
+    # Copy characters from one string to another string.
+    #
+    # This will raise an exception automatically if -1 is returned.
+    #
+    Py_ssize_t PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start, PyObject *from_, Py_ssize_t from_start, Py_ssize_t how_many) except -1
+
+
 from ._profile import Topics, PROFILER
 from ._exceptions import LoadError
 from .exceptions import LoadErrorReason
@@ -526,30 +555,67 @@ cdef class Value:
     # Returns:
     #    (str): The resolved value
     #
-    cdef str resolve(self, ObjectArray *resolved_values, Py_ssize_t idx):
-        cdef str dep_name
+    cdef str resolve(self, ObjectArray *resolved_values, Py_ssize_t values_idx):
         cdef Value part_var
         cdef ValuePart *part
-        cdef object part_object
-        cdef list parts = []
+        cdef Py_UCS4 maxchar = 0
+        cdef Py_UCS4 part_maxchar
+        cdef Py_ssize_t full_length = 0
+        cdef Py_ssize_t idx
+        cdef Py_ssize_t offset
+        cdef Py_ssize_t part_length
+        cdef PyObject *resolved
+        cdef PyObject *part_object
 
         if self._resolved is None:
-            part = self._value_class.parts
 
+            # Calculate the number of codepoints and maximum character width
+            # required for the strings involved.
+            idx = values_idx
+            part = self._value_class.parts
             while part:
                 if part.is_variable:
-
-                    # Consume one variable
                     part_var = <Value> resolved_values.array[idx]
                     idx += 1
+                    part_object = <PyObject *>part_var._resolved
+                else:
+                    part_object = part.text
+
+                full_length += PyUnicode_GET_LENGTH(part_object)
+                part_maxchar = PyUnicode_MAX_CHAR_VALUE(part_object)
+                if part_maxchar > maxchar:
+                    maxchar = part_maxchar
+
+                part = part.next_part
 
-                    parts.append(part_var._resolved)
+            # Do the stringy thingy
+            resolved = PyUnicode_New(full_length, maxchar)
+            part = self._value_class.parts
+            idx = values_idx
+            offset = 0
+
+            # This time copy characters as we loop through the parts
+            while part:
+                if part.is_variable:
+                    part_var = <Value> resolved_values.array[idx]
+                    idx += 1
+                    part_object = <PyObject *>part_var._resolved
                 else:
-                    parts.append(<str>part.text)
+                    part_object = part.text
+
+                part_length = PyUnicode_GET_LENGTH(part_object)
+
+                # Does this need to be in a loop and have a maximum copy length ?
+                #
+                # Should we be doing the regular posix thing, handling an exception indicating
+                # a SIGINT or such which means we should resume our copy instead of consider an error ?
+                #
+                PyUnicode_CopyCharacters(resolved, offset, part_object, 0, part_length)
 
+                offset += part_length
                 part = part.next_part
 
-            self._resolved = "".join(parts)
+            self._resolved = <str> resolved
 
         return self._resolved
 


[buildstream] 02/27: tests/format/variables.py: Added some new tests

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch tristan/partial-variables-manual-string-join
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit b55f31b0881bd59fe02d6cef871c2b418ae4e0ed
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Sun Jun 28 23:59:42 2020 +0900

    tests/format/variables.py: Added some new tests
    
      * Test scenarios where a junction needs to resolve variables
        in order to configure a subproject, but where some other variables
        may be derived from the same subproject.
    
        In this scenario we allow partial resolution of variables
        for junction elements.
    
      * Enhanced the undefined variables and circular reference tests
        to also check for the expected provenances.
---
 tests/format/variables.py                          | 69 +++++++++++++++++-----
 tests/format/variables/cyclic_variables/cyclic.bst |  2 +-
 .../variables/cyclic_variables/indirect-cyclic.bst |  8 +++
 .../variables/cyclic_variables/self-reference.bst  |  4 ++
 .../format/variables/missing_variables/manual3.bst | 10 ++++
 tests/format/variables/partial_context/base.bst    |  4 ++
 .../variables/partial_context/base/project.conf    |  3 +
 .../format/variables/partial_context/base/vars.yml |  2 +
 .../format/variables/partial_context/project.conf  |  8 +++
 tests/format/variables/partial_context/test.bst    |  3 +
 10 files changed, 97 insertions(+), 16 deletions(-)

diff --git a/tests/format/variables.py b/tests/format/variables.py
index c5e8eeb..7074732 100644
--- a/tests/format/variables.py
+++ b/tests/format/variables.py
@@ -53,30 +53,59 @@ def test_overrides(cli, datafiles, target, varname, expected):
     assert result_vars.get_str(varname) == expected
 
 
-@pytest.mark.parametrize("element", ["manual.bst", "manual2.bst"])
+@pytest.mark.parametrize(
+    "element,provenance",
+    [
+        # This test makes a reference to an undefined variable in a build command
+        ("manual.bst", "manual.bst [line 5 column 6]"),
+        # This test makes a reference to an undefined variable by another variable,
+        # ensuring that we validate variables even when they are unused
+        ("manual2.bst", "manual2.bst [line 4 column 8]"),
+        # This test uses a build command to refer to some variables which ultimately
+        # refer to an undefined variable, testing a more complex case.
+        ("manual3.bst", "manual3.bst [line 6 column 8]"),
+    ],
+    ids=["build-command", "variables", "complex"],
+)
 @pytest.mark.datafiles(os.path.join(DATA_DIR, "missing_variables"))
-def test_missing_variable(cli, datafiles, element):
+def test_undefined(cli, datafiles, element, provenance):
     project = str(datafiles)
     result = cli.run(project=project, silent=True, args=["show", "--deps", "none", "--format", "%{config}", element])
     result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.UNRESOLVED_VARIABLE)
+    assert provenance in result.stderr
 
 
+@pytest.mark.parametrize(
+    "element,provenances",
+    [
+        # Test a simple a -> b and b -> a reference
+        ("simple-cyclic.bst", ["simple-cyclic.bst [line 4 column 5]", "simple-cyclic.bst [line 5 column 5]"]),
+        # Test a simple a -> b and b -> a reference with some text involved
+        ("cyclic.bst", ["cyclic.bst [line 5 column 10]", "cyclic.bst [line 4 column 5]"]),
+        # Test an indirect circular dependency
+        (
+            "indirect-cyclic.bst",
+            [
+                "indirect-cyclic.bst [line 5 column 5]",
+                "indirect-cyclic.bst [line 6 column 5]",
+                "indirect-cyclic.bst [line 7 column 5]",
+                "indirect-cyclic.bst [line 8 column 5]",
+            ],
+        ),
+        # Test an indirect circular dependency
+        ("self-reference.bst", ["self-reference.bst [line 4 column 5]"]),
+    ],
+    ids=["simple", "simple-text", "indirect", "self-reference"],
+)
 @pytest.mark.timeout(15, method="signal")
 @pytest.mark.datafiles(os.path.join(DATA_DIR, "cyclic_variables"))
-def test_simple_cyclic_variables(cli, datafiles):
-    print_warning("Performing cyclic test, if this test times out it will " + "exit the test sequence")
-    project = str(datafiles)
-    result = cli.run(project=project, silent=True, args=["build", "simple-cyclic.bst"])
-    result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.RECURSIVE_VARIABLE)
-
-
-@pytest.mark.timeout(15, method="signal")
-@pytest.mark.datafiles(os.path.join(DATA_DIR, "cyclic_variables"))
-def test_cyclic_variables(cli, datafiles):
-    print_warning("Performing cyclic test, if this test times out it will " + "exit the test sequence")
+def test_circular_reference(cli, datafiles, element, provenances):
+    print_warning("Performing cyclic test, if this test times out it will exit the test sequence")
     project = str(datafiles)
-    result = cli.run(project=project, silent=True, args=["build", "cyclic.bst"])
-    result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.RECURSIVE_VARIABLE)
+    result = cli.run(project=project, silent=True, args=["build", element])
+    result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.CIRCULAR_REFERENCE_VARIABLE)
+    for provenance in provenances:
+        assert provenance in result.stderr
 
 
 @pytest.mark.parametrize("protected_var", PROTECTED_VARIABLES)
@@ -168,3 +197,13 @@ def test_variables_resolving_errors_in_public_section(cli, datafiles):
 
     result = cli.run(project=project, args=["show", "--format", "%{public}", "public_unresolved.bst"])
     result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.UNRESOLVED_VARIABLE)
+
+
+@pytest.mark.datafiles(os.path.join(DATA_DIR, "partial_context"))
+def test_partial_context_junctions(cli, datafiles):
+    project = str(datafiles)
+
+    result = cli.run(project=project, args=["show", "--format", "%{vars}", "test.bst"])
+    result.assert_success()
+    result_vars = _yaml.load_data(result.output)
+    assert result_vars.get_str("eltvar") == "/bar/foo/baz"
diff --git a/tests/format/variables/cyclic_variables/cyclic.bst b/tests/format/variables/cyclic_variables/cyclic.bst
index a05a40b..38832fa 100644
--- a/tests/format/variables/cyclic_variables/cyclic.bst
+++ b/tests/format/variables/cyclic_variables/cyclic.bst
@@ -2,4 +2,4 @@ kind: manual
 
 variables:
   a: "%{prefix}/a"
-  prefix: "%{a}/some_prefix/"
\ No newline at end of file
+  prefix: "%{a}/some_prefix/"
diff --git a/tests/format/variables/cyclic_variables/indirect-cyclic.bst b/tests/format/variables/cyclic_variables/indirect-cyclic.bst
new file mode 100644
index 0000000..fb06fb0
--- /dev/null
+++ b/tests/format/variables/cyclic_variables/indirect-cyclic.bst
@@ -0,0 +1,8 @@
+kind: manual
+
+variables:
+  foo: "%{a}"
+  a: "%{b}"
+  b: "%{c}"
+  c: "%{d}"
+  d: "%{a}"
diff --git a/tests/format/variables/cyclic_variables/self-reference.bst b/tests/format/variables/cyclic_variables/self-reference.bst
new file mode 100644
index 0000000..2e9829d
--- /dev/null
+++ b/tests/format/variables/cyclic_variables/self-reference.bst
@@ -0,0 +1,4 @@
+kind: manual
+
+variables:
+  a: "Referencing itself with %{a}"
diff --git a/tests/format/variables/missing_variables/manual3.bst b/tests/format/variables/missing_variables/manual3.bst
new file mode 100644
index 0000000..ff3c8d5
--- /dev/null
+++ b/tests/format/variables/missing_variables/manual3.bst
@@ -0,0 +1,10 @@
+kind: manual
+
+variables:
+  hello: "Hello mister %{pony}"
+  greeting: "The %{hello} string twice: %{hello} again"
+  pony: "The pony is %{undefined}"
+  
+config:
+  build-commands:
+  - Some indirectly undefined variable %{greeting}
diff --git a/tests/format/variables/partial_context/base.bst b/tests/format/variables/partial_context/base.bst
new file mode 100644
index 0000000..10ce559
--- /dev/null
+++ b/tests/format/variables/partial_context/base.bst
@@ -0,0 +1,4 @@
+kind: junction
+sources:
+- kind: local
+  path: base
diff --git a/tests/format/variables/partial_context/base/project.conf b/tests/format/variables/partial_context/base/project.conf
new file mode 100644
index 0000000..91511bf
--- /dev/null
+++ b/tests/format/variables/partial_context/base/project.conf
@@ -0,0 +1,3 @@
+name: base
+min-version: 2.0
+
diff --git a/tests/format/variables/partial_context/base/vars.yml b/tests/format/variables/partial_context/base/vars.yml
new file mode 100644
index 0000000..3a91e13
--- /dev/null
+++ b/tests/format/variables/partial_context/base/vars.yml
@@ -0,0 +1,2 @@
+variables:
+  subvar: "/bar"
diff --git a/tests/format/variables/partial_context/project.conf b/tests/format/variables/partial_context/project.conf
new file mode 100644
index 0000000..e8a8ed0
--- /dev/null
+++ b/tests/format/variables/partial_context/project.conf
@@ -0,0 +1,8 @@
+name: test
+min-version: 2.0
+
+(@): base.bst:vars.yml
+
+variables:
+  var: "%{subvar}/foo"
+
diff --git a/tests/format/variables/partial_context/test.bst b/tests/format/variables/partial_context/test.bst
new file mode 100644
index 0000000..8276763
--- /dev/null
+++ b/tests/format/variables/partial_context/test.bst
@@ -0,0 +1,3 @@
+kind: manual
+variables:
+  eltvar: "%{var}/baz"


[buildstream] 08/27: _variables: Convert ValuePart to C struct

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch tristan/partial-variables-manual-string-join
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit c67fe08ff1cc9ff5b8203d0bb3742c39b801acb1
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Thu Jul 2 03:45:07 2020 +0900

    _variables: Convert ValuePart to C struct
---
 src/buildstream/_variables.pyx | 100 ++++++++++++++++++++++++++---------------
 1 file changed, 65 insertions(+), 35 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index 245cbab..9512ab5 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -409,29 +409,6 @@ cdef class ResolutionStep:
                         detail="\n".join(reversed(error_lines)))
 
 
-# ValuePart()
-#
-# Represents a part of a value (a string and an indicator
-# of whether the string is a variable or not).
-#
-# This only exists for better performance than constructing
-# and unpacking tuples.
-#
-# Args:
-#    text (str): The text of this part
-#    is_variable (bint): True if the text is a variable, False if it's literal
-#
-cdef class ValuePart:
-    cdef str text
-    cdef bint is_variable
-    cdef ValuePart next_part
-
-    cdef init(self, str text, bint is_variable):
-        self.text = text
-        self.is_variable = is_variable
-        self.next_part = None
-
-
 cdef EMPTY_SET = set()
 
 # Value():
@@ -474,7 +451,7 @@ cdef class Value:
     cdef str resolve(self, dict values):
         cdef str dep_name
         cdef Value part_var
-        cdef ValuePart part
+        cdef ValuePart *part
         cdef object part_object
         cdef list parts = []
 
@@ -483,10 +460,10 @@ cdef class Value:
 
             while part:
                 if part.is_variable:
-                    part_var = <Value> values[part.text]
+                    part_var = <Value> values[<str>part.text]
                     parts.append(part_var._resolved)
                 else:
-                    parts.append(part.text)
+                    parts.append(<str>part.text)
 
                 part = part.next_part
 
@@ -564,7 +541,7 @@ cdef class ValueClass:
     # Public
     #
     cdef set variable_names  # A set of variable names
-    cdef ValuePart parts  # The linked list of ValuePart objects
+    cdef ValuePart *parts
 
     # init():
     #
@@ -574,10 +551,15 @@ cdef class ValueClass:
     #    string (str): The string which can contain variables
     #
     cdef init(self, str string):
+
         self.variable_names = set()
-        self.parts = None
+        self.parts = NULL
+
         self._parse_string(string)
 
+    def __dealloc__(self):
+        free_value_parts(self.parts)
+
     # _parse_string()
     #
     # Parse the string for this ValueClass, breaking it down into
@@ -604,19 +586,21 @@ cdef class ValueClass:
         # What do you expect ? These are regular expressions after all,
         # they are *supposed* to be weird.
         #
-        cdef list splits = VALUE_CLASS_PARSE_EXPANSION.split(string)
+        cdef splits = VALUE_CLASS_PARSE_EXPANSION.split(string)
         cdef object split_object
         cdef str split
         cdef Py_ssize_t split_idx = 0
-        cdef bint is_variable
-        cdef ValuePart part
-        cdef ValuePart last_part = None
+        cdef int is_variable
+
+        # Adding parts
+        #
+        cdef ValuePart *part
+        cdef ValuePart *last_part = NULL
 
         #
         # Collect the weird regex return value into something
         # more comprehensible.
         #
-
         for split_object in splits:
             split = <str> split_object
             if split:
@@ -633,8 +617,7 @@ cdef class ValueClass:
                     is_variable = True
                     self.variable_names.add(split)
 
-                part = ValuePart()
-                part.init(split, is_variable)
+                part = new_value_part(split, is_variable)
                 if last_part:
                     last_part.next_part = part
                 else:
@@ -664,3 +647,50 @@ cdef class ValueIterator:
     def __next__(self):
         name = next(self._iter)
         return name, self._variables[name]
+
+
+############################## BASEMENT ########################################
+
+
+from cpython.mem cimport PyMem_Malloc, PyMem_Free
+from cpython.object cimport PyObject
+from cpython.ref cimport Py_XINCREF, Py_XDECREF
+
+# ValuePart()
+#
+# Represents a part of a value (a string and an indicator
+# of whether the string is a variable or not).
+#
+# This only exists for better performance than constructing
+# and unpacking tuples.
+#
+# Args:
+#    text (str): The text of this part
+#    is_variable (bint): True if the text is a variable, False if it's literal
+#
+ctypedef struct ValuePart:
+    PyObject *text
+    int is_variable
+    ValuePart *next_part
+
+cdef ValuePart *new_value_part(str text, int is_variable):
+    cdef ValuePart *part = <ValuePart *>PyMem_Malloc(sizeof(ValuePart))
+    if not part:
+        raise MemoryError()
+
+    part.text = <PyObject *>text
+    part.is_variable = is_variable
+    part.next_part = NULL
+    Py_XINCREF(part.text)
+    return part
+
+cdef void free_value_part(ValuePart *part):
+    Py_XDECREF(part.text)
+    PyMem_Free(part)
+
+cdef void free_value_parts(ValuePart *part):
+    cdef ValuePart *to_free
+    while part:
+        to_free = part
+        part = part.next_part
+        free_value_part(to_free)


[buildstream] 22/27: _variables.pyx: Early return from _resolve()

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch tristan/partial-variables-manual-string-join
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 20db0e114099a7e76576d0efdbbe0e0d823a9840
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Thu Jul 9 18:20:42 2020 +0900

    _variables.pyx: Early return from _resolve()
---
 src/buildstream/_variables.pyx | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index d455940..64c3ece 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -306,10 +306,17 @@ cdef class Variables:
     #                 references encountered when resolving the variable.
     #
     cdef str _resolve(self, str name, ScalarNode pnode):
+
+        cdef Value iter_value
+
+        # Try early return first
+        iter_value = self._get_checked_value(name, None, pnode)
+        if iter_value._resolved:
+            return iter_value._resolved
+
         cdef ResolutionStep step
         cdef ResolutionStep new_step
         cdef ResolutionStep this_step
-        cdef Value iter_value
         cdef str resolved_value
         cdef Py_ssize_t idx = 0
 


[buildstream] 03/27: tests/frontend/overlaps.py: Test undefined variables

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch tristan/partial-variables-manual-string-join
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 24503c3fc3787b7516801fec43cd35869e992a37
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Mon Jun 29 19:59:53 2020 +0900

    tests/frontend/overlaps.py: Test undefined variables
    
    Ensure that we get the expected provenance when expanding a variable
    included in an overlap whitelist entry.
---
 tests/frontend/overlaps.py                      | 15 ++++++++++++++-
 tests/frontend/overlaps/whitelist-undefined.bst | 13 +++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/tests/frontend/overlaps.py b/tests/frontend/overlaps.py
index a45fc70..28bf8a7 100644
--- a/tests/frontend/overlaps.py
+++ b/tests/frontend/overlaps.py
@@ -4,7 +4,7 @@
 import os
 import pytest
 from buildstream.testing.runcli import cli  # pylint: disable=unused-import
-from buildstream.exceptions import ErrorDomain
+from buildstream.exceptions import ErrorDomain, LoadErrorReason
 from buildstream import _yaml
 from buildstream.plugin import CoreWarnings
 from tests.testutils import generate_junction
@@ -71,6 +71,19 @@ def test_overlaps_whitelist_on_overlapper(cli, datafiles):
 
 
 @pytest.mark.datafiles(DATA_DIR)
+def test_overlaps_whitelist_undefined_variable(cli, datafiles):
+    project_dir = str(datafiles)
+    gen_project(project_dir, False)
+    result = cli.run(project=project_dir, silent=True, args=["build", "whitelist-undefined.bst"])
+
+    # Assert that we get the expected undefined variable error,
+    # and that it has the provenance we expect from whitelist-undefined.bst
+    #
+    result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.UNRESOLVED_VARIABLE)
+    assert "whitelist-undefined.bst [line 13 column 6]" in result.stderr
+
+
+@pytest.mark.datafiles(DATA_DIR)
 @pytest.mark.parametrize("use_fatal_warnings", [True, False])
 def test_overlaps_script(cli, datafiles, use_fatal_warnings):
     # Test overlaps with script element to test
diff --git a/tests/frontend/overlaps/whitelist-undefined.bst b/tests/frontend/overlaps/whitelist-undefined.bst
new file mode 100644
index 0000000..5cb0c64
--- /dev/null
+++ b/tests/frontend/overlaps/whitelist-undefined.bst
@@ -0,0 +1,13 @@
+kind: import
+config:
+  source: /
+  target: /
+depends:
+- b-whitelisted.bst
+sources:
+- kind: local
+  path: "a"
+public:
+  bst:
+    overlap-whitelist:
+    - "%{undefined-variable}/*"


[buildstream] 21/27: _variables.pyx: Value.resolve() now takes strings as input, no more _do_resolve()

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch tristan/partial-variables-manual-string-join
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit b1713ea5c9745e104d7a22348cfd0d0cd5b2ccb0
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Thu Jul 9 03:42:23 2020 +0900

    _variables.pyx: Value.resolve() now takes strings as input, no more _do_resolve()
---
 src/buildstream/_variables.pyx | 31 +++++++++++++------------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index a02f976..d455940 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -246,7 +246,7 @@ cdef class Variables:
     #
     cpdef str _subst(self, ScalarNode node):
         cdef Value value = Value()
-        cdef Value iter_value
+        cdef str iter_value
         cdef str resolved_value
         cdef ValuePart *part
 
@@ -257,7 +257,7 @@ cdef class Variables:
         part = value._value_class.parts
         while part:
             if part.is_variable:
-                iter_value = self._do_resolve(<str> part.text, node)
+                iter_value = self._resolve(<str> part.text, node)
                 object_array_append(&(values), <PyObject *>iter_value)
             part = part.next_part
 
@@ -289,14 +289,7 @@ cdef class Variables:
         else:
             assert False, "Unknown 'Node' type"
 
-    # XXX
-    #
-    cdef str _resolve(self, str name, ScalarNode pnode):
-        cdef Value value
-        value = self._do_resolve(name, pnode)
-        return value._resolved
-
-    # _do_resolve()
+    # _resolve()
     #
     # Helper to expand and cache a variable definition in the context of
     # the given dictionary of expansion strings.
@@ -312,11 +305,12 @@ cdef class Variables:
     #    (LoadError): In case there was any undefined variables or circular
     #                 references encountered when resolving the variable.
     #
-    cdef Value _do_resolve(self, str name, ScalarNode pnode):
+    cdef str _resolve(self, str name, ScalarNode pnode):
         cdef ResolutionStep step
         cdef ResolutionStep new_step
         cdef ResolutionStep this_step
         cdef Value iter_value
+        cdef str resolved_value
         cdef Py_ssize_t idx = 0
 
         # We'll be collecting the values to resolve at the end in here
@@ -382,15 +376,19 @@ cdef class Variables:
         #
         idx = values.length -1
         while idx >= 0:
+
+            # Values in, strings out
+            #
             iter_value = <Value>values.array[idx]
-            iter_value.resolve(&values, idx + 1)
+            resolved_value = iter_value.resolve(&values, idx + 1)
+            values.array[idx] = <PyObject *>resolved_value
             idx -= 1
 
         # Cleanup
         #
         object_array_free(&(values))
 
-        return iter_value
+        return resolved_value
 
     # _get_checked_value()
     #
@@ -556,7 +554,6 @@ cdef class Value:
     #    (str): The resolved value
     #
     cdef str resolve(self, ObjectArray *resolved_values, Py_ssize_t values_idx):
-        cdef Value part_var
         cdef ValuePart *part
         cdef Py_UCS4 maxchar = 0
         cdef Py_UCS4 part_maxchar
@@ -575,9 +572,8 @@ cdef class Value:
             part = self._value_class.parts
             while part:
                 if part.is_variable:
-                    part_var = <Value> resolved_values.array[idx]
+                    part_object = resolved_values.array[idx]
                     idx += 1
-                    part_object = <PyObject *>part_var._resolved
                 else:
                     part_object = part.text
 
@@ -597,9 +593,8 @@ cdef class Value:
             # This time copy characters as we loop through the parts
             while part:
                 if part.is_variable:
-                    part_var = <Value> resolved_values.array[idx]
+                    part_object = resolved_values.array[idx]
                     idx += 1
-                    part_object = <PyObject *>part_var._resolved
                 else:
                     part_object = part.text
 


[buildstream] 26/27: _variables.pyx: Cache the array used to accumulate variables in _resolve_value()

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch tristan/partial-variables-manual-string-join
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 1757a42d46eca4080b7ec08e2e9b258e88c34fee
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Sat Jul 11 15:55:53 2020 +0900

    _variables.pyx: Cache the array used to accumulate variables in _resolve_value()
---
 src/buildstream/_variables.pyx | 71 +++++++++++++++++++++++++++++-------------
 1 file changed, 50 insertions(+), 21 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index a084e6f..785f861 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -88,6 +88,7 @@ cdef class Variables:
 
     cdef dict _values  # The Value objects
     cdef MappingNode _origin
+    cdef ObjectArray _resolve_value_cache
 
     def __init__(self, MappingNode node):
 
@@ -106,6 +107,16 @@ cdef class Variables:
 
         self._values = self._init_values(node)
 
+        # Initialize the resolution cache without allocating any vector initially
+        object_array_init(&(self._resolve_value_cache), 0)
+
+    # __dealloc__()
+    #
+    # Cleanup stuff which cython wont cleanup automatically
+    #
+    def __dealloc__(self):
+        object_array_free(&(self._resolve_value_cache))
+
     # __getitem__()
     #
     # Enables indexing access to variables.
@@ -207,10 +218,14 @@ cdef class Variables:
         cdef object key
 
         with PROFILER.profile(Topics.VARIABLES_CHECK, id(self._origin)):
+
             # Resolve all variables.
             for key in self._values.keys():
                 self._resolve(<str> key, None)
 
+            # Free any used resources after resolving a batch
+            object_array_free(&(self._resolve_value_cache))
+
     # _init_values()
     #
     # Here we initialize the Value() table, which contains
@@ -264,6 +279,10 @@ cdef class Variables:
         resolved_value = value.resolve(values.array)
 
         object_array_free(&(values))
+
+        # Free any used resources after substitutions
+        object_array_free(&(self._resolve_value_cache))
+
         return resolved_value
 
     # _expand()
@@ -324,10 +343,9 @@ cdef class Variables:
         cdef str resolved_value
         cdef Py_ssize_t idx = 0
 
-        # We'll be collecting the values to resolve at the end in here
-        cdef ObjectArray values
-        object_array_init(&(values), -1)
-        object_array_append(&(values), <PyObject *>value)
+        # Reset the value array cache
+        object_array_reset(&(self._resolve_value_cache))
+        object_array_append(&(self._resolve_value_cache), <PyObject *>value)
 
         step = ResolutionStep()
         step.init(name, value._value_class.parts, None)
@@ -355,7 +373,7 @@ cdef class Variables:
                 #
                 # Even if the value was already resolved, we need it in context to resolve
                 # previously enqueued variables
-                object_array_append(&(values), <PyObject *>iter_value)
+                object_array_append(&(self._resolve_value_cache), <PyObject *>iter_value)
 
                 # Queue up the values dependencies.
                 #
@@ -375,11 +393,11 @@ cdef class Variables:
         # backwards and the last (leftmost) resolved value is the one
         # we want to return.
         #
-        idx = values.length -1
+        idx = self._resolve_value_cache.length -1
         while idx > 0:
             # Values in, strings out
             #
-            iter_value = <Value>values.array[idx]
+            iter_value = <Value>self._resolve_value_cache.array[idx]
 
             if iter_value._resolved is None:
 
@@ -390,18 +408,14 @@ cdef class Variables:
                 # expansion though, because of how variables are
                 # sorted.
                 #
-                iter_value.resolve(&(values.array[idx + 1]))
+                iter_value.resolve(&(self._resolve_value_cache.array[idx + 1]))
 
-            values.array[idx] = <PyObject *>iter_value._resolved
+            self._resolve_value_cache.array[idx] = <PyObject *>iter_value._resolved
             idx -= 1
 
         # Save the return of Value.resolve from the toplevel value
-        iter_value = <Value>values.array[0]
-        resolved_value = iter_value.resolve(&(values.array[1]))
-
-        # Cleanup
-        #
-        object_array_free(&(values))
+        iter_value = <Value>self._resolve_value_cache.array[0]
+        resolved_value = iter_value.resolve(&(self._resolve_value_cache.array[1]))
 
         return resolved_value
 
@@ -850,12 +864,11 @@ cdef void object_array_append(ObjectArray *array, PyObject *obj):
 
     # Ensure we have enough space for the new item
     if array.length >= array._size:
-        array._size = array._size + OBJECT_ARRAY_BLOCK_SIZE - (array._size % 8)
+        array._size = array._size + OBJECT_ARRAY_BLOCK_SIZE - (array._size % OBJECT_ARRAY_BLOCK_SIZE)
         array.array = <PyObject **>PyMem_Realloc(array.array, array._size * sizeof(PyObject *))
         if not array.array:
             raise MemoryError()
 
-    # Py_XINCREF(obj)
     array.array[array.length] = obj
     array.length += 1
 
@@ -864,16 +877,32 @@ cdef void object_array_append(ObjectArray *array, PyObject *obj):
 #
 # Free the array, releasing references to all the objects
 #
+# This leaves the array in a state as if it had been initialized
+# with a 0 length vector, so a subsequent call to object_array_append()
+# will start by allocating a block.
+#
 # Args:
 #    array (ObjectArray *): The array to free up
 #
 cdef void object_array_free(ObjectArray *array):
-    #cdef Py_ssize_t idx = 0
-    #while idx < array.length:
-    #    Py_XDECREF(array.array[idx])
-    #    idx += 1
     if array._size > 0:
         PyMem_Free(array.array)
+        array._size = 0
+    array.length = 0
+
+
+# object_array_reset()
+#
+# Reset the array, ignoring any existing elements but not
+# freeing the allocated vector.
+#
+# This allows for reuse of an already cached array.
+#
+# Args:
+#    array (ObjectArray *): The array to reset
+#
+cdef void object_array_reset(ObjectArray *array):
+    array.length = 0
 
 
 # ValuePart()


[buildstream] 06/27: _variables.pyx: Use a linked list for ValuePart instead of a list.

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch tristan/partial-variables-manual-string-join
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 310b29486b2b8f0bd20ce3454f4c2beadcdeb308
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Wed Jul 1 19:56:47 2020 +0900

    _variables.pyx: Use a linked list for ValuePart instead of a list.
---
 src/buildstream/_variables.pyx | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index 9730948..2a01b1c 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -437,10 +437,12 @@ cdef class ValueLink:
 cdef class ValuePart:
     cdef str text
     cdef bint is_variable
+    cdef ValuePart next_part
 
     cdef init(self, str text, bint is_variable):
         self.text = text
         self.is_variable = is_variable
+        self.next_part = None
 
 
 cdef EMPTY_SET = set()
@@ -490,15 +492,17 @@ cdef class Value:
         cdef list parts = []
 
         if self._resolved is None:
-            for part_object in self._value_class.parts:
-                part = <ValuePart> part_object
+            part = self._value_class.parts
 
+            while part:
                 if part.is_variable:
                     part_var = <Value> values[part.text]
                     parts.append(part_var._resolved)
                 else:
                     parts.append(part.text)
 
+                part = part.next_part
+
             self._resolved = "".join(parts)
 
         return self._resolved
@@ -573,7 +577,7 @@ cdef class ValueClass:
     # Public
     #
     cdef set variable_names  # A set of variable names
-    cdef list parts  # A list of ValuePart objects
+    cdef ValuePart parts  # The linked list of ValuePart objects
 
     # init():
     #
@@ -584,7 +588,7 @@ cdef class ValueClass:
     #
     cdef init(self, str string):
         self.variable_names = set()
-        self.parts = []
+        self.parts = None
         self._parse_string(string)
 
     # _parse_string()
@@ -619,6 +623,7 @@ cdef class ValueClass:
         cdef Py_ssize_t split_idx = 0
         cdef bint is_variable
         cdef ValuePart part
+        cdef ValuePart last_part = None
 
         #
         # Collect the weird regex return value into something
@@ -643,7 +648,11 @@ cdef class ValueClass:
 
                 part = ValuePart()
                 part.init(split, is_variable)
-                self.parts.append(part)
+                if last_part:
+                    last_part.next_part = part
+                else:
+                    self.parts = part
+                last_part = part
 
             split_idx += 1
 


[buildstream] 11/27: _variables.pyx: Pass around provenance node instead of provenance itself

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch tristan/partial-variables-manual-string-join
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 116eec0b9c3bf9cd0faf72d5fc1550a5d3291d43
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Thu Jul 2 20:35:41 2020 +0900

    _variables.pyx: Pass around provenance node instead of provenance itself
---
 src/buildstream/_variables.pyx | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index 18772ae..8020efe 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -158,7 +158,7 @@ cdef class Variables:
         dependencies, n_dependencies = value.dependencies()
         while idx < n_dependencies:
             dep_name = <str> dependencies[idx]
-            self._resolve(dep_name, node.get_provenance())
+            self._resolve(dep_name, node)
             idx += 1
 
         return value.resolve(self._values)
@@ -224,17 +224,17 @@ cdef class Variables:
     # the given dictionary of expansion strings.
     #
     # Args:
-    #     name (str): Name of the variable to expand
-    #     provenance (ProvenanceInformation): Whence this variable was refered from
+    #    name (str): Name of the variable to expand
+    #    pnode (ScalarNode): The ScalarNode for which we need to resolve `name`
     #
     # Returns:
-    #     (str): The expanded value of variable
+    #    (str): The expanded value of variable
     #
     # Raises:
-    #     (LoadError): In case there was any undefined variables or circular
-    #                  references encountered when resolving the variable.
+    #    (LoadError): In case there was any undefined variables or circular
+    #                 references encountered when resolving the variable.
     #
-    cdef str _resolve(self, str name, ProvenanceInformation provenance):
+    cdef str _resolve(self, str name, ScalarNode pnode):
         cdef ResolutionStep step
         cdef ResolutionStep new_step
         cdef ResolutionStep this_step
@@ -275,7 +275,7 @@ cdef class Variables:
             idx = 0
             while idx < this_step.n_varnames:
                 iter_name = <str> this_step.varnames[idx]
-                iter_value = self._get_checked_value(iter_name, this_step.referee, provenance)
+                iter_value = self._get_checked_value(iter_name, this_step.referee, pnode)
                 idx += 1
 
                 # Earliest return for an already resolved value
@@ -318,7 +318,7 @@ cdef class Variables:
     # Args:
     #    varname (str): The variable name to fetch
     #    referee (str): The variable name referring to `varname`, or None
-    #    provenance (ProvenanceInformation): The provenance, incase referee is None.
+    #    pnode (ScalarNode): The ScalarNode for which we need to resolve `name`
     #
     # Returns:
     #   (Value): The Value for varname
@@ -326,7 +326,8 @@ cdef class Variables:
     # Raises:
     #   (LoadError): An appropriate error in case of undefined variables
     #
-    cdef Value _get_checked_value(self, str varname, str referee, ProvenanceInformation provenance):
+    cdef Value _get_checked_value(self, str varname, str referee, ScalarNode pnode):
+        cdef ProvenanceInformation provenance = None
         cdef Value referee_value
         cdef str error_message
 
@@ -346,8 +347,10 @@ cdef class Variables:
 
             if referee_value:
                 provenance = referee_value.get_provenance()
-
+            elif pnode:
+                provenance = pnode.get_provenance()
             error_message = "Reference to undefined variable '{}'".format(varname)
+
             if provenance:
                 error_message = "{}: {}".format(provenance, error_message)
             raise LoadError(error_message, LoadErrorReason.UNRESOLVED_VARIABLE) from e


[buildstream] 24/27: _variables.pyx: Move the meat of value resolution to ValueClass

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch tristan/partial-variables-manual-string-join
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit c61359c3c7d7c927ba84bc8b4377cefe429ba476
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Sat Jul 11 15:35:52 2020 +0900

    _variables.pyx: Move the meat of value resolution to ValueClass
---
 src/buildstream/_variables.pyx | 127 +++++++++++++++++++++++------------------
 1 file changed, 70 insertions(+), 57 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index 1958dd0..36708c9 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -382,6 +382,14 @@ cdef class Variables:
             iter_value = <Value>values.array[idx]
 
             if iter_value._resolved is None:
+
+                # For the first iteration, we pass an invalid pointer
+                # outside the bounds of the array.
+                #
+                # The first iteration cannot require any variable
+                # expansion though, because of how variables are
+                # sorted.
+                #
                 iter_value.resolve(&values, idx + 1)
 
             values.array[idx] = <PyObject *>iter_value._resolved
@@ -555,69 +563,15 @@ cdef class Value:
     # it will fail due to an undefined variable.
     #
     # Args:
-    #    values (dict): The full value table for resolving dependencies
+    #    values (PyObject **): Array of resolved strings to fill in the values
     #
     # Returns:
     #    (str): The resolved value
     #
-    cdef str resolve(self, ObjectArray *resolved_values, Py_ssize_t values_idx):
-        cdef ValuePart *part
-        cdef Py_UCS4 maxchar = 0
-        cdef Py_UCS4 part_maxchar
-        cdef Py_ssize_t full_length = 0
-        cdef Py_ssize_t idx
-        cdef Py_ssize_t offset
-        cdef Py_ssize_t part_length
-        cdef PyObject *resolved
-        cdef PyObject *part_object
+    cdef str resolve(self, ObjectArray *values, Py_ssize_t value_idx):
 
         if self._resolved is None:
-
-            # Calculate the number of codepoints and maximum character width
-            # required for the strings involved.
-            idx = values_idx
-            part = self._value_class.parts
-            while part:
-                if part.is_variable:
-                    part_object = resolved_values.array[idx]
-                    idx += 1
-                else:
-                    part_object = part.text
-
-                full_length += PyUnicode_GET_LENGTH(part_object)
-                part_maxchar = PyUnicode_MAX_CHAR_VALUE(part_object)
-                if part_maxchar > maxchar:
-                    maxchar = part_maxchar
-
-                part = part.next_part
-
-            # Do the stringy thingy
-            resolved = PyUnicode_New(full_length, maxchar)
-            part = self._value_class.parts
-            idx = values_idx
-            offset = 0
-
-            # This time copy characters as we loop through the parts
-            while part:
-                if part.is_variable:
-                    part_object = resolved_values.array[idx]
-                    idx += 1
-                else:
-                    part_object = part.text
-
-                part_length = PyUnicode_GET_LENGTH(part_object)
-
-                # Does this need to be in a loop and have a maximum copy length ?
-                #
-                # Should we be doing the regular posix thing, handling an exception indicating
-                # a SIGINT or such which means we should resume our copy instead of consider an error ?
-                #
-                PyUnicode_CopyCharacters(resolved, offset, part_object, 0, part_length)
-
-                offset += part_length
-                part = part.next_part
-
-            self._resolved = <str> resolved
+            self._resolved = self._value_class.resolve(values, value_idx)
 
         return self._resolved
 
@@ -697,6 +651,65 @@ cdef class ValueClass:
         self.parts = NULL
         self._parse_string(string)
 
+    # resolve()
+    #
+    #
+    cdef str resolve(self, ObjectArray *values, Py_ssize_t value_idx):
+        cdef ValuePart *part
+        cdef Py_UCS4 maxchar = 0
+        cdef Py_UCS4 part_maxchar
+        cdef Py_ssize_t full_length = 0
+        cdef Py_ssize_t idx
+        cdef Py_ssize_t offset = 0
+        cdef Py_ssize_t part_length
+        cdef PyObject *resolved
+        cdef PyObject *part_object
+
+        # Calculate the number of codepoints and maximum character width
+        # required for the strings involved.
+        idx = value_idx
+        part = self.parts
+        while part:
+            if part.is_variable:
+                part_object = values.array[idx]
+                idx += 1
+            else:
+                part_object = part.text
+
+            full_length += PyUnicode_GET_LENGTH(part_object)
+            part_maxchar = PyUnicode_MAX_CHAR_VALUE(part_object)
+            if part_maxchar > maxchar:
+                maxchar = part_maxchar
+
+            part = part.next_part
+
+        # Do the stringy thingy
+        resolved = PyUnicode_New(full_length, maxchar)
+
+        # This time copy characters as we loop through the parts
+        idx = value_idx
+        part = self.parts
+        while part:
+            if part.is_variable:
+                part_object = values.array[idx]
+                idx += 1
+            else:
+                part_object = part.text
+
+            part_length = PyUnicode_GET_LENGTH(part_object)
+
+            # Does this need to be in a loop and have a maximum copy length ?
+            #
+            # Should we be doing the regular posix thing, handling an exception indicating
+            # a SIGINT or such which means we should resume our copy instead of consider an error ?
+            #
+            PyUnicode_CopyCharacters(resolved, offset, part_object, 0, part_length)
+
+            offset += part_length
+            part = part.next_part
+
+        return <str> resolved
+
     # _parse_string()
     #
     # Parse the string for this ValueClass, breaking it down into


[buildstream] 14/27: _variables.pyx: Added ObjectArray, using for ValueClass varnames

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch tristan/partial-variables-manual-string-join
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit a85b3e504c7c4e772fc753adbcb10c7a8425f377
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Fri Jul 3 01:59:25 2020 +0900

    _variables.pyx: Added ObjectArray, using for ValueClass varnames
---
 src/buildstream/_variables.pyx | 171 +++++++++++++++++++++++++----------------
 1 file changed, 104 insertions(+), 67 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index a9a14eb..d6c7445 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -24,7 +24,7 @@ import re
 import sys
 import itertools
 
-from cpython.mem cimport PyMem_Malloc, PyMem_Free
+from cpython.mem cimport PyMem_Malloc, PyMem_Free, PyMem_Realloc
 from cpython.object cimport PyObject
 from cpython.ref cimport Py_XINCREF, Py_XDECREF
 
@@ -33,6 +33,14 @@ from .exceptions import LoadErrorReason
 from .node cimport MappingNode, Node, ScalarNode, SequenceNode, ProvenanceInformation
 
 
+ctypedef struct ObjectArray:
+    Py_ssize_t length
+    PyObject **array
+
+    # Private, actual size of the allocated vector
+    Py_ssize_t _size
+
+
 # The Variables helper object will resolve the variable references in
 # the given dictionary, expecting that any dictionary values which contain
 # variable references can be resolved from the same dictionary.
@@ -202,15 +210,14 @@ cdef class Variables:
     #
     cpdef str _subst(self, ScalarNode node):
         cdef Value value = Value()
-        cdef PyObject **dependencies
-        cdef Py_ssize_t n_dependencies
+        cdef ObjectArray *dependencies
         cdef Py_ssize_t idx = 0
         cdef str dep_name
 
         value.init(node)
-        dependencies, n_dependencies = value.dependencies()
-        while idx < n_dependencies:
-            dep_name = <str> dependencies[idx]
+        dependencies = value.dependencies()
+        while idx < dependencies.length:
+            dep_name = <str> dependencies.array[idx]
             self._resolve(dep_name, node)
             idx += 1
 
@@ -263,8 +270,7 @@ cdef class Variables:
         cdef Value iter_value
         cdef str iter_name
 
-        cdef PyObject **iter_value_deps
-        cdef Py_ssize_t n_iter_value_deps
+        cdef ObjectArray *iter_value_deps
         cdef Py_ssize_t idx = 0
 
         cdef str resolved_value = None
@@ -278,11 +284,12 @@ cdef class Variables:
         # Each iteration processes a ResolutionStep object and has the possibility
         # to enque more ResolutionStep objects as a result.
         #
-        cdef PyObject *names[1]
-        names[0] = <PyObject *>name
+        cdef ObjectArray initial_deps
+        object_array_init(&(initial_deps), 1)
+        object_array_append(&(initial_deps), <PyObject *>name)
 
         step = ResolutionStep()
-        step.init(None, names, 1, None)
+        step.init(None, &(initial_deps), None)
 
         while step:
             # Keep a hold of the current overall step
@@ -293,8 +300,8 @@ cdef class Variables:
             this_step.check_circular(self._values)
 
             idx = 0
-            while idx < this_step.n_varnames:
-                iter_name = <str> this_step.varnames[idx]
+            while idx < this_step.varnames.length:
+                iter_name = <str> this_step.varnames.array[idx]
                 iter_value = self._get_checked_value(iter_name, this_step.referee, pnode)
                 idx += 1
 
@@ -310,10 +317,10 @@ cdef class Variables:
                     deps.append(iter_value)
 
                     # Queue up it's dependencies for resolution
-                    iter_value_deps, n_iter_value_deps = iter_value.dependencies()
-                    if n_iter_value_deps > 0:
+                    iter_value_deps = iter_value.dependencies()
+                    if iter_value_deps:
                         new_step = ResolutionStep()
-                        new_step.init(iter_name, iter_value_deps, n_iter_value_deps, this_step)
+                        new_step.init(iter_name, iter_value_deps, this_step)
 
                         # Link it to the end of the stack
                         new_step.prev = step
@@ -328,6 +335,10 @@ cdef class Variables:
             iter_value = deps.pop()
             resolved_value = iter_value.resolve(self._values)
 
+        # Cleanup
+        #
+        object_array_free(&(initial_deps))
+
         return resolved_value
 
     # _get_checked_value()
@@ -387,8 +398,7 @@ cdef class ResolutionStep:
     cdef str referee
     cdef ResolutionStep parent
     cdef ResolutionStep prev
-    cdef PyObject **varnames
-    cdef Py_ssize_t n_varnames
+    cdef ObjectArray *varnames
 
     # init()
     #
@@ -399,10 +409,9 @@ cdef class ResolutionStep:
     #    varnames (set): A set of variable names which referee refers to.
     #    parent (ResolutionStep): The parent ResolutionStep
     #
-    cdef init(self, str referee, PyObject **varnames, Py_ssize_t n_varnames, ResolutionStep parent):
+    cdef init(self, str referee, ObjectArray *varnames, ResolutionStep parent):
         self.referee = referee
         self.varnames = varnames
-        self.n_varnames = n_varnames
         self.parent = parent
         self.prev = None
 
@@ -523,15 +532,14 @@ cdef class Value:
     # Returns the array of dependency variable names
     #
     # Returns:
-    #    (PyObject **): The array of variable names which this ValueClass depends on, or NULL
-    #    (int): The length of the returned array
+    #    (ObjectArray *): The array of variable names, or NULL
     #
-    cdef (PyObject **, Py_ssize_t)dependencies(self):
+    cdef ObjectArray *dependencies(self):
         if self._resolved is None:
-            return self._value_class.variable_names, self._value_class.n_variable_names
+            return &(self._value_class.varnames)
 
         # If we're already resolved, we don't have any dependencies anymore
-        return NULL, 0
+        return NULL
 
     # _load_value_class()
     #
@@ -590,8 +598,7 @@ cdef class ValueClass:
     # Public
     #
     cdef ValuePart *parts
-    cdef PyObject **variable_names
-    cdef Py_ssize_t n_variable_names
+    cdef ObjectArray varnames
 
     # __dealloc__()
     #
@@ -599,7 +606,7 @@ cdef class ValueClass:
     #
     def __dealloc__(self):
         free_value_parts(self.parts)
-        PyMem_Free(self.variable_names)
+        object_array_free(&(self.varnames))
 
     # init():
     #
@@ -610,8 +617,6 @@ cdef class ValueClass:
     #
     cdef init(self, str string):
         self.parts = NULL
-        self.variable_names = NULL
-        self.n_variable_names = 0
         self._parse_string(string)
 
     # _parse_string()
@@ -640,10 +645,11 @@ cdef class ValueClass:
         # What do you expect ? These are regular expressions after all,
         # they are *supposed* to be weird.
         #
-        cdef splits = VALUE_CLASS_PARSE_EXPANSION.split(string)
+        cdef list splits = VALUE_CLASS_PARSE_EXPANSION.split(string)
         cdef object split_object
         cdef str split
         cdef Py_ssize_t idx = 0
+        cdef Py_ssize_t n_variables = 0
         cdef int is_variable
 
         # Adding parts
@@ -662,7 +668,7 @@ cdef class ValueClass:
                 if (idx % 2) == 0:
                     is_variable = False
                 else:
-                    self.n_variable_names += 1
+                    n_variables += 1
                     is_variable = True
 
                 part = new_value_part(split, is_variable)
@@ -676,28 +682,12 @@ cdef class ValueClass:
 
         # Initialize the variables array
         #
-        # Note that we don't bother ref counting the string objects, as the
-        # ValuePart already takes care of owning the strings.
-        #
-        if self.n_variable_names > 0:
-            self.variable_names = <PyObject **>PyMem_Malloc(self.n_variable_names * sizeof(PyObject *))
-            if not self.variable_names:
-                raise MemoryError()
-
-            part = self.parts
-            idx = 0
-            while part:
-                if part.is_variable:
-
-                    # Record only unique variable names in the variable_names array.
-                    #
-                    if object_array_search(part.text, self.variable_names, idx) < 0:
-                        self.variable_names[idx] = part.text
-                        idx += 1
-                    else:
-                        self.n_variable_names -= 1
-
-                part = part.next_part
+        object_array_init(&(self.varnames), n_variables)
+        part = self.parts
+        while part:
+            if part.is_variable:
+                object_array_append(&(self.varnames), part.text)
+            part = part.next_part
 
 # ValueIterator()
 #
@@ -723,28 +713,75 @@ cdef class ValueIterator:
 
 ############################## BASEMENT ########################################
 
+cdef int OBJECT_ARRAY_BLOCK_SIZE = 8
 
-# object_array_search()
+
+# object_array_init()
 #
-# Searches for an object pointer in an array of object pointers.
+# Initialize the object array
 #
 # Args:
-#    search (PyObject *): The object to search for
-#    array (PyObject **): The array to search in
-#    length (Py_ssize_t): The length of the array
+#    array (ObjectArray *): The array to initialize
+#    size (Py_ssize_t): The initial size of the array, or < 0 for an automatic size,
+#                       0 for no allocated buffer initially
 #
-# Returns:
-#    (Py_ssize_t): The index of `search` in `array`, or -1 if `search` is not found.
+# Raises:
+#    (MemoryError): In the case we failed to allocate memory for the array
 #
-cdef Py_ssize_t object_array_search(PyObject *search, PyObject **array, Py_ssize_t length):
-    cdef Py_ssize_t idx = 0
+cdef void object_array_init(ObjectArray *array, Py_ssize_t size):
+    array._size = size
+    if array._size < 0:
+        array._size = OBJECT_ARRAY_BLOCK_SIZE
+    array.length = 0
+    if array._size > 0:
+        array.array = <PyObject **>PyMem_Malloc(array._size * sizeof(PyObject *))
+        if not array.array:
+            raise MemoryError()
+    else:
+        array.array = NULL
+
 
-    while idx < length:
-        if array[idx] == search:
-            return idx
+# object_array_append()
+#
+# Append an object to the array
+#
+# Args:
+#    array (ObjectArray *): The array to append to
+#    obj (PyObject *): The object to append to the array
+#
+# Raises:
+#    (MemoryError): In the case we failed to allocate memory for the array
+#
+cdef void object_array_append(ObjectArray *array, PyObject *obj):
+
+    # Ensure we have enough space for the new item
+    if array.length >= array._size:
+        array._size = array._size + OBJECT_ARRAY_BLOCK_SIZE - (array._size % 8)
+        array.array = <PyObject **>PyMem_Realloc(array.array, array._size * sizeof(PyObject *))
+        if not array.array:
+            raise MemoryError()
+
+    Py_XINCREF(obj)
+    array.array[array.length] = obj
+    array.length += 1
+
+
+# object_array_free()
+#
+# Free the array, releasing references to all the objects
+#
+# Args:
+#    array (ObjectArray *): The array to free up
+#
+cdef void object_array_free(ObjectArray *array):
+    cdef Py_ssize_t idx = 0
+    while idx < array.length:
+        Py_XDECREF(array.array[idx])
         idx += 1
 
-    return -1
+    if array._size > 0:
+        PyMem_Free(array.array)
+
 
 # ValuePart()
 #


[buildstream] 13/27: _variables.pyx: Only intern the value class strings, not the variable names

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch tristan/partial-variables-manual-string-join
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit c43bbf92ddc1c0a17981114557306a8447ffe7d7
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Thu Jul 2 21:58:58 2020 +0900

    _variables.pyx: Only intern the value class strings, not the variable names
    
    It would appear that variable name interning adds more expense than
    what we save on variable lookups.
---
 src/buildstream/_variables.pyx | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index 152a8d3..a9a14eb 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -181,7 +181,6 @@ cdef class Variables:
         cdef Value value
 
         for key, value_node in node.items():
-            key = <object> sys.intern(<str> key)
             value = Value()
             value.init(<ScalarNode> value_node)
             ret[key] = value
@@ -279,7 +278,6 @@ cdef class Variables:
         # Each iteration processes a ResolutionStep object and has the possibility
         # to enque more ResolutionStep objects as a result.
         #
-        name = sys.intern(name)
         cdef PyObject *names[1]
         names[0] = <PyObject *>name
 
@@ -548,14 +546,15 @@ cdef class Value:
     #
     cdef ValueClass _load_value_class(self, str string):
         cdef ValueClass ret
-        cdef str internal_string = sys.intern(string)
+
+        string = sys.intern(string)
 
         try:
-            ret = VALUE_CLASS_TABLE[internal_string]
+            ret = VALUE_CLASS_TABLE[string]
         except KeyError:
             ret = ValueClass()
-            ret.init(internal_string)
-            VALUE_CLASS_TABLE[internal_string] = ret
+            ret.init(string)
+            VALUE_CLASS_TABLE[string] = ret
 
         return ret
 
@@ -660,12 +659,6 @@ cdef class ValueClass:
             split = <str> split_object
             if split:
 
-                # Use an intern for the part, this will not only
-                # save memory but it will speed up lookups in the
-                # case that the part in question is used to lookup
-                # variable values.
-                split = <str> sys.intern(split)
-
                 if (idx % 2) == 0:
                     is_variable = False
                 else:


[buildstream] 27/27: _variables.pyx: Change ResolutionStep() to be an allocated struct

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch tristan/partial-variables-manual-string-join
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 87added5fd19301896c774dc5f419de0953f21c9
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Sat Jul 11 16:37:36 2020 +0900

    _variables.pyx: Change ResolutionStep() to be an allocated struct
---
 src/buildstream/_variables.pyx | 142 ++++++++++++++++++++++++-----------------
 1 file changed, 83 insertions(+), 59 deletions(-)

diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index 785f861..1958c89 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -337,9 +337,10 @@ cdef class Variables:
 
     cdef str _resolve_value(self, str name, Value value):
         cdef Value iter_value
-        cdef ResolutionStep step
-        cdef ResolutionStep new_step
-        cdef ResolutionStep this_step
+        cdef ResolutionStep *step
+        cdef ResolutionStep *new_step
+        cdef ResolutionStep *this_step
+        cdef ResolutionStep *last_step
         cdef str resolved_value
         cdef Py_ssize_t idx = 0
 
@@ -347,8 +348,8 @@ cdef class Variables:
         object_array_reset(&(self._resolve_value_cache))
         object_array_append(&(self._resolve_value_cache), <PyObject *>value)
 
-        step = ResolutionStep()
-        step.init(name, value._value_class.parts, None)
+        step = new_resolution_step(<PyObject *>name, value._value_class.parts, NULL)
+        last_step = step
 
         while step:
             # Keep a hold of the current overall step
@@ -356,7 +357,7 @@ cdef class Variables:
             step = step.prev
 
             # Check for circular dependencies
-            this_step.check_circular(self._values)
+            check_resolution_step(this_step, self._values)
 
             part = this_step.parts
             while part:
@@ -367,7 +368,7 @@ cdef class Variables:
                     part = part.next_part
                     continue
 
-                iter_value = self._get_checked_value(<str> part.text, this_step.referee, None)
+                iter_value = self._get_checked_value(<str> part.text, <str> this_step.referee, None)
 
                 # Queue up this value.
                 #
@@ -378,8 +379,8 @@ cdef class Variables:
                 # Queue up the values dependencies.
                 #
                 if iter_value._resolved is None:
-                    new_step = ResolutionStep()
-                    new_step.init(<str> part.text, iter_value._value_class.parts, this_step)
+                    new_step = new_resolution_step(part.text, iter_value._value_class.parts, this_step)
+                    last_step = new_step
 
                     # Link it to the end of the stack
                     new_step.prev = step
@@ -417,6 +418,13 @@ cdef class Variables:
         iter_value = <Value>self._resolve_value_cache.array[0]
         resolved_value = iter_value.resolve(&(self._resolve_value_cache.array[1]))
 
+        # Free up the steps
+        step = last_step
+        while step:
+            this_step = step
+            step = step.prev
+            free_resolution_step(this_step)
+
         return resolved_value
 
     # _get_checked_value()
@@ -465,80 +473,96 @@ cdef class Variables:
             raise LoadError(error_message, LoadErrorReason.UNRESOLVED_VARIABLE) from e
 
 
-# ResolutionStep()
+# ResolutionStep
 #
 # The context for a single iteration in variable resolution.
 #
 # This only exists for better performance than constructing
 # and unpacking tuples.
 #
-cdef class ResolutionStep:
-    cdef str referee
-    cdef ResolutionStep parent
-    cdef ResolutionStep prev
-    cdef ValuePart *parts
+ctypedef struct ResolutionStep:
+    PyObject *referee
+    ResolutionStep *parent
+    ResolutionStep *prev
+    ValuePart *parts
 
-    # init()
-    #
-    # Initialize this ResolutionStep
-    #
-    # Args:
-    #    referee (str): The name of the referring variable
-    #    parts (ValuePart *): A link list of ValueParts which `referee` refers to
-    #    parent (ResolutionStep): The parent ResolutionStep
-    #
-    cdef init(self, str referee, ValuePart *parts, ResolutionStep parent):
-        self.referee = referee
-        self.parts = parts
-        self.parent = parent
-        self.prev = None
 
-    # check_circular()
-    #
-    # Check for circular references in this step.
-    #
-    # Args:
-    #    values (dict): The value dictionary for lookups
-    #
-    # Raises:
-    #    (LoadError): Will raise a user facing LoadError with
-    #                 LoadErrorReason.CIRCULAR_REFERENCE_VARIABLE in case
-    #                 circular references were encountered.
-    #
-    cdef check_circular(self, dict values):
-        cdef ResolutionStep step = self.parent
-        while step:
-            if self.referee is step.referee:
-                self._raise_circular_reference_error(step, values)
-            step = step.parent
+# new_resolution_step()
+#
+# Create a new ResolutionStep
+#
+# Args:
+#    referee (str): The name of the referring variable
+#    parts (ValuePart *): A link list of ValueParts which `referee` refers to
+#    parent (ResolutionStep): The parent ResolutionStep
+#
+cdef ResolutionStep *new_resolution_step(PyObject *referee, ValuePart *parts, ResolutionStep *parent):
+    cdef ResolutionStep *step = <ResolutionStep *>PyMem_Malloc(sizeof(ResolutionStep))
+    if not step:
+        raise MemoryError()
 
-    # _raise_circular_reference_error()
-    #
-    # Helper function to construct a full report and raise the circular reference error.
-    #
-    cdef _raise_circular_reference_error(self, ResolutionStep conflict, dict values):
+    step.referee = referee
+    step.parts = parts
+    step.parent = parent
+    step.prev = NULL
+
+    return step
+
+# free_resolution_step()
+#
+# Free a ResolutionStep
+#
+# Args:
+#    step (ResolutionStep): The step to free
+#
+cdef void free_resolution_step(ResolutionStep *step):
+    PyMem_Free(step)
+
+# check_resolution_step()
+#
+# Check for circular references in this step.
+#
+# Args:
+#    step (ResolutionStep): The step to check
+#    values (dict): The value dictionary for lookups
+#
+# Raises:
+#    (LoadError): Will raise a user facing LoadError with
+#                 LoadErrorReason.CIRCULAR_REFERENCE_VARIABLE in case
+#                 circular references were encountered.
+#
+cdef check_resolution_step(ResolutionStep *step, dict values):
+    cdef ResolutionStep *iter_step = step.parent
+    while iter_step:
+        if step.referee is iter_step.referee:
+            _raise_resolution_step_error(step, iter_step, values)
+        iter_step = iter_step.parent
+
+# _raise_resolution_step_error()
+#
+# Helper function to construct a full report and raise the circular reference error.
+#
+cdef _raise_resolution_step_error(ResolutionStep *step, ResolutionStep *conflict, dict values):
         cdef list error_lines = []
-        cdef ResolutionStep step = self
+        cdef ResolutionStep *this_step = step
         cdef Value value
         cdef str referee
 
         while step is not conflict:
             if step.parent:
-                referee = step.parent.referee
+                referee = <str> step.parent.referee
             else:
-                referee = self.referee
+                referee = <str> this_step.referee
             value = values[referee]
 
-            error_lines.append("{}: Variable '{}' refers to variable '{}'".format(value.get_provenance(), referee, step.referee))
+            error_lines.append("{}: Variable '{}' refers to variable '{}'".format(value.get_provenance(), referee, <str> step.referee))
             step = step.parent
 
-        raise LoadError("Circular dependency detected on variable '{}'".format(self.referee),
+        raise LoadError("Circular dependency detected on variable '{}'".format(<str> this_step.referee),
                         LoadErrorReason.CIRCULAR_REFERENCE_VARIABLE,
                         detail="\n".join(reversed(error_lines)))
 
 
-cdef EMPTY_SET = set()
-
 # Value():
 #
 # Represents a variable value