You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@buildstream.apache.org by no...@apache.org on 2020/12/29 12:42:49 UTC

[buildstream] 01/01: _loader: Optimize loading by keeping the dependencies sorted

This is an automated email from the ASF dual-hosted git repository.

not-in-ldap pushed a commit to branch bschubert/optimize-loader-sorting
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit b61eaf795fc20f3ddb2c18570b212da8794d08fe
Author: Benjamin Schubert <be...@gmail.com>
AuthorDate: Mon Aug 19 20:48:29 2019 +0100

    _loader: Optimize loading by keeping the dependencies sorted
    
    A sizeable amount of time is spent in sorting the dependencies when
    loading elements. We can instead keep the list sorted at all time,
    which reduces the number of time we need to call sort.
---
 src/buildstream/_loader/loadelement.pyx | 55 ++++++++++++++-------------------
 src/buildstream/_loader/loader.py       | 20 ++++--------
 src/buildstream/_profile.py             |  1 -
 3 files changed, 30 insertions(+), 46 deletions(-)

diff --git a/src/buildstream/_loader/loadelement.pyx b/src/buildstream/_loader/loadelement.pyx
index 867de6f..694380b 100644
--- a/src/buildstream/_loader/loadelement.pyx
+++ b/src/buildstream/_loader/loadelement.pyx
@@ -116,6 +116,30 @@ cdef class LoadElement:
     def junction(self):
         return self._loader.project.junction
 
+    # add_dependency()
+    #
+    # Add a dependency to the current element.
+    #
+    # This helper functions keeps the underlying list sorted at all times to remove the need for filtering.
+    #
+    # Args:
+    #   dep (Dependency): the dependency to add to the list
+    #
+    def add_dependency(self, Dependency dep not None):
+        cdef int low = 0
+        cdef int high = len(self.dependencies)
+        cdef int mid
+
+        while low < high:
+            mid = (low + high) // 2
+
+            if _dependency_cmp(<Dependency> self.dependencies[mid], dep) < 0:
+                low = mid + 1
+            else:
+                high = mid
+
+        self.dependencies.insert(low, dep)
+
     # depends():
     #
     # Checks if this element depends on another element, directly
@@ -195,34 +219,3 @@ def _dependency_cmp(Dependency dep_a, Dependency dep_b):
 
     # This wont ever happen
     return 0
-
-
-# sort_dependencies():
-#
-# Sort dependencies of each element by their dependencies,
-# so that direct dependencies which depend on other direct
-# dependencies (directly or indirectly) appear later in the
-# list.
-#
-# This avoids the need for performing multiple topological
-# sorts throughout the build process.
-#
-# Args:
-#    element (LoadElement): The element to sort
-#
-def sort_dependencies(LoadElement element):
-    cdef list working_elements = [element]
-    cdef set visited = set(working_elements)
-    cdef Dependency dep
-
-    # Now dependency sort, we ensure that if any direct dependency
-    # directly or indirectly depends on another direct dependency,
-    # it is found later in the list.
-    while working_elements:
-        element = working_elements.pop()
-        for dep in element.dependencies:
-            if dep.element not in visited:
-                visited.add(dep.element)
-                working_elements.append(dep.element)
-
-        element.dependencies.sort(key=cmp_to_key(_dependency_cmp))
diff --git a/src/buildstream/_loader/loader.py b/src/buildstream/_loader/loader.py
index 061d28b..9c7add0 100644
--- a/src/buildstream/_loader/loader.py
+++ b/src/buildstream/_loader/loader.py
@@ -131,18 +131,12 @@ class Loader():
         with PROFILER.profile(Topics.CIRCULAR_CHECK, "_".join(targets)):
             self._check_circular_deps(dummy_target)
 
-        ret = []
+        # Finally, wrap what we have into LoadElements and return the target
         #
-        # Sort direct dependencies of elements by their dependency ordering
-        #
-        for element in target_elements:
-            loader = element._loader
-            with PROFILER.profile(Topics.SORT_DEPENDENCIES, element.name):
-                loadelement.sort_dependencies(element)
-
-            # Finally, wrap what we have into LoadElements and return the target
-            #
-            ret.append(loader._collect_element(element, task))
+        ret = [
+            element._loader._collect_element(element, task)
+            for element in target_elements
+        ]
 
         self._clean_caches()
 
@@ -342,9 +336,7 @@ class Loader():
                                             LoadErrorReason.INVALID_DATA)
 
                 # All is well, push the dependency onto the LoadElement
-                # Pylint is not very happy with Cython and can't understand 'dependencies' is a list
-                current_element[0].dependencies.append(  # pylint: disable=no-member
-                    Dependency(dep_element, dep.dep_type))
+                current_element[0].add_dependency(Dependency(dep_element, dep.dep_type))
             else:
                 # We do not have any more dependencies to load for this
                 # element on the queue, report any invalid dep names
diff --git a/src/buildstream/_profile.py b/src/buildstream/_profile.py
index b17215d..e41cd77 100644
--- a/src/buildstream/_profile.py
+++ b/src/buildstream/_profile.py
@@ -41,7 +41,6 @@ import time
 # The special 'all' value will enable all profiles.
 class Topics():
     CIRCULAR_CHECK = 'circ-dep-check'
-    SORT_DEPENDENCIES = 'sort-deps'
     LOAD_CONTEXT = 'load-context'
     LOAD_PROJECT = 'load-project'
     LOAD_PIPELINE = 'load-pipeline'