You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@buildstream.apache.org by gi...@apache.org on 2020/12/29 13:10:19 UTC

[buildstream] branch jmac/cas_to_cas_oct created (now 711c984)

This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a change to branch jmac/cas_to_cas_oct
in repository https://gitbox.apache.org/repos/asf/buildstream.git.


      at 711c984  Rearrange comment

This branch includes the following new commits:

     new 789251a  Add code necessary to do cas-to-cas import
     new 010565d  Add a tool to show differences in two CAS directories
     new 957539a  Correct deleting and overwriting cases
     new e1b047e  Fix 'remove_item'->delete_entry
     new a8c7639  casbaseddirectory: Various fixes.
     new 74650bb  Virtual directory test: Expand random testing to 6 roots
     new 241f8cc  CASBasedDirectory: Do not sort the input file list!
     new 4a9924b  CASBasedDirectory: Import '.'
     new d5305d5  Don't forcbily create directories in _resolve in all cases
     new 6839348  CAS-to-CAS: Now passing all 20x20 tests
     new b320812  Separation of fixed/random tests in virtual_directory_import
     new b3ebec7  hack: remove files which previously blocked directory creation
     new 459f809  Detect infinite symlink loops in resolve()
     new b28b6c9  Make the duplication test optional in cas_based_directory
     new 052da4e  virtual_directory_test.py: More fixed examples and better test names
     new d0a11e7  CasBasedDirectory: Remove some prints
     new 4bb8d31  Make virtual_directory_test do the cas roundtrip test instead of _casbaseddirectory
     new 5878f0e  casbaseddirectory: Remove roundtrip checking code
     new a52da6e  _casbaseddirectory.py: Remove some unnecessary things, label others
     new a82e6d9  casbaseddirectory: Combine all the _resolve functions
     new 711c984  Rearrange comment

The 21 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[buildstream] 14/21: Make the duplication test optional in cas_based_directory

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch jmac/cas_to_cas_oct
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit b28b6c9edea2c9171c0b8e7f83fcabe81ee93959
Author: Jim MacArthur <ji...@codethink.co.uk>
AuthorDate: Thu Oct 25 16:48:23 2018 +0100

    Make the duplication test optional in cas_based_directory
---
 buildstream/storage/_casbaseddirectory.py | 35 +++++++++++++++----------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py
index bef9acf..7b356a4 100644
--- a/buildstream/storage/_casbaseddirectory.py
+++ b/buildstream/storage/_casbaseddirectory.py
@@ -853,35 +853,34 @@ class CasBasedDirectory(Directory):
         if self.parent:
             self.parent._recalculate_recursing_up(self)
         
-        # Duplicate the current directory
-
+        duplicate_test = False
         
         print("Original CAS before CAS-based import: {}".format(self.show_files_recursive()))
         print("Original CAS hash: {}".format(self.ref.hash))
         duplicate_cas = None
         self._verify_unique()
         if isinstance(external_pathspec, CasBasedDirectory):
-            duplicate_cas = CasBasedDirectory(self.context, ref=copy.copy(self.ref))
-            duplicate_cas._verify_unique()
+            if duplicate_test:
+                duplicate_cas = CasBasedDirectory(self.context, ref=copy.copy(self.ref))
+                duplicate_cas._verify_unique()
+                print("Duplicated CAS before file-based import: {}".format(duplicate_cas.show_files_recursive()))
+                print("Duplicate CAS hash: {}".format(duplicate_cas.ref.hash))
             print("-"*80 + "Performing direct CAS-to-CAS import")
-            print("Duplicated CAS before file-based import: {}".format(duplicate_cas.show_files_recursive()))
-            print("Duplicate CAS hash: {}".format(duplicate_cas.ref.hash))
             result = self._import_cas_into_cas(external_pathspec, files=files)
             self._verify_unique()
             print("Result of cas-to-cas import: {}".format(self.show_files_recursive()))
             print("-"*80 + "Performing round-trip import via file system")
-            with tempfile.TemporaryDirectory(prefix="roundtrip") as tmpdir:
-                external_pathspec.export_files(tmpdir)
-                if files is None:
-                    files = list(list_relative_paths(tmpdir))
-                print("Importing from filesystem: filelist is: {}".format(files))
-                duplicate_cas._import_files_from_directory(tmpdir, files=files)
-                duplicate_cas._recalculate_recursing_down()
-                if duplicate_cas.parent:
-                    duplicate_cas.parent._recalculate_recursing_up(duplicate_cas)
-                print("Result of direct import: {}".format(duplicate_cas.show_files_recursive()))
-               
-
+            if duplicate_test:
+                with tempfile.TemporaryDirectory(prefix="roundtrip") as tmpdir:
+                    external_pathspec.export_files(tmpdir)
+                    if files is None:
+                        files = list(list_relative_paths(tmpdir))
+                    print("Importing from filesystem: filelist is: {}".format(files))
+                    duplicate_cas._import_files_from_directory(tmpdir, files=files)
+                    duplicate_cas._recalculate_recursing_down()
+                    if duplicate_cas.parent:
+                        duplicate_cas.parent._recalculate_recursing_up(duplicate_cas)
+                    print("Result of direct import: {}".format(duplicate_cas.show_files_recursive()))
         else:
             print("-"*80 + "Performing initial import")
             if isinstance(external_pathspec, FileBasedDirectory):


[buildstream] 03/21: Correct deleting and overwriting cases

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch jmac/cas_to_cas_oct
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 957539a0efb4cc0e2b9917bec3f13c5b2ef823c5
Author: Jim MacArthur <ji...@codethink.co.uk>
AuthorDate: Tue Oct 23 16:16:18 2018 +0100

    Correct deleting and overwriting cases
---
 buildstream/storage/_casbaseddirectory.py | 187 +++++++++++++++++++++---------
 1 file changed, 134 insertions(+), 53 deletions(-)

diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py
index 3a0364f..f293472 100644
--- a/buildstream/storage/_casbaseddirectory.py
+++ b/buildstream/storage/_casbaseddirectory.py
@@ -40,6 +40,8 @@ from ._filebaseddirectory import FileBasedDirectory
 from ..utils import FileListResult, safe_copy, list_relative_paths, _relative_symlink_target
 from .._artifactcache.cascache import CASCache
 
+import copy # Temporary
+import operator
 
 class IndexEntry():
     """ Used in our index of names to objects to store the 'modified' flag
@@ -84,7 +86,9 @@ class CasBasedDirectory(Directory):
         if ref:
             with open(self.cas_cache.objpath(ref), 'rb') as f:
                 self.pb2_directory.ParseFromString(f.read())
-
+                print("Opening ref {} and parsed into directory containing: {} {} {}.".format(ref.hash, [d.name for d in self.pb2_directory.directories],
+                                                                                        [d.name for d in self.pb2_directory.symlinks],
+                                                                                        [d.name for d in self.pb2_directory.files]))
         self.ref = ref
         self.index = OrderedDict()
         self.parent = parent
@@ -223,11 +227,27 @@ class CasBasedDirectory(Directory):
         symlinknode.target = os.readlink(os.path.join(basename, filename))
         self.index[filename] = IndexEntry(symlinknode, modified=(existing_link is not None))
 
+    def _add_new_link_direct(self, name, target):
+        existing_link = self._find_pb2_entry(name)
+        if existing_link:
+            symlinknode = existing_link
+        else:
+            symlinknode = self.pb2_directory.symlinks.add()
+        assert(isinstance(symlinknode, remote_execution_pb2.SymlinkNode))
+        symlinknode.name = name
+        # A symlink node has no digest.
+        symlinknode.target = target
+        self.index[name] = IndexEntry(symlinknode, modified=(existing_link is not None))
+
+        
     def delete_entry(self, name):
         for collection in [self.pb2_directory.files, self.pb2_directory.symlinks, self.pb2_directory.directories]:
-            if name in collection:
-                collection.remove(name)
+            for thing in collection:
+                if thing.name == name:
+                    print("Removing {} from PB2".format(name))
+                    collection.remove(thing)
         if name in self.index:
+            print("Removing {} from index".format(name))
             del self.index[name]
 
     def descend(self, subdirectory_spec, create=False):
@@ -431,17 +451,21 @@ class CasBasedDirectory(Directory):
             return True
         if (isinstance(existing_entry,
                        (remote_execution_pb2.FileNode, remote_execution_pb2.SymlinkNode))):
+            self.delete_entry(name)
+            print("Processing overwrite of file/symlink {}: Added to overwritten list and deleted".format(name))
             fileListResult.overwritten.append(relative_pathname)
             return True
         elif isinstance(existing_entry, remote_execution_pb2.DirectoryNode):
             # If 'name' maps to a DirectoryNode, then there must be an entry in index
             # pointing to another Directory.
             if self.index[name].buildstream_object.is_empty():
+                print("Processing overwrite of directory: Removing original")
                 self.delete_entry(name)
                 fileListResult.overwritten.append(relative_pathname)
                 return True
             else:
                 # We can't overwrite a non-empty directory, so we just ignore it.
+                print("Processing overwrite of non-empty directory: Ignoring overwrite")
                 fileListResult.ignored.append(relative_pathname)
                 return False
         assert False, ("Entry '{}' is not a recognised file/link/directory and not None; it is {}"
@@ -465,6 +489,9 @@ class CasBasedDirectory(Directory):
         """ Imports files from a traditional directory """
         result = FileListResult()
         for entry in sorted(files):
+            print("Importing {} from file system".format(entry))
+            print("...Order of elements was {}".format(", ".join(self.index.keys())))
+
             split_path = entry.split(os.path.sep)
             # The actual file on the FS we're importing
             import_file = os.path.join(source_directory, entry)
@@ -489,6 +516,8 @@ class CasBasedDirectory(Directory):
                 if self._check_replacement(entry, path_prefix, result):
                     self._add_new_file(source_directory, entry)
                     result.files_written.append(relative_pathname)
+            print("...Order of elements is now {}".format(", ".join(self.index.keys())))
+
         return result
 
 
@@ -545,6 +574,17 @@ class CasBasedDirectory(Directory):
         x = self._resolve_symlink(symlink_node)
         return isinstance(x, CasBasedDirectory)
 
+    def _verify_unique(self):
+        # Verifies that there are no duplicate names in this directory or subdirectories.
+        names = []
+        for entrylist in [self.pb2_directory.files, self.pb2_directory.directories, self.pb2_directory.symlinks]:
+            for e in entrylist:
+                if e.name in names:
+                    raise VirtualDirectoryError("Duplicate entry for name {} found".format(e.name))
+                names.append(e.name)
+        for d in self.pb2_directory.directories:
+            self.index[d.name].buildstream_object._verify_unique()
+    
     def _partial_import_cas_into_cas(self, source_directory, files, path_prefix="", file_list_required=True):
         """ Import only the files and symlinks listed in 'files' from source_directory to this one.
         Args:
@@ -553,7 +593,7 @@ class CasBasedDirectory(Directory):
            path_prefix (str): Prefix used to add entries to the file list result.
            file_list_required: Whether to update the file list while processing.
         """
-        print("Beginning partial import of {} into {}".format(source_directory, self))
+        print("Beginning partial import of {} into {}. Files are: >{}<".format(source_directory, self, ", ".join(files)))
         result = FileListResult()
         processed_directories = set()
         for f in files:
@@ -581,17 +621,24 @@ class CasBasedDirectory(Directory):
                 self.create_directory(f)
             else:
                 # We're importing a file or symlink - replace anything with the same name.
-                self._check_replacement(f, path_prefix, result)
-                item = source_directory.index[f].pb_object
-                if isinstance(item, remote_execution_pb2.FileNode):
-                    filenode = self.pb2_directory.files.add(digest=item.digest, name=f,
-                                                            is_executable=item.is_executable)
-                    self.index[f] = IndexEntry(filenode, modified=(fullname in result.overwritten))
-                else:
-                    assert(isinstance(item, remote_execution_pb2.SymlinkNode))
-                    symlinknode = self.pb2_directory.symlinks.add(name=f, target=item.target)
-                    # A symlink node has no digest.
-                    self.index[f] = IndexEntry(symlinknode, modified=(fullname in result.overwritten))
+                print("Import of file/symlink {} into this directory. Removing anything existing...".format(f))
+                print("   ... ordering of nodes in this dir was: {}".format(self.index.keys()))
+                print("   ... symlinks were {}".format([x.name for x in self.pb2_directory.symlinks]))
+                importable = self._check_replacement(f, path_prefix, result)
+                if importable:
+                    print("   ... after replacement of '{}', symlinks are now {}".format(f, [x.name for x in self.pb2_directory.symlinks]))
+                    item = source_directory.index[f].pb_object
+                    if isinstance(item, remote_execution_pb2.FileNode):
+                        print("   ... importing file")
+                        filenode = self.pb2_directory.files.add(digest=item.digest, name=f,
+                                                                is_executable=item.is_executable)
+                        self.index[f] = IndexEntry(filenode, modified=(fullname in result.overwritten))
+                    else:
+                        print("   ... importing symlink")
+                        assert(isinstance(item, remote_execution_pb2.SymlinkNode))
+                        self._add_new_link_direct(name=f, target=item.target)
+                        print("   ... symlinks are now {}".format([x.name for x in self.pb2_directory.symlinks]))
+                    print("   ... ordering of nodes in this dir is now: {}".format(self.index.keys()))
         return result
 
     def transfer_node_contents(destination, source):
@@ -637,47 +684,57 @@ class CasBasedDirectory(Directory):
         """
         if files is None:
             #return self._full_import_cas_into_cas(source_directory, can_hardlink=True)
-            files = source_directory.list_relative_paths()
+            files = list(source_directory.list_relative_paths())
             print("Extracted all files from source directory '{}': {}".format(source_directory, files))
-        return self._partial_import_cas_into_cas(source_directory, files)
+        return self._partial_import_cas_into_cas(source_directory, list(files))
 
     def showdiff(self, other):
         print("Diffing {} and {}:".format(self, other))
-        l1 = list(self.index.items())
-        l2 = list(other.index.items())
-        for (key, value) in l1:
-            if len(l2) == 0:
-                print("'Other' is short: no item to correspond to '{}' in first.".format(key))
-                return
-            (key2, value2) = l2.pop(0)
-            if key != key2:
-                print("Mismatch: item named {} in first, named {} in second".format(key, key2))
-                return
-            if type(value.pb_object) != type(value2.pb_object):
-                print("Mismatch: item named {}'s pb_object is a {} in first and a {} in second".format(key, type(value.pb_object), type(value2.pb_object)))
-                return
-            if type(value.buildstream_object) != type(value2.buildstream_object):
-                print("Mismatch: item named {}'s buildstream_object is a {} in first and a {} in second".format(key, type(value.buildstream_object), type(value2.buildstream_object)))
-                return
-            print("Inspecting {} of type {}".format(key, type(value.pb_object)))
-            if type(value.pb_object) == remote_execution_pb2.DirectoryNode:
-                # It's a directory, follow it
-                self.descend(key).showdiff(other.descend(key))
-            elif type(value.pb_object) == remote_execution_pb2.SymlinkNode:
-                target1 = value.pb_object.target
-                target2 = value2.pb_object.target
-                if target1 != target2:
-                    print("Symlink named {}: targets do not match. {} in the first, {} in the second".format(key, target1, target2))
-            elif type(value.pb_object) == remote_execution_pb2.FileNode:
-                if value.pb_object.digest != value2.pb_object.digest:
-                    print("File named {}: digests do not match. {} in the first, {} in the second".format(key, value.pb_object.digest, value2.pb_object.digest))
-        if len(l2) != 0:
-            print("'Other' is long: it contains extra items called: {}".format(", ".join([i[0] for i in l2])))
-            return
+
+        def compare_list(l1, l2):
+            item2 = None
+            index = 0
+            print("Comparing lists: {} vs {}".format([d.name for d in l1], [d.name for d in l2]))
+            for item1 in l1:
+                if index>=len(l2):
+                    print("l2 is short: no item to correspond to '{}' in l1.".format(item1.name))
+                    return False
+                item2 = l2[index]
+                if item1.name != item2.name:
+                    print("Items do not match: {} in l1, {} in l2".format(item1.name, item2.name))
+                    return False
+                index += 1
+            if index != len(l2):
+                print("l2 is long: Has extra items {}".format(l2[index:]))
+                return False
+            return True
+
+        def compare_pb2_directories(d1, d2):
+            result = (compare_list(d1.directories, d2.directories)
+                    and compare_list(d1.symlinks, d2.symlinks)
+                    and compare_list(d1.files, d2.files))
+            return result
+                        
+        if not compare_pb2_directories(self.pb2_directory, other.pb2_directory):
+            return False
+
+        for d in self.pb2_directory.directories:
+            self.index[d.name].buildstream_object.showdiff(other.index[d.name].buildstream_object)
         print("No differences found in {}".format(self))
               
+    def show_files_recursive(self):
+        elems = []
+        for (k,v) in self.index.items():
+            if type(v.pb_object) == remote_execution_pb2.DirectoryNode:
+                elems.append("{}=[{}]".format(k, v.buildstream_object.show_files_recursive()))
+            elif type(v.pb_object) == remote_execution_pb2.SymlinkNode:
+                elems.append("{}(s)".format(k))
+            elif type(v.pb_object) == remote_execution_pb2.FileNode:
+                elems.append("{}(f)".format(k))
+            else:
+                elems.append("{}(?)".format(k))
+        return " ".join(elems)
         
-    
     def import_files(self, external_pathspec, *, files=None,
                      report_written=True, update_utimes=False,
                      can_link=False):
@@ -700,12 +757,30 @@ class CasBasedDirectory(Directory):
         can_link (bool): Ignored, since hard links do not have any meaning within CAS.
         """
 
+        print("Directory before import: {}".format(self.show_files_recursive()))
+
+        # Sync self
+        self._recalculate_recursing_down()
+        if self.parent:
+            self.parent._recalculate_recursing_up(self)
+        
+        # Duplicate the current directory
+
+        
+        print("Original CAS before CAS-based import: {}".format(self.show_files_recursive()))
+        print("Original CAS hash: {}".format(self.ref.hash))
         duplicate_cas = None
+        self._verify_unique()
         if isinstance(external_pathspec, CasBasedDirectory):
+            duplicate_cas = CasBasedDirectory(self.context, ref=copy.copy(self.ref))
+            duplicate_cas._verify_unique()
+            print("-"*80 + "Performing direct CAS-to-CAS import")
+            print("Duplicated CAS before file-based import: {}".format(duplicate_cas.show_files_recursive()))
+            print("Duplicate CAS hash: {}".format(duplicate_cas.ref.hash))
             result = self._import_cas_into_cas(external_pathspec, files=files)
-
-            # Duplicate the current directory and do an import that way.
-            duplicate_cas = CasBasedDirectory(self.context, ref=self.ref)
+            self._verify_unique()
+            print("Result of cas-to-cas import: {}".format(self.show_files_recursive()))
+            print("-"*80 + "Performing round-trip import via file system")
             with tempfile.TemporaryDirectory(prefix="roundtrip") as tmpdir:
                 external_pathspec.export_files(tmpdir)
                 if files is None:
@@ -713,8 +788,12 @@ class CasBasedDirectory(Directory):
                 duplicate_cas._import_files_from_directory(tmpdir, files=files)
                 duplicate_cas._recalculate_recursing_down()
                 if duplicate_cas.parent:
-                    duplicate_cas.parent._recalculate_recursing_up(self)
+                    duplicate_cas.parent._recalculate_recursing_up(duplicate_cas)
+                print("Result of direct import: {}".format(duplicate_cas.show_files_recursive()))
+               
+
         else:
+            print("-"*80 + "Performing initial import")
             if isinstance(external_pathspec, FileBasedDirectory):
                 source_directory = external_pathspec.get_underlying_directory()
             else:
@@ -799,6 +878,7 @@ class CasBasedDirectory(Directory):
         for entry in self.pb2_directory.symlinks:
             src_name = os.path.join(to_directory, entry.name)
             target_name = entry.target
+            print("Exporting symlink named {}".format(src_name))
             try:
                 os.symlink(target_name, src_name)
             except FileExistsError as e:
@@ -899,6 +979,7 @@ class CasBasedDirectory(Directory):
         for (k, v) in sorted(directory_list):
             print("Yielding from subdirectory name {}".format(k))
             yield from v.buildstream_object.list_relative_paths(relpath=os.path.join(relpath, k))
+        print("List_relative_paths on {} complete".format(relpath))
 
     def recalculate_hash(self):
         """ Recalcuates the hash for this directory and store the results in


[buildstream] 05/21: casbaseddirectory: Various fixes.

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch jmac/cas_to_cas_oct
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit a8c76395365fa51ec0068fa1bc736554cd1bb3f6
Author: Jim MacArthur <ji...@codethink.co.uk>
AuthorDate: Tue Oct 23 17:57:16 2018 +0100

    casbaseddirectory: Various fixes.
---
 buildstream/storage/_casbaseddirectory.py | 44 ++++++++++++++++++++++++++++---
 1 file changed, 41 insertions(+), 3 deletions(-)

diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py
index cc28fbd..1a078b2 100644
--- a/buildstream/storage/_casbaseddirectory.py
+++ b/buildstream/storage/_casbaseddirectory.py
@@ -288,9 +288,12 @@ class CasBasedDirectory(Directory):
                 return entry.descend(subdirectory_spec[1:], create)
             else:
                 # May be a symlink
+                target = self._resolve(subdirectory_spec[0])
+                if isinstance(target, CasBasedDirectory):
+                    return target
                 error = "Cannot descend into {}, which is a '{}' in the directory {}"
                 raise VirtualDirectoryError(error.format(subdirectory_spec[0],
-                                                         type(entry).__name__,
+                                                         type(self.index[subdirectory_spec[0]].pb_object).__name__,
                                                          self))
         else:
             if create:
@@ -328,6 +331,7 @@ class CasBasedDirectory(Directory):
             return self.index[name].buildstream_object
         # OK then, it's a symlink
         symlink = self._find_pb2_entry(name)
+        assert isinstance(symlink, remote_execution_pb2.SymlinkNode)
         absolute = symlink.target.startswith(CasBasedDirectory._pb2_absolute_path_prefix)
         if absolute:
             root = self.find_root()
@@ -344,6 +348,16 @@ class CasBasedDirectory(Directory):
                 directory = directory.descend(c, create=True)
         return directory
 
+    def _is_followable(self, name):
+        """ Returns true if this is a directory or symlink to a valid directory. """
+        if name not in self.index:
+            return False
+        if isinstance(self.index[name].buildstream_object, Directory):
+            return True
+        target = self._resolve(name)
+        print("Is {} followable? Resolved to {}".format(name, target))
+        return isinstance(target, CasBasedDirectory) or target is None
+
     def _resolve_symlink(self, node):
         """Same as _resolve_symlink_or_directory but takes a SymlinkNode.
         """
@@ -476,7 +490,13 @@ class CasBasedDirectory(Directory):
         """ _import_directory_recursively and _import_files_from_directory will be called alternately
         as a directory tree is descended. """
         if directory_name in self.index:
-            subdir = self._resolve_symlink_or_directory(directory_name)
+            if self._is_followable(directory_name): 
+                subdir = self._resolve_symlink_or_directory(directory_name)
+            else:
+                print("Overwriting unfollowable thing {}".format(directory_name))
+                self.delete_entry(directory_name)
+                subdir = self._add_directory(directory_name)
+                # TODO: Add this to the list of overwritten things.
         else:
             subdir = self._add_directory(directory_name)
         new_path_prefix = os.path.join(path_prefix, directory_name)
@@ -607,6 +627,12 @@ class CasBasedDirectory(Directory):
                 if dirname not in processed_directories:
                     # Now strip off the first directory name and import files recursively.
                     subcomponents = CasBasedDirectory.files_in_subdir(files, dirname)
+                    # We will fail at this point if there is a file or symlink to file called 'dirname'.
+                    if dirname in self.index:
+                        x = self._resolve(dirname)
+                        if isinstance(x, remote_execution_pb2.FileNode):
+                            self.delete_entry(dirname)
+                            result.overwritten.append(f)
                     self.create_directory(dirname)
                     print("Creating destination in {}: {}".format(self, dirname))
                     dest_subdir = self._resolve_symlink_or_directory(dirname)
@@ -688,6 +714,18 @@ class CasBasedDirectory(Directory):
             print("Extracted all files from source directory '{}': {}".format(source_directory, files))
         return self._partial_import_cas_into_cas(source_directory, list(files))
 
+    def _describe(self, thing):
+        # Describes protocol buffer objects
+        if isinstance(thing, remote_execution_pb2.DirectoryNode):
+            return "directory called {}".format(thing.name)
+        elif isinstance(thing, remote_execution_pb2.SymlinkNode):
+            return "symlink called {} pointing to {}".format(thing.name, thing.target)
+        elif isinstance(thing, remote_execution_pb2.FileNode):
+            return "file called {}".format(thing.name)
+        else:
+            return "strange thing"
+        
+    
     def showdiff(self, other):
         print("Diffing {} and {}:".format(self, other))
 
@@ -701,7 +739,7 @@ class CasBasedDirectory(Directory):
                     return False
                 item2 = l2[index]
                 if item1.name != item2.name:
-                    print("Items do not match: {} in l1, {} in l2".format(item1.name, item2.name))
+                    print("Items do not match: {}, a {} in l1, vs {}, a {} in l2".format(item1.name, self._describe(item1), item2.name, self._describe(item2)))
                     return False
                 index += 1
             if index != len(l2):


[buildstream] 10/21: CAS-to-CAS: Now passing all 20x20 tests

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch jmac/cas_to_cas_oct
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 6839348293b62b17b059a3cd826df6ccf737902f
Author: Jim MacArthur <ji...@codethink.co.uk>
AuthorDate: Wed Oct 24 18:28:39 2018 +0100

    CAS-to-CAS: Now passing all 20x20 tests
---
 buildstream/storage/_casbaseddirectory.py | 70 +++++++++++++++++++++++--------
 tests/storage/virtual_directory_import.py | 14 ++++---
 2 files changed, 61 insertions(+), 23 deletions(-)

diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py
index f04a5f5..761a55b 100644
--- a/buildstream/storage/_casbaseddirectory.py
+++ b/buildstream/storage/_casbaseddirectory.py
@@ -298,6 +298,9 @@ class CasBasedDirectory(Directory):
         else:
             if create:
                 newdir = self._add_directory(subdirectory_spec[0])
+                print("Created new directory called {} and descending into it".format(subdirectory_spec[0]))
+                #if subdirectory_spec[0] == "broken":
+                #    assert False
                 return newdir.descend(subdirectory_spec[1:], create)
             else:
                 error = "No entry called '{}' found in {}. There are directories called {}."
@@ -358,7 +361,7 @@ class CasBasedDirectory(Directory):
         print("Is {} followable? Resolved to {}".format(name, target))
         return isinstance(target, CasBasedDirectory) or target is None
 
-    def _resolve_symlink(self, node):
+    def _resolve_symlink(self, node, force_create=True):
         """Same as _resolve_symlink_or_directory but takes a SymlinkNode.
         """
 
@@ -377,7 +380,10 @@ class CasBasedDirectory(Directory):
             elif c == "..":
                 directory = directory.parent
             else:
-                directory = directory.descend(c, create=True)
+                if c in directory.index or force_create:
+                    directory = directory.descend(c, create=True)
+                else:
+                    return None
         return directory
 
     
@@ -400,6 +406,7 @@ class CasBasedDirectory(Directory):
             return index_entry.pb_object
         
         assert isinstance(index_entry.pb_object, remote_execution_pb2.SymlinkNode)
+        print("Resolving '{}': This is a symlink node in the current directory.".format(name))
         symlink = index_entry.pb_object
         components = symlink.target.split(CasBasedDirectory._pb2_path_sep)
 
@@ -443,7 +450,7 @@ class CasBasedDirectory(Directory):
                         print("  resolving {}: file/broken link".format(c))
                         if f is None and force_create:
                             print("Creating target of broken link {}".format(c))
-                            return directory.descend(c, create=True)
+                            directory = directory.descend(c, create=True)
                         elif components:
                             # Oh dear. We have components left to resolve, but the one we're trying to resolve points to a file.
                             raise VirtualDirectoryError("Reached a file called {} while trying to resolve a symlink; cannot proceed".format(c))
@@ -453,7 +460,7 @@ class CasBasedDirectory(Directory):
                     print("  resolving {}: Non-existent file; must be from a broken symlink.".format(c))
                     if force_create:
                         print("Creating target of broken link {} (2)".format(c))
-                        return directory.descend(c, create=True)
+                        directory = directory.descend(c, create=True)
                     else:
                         return None
 
@@ -528,6 +535,8 @@ class CasBasedDirectory(Directory):
                 directory_name = split_path[0]
                 # Hand this off to the importer for that subdir. This will only do one file -
                 # a better way would be to hand off all the files in this subdir at once.
+                # failed here because directory_name didn't point to a directory...
+                print("Attempting to import into {} from {}".format(directory_name, source_directory))
                 subdir_result = self._import_directory_recursively(directory_name, source_directory,
                                                                    split_path[1:], path_prefix)
                 result.combine(subdir_result)
@@ -598,7 +607,7 @@ class CasBasedDirectory(Directory):
         return [f[len(dirname):] for f in sorted_files if f.startswith(dirname)]
 
     def symlink_target_is_directory(self, symlink_node):
-        x = self._resolve_symlink(symlink_node)
+        x = self._resolve_symlink(symlink_node, force_create=False)
         return isinstance(x, CasBasedDirectory)
 
     def _verify_unique(self):
@@ -636,14 +645,20 @@ class CasBasedDirectory(Directory):
                     subcomponents = CasBasedDirectory.files_in_subdir(files, dirname)
                     # We will fail at this point if there is a file or symlink to file called 'dirname'.
                     if dirname in self.index:
-                        x = self._resolve(dirname)
+                        x = self._resolve(dirname, force_create=True)
                         if isinstance(x, remote_execution_pb2.FileNode):
                             self.delete_entry(dirname)
                             result.overwritten.append(f)
-                    self.create_directory(dirname)
-                    print("Creating destination in {}: {}".format(self, dirname))
-                    dest_subdir = self._resolve_symlink_or_directory(dirname)
+                            dest_subdir = self.descend(dirname, create=True)
+                        else:
+                            dest_subdir = x
+                    else:
+                        print("Importing {}: {} does not exist in {}, so it is created as a directory".format(f, dirname, self))
+                        
+                        self.create_directory(dirname)
+                        dest_subdir = self._resolve_symlink_or_directory(dirname)
                     src_subdir = source_directory.descend(dirname)
+                    print("Now recursing into {} to continue adding {}".format(src_subdir, f))
                     import_result = dest_subdir._partial_import_cas_into_cas(src_subdir, subcomponents,
                                                                              path_prefix=fullname, file_list_required=file_list_required)
                     result.combine(import_result)
@@ -651,7 +666,19 @@ class CasBasedDirectory(Directory):
             elif isinstance(source_directory.index[f].buildstream_object, CasBasedDirectory):
                 # The thing in the input file list is a directory on its own. In which case, replace any existing file, or symlink to file
                 # with the new, blank directory - if it's neither of those things, or doesn't exist, then just create the dir.
-                self.create_directory(f)
+                if f in self.index:
+                    x = self._resolve(f)
+                    if x is None:
+                        # If we're importing a blank directory, and the target has a broken symlink, then do nothing.
+                        pass
+                    elif isinstance(x, remote_execution_pb2.FileNode):
+                        # Files with the same name, or symlinks to files, get removed.
+                        pass
+                    else:
+                        # There's either a symlink (valid or not) or existing directory with this name, so do nothing.
+                        pass
+                else:
+                    self.create_directory(f)                    
             else:
                 # We're importing a file or symlink - replace anything with the same name.
                 print("Import of file/symlink {} into this directory. Removing anything existing...".format(f))
@@ -736,18 +763,22 @@ class CasBasedDirectory(Directory):
     def showdiff(self, other):
         print("Diffing {} and {}:".format(self, other))
 
-        def compare_list(l1, l2):
+        def compare_list(l1, l2, name):
             item2 = None
             index = 0
-            print("Comparing lists: {} vs {}".format([d.name for d in l1], [d.name for d in l2]))
+            print("Comparing {} lists: {} vs {}".format(name, [d.name for d in l1], [d.name for d in l2]))
             for item1 in l1:
                 if index>=len(l2):
                     print("l2 is short: no item to correspond to '{}' in l1.".format(item1.name))
                     return False
                 item2 = l2[index]
                 if item1.name != item2.name:
-                    print("Items do not match: {}, a {} in l1, vs {}, a {} in l2".format(item1.name, self._describe(item1), item2.name, self._describe(item2)))
+                    print("Items do not match in {} list: {}, a {} in l1, vs {}, a {} in l2".format(name, item1.name, self._describe(item1), item2.name, self._describe(item2)))
                     return False
+                if isinstance(item1, remote_execution_pb2.FileNode):
+                    if item1.is_executable != item2.is_executable:
+                        print("Executable flags do not match on file {}.".format(item1.name))
+                        return False
                 index += 1
             if index != len(l2):
                 print("l2 is long: Has extra items {}".format(l2[index:]))
@@ -755,17 +786,19 @@ class CasBasedDirectory(Directory):
             return True
 
         def compare_pb2_directories(d1, d2):
-            result = (compare_list(d1.directories, d2.directories)
-                    and compare_list(d1.symlinks, d2.symlinks)
-                    and compare_list(d1.files, d2.files))
+            result = (compare_list(d1.directories, d2.directories, "directory")
+                    and compare_list(d1.symlinks, d2.symlinks, "symlink")
+                    and compare_list(d1.files, d2.files, "file"))
             return result
                         
         if not compare_pb2_directories(self.pb2_directory, other.pb2_directory):
             return False
 
         for d in self.pb2_directory.directories:
-            self.index[d.name].buildstream_object.showdiff(other.index[d.name].buildstream_object)
+            if not self.index[d.name].buildstream_object.showdiff(other.index[d.name].buildstream_object):
+                return False
         print("No differences found in {}".format(self))
+        return True
               
     def show_files_recursive(self):
         elems = []
@@ -829,7 +862,8 @@ class CasBasedDirectory(Directory):
             with tempfile.TemporaryDirectory(prefix="roundtrip") as tmpdir:
                 external_pathspec.export_files(tmpdir)
                 if files is None:
-                    files = list_relative_paths(tmpdir)
+                    files = list(list_relative_paths(tmpdir))
+                print("Importing from filesystem: filelist is: {}".format(files))
                 duplicate_cas._import_files_from_directory(tmpdir, files=files)
                 duplicate_cas._recalculate_recursing_down()
                 if duplicate_cas.parent:
diff --git a/tests/storage/virtual_directory_import.py b/tests/storage/virtual_directory_import.py
index 24ef2e3..4754800 100644
--- a/tests/storage/virtual_directory_import.py
+++ b/tests/storage/virtual_directory_import.py
@@ -24,7 +24,7 @@ root_filesets = [
     [('a/b/c/textfile1', 'F', 'This is the replacement textfile 1\n')],
     [('a/b/d', 'D', '')],
     [('a/b/c', 'S', '/a/b/d')],
-    [('a/b/d', 'D', ''), ('a/b/c', 'S', '/a/b/d')],
+    [('a/b/d', 'D', ''), ('a/b/c', 'S', '/a/b/d')]
 ]
 
 empty_hash_ref = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
@@ -52,7 +52,7 @@ def generate_import_roots(directory):
 
 def generate_random_roots(directory):
     random.seed(RANDOM_SEED)
-    for rootno in range(6,13):
+    for rootno in range(6,21):
         rootname = "root{}".format(rootno)
         rootdir = os.path.join(directory, "content", rootname)
         things = []
@@ -63,6 +63,7 @@ def generate_random_roots(directory):
             thingname = "node{}".format(i)
             thing = random.choice(['dir', 'link', 'file'])
             target = os.path.join(rootdir, location, thingname)
+            description = thing
             if thing == 'dir':
                 os.makedirs(target)
                 locations.append(os.path.join(location, thingname))
@@ -73,10 +74,13 @@ def generate_random_roots(directory):
                 # TODO: Make some relative symlinks
                 if random.randint(1, 3) == 1 or len(things) == 0:
                     os.symlink("/broken", target)
+                    description = "symlink pointing to /broken"
                 else:
-                    os.symlink(random.choice(things), target)
+                    symlink_destination = random.choice(things)
+                    os.symlink(symlink_destination, target)
+                    description = "symlink pointing to {}".format(symlink_destination)
             things.append(os.path.join(location, thingname))
-            print("Generated {}/{} ".format(rootdir, things[-1]))
+            print("Generated {}/{}, a {}".format(rootdir, things[-1], description))
 
 
 def file_contents(path):
@@ -143,7 +147,7 @@ def directory_not_empty(path):
     return os.listdir(path)
 
 
-@pytest.mark.parametrize("original,overlay", combinations([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]))
+@pytest.mark.parametrize("original,overlay", combinations(range(1,21)))
 def test_cas_import(cli, tmpdir, original, overlay):
     fake_context = FakeContext()
     fake_context.artifactdir = tmpdir


[buildstream] 15/21: virtual_directory_test.py: More fixed examples and better test names

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch jmac/cas_to_cas_oct
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 052da4e9c98d8bdfead81c16c6f197909fd88b22
Author: Jim MacArthur <ji...@codethink.co.uk>
AuthorDate: Thu Oct 25 16:48:50 2018 +0100

    virtual_directory_test.py: More fixed examples and better test names
---
 tests/storage/virtual_directory_import.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/tests/storage/virtual_directory_import.py b/tests/storage/virtual_directory_import.py
index dfe3580..9207193 100644
--- a/tests/storage/virtual_directory_import.py
+++ b/tests/storage/virtual_directory_import.py
@@ -20,11 +20,17 @@ class FakeContext():
 # 'F' (file), 'S' (symlink) or 'D' (directory) with content being the contents
 # for a file or the destination for a symlink.
 root_filesets = [
+    # Arbitrary test sets
     [('a/b/c/textfile1', 'F', 'This is textfile 1\n')],
     [('a/b/c/textfile1', 'F', 'This is the replacement textfile 1\n')],
     [('a/b/d', 'D', '')],
     [('a/b/c', 'S', '/a/b/d')],
-    [('a/b/d', 'D', ''), ('a/b/c', 'S', '/a/b/d')]
+    [('a/b/d', 'S', '/a/b/c')],
+    [('a/b/d', 'D', ''), ('a/b/c', 'S', '/a/b/d')], 
+    [('a/b/c', 'D', ''), ('a/b/d', 'S', '/a/b/c')], 
+    [('a/b', 'F', 'This is textfile 1\n')],
+    [('a/b/c', 'F', 'This is textfile 1\n')],
+    [('a/b/c', 'D', '')]
 ]
 
 empty_hash_ref = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
@@ -178,8 +184,9 @@ def _import_test(tmpdir, original, overlay, generator_function, verify_contents=
                     assert os.path.islink(realpath)
                     assert os.readlink(realpath) == content
             elif typename == 'D':
-                # Note that isdir accepts symlinks to dirs, so a symlink to a dir is acceptable.
-                assert os.path.isdir(realpath)
+                # We can't do any more tests than this because it depends on things present in the original. Blank directories
+                # here will be ignored and the original left in place.
+                assert os.path.lexists(realpath)
 
     # Now do the same thing with filebaseddirectories and check the contents match
     d3 = create_new_casdir(original, fake_context, tmpdir)
@@ -187,14 +194,15 @@ def _import_test(tmpdir, original, overlay, generator_function, verify_contents=
     d3.import_files(d2)
     assert d.ref.hash == d3.ref.hash
 
-@pytest.mark.parametrize("original,overlay", combinations(range(1,6)))
+@pytest.mark.parametrize("original,overlay", combinations(range(1,len(root_filesets)+1)))
 def test_fixed_cas_import(cli, tmpdir, original, overlay):
     _import_test(tmpdir, original, overlay, generate_import_roots, verify_contents=True)
 
 @pytest.mark.parametrize("original,overlay", combinations(range(1,11)))
-def test_random_cas_import(cli, tmpdir, original, overlay):
+def test_random_cas_import_fast(cli, tmpdir, original, overlay):
     _import_test(tmpdir, original, overlay, generate_random_root, verify_contents=False)
 
+    
 def _listing_test(tmpdir, root, generator_function):
     fake_context = FakeContext()
     fake_context.artifactdir = tmpdir


[buildstream] 09/21: Don't forcbily create directories in _resolve in all cases

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch jmac/cas_to_cas_oct
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit d5305d55f359dc02578bb0996871de681bac9b1a
Author: Jim MacArthur <ji...@codethink.co.uk>
AuthorDate: Wed Oct 24 14:41:21 2018 +0100

    Don't forcbily create directories in _resolve in all cases
---
 buildstream/storage/_casbaseddirectory.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py
index a418816..f04a5f5 100644
--- a/buildstream/storage/_casbaseddirectory.py
+++ b/buildstream/storage/_casbaseddirectory.py
@@ -288,7 +288,7 @@ class CasBasedDirectory(Directory):
                 return entry.descend(subdirectory_spec[1:], create)
             else:
                 # May be a symlink
-                target = self._resolve(subdirectory_spec[0])
+                target = self._resolve(subdirectory_spec[0], force_create=create)
                 if isinstance(target, CasBasedDirectory):
                     return target
                 error = "Cannot descend into {}, which is a '{}' in the directory {}"
@@ -381,7 +381,7 @@ class CasBasedDirectory(Directory):
         return directory
 
     
-    def _resolve(self, name, absolute_symlinks_resolve=True):
+    def _resolve(self, name, absolute_symlinks_resolve=True, force_create=False):
         """ Resolves any name to an object. If the name points to a symlink in
         this directory, it returns the thing it points to,
         recursively. Returns a CasBasedDirectory, FileNode or
@@ -441,14 +441,21 @@ class CasBasedDirectory(Directory):
                     else:
                         # This is a file or None (i.e. broken symlink)
                         print("  resolving {}: file/broken link".format(c))
-                        if components:
+                        if f is None and force_create:
+                            print("Creating target of broken link {}".format(c))
+                            return directory.descend(c, create=True)
+                        elif components:
                             # Oh dear. We have components left to resolve, but the one we're trying to resolve points to a file.
                             raise VirtualDirectoryError("Reached a file called {} while trying to resolve a symlink; cannot proceed".format(c))
                         else:
                             return f
                 else:
-                    print("  resolving {}: Broken symlink".format(c))
-                    return None
+                    print("  resolving {}: Non-existent file; must be from a broken symlink.".format(c))
+                    if force_create:
+                        print("Creating target of broken link {} (2)".format(c))
+                        return directory.descend(c, create=True)
+                    else:
+                        return None
 
         # Shouldn't get here.
         


[buildstream] 16/21: CasBasedDirectory: Remove some prints

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch jmac/cas_to_cas_oct
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit d0a11e7f7068553c3ae46cc087d5a3d2b88713d7
Author: Jim MacArthur <ji...@codethink.co.uk>
AuthorDate: Thu Oct 25 17:25:51 2018 +0100

    CasBasedDirectory: Remove some prints
---
 buildstream/storage/_casbaseddirectory.py | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py
index 7b356a4..c69777b 100644
--- a/buildstream/storage/_casbaseddirectory.py
+++ b/buildstream/storage/_casbaseddirectory.py
@@ -244,10 +244,8 @@ class CasBasedDirectory(Directory):
         for collection in [self.pb2_directory.files, self.pb2_directory.symlinks, self.pb2_directory.directories]:
             for thing in collection:
                 if thing.name == name:
-                    print("Removing {} from PB2".format(name))
                     collection.remove(thing)
         if name in self.index:
-            print("Removing {} from index".format(name))
             del self.index[name]
 
     def descend(self, subdirectory_spec, create=False):
@@ -534,9 +532,6 @@ class CasBasedDirectory(Directory):
         """ Imports files from a traditional directory """
         result = FileListResult()
         for entry in files:
-            print("Importing {} from file system".format(entry))
-            print("...Order of elements was {}".format(", ".join(self.index.keys())))
-
             split_path = entry.split(os.path.sep)
             # The actual file on the FS we're importing
             import_file = os.path.join(source_directory, entry)
@@ -547,7 +542,6 @@ class CasBasedDirectory(Directory):
                 # Hand this off to the importer for that subdir. This will only do one file -
                 # a better way would be to hand off all the files in this subdir at once.
                 # failed here because directory_name didn't point to a directory...
-                print("Attempting to import into {} from {}".format(directory_name, source_directory))
                 subdir_result = self._import_directory_recursively(directory_name, source_directory,
                                                                    split_path[1:], path_prefix)
                 result.combine(subdir_result)
@@ -563,8 +557,6 @@ class CasBasedDirectory(Directory):
                 if self._check_replacement(entry, path_prefix, result):
                     self._add_new_file(source_directory, entry)
                     result.files_written.append(relative_pathname)
-            print("...Order of elements is now {}".format(", ".join(self.index.keys())))
-
         return result
 
 
@@ -693,23 +685,17 @@ class CasBasedDirectory(Directory):
             else:
                 # We're importing a file or symlink - replace anything with the same name.
                 print("Import of file/symlink {} into this directory. Removing anything existing...".format(f))
-                print("   ... ordering of nodes in this dir was: {}".format(self.index.keys()))
-                print("   ... symlinks were {}".format([x.name for x in self.pb2_directory.symlinks]))
                 importable = self._check_replacement(f, path_prefix, result)
                 if importable:
                     print("   ... after replacement of '{}', symlinks are now {}".format(f, [x.name for x in self.pb2_directory.symlinks]))
                     item = source_directory.index[f].pb_object
                     if isinstance(item, remote_execution_pb2.FileNode):
-                        print("   ... importing file")
                         filenode = self.pb2_directory.files.add(digest=item.digest, name=f,
                                                                 is_executable=item.is_executable)
                         self.index[f] = IndexEntry(filenode, modified=(fullname in result.overwritten))
                     else:
-                        print("   ... importing symlink")
                         assert(isinstance(item, remote_execution_pb2.SymlinkNode))
                         self._add_new_link_direct(name=f, target=item.target)
-                        print("   ... symlinks are now {}".format([x.name for x in self.pb2_directory.symlinks]))
-                    print("   ... ordering of nodes in this dir is now: {}".format(self.index.keys()))
         return result
 
     def transfer_node_contents(destination, source):


[buildstream] 12/21: hack: remove files which previously blocked directory creation

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch jmac/cas_to_cas_oct
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit b3ebec7c37681884fdab78d9eed3a191a9f5d859
Author: Jim MacArthur <ji...@codethink.co.uk>
AuthorDate: Thu Oct 25 15:12:05 2018 +0100

    hack: remove files which previously blocked directory creation
---
 buildstream/storage/_casbaseddirectory.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py
index 761a55b..a5db96e 100644
--- a/buildstream/storage/_casbaseddirectory.py
+++ b/buildstream/storage/_casbaseddirectory.py
@@ -453,7 +453,10 @@ class CasBasedDirectory(Directory):
                             directory = directory.descend(c, create=True)
                         elif components:
                             # Oh dear. We have components left to resolve, but the one we're trying to resolve points to a file.
-                            raise VirtualDirectoryError("Reached a file called {} while trying to resolve a symlink; cannot proceed".format(c))
+                            print("Trying to resolve {}, but found {} was a file.".format(symlink.target, c))
+                            self.delete_entry(c)
+                            directory = directory.descend(c, create=True)
+                            #raise VirtualDirectoryError("Reached a file called {} while trying to resolve a symlink; cannot proceed".format(c))
                         else:
                             return f
                 else:


[buildstream] 08/21: CASBasedDirectory: Import '.'

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch jmac/cas_to_cas_oct
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 4a9924b5f974751c3c18031068e8a4211b029683
Author: Jim MacArthur <ji...@codethink.co.uk>
AuthorDate: Tue Oct 23 17:58:07 2018 +0100

    CASBasedDirectory: Import '.'
---
 buildstream/storage/_casbaseddirectory.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py
index e44be6b..a418816 100644
--- a/buildstream/storage/_casbaseddirectory.py
+++ b/buildstream/storage/_casbaseddirectory.py
@@ -617,7 +617,7 @@ class CasBasedDirectory(Directory):
         result = FileListResult()
         processed_directories = set()
         for f in files:
-            if f == ".": continue
+            #if f == ".": continue
             fullname = os.path.join(path_prefix, f)
             components = f.split(os.path.sep)
             if len(components)>1:


[buildstream] 20/21: casbaseddirectory: Combine all the _resolve functions

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch jmac/cas_to_cas_oct
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit a82e6d9541c404366c2866c2f2ef57497a71a567
Author: Jim MacArthur <ji...@codethink.co.uk>
AuthorDate: Thu Oct 25 18:02:12 2018 +0100

    casbaseddirectory: Combine all the _resolve functions
---
 buildstream/storage/_casbaseddirectory.py | 58 ++++---------------------------
 1 file changed, 7 insertions(+), 51 deletions(-)

diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py
index 6f526ea..a99b486 100644
--- a/buildstream/storage/_casbaseddirectory.py
+++ b/buildstream/storage/_casbaseddirectory.py
@@ -328,26 +328,7 @@ class CasBasedDirectory(Directory):
         as a directory as long as it's within this directory tree.
         """
 
-        if isinstance(self.index[name].buildstream_object, Directory):
-            return self.index[name].buildstream_object
-        # OK then, it's a symlink
-        symlink = self._find_pb2_entry(name)
-        assert isinstance(symlink, remote_execution_pb2.SymlinkNode)
-        absolute = symlink.target.startswith(CasBasedDirectory._pb2_absolute_path_prefix)
-        if absolute:
-            root = self.find_root()
-        else:
-            root = self
-        directory = root
-        components = symlink.target.split(CasBasedDirectory._pb2_path_sep)
-        for c in components:
-            if c == ".":
-                pass
-            elif c == "..":
-                directory = directory.parent
-            else:
-                directory = directory.descend(c, create=True)
-        return directory
+        return self._resolve(name, force_create=True)
 
     def _is_followable(self, name):
         """ Returns true if this is a directory or symlink to a valid directory. """
@@ -362,35 +343,16 @@ class CasBasedDirectory(Directory):
     def _resolve_symlink(self, node, force_create=True):
         """Same as _resolve_symlink_or_directory but takes a SymlinkNode.
         """
-
-        # OK then, it's a symlink
-        symlink = node
-        absolute = symlink.target.startswith(CasBasedDirectory._pb2_absolute_path_prefix)
-        if absolute:
-            root = self.find_root()
-        else:
-            root = self
-        directory = root
-        components = symlink.target.split(CasBasedDirectory._pb2_path_sep)
-        for c in components:
-            if c == ".":
-                pass
-            elif c == "..":
-                directory = directory.parent
-            else:
-                if c in directory.index or force_create:
-                    directory = directory.descend(c, create=True)
-                else:
-                    return None
-        return directory
-
+        return self._resolve(node.name, force_create=True)
     
     def _resolve(self, name, absolute_symlinks_resolve=True, force_create=False, first_seen_object = None):
         """ Resolves any name to an object. If the name points to a symlink in
         this directory, it returns the thing it points to,
         recursively. Returns a CasBasedDirectory, FileNode or
-        None. Never creates a directory or otherwise alters the
-        directory.
+        None.
+
+        If force_create is on, will attempt to create directories to make symlinks and directories resolve.
+        If force_create is off, this will never alter this directory.
 
         """
         # First check if it's a normal object and return that
@@ -437,7 +399,6 @@ class CasBasedDirectory(Directory):
             if c == ".":
                 pass
             elif c == "..":
-                print("  resolving {}: up-dir".format(c))
                 # If directory.parent *is* None, this is an attempt to access
                 # '..' from the root, which is valid under POSIX; it just
                 # returns the root.                
@@ -449,15 +410,12 @@ class CasBasedDirectory(Directory):
                     # Ultimately f must now be a file or directory
                     if isinstance(f, CasBasedDirectory):
                         directory = f
-                        print("  resolving {}: dir".format(c))
 
                     else:
                         # This is a file or None (i.e. broken symlink)
-                        print("  resolving {}: file/broken link".format(c))
                         if f is None and force_create:
-                            print("Creating target of broken link {}".format(c))
                             directory = directory.descend(c, create=True)
-                        elif components:
+                        elif components and force_create:
                             # Oh dear. We have components left to resolve, but the one we're trying to resolve points to a file.
                             print("Trying to resolve {}, but found {} was a file.".format(symlink.target, c))
                             self.delete_entry(c)
@@ -466,9 +424,7 @@ class CasBasedDirectory(Directory):
                         else:
                             return f
                 else:
-                    print("  resolving {}: Non-existent file; must be from a broken symlink.".format(c))
                     if force_create:
-                        print("Creating target of broken link {} (2)".format(c))
                         directory = directory.descend(c, create=True)
                     else:
                         return None


[buildstream] 11/21: Separation of fixed/random tests in virtual_directory_import

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch jmac/cas_to_cas_oct
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit b320812d67d0095586b2006043cdb25ff46f8b30
Author: Jim MacArthur <ji...@codethink.co.uk>
AuthorDate: Wed Oct 24 19:01:36 2018 +0100

    Separation of fixed/random tests in virtual_directory_import
---
 tests/storage/virtual_directory_import.py | 131 ++++++++++++++++--------------
 1 file changed, 72 insertions(+), 59 deletions(-)

diff --git a/tests/storage/virtual_directory_import.py b/tests/storage/virtual_directory_import.py
index 4754800..dfe3580 100644
--- a/tests/storage/virtual_directory_import.py
+++ b/tests/storage/virtual_directory_import.py
@@ -31,56 +31,54 @@ empty_hash_ref = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b8
 RANDOM_SEED = 69105
 
 
-def generate_import_roots(directory):
-    for fileset in range(1, len(root_filesets) + 1):
-        rootname = "root{}".format(fileset)
-        rootdir = os.path.join(directory, "content", rootname)
-
-        for (path, typesymbol, content) in root_filesets[fileset - 1]:
-            if typesymbol == 'F':
-                (dirnames, filename) = os.path.split(path)
-                os.makedirs(os.path.join(rootdir, dirnames), exist_ok=True)
-                with open(os.path.join(rootdir, dirnames, filename), "wt") as f:
-                    f.write(content)
-            elif typesymbol == 'D':
-                os.makedirs(os.path.join(rootdir, path), exist_ok=True)
-            elif typesymbol == 'S':
-                (dirnames, filename) = os.path.split(path)
-                os.makedirs(os.path.join(rootdir, dirnames), exist_ok=True)
-                os.symlink(content, os.path.join(rootdir, path))
-
-
-def generate_random_roots(directory):
-    random.seed(RANDOM_SEED)
-    for rootno in range(6,21):
-        rootname = "root{}".format(rootno)
-        rootdir = os.path.join(directory, "content", rootname)
-        things = []
-        locations = ['.']
-        os.makedirs(rootdir)
-        for i in range(0, 100):
-            location = random.choice(locations)
-            thingname = "node{}".format(i)
-            thing = random.choice(['dir', 'link', 'file'])
-            target = os.path.join(rootdir, location, thingname)
-            description = thing
-            if thing == 'dir':
-                os.makedirs(target)
-                locations.append(os.path.join(location, thingname))
-            elif thing == 'file':
-                with open(target, "wt") as f:
-                    f.write("This is node {}\n".format(i))
-            elif thing == 'link':
-                # TODO: Make some relative symlinks
-                if random.randint(1, 3) == 1 or len(things) == 0:
-                    os.symlink("/broken", target)
-                    description = "symlink pointing to /broken"
-                else:
-                    symlink_destination = random.choice(things)
-                    os.symlink(symlink_destination, target)
-                    description = "symlink pointing to {}".format(symlink_destination)
-            things.append(os.path.join(location, thingname))
-            print("Generated {}/{}, a {}".format(rootdir, things[-1], description))
+def generate_import_roots(rootno, directory):
+    rootname = "root{}".format(rootno)
+    rootdir = os.path.join(directory, "content", rootname)
+
+    for (path, typesymbol, content) in root_filesets[rootno - 1]:
+        if typesymbol == 'F':
+            (dirnames, filename) = os.path.split(path)
+            os.makedirs(os.path.join(rootdir, dirnames), exist_ok=True)
+            with open(os.path.join(rootdir, dirnames, filename), "wt") as f:
+                f.write(content)
+        elif typesymbol == 'D':
+            os.makedirs(os.path.join(rootdir, path), exist_ok=True)
+        elif typesymbol == 'S':
+            (dirnames, filename) = os.path.split(path)
+            os.makedirs(os.path.join(rootdir, dirnames), exist_ok=True)
+            os.symlink(content, os.path.join(rootdir, path))
+
+
+def generate_random_root(rootno, directory):
+    random.seed(RANDOM_SEED+rootno)
+    rootname = "root{}".format(rootno)
+    rootdir = os.path.join(directory, "content", rootname)
+    things = []
+    locations = ['.']
+    os.makedirs(rootdir)
+    for i in range(0, 100):
+        location = random.choice(locations)
+        thingname = "node{}".format(i)
+        thing = random.choice(['dir', 'link', 'file'])
+        target = os.path.join(rootdir, location, thingname)
+        description = thing
+        if thing == 'dir':
+            os.makedirs(target)
+            locations.append(os.path.join(location, thingname))
+        elif thing == 'file':
+            with open(target, "wt") as f:
+                f.write("This is node {}\n".format(i))
+        elif thing == 'link':
+            # TODO: Make some relative symlinks
+            if random.randint(1, 3) == 1 or len(things) == 0:
+                os.symlink("/broken", target)
+                description = "symlink pointing to /broken"
+            else:
+                symlink_destination = random.choice(things)
+                os.symlink(symlink_destination, target)
+                description = "symlink pointing to {}".format(symlink_destination)
+        things.append(os.path.join(location, thingname))
+        print("Generated {}/{}, a {}".format(rootdir, things[-1], description))
 
 
 def file_contents(path):
@@ -147,20 +145,21 @@ def directory_not_empty(path):
     return os.listdir(path)
 
 
-@pytest.mark.parametrize("original,overlay", combinations(range(1,21)))
-def test_cas_import(cli, tmpdir, original, overlay):
+def _import_test(tmpdir, original, overlay, generator_function, verify_contents=False):
     fake_context = FakeContext()
     fake_context.artifactdir = tmpdir
     # Create some fake content
-    generate_import_roots(tmpdir)
-    generate_random_roots(tmpdir)
+    generator_function(original, tmpdir)
+    if original != overlay:
+        generator_function(overlay, tmpdir)
+        
     d = create_new_casdir(original, fake_context, tmpdir)
     d2 = create_new_casdir(overlay, fake_context, tmpdir)
     print("Importing dir {} into {}".format(overlay, original))
     d.import_files(d2)
     d.export_files(os.path.join(tmpdir, "output"))
     
-    if overlay < 6:
+    if verify_contents:
         for item in root_filesets[overlay - 1]:
             (path, typename, content) = item
             realpath = resolve_symlinks(path, os.path.join(tmpdir, "output"))
@@ -188,14 +187,19 @@ def test_cas_import(cli, tmpdir, original, overlay):
     d3.import_files(d2)
     assert d.ref.hash == d3.ref.hash
 
+@pytest.mark.parametrize("original,overlay", combinations(range(1,6)))
+def test_fixed_cas_import(cli, tmpdir, original, overlay):
+    _import_test(tmpdir, original, overlay, generate_import_roots, verify_contents=True)
+
+@pytest.mark.parametrize("original,overlay", combinations(range(1,11)))
+def test_random_cas_import(cli, tmpdir, original, overlay):
+    _import_test(tmpdir, original, overlay, generate_random_root, verify_contents=False)
 
-@pytest.mark.parametrize("root", [1, 2, 3, 4, 5, 6])
-def test_directory_listing(cli, tmpdir, root):
+def _listing_test(tmpdir, root, generator_function):
     fake_context = FakeContext()
     fake_context.artifactdir = tmpdir
     # Create some fake content
-    generate_import_roots(tmpdir)
-    generate_random_roots(tmpdir)
+    generator_function(root, tmpdir)
 
     d = create_new_filedir(root, tmpdir)
     filelist = list(d.list_relative_paths())
@@ -208,3 +212,12 @@ def test_directory_listing(cli, tmpdir, root):
     print("filelist for root {} via CasBasedDirectory:".format(root))
     print("{}".format(filelist2))
     assert filelist == filelist2
+    
+
+@pytest.mark.parametrize("root", range(1,11))
+def test_random_directory_listing(cli, tmpdir, root):
+    _listing_test(tmpdir, root, generate_random_root)
+    
+@pytest.mark.parametrize("root", [1, 2, 3, 4, 5])
+def test_fixed_directory_listing(cli, tmpdir, root):
+    _listing_test(tmpdir, root, generate_import_roots)


[buildstream] 06/21: Virtual directory test: Expand random testing to 6 roots

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch jmac/cas_to_cas_oct
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 74650bb17ba5a68815782777789a4eb0a2c84191
Author: Jim MacArthur <ji...@codethink.co.uk>
AuthorDate: Tue Oct 23 17:57:37 2018 +0100

    Virtual directory test: Expand random testing to 6 roots
---
 tests/storage/virtual_directory_import.py | 97 ++++++++++++++++---------------
 1 file changed, 50 insertions(+), 47 deletions(-)

diff --git a/tests/storage/virtual_directory_import.py b/tests/storage/virtual_directory_import.py
index 47b4935..24ef2e3 100644
--- a/tests/storage/virtual_directory_import.py
+++ b/tests/storage/virtual_directory_import.py
@@ -50,31 +50,33 @@ def generate_import_roots(directory):
                 os.symlink(content, os.path.join(rootdir, path))
 
 
-def generate_random_root(directory):
+def generate_random_roots(directory):
     random.seed(RANDOM_SEED)
-    rootname = "root6"
-    rootdir = os.path.join(directory, "content", rootname)
-    things = []
-    locations = ['.']
-    for i in range(0, 100):
-        location = random.choice(locations)
-        thingname = "node{}".format(i)
-        thing = random.choice(['dir', 'link', 'file'])
-        target = os.path.join(rootdir, location, thingname)
-        if thing == 'dir':
-            os.makedirs(target)
-            locations.append(os.path.join(location, thingname))
-        elif thing == 'file':
-            with open(target, "wt") as f:
-                f.write("This is node {}\n".format(i))
-        elif thing == 'link':
-            # TODO: Make some relative symlinks
-            if random.randint(1, 3) == 1 or len(things) == 0:
-                os.symlink("/broken", target)
-            else:
-                os.symlink(random.choice(things), target)
-        things.append(os.path.join(location, thingname))
-        print("Generated {}/{} ".format(rootdir, things[-1]))
+    for rootno in range(6,13):
+        rootname = "root{}".format(rootno)
+        rootdir = os.path.join(directory, "content", rootname)
+        things = []
+        locations = ['.']
+        os.makedirs(rootdir)
+        for i in range(0, 100):
+            location = random.choice(locations)
+            thingname = "node{}".format(i)
+            thing = random.choice(['dir', 'link', 'file'])
+            target = os.path.join(rootdir, location, thingname)
+            if thing == 'dir':
+                os.makedirs(target)
+                locations.append(os.path.join(location, thingname))
+            elif thing == 'file':
+                with open(target, "wt") as f:
+                    f.write("This is node {}\n".format(i))
+            elif thing == 'link':
+                # TODO: Make some relative symlinks
+                if random.randint(1, 3) == 1 or len(things) == 0:
+                    os.symlink("/broken", target)
+                else:
+                    os.symlink(random.choice(things), target)
+            things.append(os.path.join(location, thingname))
+            print("Generated {}/{} ".format(rootdir, things[-1]))
 
 
 def file_contents(path):
@@ -141,39 +143,40 @@ def directory_not_empty(path):
     return os.listdir(path)
 
 
-@pytest.mark.parametrize("original,overlay", combinations([1, 2, 3, 4, 5]))
+@pytest.mark.parametrize("original,overlay", combinations([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]))
 def test_cas_import(cli, tmpdir, original, overlay):
     fake_context = FakeContext()
     fake_context.artifactdir = tmpdir
     # Create some fake content
     generate_import_roots(tmpdir)
-    generate_random_root(tmpdir)
+    generate_random_roots(tmpdir)
     d = create_new_casdir(original, fake_context, tmpdir)
     d2 = create_new_casdir(overlay, fake_context, tmpdir)
     print("Importing dir {} into {}".format(overlay, original))
     d.import_files(d2)
     d.export_files(os.path.join(tmpdir, "output"))
     
-    for item in root_filesets[overlay - 1]:
-        (path, typename, content) = item
-        realpath = resolve_symlinks(path, os.path.join(tmpdir, "output"))
-        if typename == 'F':
-            if os.path.isdir(realpath) and directory_not_empty(realpath):
-                # The file should not have overwritten the directory in this case.
-                pass
-            else:
-                assert os.path.isfile(realpath), "{} did not exist in the combined virtual directory".format(path)
-                assert file_contents_are(realpath, content)
-        elif typename == 'S':
-            if os.path.isdir(realpath) and directory_not_empty(realpath):
-                # The symlink should not have overwritten the directory in this case.
-                pass
-            else:
-                assert os.path.islink(realpath)
-                assert os.readlink(realpath) == content
-        elif typename == 'D':
-            # Note that isdir accepts symlinks to dirs, so a symlink to a dir is acceptable.
-            assert os.path.isdir(realpath)
+    if overlay < 6:
+        for item in root_filesets[overlay - 1]:
+            (path, typename, content) = item
+            realpath = resolve_symlinks(path, os.path.join(tmpdir, "output"))
+            if typename == 'F':
+                if os.path.isdir(realpath) and directory_not_empty(realpath):
+                    # The file should not have overwritten the directory in this case.
+                    pass
+                else:
+                    assert os.path.isfile(realpath), "{} did not exist in the combined virtual directory".format(path)
+                    assert file_contents_are(realpath, content)
+            elif typename == 'S':
+                if os.path.isdir(realpath) and directory_not_empty(realpath):
+                    # The symlink should not have overwritten the directory in this case.
+                    pass
+                else:
+                    assert os.path.islink(realpath)
+                    assert os.readlink(realpath) == content
+            elif typename == 'D':
+                # Note that isdir accepts symlinks to dirs, so a symlink to a dir is acceptable.
+                assert os.path.isdir(realpath)
 
     # Now do the same thing with filebaseddirectories and check the contents match
     d3 = create_new_casdir(original, fake_context, tmpdir)
@@ -188,7 +191,7 @@ def test_directory_listing(cli, tmpdir, root):
     fake_context.artifactdir = tmpdir
     # Create some fake content
     generate_import_roots(tmpdir)
-    generate_random_root(tmpdir)
+    generate_random_roots(tmpdir)
 
     d = create_new_filedir(root, tmpdir)
     filelist = list(d.list_relative_paths())


[buildstream] 18/21: casbaseddirectory: Remove roundtrip checking code

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch jmac/cas_to_cas_oct
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 5878f0ec3e9ea7f89cea55d3b8cc3ce31986b3e3
Author: Jim MacArthur <ji...@codethink.co.uk>
AuthorDate: Thu Oct 25 17:32:05 2018 +0100

    casbaseddirectory: Remove roundtrip checking code
---
 buildstream/storage/_casbaseddirectory.py | 28 +---------------------------
 1 file changed, 1 insertion(+), 27 deletions(-)

diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py
index c69777b..d0776e4 100644
--- a/buildstream/storage/_casbaseddirectory.py
+++ b/buildstream/storage/_casbaseddirectory.py
@@ -834,39 +834,17 @@ class CasBasedDirectory(Directory):
 
         print("Directory before import: {}".format(self.show_files_recursive()))
 
-        # Sync self
+        # Sync self (necessary?)
         self._recalculate_recursing_down()
         if self.parent:
             self.parent._recalculate_recursing_up(self)
         
-        duplicate_test = False
-        
-        print("Original CAS before CAS-based import: {}".format(self.show_files_recursive()))
-        print("Original CAS hash: {}".format(self.ref.hash))
-        duplicate_cas = None
         self._verify_unique()
         if isinstance(external_pathspec, CasBasedDirectory):
-            if duplicate_test:
-                duplicate_cas = CasBasedDirectory(self.context, ref=copy.copy(self.ref))
-                duplicate_cas._verify_unique()
-                print("Duplicated CAS before file-based import: {}".format(duplicate_cas.show_files_recursive()))
-                print("Duplicate CAS hash: {}".format(duplicate_cas.ref.hash))
             print("-"*80 + "Performing direct CAS-to-CAS import")
             result = self._import_cas_into_cas(external_pathspec, files=files)
             self._verify_unique()
             print("Result of cas-to-cas import: {}".format(self.show_files_recursive()))
-            print("-"*80 + "Performing round-trip import via file system")
-            if duplicate_test:
-                with tempfile.TemporaryDirectory(prefix="roundtrip") as tmpdir:
-                    external_pathspec.export_files(tmpdir)
-                    if files is None:
-                        files = list(list_relative_paths(tmpdir))
-                    print("Importing from filesystem: filelist is: {}".format(files))
-                    duplicate_cas._import_files_from_directory(tmpdir, files=files)
-                    duplicate_cas._recalculate_recursing_down()
-                    if duplicate_cas.parent:
-                        duplicate_cas.parent._recalculate_recursing_up(duplicate_cas)
-                    print("Result of direct import: {}".format(duplicate_cas.show_files_recursive()))
         else:
             print("-"*80 + "Performing initial import")
             if isinstance(external_pathspec, FileBasedDirectory):
@@ -890,10 +868,6 @@ class CasBasedDirectory(Directory):
         self._recalculate_recursing_down()
         if self.parent:
             self.parent._recalculate_recursing_up(self)
-        if duplicate_cas:
-            if duplicate_cas.ref.hash != self.ref.hash:
-                self.showdiff(duplicate_cas)
-                raise VirtualDirectoryError("Mismatch between file-imported result {} and cas-to-cas imported result {}.".format(duplicate_cas.ref.hash,self.ref.hash))
 
         return result
 


[buildstream] 19/21: _casbaseddirectory.py: Remove some unnecessary things, label others

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch jmac/cas_to_cas_oct
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit a52da6e24885f93798211c06db36c8736151344b
Author: Jim MacArthur <ji...@codethink.co.uk>
AuthorDate: Thu Oct 25 17:49:45 2018 +0100

    _casbaseddirectory.py: Remove some unnecessary things, label others
---
 buildstream/storage/_casbaseddirectory.py | 33 ++++---------------------------
 1 file changed, 4 insertions(+), 29 deletions(-)

diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py
index d0776e4..6f526ea 100644
--- a/buildstream/storage/_casbaseddirectory.py
+++ b/buildstream/storage/_casbaseddirectory.py
@@ -613,17 +613,6 @@ class CasBasedDirectory(Directory):
         x = self._resolve_symlink(symlink_node, force_create=False)
         return isinstance(x, CasBasedDirectory)
 
-    def _verify_unique(self):
-        # Verifies that there are no duplicate names in this directory or subdirectories.
-        names = []
-        for entrylist in [self.pb2_directory.files, self.pb2_directory.directories, self.pb2_directory.symlinks]:
-            for e in entrylist:
-                if e.name in names:
-                    raise VirtualDirectoryError("Duplicate entry for name {} found".format(e.name))
-                names.append(e.name)
-        for d in self.pb2_directory.directories:
-            self.index[d.name].buildstream_object._verify_unique()
-    
     def _partial_import_cas_into_cas(self, source_directory, files, path_prefix="", file_list_required=True):
         """ Import only the files and symlinks listed in 'files' from source_directory to this one.
         Args:
@@ -632,11 +621,9 @@ class CasBasedDirectory(Directory):
            path_prefix (str): Prefix used to add entries to the file list result.
            file_list_required: Whether to update the file list while processing.
         """
-        print("Beginning partial import of {} into {}. Files are: >{}<".format(source_directory, self, ", ".join(files)))
         result = FileListResult()
         processed_directories = set()
         for f in files:
-            #if f == ".": continue
             fullname = os.path.join(path_prefix, f)
             components = f.split(os.path.sep)
             if len(components)>1:
@@ -656,12 +643,9 @@ class CasBasedDirectory(Directory):
                         else:
                             dest_subdir = x
                     else:
-                        print("Importing {}: {} does not exist in {}, so it is created as a directory".format(f, dirname, self))
-                        
                         self.create_directory(dirname)
                         dest_subdir = self._resolve_symlink_or_directory(dirname)
                     src_subdir = source_directory.descend(dirname)
-                    print("Now recursing into {} to continue adding {}".format(src_subdir, f))
                     import_result = dest_subdir._partial_import_cas_into_cas(src_subdir, subcomponents,
                                                                              path_prefix=fullname, file_list_required=file_list_required)
                     result.combine(import_result)
@@ -740,12 +724,12 @@ class CasBasedDirectory(Directory):
         replace one directory with another's hash, without doing any recursion.
         """
         if files is None:
-            #return self._full_import_cas_into_cas(source_directory, can_hardlink=True)
-            files = list(source_directory.list_relative_paths())
-            print("Extracted all files from source directory '{}': {}".format(source_directory, files))
+            files = source_directory.list_relative_paths()
+        # You must pass a list into _partial_import (not a generator)
         return self._partial_import_cas_into_cas(source_directory, list(files))
 
     def _describe(self, thing):
+        """ Only used by showdiff, and as such, not called """
         # Describes protocol buffer objects
         if isinstance(thing, remote_execution_pb2.DirectoryNode):
             return "directory called {}".format(thing.name)
@@ -756,10 +740,8 @@ class CasBasedDirectory(Directory):
         else:
             return "strange thing"
         
-    
     def showdiff(self, other):
-        print("Diffing {} and {}:".format(self, other))
-
+        """ An old function used to show differences between two directories. No longer in use. """
         def compare_list(l1, l2, name):
             item2 = None
             index = 0
@@ -834,16 +816,9 @@ class CasBasedDirectory(Directory):
 
         print("Directory before import: {}".format(self.show_files_recursive()))
 
-        # Sync self (necessary?)
-        self._recalculate_recursing_down()
-        if self.parent:
-            self.parent._recalculate_recursing_up(self)
-        
-        self._verify_unique()
         if isinstance(external_pathspec, CasBasedDirectory):
             print("-"*80 + "Performing direct CAS-to-CAS import")
             result = self._import_cas_into_cas(external_pathspec, files=files)
-            self._verify_unique()
             print("Result of cas-to-cas import: {}".format(self.show_files_recursive()))
         else:
             print("-"*80 + "Performing initial import")


[buildstream] 07/21: CASBasedDirectory: Do not sort the input file list!

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch jmac/cas_to_cas_oct
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 241f8cc0cb1b864a2c45a943607f073679680e83
Author: Jim MacArthur <ji...@codethink.co.uk>
AuthorDate: Tue Oct 23 17:57:54 2018 +0100

    CASBasedDirectory: Do not sort the input file list!
---
 buildstream/storage/_casbaseddirectory.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py
index 1a078b2..e44be6b 100644
--- a/buildstream/storage/_casbaseddirectory.py
+++ b/buildstream/storage/_casbaseddirectory.py
@@ -508,7 +508,7 @@ class CasBasedDirectory(Directory):
     def _import_files_from_directory(self, source_directory, files, path_prefix=""):
         """ Imports files from a traditional directory """
         result = FileListResult()
-        for entry in sorted(files):
+        for entry in files:
             print("Importing {} from file system".format(entry))
             print("...Order of elements was {}".format(", ".join(self.index.keys())))
 


[buildstream] 01/21: Add code necessary to do cas-to-cas import

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch jmac/cas_to_cas_oct
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 789251ac026b58909e67fdbaacc0f10c964bf9b7
Author: Jim MacArthur <ji...@codethink.co.uk>
AuthorDate: Fri Oct 19 17:43:08 2018 +0100

    Add code necessary to do cas-to-cas import
---
 buildstream/storage/_casbaseddirectory.py | 246 ++++++++++++++++++++++++++++--
 tests/storage/virtual_directory_import.py |   3 +-
 2 files changed, 235 insertions(+), 14 deletions(-)

diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py
index 640c90d..85c98cf 100644
--- a/buildstream/storage/_casbaseddirectory.py
+++ b/buildstream/storage/_casbaseddirectory.py
@@ -136,6 +136,41 @@ class CasBasedDirectory(Directory):
         # We don't need to do anything more than that; files were already added ealier, and symlinks are
         # part of the directory structure.
 
+    def _add_new_blank_directory(self, name) -> Directory:
+        bst_dir = CasBasedDirectory(self.context, parent=self, filename=name)
+        new_pb2_dirnode = self.pb2_directory.directories.add()
+        new_pb2_dirnode.name = name
+        # Calculate the hash for an empty directory
+        if name in self.index:
+            raise VirtualDirectoryError("Creating directory {} would overwrite an existing item in {}"
+                                        .format(name, str(self)))
+        new_pb2_directory = remote_execution_pb2.Directory()
+        self.cas_cache.add_object(digest=new_pb2_dirnode.digest, buffer=new_pb2_directory.SerializeToString())
+        self.index[name] = IndexEntry(new_pb2_dirnode, buildstream_object=bst_dir)
+        return bst_dir
+
+    def create_directory(self, name: str) -> Directory:
+        """Creates a directory if it does not already exist. This does not
+        cause an error if something exists; it will remove files and
+        symlinks to files which have the same name in this
+        directory. Symlinks to directories with the name 'name' are
+        unaltered; it's assumed that the target of that symlink will
+        be used.
+
+        """
+        existing_item = self._find_pb2_entry(name)
+        if isinstance(existing_item, remote_execution_pb2.FileNode):
+            # Directory imported over file with same name
+            self.remove_item(name)
+        elif isinstance(existing_item, remote_execution_pb2.SymlinkNode):
+            # Directory imported over symlink with same source name
+            if self.symlink_target_is_directory(existing_item):
+                return self._resolve_symlink_or_directory(name) # That's fine; any files in the source directory should end up at the target of the symlink.
+            else:
+                self.remove_item(name) # Symlinks to files get replaced
+        return self.descend(name, create=True) # Creates the directory if it doesn't already exist.
+
+
     def _find_pb2_entry(self, name):
         if name in self.index:
             return self.index[name].pb_object
@@ -232,6 +267,7 @@ class CasBasedDirectory(Directory):
             if isinstance(entry, CasBasedDirectory):
                 return entry.descend(subdirectory_spec[1:], create)
             else:
+                # May be a symlink
                 error = "Cannot descend into {}, which is a '{}' in the directory {}"
                 raise VirtualDirectoryError(error.format(subdirectory_spec[0],
                                                          type(entry).__name__,
@@ -288,6 +324,29 @@ class CasBasedDirectory(Directory):
                 directory = directory.descend(c, create=True)
         return directory
 
+    def _resolve_symlink(self, node):
+        """Same as _resolve_symlink_or_directory but takes a SymlinkNode.
+        """
+
+        # OK then, it's a symlink
+        symlink = node
+        absolute = symlink.target.startswith(CasBasedDirectory._pb2_absolute_path_prefix)
+        if absolute:
+            root = self.find_root()
+        else:
+            root = self
+        directory = root
+        components = symlink.target.split(CasBasedDirectory._pb2_path_sep)
+        for c in components:
+            if c == ".":
+                pass
+            elif c == "..":
+                directory = directory.parent
+            else:
+                directory = directory.descend(c, create=True)
+        return directory
+
+    
     def _resolve(self, name, absolute_symlinks_resolve=True):
         """ Resolves any name to an object. If the name points to a symlink in this 
         directory, it returns the thing it points to, recursively. Returns a CasBasedDirectory, FileNode or None. Never creates a directory or otherwise alters the directory. """
@@ -427,6 +486,157 @@ class CasBasedDirectory(Directory):
                     result.files_written.append(relative_pathname)
         return result
 
+
+    def _save(self, name):
+        """ Saves this directory into the content cache as a named ref. This function is not
+        currently in use, but may be useful later. """
+        self._recalculate_recursing_up()
+        self._recalculate_recursing_down()
+        (rel_refpath, refname) = os.path.split(name)
+        refdir = os.path.join(self.cas_directory, 'refs', 'heads', rel_refpath)
+        refname = os.path.join(refdir, refname)
+
+        if not os.path.exists(refdir):
+            os.makedirs(refdir)
+        with open(refname, "wb") as f:
+            f.write(self.ref.SerializeToString())
+
+    def find_updated_files(self, modified_directory, prefix=""):
+        """Find the list of written and overwritten files that would result
+        from importing 'modified_directory' into this one.  This does
+        not change either directory. The reason this exists is for
+        direct imports of cas directories into other ones, which can
+        be done by simply replacing a hash, but we still need the file
+        lists.
+
+        """
+        result = FileListResult()
+        for entry in modified_directory.pb2_directory.directories:
+            existing_dir = self._find_pb2_entry(entry.name)
+            if existing_dir:
+                updates_files = existing_dir.find_updated_files(modified_directory.descend(entry.name),
+                                                                os.path.join(prefix, entry.name))
+                result.combine(updated_files)
+            else:
+                for f in source_directory.descend(entry.name).list_relative_paths():
+                    result.files_written.append(os.path.join(prefix, f))
+                    # None of these can overwrite anything, since the original files don't exist
+        for entry in modified_directory.pb2_directory.files + modified_directory.pb2_directory.symlinks:
+            if self._find_pb2_entry(entry.name):
+                result.files_overwritten.apppend(os.path.join(prefix, entry.name))
+            result.file_written.apppend(os.path.join(prefix, entry.name))
+        return result
+
+    def files_in_subdir(sorted_files, dirname):
+        """Filters sorted_files and returns only the ones which have
+           'dirname' as a prefix, with that prefix removed.
+
+        """
+        if not dirname.endswith(os.path.sep):
+            dirname += os.path.sep
+        return [f[len(dirname):] for f in sorted_files if f.startswith(dirname)]
+
+    def symlink_target_is_directory(self, symlink_node):
+        x = self._resolve_symlink(symlink_node)
+        return isinstance(x, CasBasedDirectory)
+
+    def _partial_import_cas_into_cas(self, source_directory, files, path_prefix="", file_list_required=True):
+        """ Import only the files and symlinks listed in 'files' from source_directory to this one.
+        Args:
+           source_directory (:class:`.CasBasedDirectory`): The directory to import from
+           files ([str]): List of pathnames to import.
+           path_prefix (str): Prefix used to add entries to the file list result.
+           file_list_required: Whether to update the file list while processing.
+        """
+        print("Beginning partial import of {} into {}".format(source_directory, self))
+        result = FileListResult()
+        processed_directories = set()
+        for f in files:
+            if f == ".": continue
+            fullname = os.path.join(path_prefix, f)
+            components = f.split(os.path.sep)
+            if len(components)>1:
+                # We are importing a thing which is in a subdirectory. We may have already seen this dirname
+                # for a previous file.
+                dirname = components[0]
+                if dirname not in processed_directories:
+                    # Now strip off the first directory name and import files recursively.
+                    subcomponents = CasBasedDirectory.files_in_subdir(files, dirname)
+                    self.create_directory(dirname)
+                    print("Creating destination in {}: {}".format(self, dirname))
+                    dest_subdir = self._resolve_symlink_or_directory(dirname)
+                    src_subdir = source_directory.descend(dirname)
+                    import_result = dest_subdir._partial_import_cas_into_cas(src_subdir, subcomponents,
+                                                                             path_prefix=fullname, file_list_required=file_list_required)
+                    result.combine(import_result)
+                processed_directories.add(dirname)
+            elif isinstance(source_directory.index[f].buildstream_object, CasBasedDirectory):
+                # The thing in the input file list is a directory on its own. In which case, replace any existing file, or symlink to file
+                # with the new, blank directory - if it's neither of those things, or doesn't exist, then just create the dir.
+                self.create_directory(f)
+            else:
+                # We're importing a file or symlink - replace anything with the same name.
+                self._check_replacement(f, path_prefix, result)
+                item = source_directory.index[f].pb_object
+                if isinstance(item, remote_execution_pb2.FileNode):
+                    filenode = self.pb2_directory.files.add(digest=item.digest, name=f,
+                                                            is_executable=item.is_executable)
+                    self.index[f] = IndexEntry(filenode, modified=(fullname in result.overwritten))
+                else:
+                    assert(isinstance(item, remote_execution_pb2.SymlinkNode))
+                    symlinknode = self.pb2_directory.symlinks.add(name=f, target=item.target)
+                    # A symlink node has no digest.
+                    self.index[f] = IndexEntry(symlinknode, modified=(fullname in result.overwritten))
+        return result
+
+    def transfer_node_contents(destination, source):
+        """Transfers all fields from the source PB2 node into the
+        destination. Destination and source must be of the same type and must
+        be a FileNode, SymlinkNode or DirectoryNode.
+        """
+        assert(type(destination) == type(source))
+        destination.name = source.name
+        if isinstance(destination, remote_execution_pb2.FileNode):
+            destination.digest.hash = source.digest.hash
+            destination.digest.size_bytes = source.digest.size_bytes
+            destination.is_executable = source.is_executable
+        elif isinstance(destination, remote_execution_pb2.SymlinkNode):
+            destination.target = source.target
+        elif isinstance(destination, remote_execution_pb2.DirectoryNode):
+            destination.digest.hash = source.digest.hash
+            destination.digest.size_bytes = source.digest.size_bytes
+        else:
+            raise VirtualDirectoryError("Incompatible type '{}' used as destination for transfer_node_contents"
+                                        .format(destination.type))
+
+    def _add_directory_from_node(self, source_node, source_casdir, can_hardlink=False):
+        # Duplicate the given node and add it to our index with a CasBasedDirectory object.
+        # No existing entry with the source node's name can exist.
+        # source_casdir is only needed if can_hardlink is True.
+        assert(self._find_pb2_entry(source_node.name) is None)
+
+        if can_hardlink:
+            new_dir_node = self.pb2_directory.directories.add()
+            CasBasedDirectory.transfer_node_contents(new_dir_node, source_node)
+            self.index[source_node.name] = IndexEntry(source_node, buildstream_object=source_casdir, modified=True)
+        else:
+            new_dir_node = self.pb2_directory.directories.add()
+            CasBasedDirectory.transfer_node_contents(new_dir_node, source_node)
+            buildStreamDirectory = CasBasedDirectory(self.context, ref=source_node.digest,
+                                                     parent=self, filename=source_node.name)
+            self.index[source_node.name] = IndexEntry(source_node, buildstream_object=buildStreamDirectory, modified=True)
+
+    def _import_cas_into_cas(self, source_directory, files=None):
+        """ A full import is significantly quicker than a partial import, because we can just
+        replace one directory with another's hash, without doing any recursion.
+        """
+        if files is None:
+            #return self._full_import_cas_into_cas(source_directory, can_hardlink=True)
+            files = source_directory.list_relative_paths()
+            print("Extracted all files from source directory '{}': {}".format(source_directory, files))
+        return self._partial_import_cas_into_cas(source_directory, files)
+
+
     def import_files(self, external_pathspec, *, files=None,
                      report_written=True, update_utimes=False,
                      can_link=False):
@@ -448,28 +658,34 @@ class CasBasedDirectory(Directory):
 
         can_link (bool): Ignored, since hard links do not have any meaning within CAS.
         """
-        if isinstance(external_pathspec, FileBasedDirectory):
-            source_directory = external_pathspec._get_underlying_directory()
-        elif isinstance(external_pathspec, CasBasedDirectory):
-            # TODO: This transfers from one CAS to another via the
-            # filesystem, which is very inefficient. Alter this so it
-            # transfers refs across directly.
+
+        duplicate_cas = None
+        if isinstance(external_pathspec, CasBasedDirectory):
+            result = self._import_cas_into_cas(external_pathspec, files=files)
+
+            # Duplicate the current directory and do an import that way.
+            duplicate_cas = CasBasedDirectory(self.context, ref=self.ref)
             with tempfile.TemporaryDirectory(prefix="roundtrip") as tmpdir:
                 external_pathspec.export_files(tmpdir)
                 if files is None:
                     files = list_relative_paths(tmpdir)
-                result = self._import_files_from_directory(tmpdir, files=files)
-            return result
+                duplicate_cas._import_files_from_directory(tmpdir, files=files)
+                duplicate_cas._recalculate_recursing_down()
+                if duplicate_cas.parent:
+                    duplicate_cas.parent._recalculate_recursing_up(self)
         else:
-            source_directory = external_pathspec
-
-        if files is None:
-            files = list_relative_paths(source_directory)
+            if isinstance(external_pathspec, FileBasedDirectory):
+                source_directory = external_pathspec.get_underlying_directory()
+            else:
+                source_directory = external_pathspec
+            if files is None:
+                files = list_relative_paths(external_pathspec)
+            result = self._import_files_from_directory(source_directory, files=files)
 
         # TODO: No notice is taken of report_written, update_utimes or can_link.
         # Current behaviour is to fully populate the report, which is inefficient,
         # but still correct.
-        result = self._import_files_from_directory(source_directory, files=files)
+
 
         # We need to recalculate and store the hashes of all directories both
         # up and down the tree; we have changed our directory by importing files
@@ -479,6 +695,10 @@ class CasBasedDirectory(Directory):
         self._recalculate_recursing_down()
         if self.parent:
             self.parent._recalculate_recursing_up(self)
+        if duplicate_cas:
+            if duplicate_cas.ref.hash != self.ref.hash:
+                raise VirtualDirectoryError("Mismatch between file-imported result {} and cas-to-cas imported result {}.".format(duplicate_cas.ref.hash,self.ref.hash))
+
         return result
 
     def set_deterministic_mtime(self):
diff --git a/tests/storage/virtual_directory_import.py b/tests/storage/virtual_directory_import.py
index 1c78c1b..47b4935 100644
--- a/tests/storage/virtual_directory_import.py
+++ b/tests/storage/virtual_directory_import.py
@@ -150,9 +150,10 @@ def test_cas_import(cli, tmpdir, original, overlay):
     generate_random_root(tmpdir)
     d = create_new_casdir(original, fake_context, tmpdir)
     d2 = create_new_casdir(overlay, fake_context, tmpdir)
+    print("Importing dir {} into {}".format(overlay, original))
     d.import_files(d2)
     d.export_files(os.path.join(tmpdir, "output"))
-
+    
     for item in root_filesets[overlay - 1]:
         (path, typename, content) = item
         realpath = resolve_symlinks(path, os.path.join(tmpdir, "output"))


[buildstream] 04/21: Fix 'remove_item'->delete_entry

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch jmac/cas_to_cas_oct
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit e1b047ef61cd3727355915868f6cd546b6a4205f
Author: Jim MacArthur <ji...@codethink.co.uk>
AuthorDate: Tue Oct 23 17:56:12 2018 +0100

    Fix 'remove_item'->delete_entry
---
 buildstream/storage/_casbaseddirectory.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py
index f293472..cc28fbd 100644
--- a/buildstream/storage/_casbaseddirectory.py
+++ b/buildstream/storage/_casbaseddirectory.py
@@ -165,13 +165,13 @@ class CasBasedDirectory(Directory):
         existing_item = self._find_pb2_entry(name)
         if isinstance(existing_item, remote_execution_pb2.FileNode):
             # Directory imported over file with same name
-            self.remove_item(name)
+            self.delete_entry(name)
         elif isinstance(existing_item, remote_execution_pb2.SymlinkNode):
             # Directory imported over symlink with same source name
             if self.symlink_target_is_directory(existing_item):
                 return self._resolve_symlink_or_directory(name) # That's fine; any files in the source directory should end up at the target of the symlink.
             else:
-                self.remove_item(name) # Symlinks to files get replaced
+                self.delete_entry(name) # Symlinks to files get replaced
         return self.descend(name, create=True) # Creates the directory if it doesn't already exist.
 
 


[buildstream] 17/21: Make virtual_directory_test do the cas roundtrip test instead of _casbaseddirectory

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch jmac/cas_to_cas_oct
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 4bb8d31143739749677aeb701a9c9e9b3af5064c
Author: Jim MacArthur <ji...@codethink.co.uk>
AuthorDate: Thu Oct 25 17:26:21 2018 +0100

    Make virtual_directory_test do the cas roundtrip test instead of _casbaseddirectory
---
 tests/storage/virtual_directory_import.py | 38 ++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 8 deletions(-)

diff --git a/tests/storage/virtual_directory_import.py b/tests/storage/virtual_directory_import.py
index 9207193..70e3c9a 100644
--- a/tests/storage/virtual_directory_import.py
+++ b/tests/storage/virtual_directory_import.py
@@ -1,11 +1,14 @@
 import os
 import pytest
 import random
+import copy
+import tempfile
 from tests.testutils import cli
 
+
 from buildstream.storage import CasBasedDirectory
 from buildstream.storage import FileBasedDirectory
-
+from buildstream import utils
 
 class FakeContext():
     def __init__(self):
@@ -84,7 +87,6 @@ def generate_random_root(rootno, directory):
                 os.symlink(symlink_destination, target)
                 description = "symlink pointing to {}".format(symlink_destination)
         things.append(os.path.join(location, thingname))
-        print("Generated {}/{}, a {}".format(rootdir, things[-1], description))
 
 
 def file_contents(path):
@@ -160,15 +162,24 @@ def _import_test(tmpdir, original, overlay, generator_function, verify_contents=
         generator_function(overlay, tmpdir)
         
     d = create_new_casdir(original, fake_context, tmpdir)
+
+    #duplicate_cas = CasBasedDirectory(fake_context, ref=copy.copy(d.ref))
+    duplicate_cas = create_new_casdir(original, fake_context, tmpdir)
+
+    assert duplicate_cas.ref.hash == d.ref.hash
+
     d2 = create_new_casdir(overlay, fake_context, tmpdir)
     print("Importing dir {} into {}".format(overlay, original))
     d.import_files(d2)
-    d.export_files(os.path.join(tmpdir, "output"))
+    export_dir = os.path.join(tmpdir, "output")
+    roundtrip_dir = os.path.join(tmpdir, "roundtrip")
+    d2.export_files(roundtrip_dir)
+    d.export_files(export_dir)
     
     if verify_contents:
         for item in root_filesets[overlay - 1]:
             (path, typename, content) = item
-            realpath = resolve_symlinks(path, os.path.join(tmpdir, "output"))
+            realpath = resolve_symlinks(path, export_dir)
             if typename == 'F':
                 if os.path.isdir(realpath) and directory_not_empty(realpath):
                     # The file should not have overwritten the directory in this case.
@@ -189,10 +200,21 @@ def _import_test(tmpdir, original, overlay, generator_function, verify_contents=
                 assert os.path.lexists(realpath)
 
     # Now do the same thing with filebaseddirectories and check the contents match
-    d3 = create_new_casdir(original, fake_context, tmpdir)
-    d4 = create_new_filedir(overlay, tmpdir)
-    d3.import_files(d2)
-    assert d.ref.hash == d3.ref.hash
+
+    files = list(utils.list_relative_paths(roundtrip_dir))
+    print("Importing from filesystem: filelist is: {}".format(files))
+    duplicate_cas._import_files_from_directory(roundtrip_dir, files=files)
+    duplicate_cas._recalculate_recursing_down()
+    if duplicate_cas.parent:
+        duplicate_cas.parent._recalculate_recursing_up(duplicate_cas)
+        print("Result of direct import: {}".format(duplicate_cas.show_files_recursive()))
+
+    assert duplicate_cas.ref.hash == d.ref.hash
+
+    #d3 = create_new_casdir(original, fake_context, tmpdir)
+    #d4 = create_new_filedir(overlay, tmpdir)
+    #d3.import_files(d2)
+    #assert d.ref.hash == d3.ref.hash
 
 @pytest.mark.parametrize("original,overlay", combinations(range(1,len(root_filesets)+1)))
 def test_fixed_cas_import(cli, tmpdir, original, overlay):


[buildstream] 21/21: Rearrange comment

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch jmac/cas_to_cas_oct
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 711c98472e024028e9392a791c850fc9ee326ede
Author: Jim MacArthur <ji...@codethink.co.uk>
AuthorDate: Fri Oct 26 14:11:37 2018 +0100

    Rearrange comment
---
 buildstream/storage/_casbaseddirectory.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py
index a99b486..53fdb03 100644
--- a/buildstream/storage/_casbaseddirectory.py
+++ b/buildstream/storage/_casbaseddirectory.py
@@ -399,11 +399,11 @@ class CasBasedDirectory(Directory):
             if c == ".":
                 pass
             elif c == "..":
+                if directory.parent is not None:
+                    directory = directory.parent
                 # If directory.parent *is* None, this is an attempt to access
                 # '..' from the root, which is valid under POSIX; it just
                 # returns the root.                
-                if directory.parent is not None:
-                    directory = directory.parent
             else:
                 if c in directory.index:
                     f = directory._resolve(c, absolute_symlinks_resolve, first_seen_object=first_seen_object)


[buildstream] 13/21: Detect infinite symlink loops in resolve()

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch jmac/cas_to_cas_oct
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 459f809bd43b1c2fd6ba73a21de952bcf4792317
Author: Jim MacArthur <ji...@codethink.co.uk>
AuthorDate: Thu Oct 25 16:48:07 2018 +0100

    Detect infinite symlink loops in resolve()
---
 buildstream/storage/_casbaseddirectory.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py
index a5db96e..bef9acf 100644
--- a/buildstream/storage/_casbaseddirectory.py
+++ b/buildstream/storage/_casbaseddirectory.py
@@ -387,7 +387,7 @@ class CasBasedDirectory(Directory):
         return directory
 
     
-    def _resolve(self, name, absolute_symlinks_resolve=True, force_create=False):
+    def _resolve(self, name, absolute_symlinks_resolve=True, force_create=False, first_seen_object = None):
         """ Resolves any name to an object. If the name points to a symlink in
         this directory, it returns the thing it points to,
         recursively. Returns a CasBasedDirectory, FileNode or
@@ -406,6 +406,14 @@ class CasBasedDirectory(Directory):
             return index_entry.pb_object
         
         assert isinstance(index_entry.pb_object, remote_execution_pb2.SymlinkNode)
+
+        if first_seen_object is None:
+            first_seen_object = index_entry.pb_object
+        else:
+            if index_entry.pb_object == first_seen_object:
+                ### Infinite symlink loop detected ###
+                return None
+        
         print("Resolving '{}': This is a symlink node in the current directory.".format(name))
         symlink = index_entry.pb_object
         components = symlink.target.split(CasBasedDirectory._pb2_path_sep)
@@ -439,7 +447,7 @@ class CasBasedDirectory(Directory):
                     directory = directory.parent
             else:
                 if c in directory.index:
-                    f = directory._resolve(c, absolute_symlinks_resolve)
+                    f = directory._resolve(c, absolute_symlinks_resolve, first_seen_object=first_seen_object)
                     # Ultimately f must now be a file or directory
                     if isinstance(f, CasBasedDirectory):
                         directory = f


[buildstream] 02/21: Add a tool to show differences in two CAS directories

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch jmac/cas_to_cas_oct
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 010565db93f8d543c16c36ffac476f0b3588acfa
Author: Jim MacArthur <ji...@codethink.co.uk>
AuthorDate: Fri Oct 19 18:28:45 2018 +0100

    Add a tool to show differences in two CAS directories
---
 buildstream/storage/_casbaseddirectory.py | 50 ++++++++++++++++++++++++++++---
 1 file changed, 46 insertions(+), 4 deletions(-)

diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py
index 85c98cf..3a0364f 100644
--- a/buildstream/storage/_casbaseddirectory.py
+++ b/buildstream/storage/_casbaseddirectory.py
@@ -348,8 +348,13 @@ class CasBasedDirectory(Directory):
 
     
     def _resolve(self, name, absolute_symlinks_resolve=True):
-        """ Resolves any name to an object. If the name points to a symlink in this 
-        directory, it returns the thing it points to, recursively. Returns a CasBasedDirectory, FileNode or None. Never creates a directory or otherwise alters the directory. """
+        """ Resolves any name to an object. If the name points to a symlink in
+        this directory, it returns the thing it points to,
+        recursively. Returns a CasBasedDirectory, FileNode or
+        None. Never creates a directory or otherwise alters the
+        directory.
+
+        """
         # First check if it's a normal object and return that
 
         if name not in self.index:
@@ -408,7 +413,7 @@ class CasBasedDirectory(Directory):
                         else:
                             return f
                 else:
-                    print("  resolving {}: nonexistent!".format(c))
+                    print("  resolving {}: Broken symlink".format(c))
                     return None
 
         # Shouldn't get here.
@@ -636,7 +641,43 @@ class CasBasedDirectory(Directory):
             print("Extracted all files from source directory '{}': {}".format(source_directory, files))
         return self._partial_import_cas_into_cas(source_directory, files)
 
-
+    def showdiff(self, other):
+        print("Diffing {} and {}:".format(self, other))
+        l1 = list(self.index.items())
+        l2 = list(other.index.items())
+        for (key, value) in l1:
+            if len(l2) == 0:
+                print("'Other' is short: no item to correspond to '{}' in first.".format(key))
+                return
+            (key2, value2) = l2.pop(0)
+            if key != key2:
+                print("Mismatch: item named {} in first, named {} in second".format(key, key2))
+                return
+            if type(value.pb_object) != type(value2.pb_object):
+                print("Mismatch: item named {}'s pb_object is a {} in first and a {} in second".format(key, type(value.pb_object), type(value2.pb_object)))
+                return
+            if type(value.buildstream_object) != type(value2.buildstream_object):
+                print("Mismatch: item named {}'s buildstream_object is a {} in first and a {} in second".format(key, type(value.buildstream_object), type(value2.buildstream_object)))
+                return
+            print("Inspecting {} of type {}".format(key, type(value.pb_object)))
+            if type(value.pb_object) == remote_execution_pb2.DirectoryNode:
+                # It's a directory, follow it
+                self.descend(key).showdiff(other.descend(key))
+            elif type(value.pb_object) == remote_execution_pb2.SymlinkNode:
+                target1 = value.pb_object.target
+                target2 = value2.pb_object.target
+                if target1 != target2:
+                    print("Symlink named {}: targets do not match. {} in the first, {} in the second".format(key, target1, target2))
+            elif type(value.pb_object) == remote_execution_pb2.FileNode:
+                if value.pb_object.digest != value2.pb_object.digest:
+                    print("File named {}: digests do not match. {} in the first, {} in the second".format(key, value.pb_object.digest, value2.pb_object.digest))
+        if len(l2) != 0:
+            print("'Other' is long: it contains extra items called: {}".format(", ".join([i[0] for i in l2])))
+            return
+        print("No differences found in {}".format(self))
+              
+        
+    
     def import_files(self, external_pathspec, *, files=None,
                      report_written=True, update_utimes=False,
                      can_link=False):
@@ -697,6 +738,7 @@ class CasBasedDirectory(Directory):
             self.parent._recalculate_recursing_up(self)
         if duplicate_cas:
             if duplicate_cas.ref.hash != self.ref.hash:
+                self.showdiff(duplicate_cas)
                 raise VirtualDirectoryError("Mismatch between file-imported result {} and cas-to-cas imported result {}.".format(duplicate_cas.ref.hash,self.ref.hash))
 
         return result