You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by ju...@apache.org on 2015/12/31 13:48:16 UTC

svn commit: r1722441 - /subversion/trunk/notes/move-tracking/path_pairs_to_eid_map.py

Author: julianfoad
Date: Thu Dec 31 12:48:16 2015
New Revision: 1722441

URL: http://svn.apache.org/viewvc?rev=1722441&view=rev
Log:
* notes/move-tracking/path_pairs_to_eid_map.py
  Rewrite with EID mapping classes for easier translation to C.

Modified:
    subversion/trunk/notes/move-tracking/path_pairs_to_eid_map.py

Modified: subversion/trunk/notes/move-tracking/path_pairs_to_eid_map.py
URL: http://svn.apache.org/viewvc/subversion/trunk/notes/move-tracking/path_pairs_to_eid_map.py?rev=1722441&r1=1722440&r2=1722441&view=diff
==============================================================================
--- subversion/trunk/notes/move-tracking/path_pairs_to_eid_map.py (original)
+++ subversion/trunk/notes/move-tracking/path_pairs_to_eid_map.py Thu Dec 31 12:48:16 2015
@@ -25,6 +25,7 @@
 
 import sys
 import posixpath
+import collections
 
 class ArgumentsError(Exception):
   pass
@@ -56,34 +57,7 @@ print("Input:")
 for e in input_path_pairs:
   print("  " + str(e))
 
-def split_path(path):
-  """Convert PATH to (parent-path, name), unless it is None.
-  """
-  return posixpath.split(path) if path is not None else None
-
-def split_paths(path_pairs):
-  """Convert each path in PATH_PAIRS to (parent-path, name), unless it is None.
-  """
-  return [[split_path(path) for path in path_pair]
-          for path_pair in path_pairs]
-
-# Convert to (initial, final) mappings by "split path",
-# where split-path is (parent-path, name).
-input_pairs = split_paths(input_path_pairs)
-
-# Assign an EID to each input pair of split-paths, starting at EID 1.
-out_map = dict(enumerate(input_pairs, 1))
-
-# ### Experimental section...
-# Convert to (initial, final) lists of split paths:
-# ([split-path, ...], [split-path, ...])
-input_as_two_lists = zip(*input_pairs)
-# Convert to (initial, final) eid-to-split-path mappings, starting at EID 1:
-# ({ eid:split-path, ... }, { eid:split-path, ... })
-split_path_maps = (dict(enumerate(input_as_two_lists[0], 1)),
-                   dict(enumerate(input_as_two_lists[1], 1)))
-
-next_eid = len(input_pairs) + 1
+next_eid = len(input_path_pairs) + 1
 
 def get_next_eid():
   global next_eid
@@ -91,114 +65,196 @@ def get_next_eid():
   next_eid += 1
   return new_eid
 
-print("Input, as (intial, final) mappings by (parent-path, name):")
-for e in out_map.items():
-  print("  " + str(e))
+def split_path(path):
+  """Convert PATH to (parent-path, name), unless it is None.
+  """
+  return posixpath.split(path) if path is not None else None
 
-def map_set(out_map, side, eid, loc):
-  """Set the mapping for EID to LOC. If no mapping for EID already exists,
-     set the other side to None.
-     LOC is either (parent-path, name) or (parent-eid, name).
-   """
-  #print("map_set(%d, %s)" % (eid, str(loc)))
-  entry = out_map.get(eid, [None, None])
-  entry[side] = loc
-  out_map[eid] = entry
-
-def find_eid_from_path(out_map, side, path):
-  """Return the EID for PATH, or -1 if the EID for PATH is not known.
-  """
-  if path == '':
-    return 0
-  for eid, entry in out_map.items():
-    loc = entry[side]
-    if loc:
-      parent_path, name = posixpath.split(path)
-      if loc[1] == name:
-        if (loc[0] == parent_path or
-            loc[0] == find_eid_from_path(out_map, side, parent_path)):
-          return eid
-
-  return -1
-
-def find_eid_from_loc(out_map, side, loc):
-  """Return the EID for LOC, or -1 if the EID for LOC is not known.
-  """
-  if loc is None:
-    return 0
-  for eid, entry in out_map.items():
-    if loc == entry[side]:
-      return eid
-  return -1
+class DictWithUniqueValues(dict):
+  # Overriding __setitem__ won't catch all possible ways to set a value
+  # (e.g. __init__, update, setdefault) but is good enough for us here.
+  def __setitem__(self, k, v):
+    """Ensure no duplicate value already exists."""
+    assert v not in self.values(), (k, v)
+    dict.__setitem__(self, k, v)
+
+class MappingEidToPpathName(DictWithUniqueValues):
+  """A mapping from EID to (parent_path, name).
+  """
+  def eid_from_relpath(self, relpath):
+    """Return the EID for RELPATH, or -1 if the EID for RELPATH is not known.
+    """
+    if relpath == '':
+      return 0
+    parent_path, name = posixpath.split(relpath)
+    for eid, loc in self.items():
+      if loc == (parent_path, name):
+        return eid
+    return -1
+
+class MappingEidToPeidName(DictWithUniqueValues):
+  """A mapping from EID to (parent_eid, name).
+  """
+  def eid_from_relpath(self, relpath):
+    """Return the EID for RELPATH, or -1 if the EID for RELPATH is not known.
+    """
+    if relpath == '':
+      return 0
+    parent_path, name = posixpath.split(relpath)
+    for eid, loc in self.items():
+      if loc[1] == name and loc[0] == self.eid_from_relpath(parent_path):
+        return eid
+    return -1
+
+  def eid_from_loc(self, loc):
+    """Return the EID for LOC, or -1 if the EID for LOC is not known.
+       LOC is (parent_eid, name).
+    """
+    if loc is None:
+      return 0
+    for eid, this_loc in self.items():
+      if loc == this_loc:
+        return eid
+    return -1
+
+  def relpath_from_eid(self, eid):
+    """Return the relpath of element EID in a mapping from EID to
+       (parent_eid, name).
+    """
+    if eid == 0:
+      return ''
+    element = self.get(eid)
+    if element is None:
+      return None
+    parent_eid, name = element
+    parent_path = self.relpath_from_eid(parent_eid)
+    if parent_path is None:
+      return None
+    return posixpath.join(parent_path, name)
+
+class InitialFinalConverter:
+  def __init__(self, path_pairs=[]):
+    assert all([path is None or type(path) is str
+                for pair in path_pairs for path in pair])
+    # Convert to a list of pairs of split-path:
+    # [[initial_split_path, final_split_path], ...],
+    # where split-path is (parent-path, name).
+    split_path_pairs = [[split_path(path) for path in path_pair]
+                        for path_pair in path_pairs]
+    # Convert to (initial, final) lists of split paths:
+    # ([split-path, ...], [split-path, ...])
+    two_lists = zip(*split_path_pairs)
+    # Convert to (initial, final) eid-to-split-path mappings, assigning
+    # EIDs starting at EID 1:
+    # ({ eid:split-path, ... }, { eid:split-path, ... })
+    self.path_maps = (MappingEidToPpathName(enumerate(two_lists[0], 1)),
+                      MappingEidToPpathName(enumerate(two_lists[1], 1)))
+    self.peid_maps = (MappingEidToPeidName(), MappingEidToPeidName())
+
+  def path_loc_pairs(self):
+    return { eid: [self.path_maps[0].get(eid), self.path_maps[1].get(eid)]
+             for eid in set(self.path_maps[0]) | set(self.path_maps[1]) }
+
+  def peid_loc_pairs(self):
+    return { eid: [self.peid_maps[0].get(eid), self.peid_maps[1].get(eid)]
+             for eid in set(self.peid_maps[0]) | set(self.peid_maps[1]) }
+
+  def path_locs_for_side(self, side):
+    return self.path_maps[side]
+
+  def peid_locs_for_side(self, side):
+    return self.peid_maps[side]
+
+  def has_peid_loc(self, side, loc):
+    assert loc and type(loc[0]) is int
+    return self.peid_locs_for_side(side).eid_from_loc(loc) >= 0
+
+  def set_peid_loc(self, side, eid, loc):
+    """Set the mapping for SIDE:EID to LOC. (If no mapping for EID already
+       exists, implicitly set the other side to None.)
+       LOC is (parent-eid, name).
+    """
+    assert type(loc[0]) is int
+    self.peid_maps[side][eid] = loc
+
+  def find_eid_from_relpath(self, side, relpath):
+    """Return the EID for SIDE:RELPATH, or -1 if not found.
+    """
+    eid = self.path_locs_for_side(side).eid_from_relpath(relpath)
+    if eid < 0:
+      eid = self.peid_locs_for_side(side).eid_from_relpath(relpath)
+    if eid < 0:
+      # Look up using a combined search in both (parent-eid, name) and
+      # (parent-relpath, name) maps, if necessary.
+      ### Not sure if this would ever be necessary.
+      pass
+    return eid
 
-def add_new(out_map, side, path):
-  """Add a new EID and (parent_eid, name) entry for PATH, and for each
-     of its parents that lacks one.
+# Assign an EID to each input pair of split-paths, starting at EID 1.
+converter = InitialFinalConverter(input_path_pairs)
+
+print("Input, as (initial, final) mappings by (parent-path, name):")
+for eid, locs in converter.path_loc_pairs().items():
+  print("  " + str(eid) + ": " + str(locs))
+
+def add_eid_mapping_and_make_parents(mapping, side, eid, parent_path, name):
+  """Add a (parent_eid, name) entry for SIDE:EID, and for each of its parent
+     paths that lacks an EID, up to a path that has an EID.
      Add this same mapping to the other side as well, but without caring
      whether the parent element exists on the other side. ### Is this right?
   """
-  new_eid = get_next_eid()
-  parent_path, name = posixpath.split(path)
-  parent_eid = find_or_add(out_map, side, parent_path)
+  parent_eid = mapping.find_eid_from_relpath(side, parent_path)
+  if parent_eid < 0:
+    # no eid assigned yet for this path: assign a new eid for it
+    parent_eid = add_new(mapping, side, parent_path)
   loc = (parent_eid, name)
-  map_set(out_map, side, new_eid, loc)
-  #if loc not in out_map[1 - side]:
-  if find_eid_from_loc(out_map, 1 - side, loc) < 0:
-    map_set(out_map, 1 - side, new_eid, loc)
-  return new_eid
+  mapping.set_peid_loc(side, eid, loc)
+  return loc
 
-def find_or_add(out_map, side, path):
-  """Find the EID for PATH, creating a new EID and (parent-eid, name) entry
-     for each of PATH and its parents that is not already present.
+def add_new(mapping, side, path):
+  """Add a new EID and (parent_eid, name) entry for PATH, and for each
+     of its parents that lacks an EID.
+
+     Add this same mapping to the other side as well, but without caring
+     whether the parent element exists on the other side.
+       ### Why is this right?
   """
-  eid = find_eid_from_path(out_map, side, path)
-  if eid < 0:
-    # no eid assigned yet for this path: assign a new eid for it
-    eid = add_new(out_map, side, path)
+  eid = get_next_eid()
+  parent_path, name = posixpath.split(path)
+  loc = add_eid_mapping_and_make_parents(mapping, side, eid, parent_path, name)
+  if not mapping.has_peid_loc(1 - side, loc):
+    mapping.set_peid_loc(1 - side, eid, loc)
   return eid
 
-def write_parent_eid(out_map, side, eid):
-  """Ensure the SIDE mapping for EID (if any) uses a parent_eid rather than
-     a path. (Also ensure that its parent has some sort of mapping.)
-  """
-  entry = out_map[eid]
-  loc = entry[side]
-  if loc:
-    parent, name = loc
-    if type(parent) is int:
-      print("# converting e%d: %s already has a parent-eid" % (eid, str(loc)))
-      return
-    parent_eid = find_or_add(out_map, side, parent)
-    new_loc = (parent_eid, name)
-    print("# converting e%d: %s -> %s" % (eid, str(loc), str(new_loc)))
-    map_set(out_map, side, eid, new_loc)
-
-# Change parent paths to parent eids (in place). For each location in OUT_MAP,
-# convert its (parent-path, name) to (parent-eid, name), adding new elements as
-# we go for any previously unlisted parent paths.
+def write_parent_eid(mapping, side, eid):
+  """Write a (parent_eid, name) mapping corresponding to the existing
+     (parent-path, name) mapping for SIDE:EID.
+
+     For each of its parent paths in SIDE that lacks an EID, up to a path
+     that has an EID, allocate an EID and write a (parent-eid, name) mapping
+     in BOTH sides.
+  """
+  path_loc = mapping.path_locs_for_side(side)[eid]
+  parent_path, name = path_loc
+  new_loc = add_eid_mapping_and_make_parents(mapping, side, eid, parent_path, name)
+  print("# converting e%d: %s -> %s" % (eid, str(path_loc), str(new_loc)))
+
+# Convert parent-path mappings to parent-EID mappings.
+# Add new elements as we go for any previously unlisted parent paths.
 for side in [0, 1]:
-  for eid, entry in out_map.items():
-    write_parent_eid(out_map, side, eid)
+  for eid in converter.path_locs_for_side(side):
+    # We don't write any of these entries twice, so it shouldn't already be
+    # here:
+    assert eid not in converter.peid_locs_for_side(side)
+    write_parent_eid(converter, side, eid)
 
 print("Output, as (initial, final) mappings by (parent-eid, name):")
-for e in out_map.items():
-  print("  " + str(e))
-
-def map_get_path(eid_map, side, eid):
-  if eid == 0:
-    return ''
-  element = eid_map[eid][side]
-  if element is None:
-    return None
-  parent_eid, name = element
-  parent_path = map_get_path(eid_map, side, parent_eid)
-  if parent_path is None:
-    return None
-  return posixpath.join(parent_path, name)
+for eid, locs in converter.peid_loc_pairs().items():
+  print("  " + str(eid) + ": " + str(locs))
 
 print("Output, as (initial, final) mappings by paths:")
-for eid, entry in out_map.items():
-  relpath0 = map_get_path(out_map, 0, eid)
-  relpath1 = map_get_path(out_map, 1, eid)
+for eid in converter.peid_loc_pairs():
+  relpath0 = converter.peid_locs_for_side(0).relpath_from_eid(eid)
+  relpath1 = converter.peid_locs_for_side(1).relpath_from_eid(eid)
   print "%3d %-12s %-12s" % (eid, relpath0, relpath1)