You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@subversion.apache.org by Erik Huelsmann <e....@gmx.net> on 2003/12/20 19:09:07 UTC

[PATCH] Resolve issue #1255: Non-utf8 encoded path names in cvs2svn dumpfile

The patch below fixes issue 1255. Karl asked me to extend the testsuite for
cvs2svn. I have added a shell script to the issue, but I'm having some
difficulty translating that to cross platform portable code within the test suite.
Maybe it's not possible.

Anyway: I'd like to commit this to trunk one of these days.

bye,

Erik.

Log:
[[[
Fix issue #1255: cvs2svn.py generates non-utf8 paths on repositories with
non-ascii characters in filenames.

* subversion/tools/cvs2svn/cvs2svn.py:
  (Dumper): Use new utf8_path member for encoding path information written
  to the dump file.
  (Dumper.utf8_path): New. Encode path name to utf8 or stop cvs2svn.py
  on error.

]]]

Index: tools/cvs2svn/cvs2svn.py
===================================================================
--- tools/cvs2svn/cvs2svn.py	(revision 8044)
+++ tools/cvs2svn/cvs2svn.py	(working copy)
@@ -862,6 +862,7 @@
     self.target = ctx.target
     self.dump_only = ctx.dump_only
     self.dumpfile = None
+    self.path_encoding = ctx.encoding
     
     # If all we're doing here is dumping, we can go ahead and
     # initialize our single dumpfile.  Else, if we're suppose to
@@ -978,8 +979,22 @@
                         "\n"
                         "PROPS-END\n"
                         "\n"
-                        "\n" % path)
+                        "\n" % self.utf8_path(path))
 
+  def utf8_path(self, path):
+    """Return UTF-8 encoded 'path' based on ctx.path_encoding."""
+    try:
+      ### Log messages can be converted with 'replace' strategy.
+      ### We can't afford that here.
+      unicode_path = unicode(path, self.path_encoding, 'strict')
+      return unicode_path.encode('utf-8')
+    
+    except UnicodeError:
+      print "Unable to convert a path '%s' to internal encoding." % path
+      print "Try rerunning with (for example) '--encoding=latin1'"
+      sys.exit(1)
+
+
   def probe_path(self, path):
     """Return true if PATH exists in the youngest tree of the svn
     repository, else return None.  PATH does not start with '/'."""
@@ -1011,12 +1026,14 @@
                           'Node-copyfrom-rev: %d\n'
                           'Node-copyfrom-path: /%s\n'
                           '\n'
-                          % (svn_dst_path, change.copyfrom_rev,
svn_src_path))
+                          % (self.utf8_path(svn_dst_path),
+                             change.copyfrom_rev,
+                             self.utf8_path(svn_src_path)))
 
       for ent in change.deleted_entries:
         self.dumpfile.write('Node-path: %s\n'
                             'Node-action: delete\n'
-                            '\n' % (svn_dst_path + '/' + ent))
+                            '\n' % (self.utf8_path(svn_dst_path + '/' +
ent)))
 
   def prune_entries(self, path, expected):
     """Delete any entries in PATH that are not in list EXPECTED.
@@ -1030,7 +1047,7 @@
     for ent in change.deleted_entries:
       self.dumpfile.write('Node-path: %s\n'
                           'Node-action: delete\n'
-                          '\n' % (path + '/' + ent))
+                          '\n' % (self.utf8_path(path + '/' + ent)))
 
   def add_or_change_path(self, cvs_path, svn_path, cvs_rev, rcs_file,
                          tags, branches):
@@ -1079,7 +1096,7 @@
                         'Node-action: %s\n'
                         'Prop-content-length: %d\n'
                         'Text-content-length: '
-                        % (svn_path, action, props_len))
+                        % (self.utf8_path(svn_path), action, props_len))
 
     pos = self.dumpfile.tell()
 
@@ -1144,11 +1161,10 @@
     deleted_path, closed_tags, closed_branches \
                   = self.repos_mirror.delete_path(svn_path, tags,
                                                   branches, prune)
-    if deleted_path:
-      print '    (deleted %s)' % deleted_path
-      self.dumpfile.write('Node-path: %s\n'
-                          'Node-action: delete\n'
-                          '\n' % deleted_path)
+    
+    self.dumpfile.write('Node-path: %s\n'
+                        'Node-action: delete\n'
+                        '\n' % self.utf8_path(deleted_path))
     return deleted_path, closed_tags, closed_branches
 
   def close(self):
@@ -1844,7 +1860,7 @@
       print "  date:   '%s'" % date
       for rcs_file, cvs_rev, br, tags, branches in self.changes:
         print "    rev %s of '%s'" % (cvs_rev, rcs_file)
-      print 'Try rerunning with (for example) \"--encoding=latin1\".'
+      print "Try rerunning with (for example) '--encoding=latin1'."
       sys.exit(1)
 
     # Tells whether we actually wrote anything to the dumpfile.

-- 
+++ GMX - die erste Adresse für Mail, Message, More +++
Neu: Preissenkung für MMS und FreeMMS! http://www.gmx.net



---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org