You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by rh...@apache.org on 2010/04/20 10:47:23 UTC

svn commit: r935829 - in /subversion/trunk/subversion: include/svn_dirent_uri.h libsvn_subr/dirent_uri.c libsvn_subr/dirent_uri.h libsvn_subr/path.c libsvn_wc/old-and-busted.c tests/cmdline/svntest/main.py tests/libsvn_subr/dirent_uri-test.c

Author: rhuijben
Date: Tue Apr 20 08:47:23 2010
New Revision: 935829

URL: http://svn.apache.org/viewvc?rev=935829&view=rev
Log:
In a response to issue #3601 and as followup of the dirent/uri work
make svn_uri_canonicalize() normalize the escaping of characters in
uris that start with a schema (read: urls). This makes the uris a bit
'more canonical'.

I think some of this code might use a bit more optimization, but this at
least switches the functions to the intended behavior. The RFCs and our
current code use upper case characters when encoding characters so I
decided to make that the canonical format. (The mailinglist thread on
http://svn.haxx.se/dev/archive-2010-02/0564.shtml suggest converting to
lower case).

* subversion/include/svn_dirent_uri.h
  (svn_uri_canonicalize): Update documentation.

* subversion/libsvn_subr/dirent_uri.c
  (includes): Add apr_lib.h and dirent_uri.h.
  (canonicalize): Add a normalization of characters step.
  (svn_uri_is_canonical): Verify character normalization

* subversion/libsvn_subr/dirent_uri.h
  New file.
  (svn_uri__char_validity):
    Make uri_char_validity from path.c usable in other files in
    the same library.

* subversion/libsvn_subr/path.c
  (uri_char_validity): Rename to ...
  (svn_uri__char_validity): ... and remove static to make it usable
     from dirent_uri.c
  (svn_path_is_uri_safe, svn_path_uri_encode,
   svn_path_url_add_component2): Update users of uri_char_validity.

* subversion/libsvn_wc/old-and-busted.c
  (read_url): Always recanonicalize the uri; not just pre 1.6 versions.

* subversion/tests/cmdline/svntest/main.py
  (pathname2url): Make the url used for file:/// tests on Windows canonical
    following our new rules. (It used '%3A' instead of ':')

* subversion/tests/libsvn_subr/dirent_uri-test.c
  (test_uri_canonicalize,
   test_uri_is_canonical): Extend test with some canonicalization tests.

Added:
    subversion/trunk/subversion/libsvn_subr/dirent_uri.h   (with props)
Modified:
    subversion/trunk/subversion/include/svn_dirent_uri.h
    subversion/trunk/subversion/libsvn_subr/dirent_uri.c
    subversion/trunk/subversion/libsvn_subr/path.c
    subversion/trunk/subversion/libsvn_wc/old-and-busted.c
    subversion/trunk/subversion/tests/cmdline/svntest/main.py
    subversion/trunk/subversion/tests/libsvn_subr/dirent_uri-test.c

Modified: subversion/trunk/subversion/include/svn_dirent_uri.h
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/include/svn_dirent_uri.h?rev=935829&r1=935828&r2=935829&view=diff
==============================================================================
--- subversion/trunk/subversion/include/svn_dirent_uri.h (original)
+++ subversion/trunk/subversion/include/svn_dirent_uri.h Tue Apr 20 08:47:23 2010
@@ -465,6 +465,11 @@ svn_relpath_canonicalize(const char *uri
  * separator characters, and possibly other semantically inoperative
  * transformations.
  *
+ * If @a uri starts with a schema, this function also normalizes the
+ * escaping of the path component by unescaping characters that don't
+ * need escaping and escaping characters that do need escaping but
+ * weren't.
+ *
  * This functions supports URLs.
  *
  * The returned uri may be statically allocated or allocated from @a pool.

Modified: subversion/trunk/subversion/libsvn_subr/dirent_uri.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/dirent_uri.c?rev=935829&r1=935828&r2=935829&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/dirent_uri.c (original)
+++ subversion/trunk/subversion/libsvn_subr/dirent_uri.c Tue Apr 20 08:47:23 2010
@@ -28,12 +28,15 @@
 #include <ctype.h>
 
 #include <apr_uri.h>
+#include <apr_lib.h>
 
 #include "svn_private_config.h"
 #include "svn_string.h"
 #include "svn_dirent_uri.h"
 #include "svn_path.h"
 
+#include "dirent_uri.h"
+
 /* The canonical empty path.  Can this be changed?  Well, change the empty
    test below and the path library will work, not so sure about the fs/wc
    libraries. */
@@ -54,7 +57,7 @@ typedef enum {
   type_relpath
 } path_type_t;
 
-
+ 
 /**** Internal implementation functions *****/
 
 /* Return an internal-style new path based on PATH, allocated in POOL.
@@ -344,6 +347,7 @@ canonicalize(path_type_t type, const cha
   apr_size_t schemelen = 0;
   apr_size_t canon_segments = 0;
   svn_boolean_t url = FALSE;
+  char *schema_data = NULL;
 
   /* "" is already canonical, so just return it; note that later code
      depends on path not being zero-length.  */
@@ -408,6 +412,7 @@ canonicalize(path_type_t type, const cha
             {
               src++;
               dst++;
+              schema_data = dst;
             }
 
           canon_segments = 1;
@@ -527,6 +532,97 @@ canonicalize(path_type_t type, const cha
     }
 #endif /* WIN32 or Cygwin */
 
+  /* Check the normalization of characters in a uri */
+  if (schema_data)
+    {
+      int need_extra = 0;
+      src = schema_data;
+
+      while (*src)
+        {
+          switch (*src)
+            {
+              case '/':
+                break;
+              case '%':
+                if (!apr_isxdigit(*(src+1)) || !apr_isxdigit(*(src+2)))
+                  need_extra += 2;
+                else
+                  src += 2;
+                break;
+              default:
+                if (!svn_uri__char_validity[(unsigned char)*src])
+                  need_extra += 2;
+                break;
+            }
+          src++;
+        }
+
+      if (need_extra > 0)
+        {
+          apr_size_t pre_schema_size = (apr_uintptr_t)schema_data - (apr_uintptr_t)canon;
+
+          dst = apr_palloc(pool, (apr_uintptr_t)src - (apr_uintptr_t)canon + need_extra + 1);
+          memcpy(dst, canon, pre_schema_size);
+          canon = dst;
+
+          dst += pre_schema_size;
+        }
+      else
+        dst = schema_data;
+
+      src = schema_data;
+
+      while (*src)
+        {
+          switch (*src)
+            {
+              case '/':
+                *(dst++) = '/';
+                break;
+              case '%':
+                if (!apr_isxdigit(*(src+1)) || !apr_isxdigit(*(src+2)))
+                  {
+                    *(dst++) = '%';
+                    *(dst++) = '2';
+                    *(dst++) = '5';
+                  }
+                else
+                  {
+                    char digitz[3];
+                    int val;
+
+                    digitz[0] = *(++src);
+                    digitz[1] = *(++src);
+                    digitz[2] = 0;
+
+                    val = (int)strtol(digitz, NULL, 16);
+
+                    if (svn_uri__char_validity[(unsigned char)val])
+                      *(dst++) = (char)val;
+                    else
+                      {
+                        *(dst++) = '%';
+                        *(dst++) = canonicalize_to_upper(digitz[0]);
+                        *(dst++) = canonicalize_to_upper(digitz[1]);
+                      }
+                  }
+                break;
+              default:
+                if (!svn_uri__char_validity[(unsigned char)*src])
+                  {
+                    apr_snprintf(dst, 4, "%%%02X", (unsigned char)*src);
+                    dst += 3;
+                  }
+                else
+                  *(dst++) = *src;
+                break;
+            }
+          src++;
+        }
+      *dst = '\0';
+    }
+
   return canon;
 }
 
@@ -1687,6 +1783,7 @@ svn_boolean_t
 svn_uri_is_canonical(const char *uri, apr_pool_t *pool)
 {
   const char *ptr = uri, *seg = uri;
+  const char *schema_data = NULL;
 
   /* URI is canonical if it has:
    *  - no '.' segments
@@ -1737,6 +1834,8 @@ svn_uri_is_canonical(const char *uri, ap
                 return FALSE;
               ptr++;
             }
+
+          schema_data = ptr;
         }
       else
         {
@@ -1747,7 +1846,7 @@ svn_uri_is_canonical(const char *uri, ap
     }
 
 #if defined(WIN32) || defined(__CYGWIN__)
-  if (*ptr == '/')
+  if (schema_data && *ptr == '/')
     {
       /* If this is a file url, ptr now points to the third '/' in
          file:///C:/path. Check that if we have such a URL the drive
@@ -1780,10 +1879,45 @@ svn_uri_is_canonical(const char *uri, ap
         ptr++;
       seg = ptr;
 
+
       while (*ptr && (*ptr != '/'))
         ptr++;
     }
 
+  if (schema_data)
+    {
+      ptr = schema_data;
+
+      while (*ptr)
+        {
+          if (*ptr == '%')
+            {
+              char digitz[3];
+              int val;
+
+              /* Can't use apr_isxdigit() because lower case letters are
+                 not in our canonical format */
+              if (((*(ptr+1) < '0' || (*ptr+1) > '9')) 
+                  && (*(ptr+1) < 'A' || (*ptr+1) > 'F'))
+                return FALSE;
+              else if (((*(ptr+2) < '0' || (*ptr+2) > '9')) 
+                  && (*(ptr+2) < 'A' || (*ptr+2) > 'F'))
+                return FALSE;
+
+              digitz[0] = *(++ptr);
+              digitz[1] = *(++ptr);
+              digitz[2] = '\0';
+              val = (int)strtol(digitz, NULL, 16);
+
+              if (svn_uri__char_validity[val])
+                return FALSE; /* Should not have been escaped */
+            }
+          else if (*ptr != '/' && !svn_uri__char_validity[(unsigned char)*ptr])
+            return FALSE; /* Character should have been escaped */
+          ptr++;
+        }
+    }
+
   return TRUE;
 }
 

Added: subversion/trunk/subversion/libsvn_subr/dirent_uri.h
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/dirent_uri.h?rev=935829&view=auto
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/dirent_uri.h (added)
+++ subversion/trunk/subversion/libsvn_subr/dirent_uri.h Tue Apr 20 08:47:23 2010
@@ -0,0 +1,40 @@
+/*
+ * dirent_uri.h :  private header for the dirent, uri, relpath implementation.
+ *
+ * ====================================================================
+ *    Licensed to the Apache Software Foundation (ASF) under one
+ *    or more contributor license agreements.  See the NOTICE file
+ *    distributed with this work for additional information
+ *    regarding copyright ownership.  The ASF licenses this file
+ *    to you under the Apache License, Version 2.0 (the
+ *    "License"); you may not use this file except in compliance
+ *    with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing,
+ *    software distributed under the License is distributed on an
+ *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ *    KIND, either express or implied.  See the License for the
+ *    specific language governing permissions and limitations
+ *    under the License.
+ * ====================================================================
+ */
+
+
+#ifndef SVN_LIBSVN_SUBR_DIRENT_URI_H
+#define SVN_LIBSVN_SUBR_DIRENT_URI_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/* Character map used to check which characters need escaping when
+   used in a uri. See path.c for more details */
+extern const char svn_uri__char_validity[256];
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* SVN_LIBSVN_SUBR_DIRENT_URI_H */

Propchange: subversion/trunk/subversion/libsvn_subr/dirent_uri.h
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: subversion/trunk/subversion/libsvn_subr/path.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/path.c?rev=935829&r1=935828&r2=935829&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/path.c (original)
+++ subversion/trunk/subversion/libsvn_subr/path.c Tue Apr 20 08:47:23 2010
@@ -38,6 +38,8 @@
 #include "svn_io.h"                     /* for svn_io_stat() */
 #include "svn_ctype.h"
 
+#include "dirent_uri.h"
+
 
 /* The canonical empty path.  Can this be changed?  Well, change the empty
    test below and the path library will work, not so sure about the fs/wc
@@ -673,7 +675,7 @@ svn_path_is_url(const char *path)
 
       alphanum | mark | ":" | "@" | "&" | "=" | "+" | "$" | ","
 */
-static const char uri_char_validity[256] = {
+const char svn_uri__char_validity[256] = {
   0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
   0, 1, 0, 0, 1, 0, 1, 1,   1, 1, 1, 1, 1, 1, 1, 1,
@@ -732,7 +734,7 @@ svn_path_is_uri_safe(const char *path)
             }
           return FALSE;
         }
-      else if (! uri_char_validity[((unsigned char)path[i])])
+      else if (! svn_uri__char_validity[((unsigned char)path[i])])
         {
           return FALSE;
         }
@@ -802,7 +804,7 @@ svn_path_uri_encode(const char *path, ap
 {
   const char *ret;
 
-  ret = uri_escape(path, uri_char_validity, pool);
+  ret = uri_escape(path, svn_uri__char_validity, pool);
 
   /* Our interface guarantees a copy. */
   if (ret == path)
@@ -921,7 +923,7 @@ svn_path_url_add_component2(const char *
                             apr_pool_t *pool)
 {
   /* = svn_path_uri_encode() but without always copying */
-  component = uri_escape(component, uri_char_validity, pool);
+  component = uri_escape(component, svn_uri__char_validity, pool);
 
   return svn_path_join(url, component, pool);
 }

Modified: subversion/trunk/subversion/libsvn_wc/old-and-busted.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_wc/old-and-busted.c?rev=935829&r1=935828&r2=935829&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_wc/old-and-busted.c (original)
+++ subversion/trunk/subversion/libsvn_wc/old-and-busted.c Tue Apr 20 08:47:23 2010
@@ -189,16 +189,11 @@ read_url(const char **result,
 {
   SVN_ERR(read_str(result, buf, end, pool));
 
-  /* If the wc format is <10 canonicalize the url, */
+  /* Always canonicalize the url, as we have stricter canonicalization rules
+     in 1.7+ then before */
   if (*result && **result)
-    {
-      if (wc_format < SVN_WC__CHANGED_CANONICAL_URLS)
-        *result = svn_uri_canonicalize(*result, pool);
-      else if (! svn_uri_is_canonical(*result, pool))
-        return svn_error_createf(SVN_ERR_WC_CORRUPT, NULL,
-                                 _("Entry contains non-canonical path '%s'"),
-                                 *result);
-    }
+    *result = svn_uri_canonicalize(*result, pool);
+
   return SVN_NO_ERROR;
 }
 

Modified: subversion/trunk/subversion/tests/cmdline/svntest/main.py
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/tests/cmdline/svntest/main.py?rev=935829&r1=935828&r2=935829&view=diff
==============================================================================
--- subversion/trunk/subversion/tests/cmdline/svntest/main.py (original)
+++ subversion/trunk/subversion/tests/cmdline/svntest/main.py Tue Apr 20 08:47:23 2010
@@ -137,7 +137,10 @@ def pathname2url(path):
   """Convert the pathname PATH from the local syntax for a path to the form
   used in the path component of a URL. This does not produce a complete URL.
   The return value will already be quoted using the quote() function."""
-  return urllib_parse_quote(path.replace('\\', '/'))
+
+  # Don't leave ':' in file://C%3A/ escaped as our canonicalization
+  # rules will replace this with a ':' on input.
+  return urllib_parse_quote(path.replace('\\', '/')).replace('%3A', ':')
 
 # This function mimics the Python 2.3 urllib function of the same name.
 def url2pathname(path):

Modified: subversion/trunk/subversion/tests/libsvn_subr/dirent_uri-test.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/tests/libsvn_subr/dirent_uri-test.c?rev=935829&r1=935828&r2=935829&view=diff
==============================================================================
--- subversion/trunk/subversion/tests/libsvn_subr/dirent_uri-test.c (original)
+++ subversion/trunk/subversion/tests/libsvn_subr/dirent_uri-test.c Tue Apr 20 08:47:23 2010
@@ -958,6 +958,11 @@ test_uri_canonicalize(apr_pool_t *pool)
     { "http://server////",     "http://server" },
     { "http://server/file//",  "http://server/file" },
     { "http://server//.//f//", "http://server/f" },
+    { "s://d/%KK",             "s://d/%25KK" }, /* Make bad escapings safe */
+    { "s://d/c%3A",            "s://d/c:" },
+    { "s://d/c#",              "s://d/c%23" }, /* Escape schema separator */
+    { "s://d/c($) .+?",        "s://d/c($)%20.+%3F" }, /* Test special chars */
+    { "file:///C%3a/temp",     "file:///C:/temp" },
 #if defined(WIN32) || defined(__CYGWIN__)
     { "file:///c:/temp/repos", "file:///C:/temp/repos" },
     { "file:///c:/temp/REPOS", "file:///C:/temp/REPOS" },
@@ -1254,6 +1259,9 @@ test_uri_is_canonical(apr_pool_t *pool)
     { "//server/share",         FALSE }, /* Only valid as dirent */
     { "//server",               FALSE },
     { "//",                     FALSE },
+    { "file:///folder/c#",      FALSE }, /* # needs escaping */
+    { "file:///fld/with space", FALSE }, /* # needs escaping */
+    { "file:///fld/c%23",       TRUE }, /* Properly escaped C# */
 #if defined(WIN32) || defined(__CYGWIN__)
     { "file:///c:/temp/repos", FALSE },
     { "file:///c:/temp/REPOS", FALSE },