You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by rh...@apache.org on 2010/04/20 10:47:23 UTC
svn commit: r935829 - in /subversion/trunk/subversion:
include/svn_dirent_uri.h libsvn_subr/dirent_uri.c libsvn_subr/dirent_uri.h
libsvn_subr/path.c libsvn_wc/old-and-busted.c tests/cmdline/svntest/main.py
tests/libsvn_subr/dirent_uri-test.c
Author: rhuijben
Date: Tue Apr 20 08:47:23 2010
New Revision: 935829
URL: http://svn.apache.org/viewvc?rev=935829&view=rev
Log:
In a response to issue #3601 and as followup of the dirent/uri work
make svn_uri_canonicalize() normalize the escaping of characters in
uris that start with a schema (read: urls). This makes the uris a bit
'more canonical'.
I think some of this code might use a bit more optimization, but this at
least switches the functions to the intended behavior. The RFCs and our
current code use upper case characters when encoding characters so I
decided to make that the canonical format. (The mailinglist thread on
http://svn.haxx.se/dev/archive-2010-02/0564.shtml suggest converting to
lower case).
* subversion/include/svn_dirent_uri.h
(svn_uri_canonicalize): Update documentation.
* subversion/libsvn_subr/dirent_uri.c
(includes): Add apr_lib.h and dirent_uri.h.
(canonicalize): Add a normalization of characters step.
(svn_uri_is_canonical): Verify character normalization
* subversion/libsvn_subr/dirent_uri.h
New file.
(svn_uri__char_validity):
Make uri_char_validity from path.c usable in other files in
the same library.
* subversion/libsvn_subr/path.c
(uri_char_validity): Rename to ...
(svn_uri__char_validity): ... and remove static to make it usable
from dirent_uri.c
(svn_path_is_uri_safe, svn_path_uri_encode,
svn_path_url_add_component2): Update users of uri_char_validity.
* subversion/libsvn_wc/old-and-busted.c
(read_url): Always recanonicalize the uri; not just pre 1.6 versions.
* subversion/tests/cmdline/svntest/main.py
(pathname2url): Make the url used for file:/// tests on Windows canonical
following our new rules. (It used '%3A' instead of ':')
* subversion/tests/libsvn_subr/dirent_uri-test.c
(test_uri_canonicalize,
test_uri_is_canonical): Extend test with some canonicalization tests.
Added:
subversion/trunk/subversion/libsvn_subr/dirent_uri.h (with props)
Modified:
subversion/trunk/subversion/include/svn_dirent_uri.h
subversion/trunk/subversion/libsvn_subr/dirent_uri.c
subversion/trunk/subversion/libsvn_subr/path.c
subversion/trunk/subversion/libsvn_wc/old-and-busted.c
subversion/trunk/subversion/tests/cmdline/svntest/main.py
subversion/trunk/subversion/tests/libsvn_subr/dirent_uri-test.c
Modified: subversion/trunk/subversion/include/svn_dirent_uri.h
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/include/svn_dirent_uri.h?rev=935829&r1=935828&r2=935829&view=diff
==============================================================================
--- subversion/trunk/subversion/include/svn_dirent_uri.h (original)
+++ subversion/trunk/subversion/include/svn_dirent_uri.h Tue Apr 20 08:47:23 2010
@@ -465,6 +465,11 @@ svn_relpath_canonicalize(const char *uri
* separator characters, and possibly other semantically inoperative
* transformations.
*
+ * If @a uri starts with a schema, this function also normalizes the
+ * escaping of the path component by unescaping characters that don't
+ * need escaping and escaping characters that do need escaping but
+ * weren't.
+ *
* This functions supports URLs.
*
* The returned uri may be statically allocated or allocated from @a pool.
Modified: subversion/trunk/subversion/libsvn_subr/dirent_uri.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/dirent_uri.c?rev=935829&r1=935828&r2=935829&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/dirent_uri.c (original)
+++ subversion/trunk/subversion/libsvn_subr/dirent_uri.c Tue Apr 20 08:47:23 2010
@@ -28,12 +28,15 @@
#include <ctype.h>
#include <apr_uri.h>
+#include <apr_lib.h>
#include "svn_private_config.h"
#include "svn_string.h"
#include "svn_dirent_uri.h"
#include "svn_path.h"
+#include "dirent_uri.h"
+
/* The canonical empty path. Can this be changed? Well, change the empty
test below and the path library will work, not so sure about the fs/wc
libraries. */
@@ -54,7 +57,7 @@ typedef enum {
type_relpath
} path_type_t;
-
+
/**** Internal implementation functions *****/
/* Return an internal-style new path based on PATH, allocated in POOL.
@@ -344,6 +347,7 @@ canonicalize(path_type_t type, const cha
apr_size_t schemelen = 0;
apr_size_t canon_segments = 0;
svn_boolean_t url = FALSE;
+ char *schema_data = NULL;
/* "" is already canonical, so just return it; note that later code
depends on path not being zero-length. */
@@ -408,6 +412,7 @@ canonicalize(path_type_t type, const cha
{
src++;
dst++;
+ schema_data = dst;
}
canon_segments = 1;
@@ -527,6 +532,97 @@ canonicalize(path_type_t type, const cha
}
#endif /* WIN32 or Cygwin */
+ /* Check the normalization of characters in a uri */
+ if (schema_data)
+ {
+ int need_extra = 0;
+ src = schema_data;
+
+ while (*src)
+ {
+ switch (*src)
+ {
+ case '/':
+ break;
+ case '%':
+ if (!apr_isxdigit(*(src+1)) || !apr_isxdigit(*(src+2)))
+ need_extra += 2;
+ else
+ src += 2;
+ break;
+ default:
+ if (!svn_uri__char_validity[(unsigned char)*src])
+ need_extra += 2;
+ break;
+ }
+ src++;
+ }
+
+ if (need_extra > 0)
+ {
+ apr_size_t pre_schema_size = (apr_uintptr_t)schema_data - (apr_uintptr_t)canon;
+
+ dst = apr_palloc(pool, (apr_uintptr_t)src - (apr_uintptr_t)canon + need_extra + 1);
+ memcpy(dst, canon, pre_schema_size);
+ canon = dst;
+
+ dst += pre_schema_size;
+ }
+ else
+ dst = schema_data;
+
+ src = schema_data;
+
+ while (*src)
+ {
+ switch (*src)
+ {
+ case '/':
+ *(dst++) = '/';
+ break;
+ case '%':
+ if (!apr_isxdigit(*(src+1)) || !apr_isxdigit(*(src+2)))
+ {
+ *(dst++) = '%';
+ *(dst++) = '2';
+ *(dst++) = '5';
+ }
+ else
+ {
+ char digitz[3];
+ int val;
+
+ digitz[0] = *(++src);
+ digitz[1] = *(++src);
+ digitz[2] = 0;
+
+ val = (int)strtol(digitz, NULL, 16);
+
+ if (svn_uri__char_validity[(unsigned char)val])
+ *(dst++) = (char)val;
+ else
+ {
+ *(dst++) = '%';
+ *(dst++) = canonicalize_to_upper(digitz[0]);
+ *(dst++) = canonicalize_to_upper(digitz[1]);
+ }
+ }
+ break;
+ default:
+ if (!svn_uri__char_validity[(unsigned char)*src])
+ {
+ apr_snprintf(dst, 4, "%%%02X", (unsigned char)*src);
+ dst += 3;
+ }
+ else
+ *(dst++) = *src;
+ break;
+ }
+ src++;
+ }
+ *dst = '\0';
+ }
+
return canon;
}
@@ -1687,6 +1783,7 @@ svn_boolean_t
svn_uri_is_canonical(const char *uri, apr_pool_t *pool)
{
const char *ptr = uri, *seg = uri;
+ const char *schema_data = NULL;
/* URI is canonical if it has:
* - no '.' segments
@@ -1737,6 +1834,8 @@ svn_uri_is_canonical(const char *uri, ap
return FALSE;
ptr++;
}
+
+ schema_data = ptr;
}
else
{
@@ -1747,7 +1846,7 @@ svn_uri_is_canonical(const char *uri, ap
}
#if defined(WIN32) || defined(__CYGWIN__)
- if (*ptr == '/')
+ if (schema_data && *ptr == '/')
{
/* If this is a file url, ptr now points to the third '/' in
file:///C:/path. Check that if we have such a URL the drive
@@ -1780,10 +1879,45 @@ svn_uri_is_canonical(const char *uri, ap
ptr++;
seg = ptr;
+
while (*ptr && (*ptr != '/'))
ptr++;
}
+ if (schema_data)
+ {
+ ptr = schema_data;
+
+ while (*ptr)
+ {
+ if (*ptr == '%')
+ {
+ char digitz[3];
+ int val;
+
+ /* Can't use apr_isxdigit() because lower case letters are
+ not in our canonical format */
+ if (((*(ptr+1) < '0' || (*ptr+1) > '9'))
+ && (*(ptr+1) < 'A' || (*ptr+1) > 'F'))
+ return FALSE;
+ else if (((*(ptr+2) < '0' || (*ptr+2) > '9'))
+ && (*(ptr+2) < 'A' || (*ptr+2) > 'F'))
+ return FALSE;
+
+ digitz[0] = *(++ptr);
+ digitz[1] = *(++ptr);
+ digitz[2] = '\0';
+ val = (int)strtol(digitz, NULL, 16);
+
+ if (svn_uri__char_validity[val])
+ return FALSE; /* Should not have been escaped */
+ }
+ else if (*ptr != '/' && !svn_uri__char_validity[(unsigned char)*ptr])
+ return FALSE; /* Character should have been escaped */
+ ptr++;
+ }
+ }
+
return TRUE;
}
Added: subversion/trunk/subversion/libsvn_subr/dirent_uri.h
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/dirent_uri.h?rev=935829&view=auto
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/dirent_uri.h (added)
+++ subversion/trunk/subversion/libsvn_subr/dirent_uri.h Tue Apr 20 08:47:23 2010
@@ -0,0 +1,40 @@
+/*
+ * dirent_uri.h : private header for the dirent, uri, relpath implementation.
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ */
+
+
+#ifndef SVN_LIBSVN_SUBR_DIRENT_URI_H
+#define SVN_LIBSVN_SUBR_DIRENT_URI_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/* Character map used to check which characters need escaping when
+ used in a uri. See path.c for more details */
+extern const char svn_uri__char_validity[256];
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* SVN_LIBSVN_SUBR_DIRENT_URI_H */
Propchange: subversion/trunk/subversion/libsvn_subr/dirent_uri.h
------------------------------------------------------------------------------
svn:eol-style = native
Modified: subversion/trunk/subversion/libsvn_subr/path.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/path.c?rev=935829&r1=935828&r2=935829&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/path.c (original)
+++ subversion/trunk/subversion/libsvn_subr/path.c Tue Apr 20 08:47:23 2010
@@ -38,6 +38,8 @@
#include "svn_io.h" /* for svn_io_stat() */
#include "svn_ctype.h"
+#include "dirent_uri.h"
+
/* The canonical empty path. Can this be changed? Well, change the empty
test below and the path library will work, not so sure about the fs/wc
@@ -673,7 +675,7 @@ svn_path_is_url(const char *path)
alphanum | mark | ":" | "@" | "&" | "=" | "+" | "$" | ","
*/
-static const char uri_char_validity[256] = {
+const char svn_uri__char_validity[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -732,7 +734,7 @@ svn_path_is_uri_safe(const char *path)
}
return FALSE;
}
- else if (! uri_char_validity[((unsigned char)path[i])])
+ else if (! svn_uri__char_validity[((unsigned char)path[i])])
{
return FALSE;
}
@@ -802,7 +804,7 @@ svn_path_uri_encode(const char *path, ap
{
const char *ret;
- ret = uri_escape(path, uri_char_validity, pool);
+ ret = uri_escape(path, svn_uri__char_validity, pool);
/* Our interface guarantees a copy. */
if (ret == path)
@@ -921,7 +923,7 @@ svn_path_url_add_component2(const char *
apr_pool_t *pool)
{
/* = svn_path_uri_encode() but without always copying */
- component = uri_escape(component, uri_char_validity, pool);
+ component = uri_escape(component, svn_uri__char_validity, pool);
return svn_path_join(url, component, pool);
}
Modified: subversion/trunk/subversion/libsvn_wc/old-and-busted.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_wc/old-and-busted.c?rev=935829&r1=935828&r2=935829&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_wc/old-and-busted.c (original)
+++ subversion/trunk/subversion/libsvn_wc/old-and-busted.c Tue Apr 20 08:47:23 2010
@@ -189,16 +189,11 @@ read_url(const char **result,
{
SVN_ERR(read_str(result, buf, end, pool));
- /* If the wc format is <10 canonicalize the url, */
+ /* Always canonicalize the url, as we have stricter canonicalization rules
+ in 1.7+ then before */
if (*result && **result)
- {
- if (wc_format < SVN_WC__CHANGED_CANONICAL_URLS)
- *result = svn_uri_canonicalize(*result, pool);
- else if (! svn_uri_is_canonical(*result, pool))
- return svn_error_createf(SVN_ERR_WC_CORRUPT, NULL,
- _("Entry contains non-canonical path '%s'"),
- *result);
- }
+ *result = svn_uri_canonicalize(*result, pool);
+
return SVN_NO_ERROR;
}
Modified: subversion/trunk/subversion/tests/cmdline/svntest/main.py
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/tests/cmdline/svntest/main.py?rev=935829&r1=935828&r2=935829&view=diff
==============================================================================
--- subversion/trunk/subversion/tests/cmdline/svntest/main.py (original)
+++ subversion/trunk/subversion/tests/cmdline/svntest/main.py Tue Apr 20 08:47:23 2010
@@ -137,7 +137,10 @@ def pathname2url(path):
"""Convert the pathname PATH from the local syntax for a path to the form
used in the path component of a URL. This does not produce a complete URL.
The return value will already be quoted using the quote() function."""
- return urllib_parse_quote(path.replace('\\', '/'))
+
+ # Don't leave ':' in file://C%3A/ escaped as our canonicalization
+ # rules will replace this with a ':' on input.
+ return urllib_parse_quote(path.replace('\\', '/')).replace('%3A', ':')
# This function mimics the Python 2.3 urllib function of the same name.
def url2pathname(path):
Modified: subversion/trunk/subversion/tests/libsvn_subr/dirent_uri-test.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/tests/libsvn_subr/dirent_uri-test.c?rev=935829&r1=935828&r2=935829&view=diff
==============================================================================
--- subversion/trunk/subversion/tests/libsvn_subr/dirent_uri-test.c (original)
+++ subversion/trunk/subversion/tests/libsvn_subr/dirent_uri-test.c Tue Apr 20 08:47:23 2010
@@ -958,6 +958,11 @@ test_uri_canonicalize(apr_pool_t *pool)
{ "http://server////", "http://server" },
{ "http://server/file//", "http://server/file" },
{ "http://server//.//f//", "http://server/f" },
+ { "s://d/%KK", "s://d/%25KK" }, /* Make bad escapings safe */
+ { "s://d/c%3A", "s://d/c:" },
+ { "s://d/c#", "s://d/c%23" }, /* Escape schema separator */
+ { "s://d/c($) .+?", "s://d/c($)%20.+%3F" }, /* Test special chars */
+ { "file:///C%3a/temp", "file:///C:/temp" },
#if defined(WIN32) || defined(__CYGWIN__)
{ "file:///c:/temp/repos", "file:///C:/temp/repos" },
{ "file:///c:/temp/REPOS", "file:///C:/temp/REPOS" },
@@ -1254,6 +1259,9 @@ test_uri_is_canonical(apr_pool_t *pool)
{ "//server/share", FALSE }, /* Only valid as dirent */
{ "//server", FALSE },
{ "//", FALSE },
+ { "file:///folder/c#", FALSE }, /* # needs escaping */
+ { "file:///fld/with space", FALSE }, /* # needs escaping */
+ { "file:///fld/c%23", TRUE }, /* Properly escaped C# */
#if defined(WIN32) || defined(__CYGWIN__)
{ "file:///c:/temp/repos", FALSE },
{ "file:///c:/temp/REPOS", FALSE },