You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by st...@apache.org on 2013/06/05 14:05:14 UTC
svn commit: r1489828 - /subversion/trunk/subversion/libsvn_subr/dirent_uri.c

Author: stefan2
Date: Wed Jun  5 12:05:14 2013
New Revision: 1489828

URL: http://svn.apache.org/r1489828
Log:
Another go at the relpath_is_canonical check.  As it turns out, it is
much faster to check for "." and "//" elements in seperate runs due to
the simpler patterns that we check for.

* subversion/libsvn_subr/dirent_uri.c
  (relpath_is_canonical): rework the scanner part

Modified:
    subversion/trunk/subversion/libsvn_subr/dirent_uri.c

Modified: subversion/trunk/subversion/libsvn_subr/dirent_uri.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/dirent_uri.c?rev=1489828&r1=1489827&r2=1489828&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/dirent_uri.c (original)
+++ subversion/trunk/subversion/libsvn_subr/dirent_uri.c Wed Jun  5 12:05:14 2013
@@ -1688,8 +1688,9 @@ svn_dirent_is_canonical(const char *dire
 static svn_boolean_t
 relpath_is_canonical(const char *relpath)
 {
-  const char *ptr = relpath;
+  const char *dot_pos, *ptr = relpath;
   apr_size_t i, len;
+  unsigned pattern = 0;
 
   /* RELPATH is canonical if it has:
    *  - no '.' segments
@@ -1713,16 +1714,23 @@ relpath_is_canonical(const char *relpath
   if (ptr[len-1] == '/' || (ptr[len-1] == '.' && ptr[len-2] == '/'))
     return FALSE;
 
+  /* '.' are rare. So, search for them globally. There will often be no 
+   * more than one hit.  Also note that we already checked for invalid 
+   * starts and endings, i.e. we only need to check for "/./"
+   */
+  for (dot_pos = memchr(ptr, '.', len);
+       dot_pos;
+       dot_pos = strchr(dot_pos+1, '.'))
+    if (dot_pos > ptr && dot_pos[-1] == '/' && dot_pos[1] == '/')
+      return FALSE;
+
   /* Now validate the rest of the path. */
   for (i = 0; i < len - 1; ++i)
-    if (ptr[i] == '/' && ptr[i+1] <= '/') /* '.' and '/' have smaller UTF-8
-                                             codes than most other chars */
-      {
-        if (ptr[i+1] == '/')
-          return FALSE;  /*  //   */
-        if (ptr[i+1] == '.' && ptr[i+2] == '/')
-          return FALSE;  /*  /./  */
-      }
+    {
+      pattern = ((pattern & 0xff) << 8) + (unsigned char)ptr[i];
+      if (pattern == 0x101 * (unsigned char)('/'))
+        return FALSE;
+    }
 
   return TRUE;
 }