You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by rh...@apache.org on 2012/05/23 02:03:07 UTC

svn commit: r1341684 - in /subversion/trunk: build/transform_sql.py subversion/libsvn_wc/wc-queries.sql subversion/tests/libsvn_wc/wc-queries-test.c

Author: rhuijben
Date: Wed May 23 00:03:06 2012
New Revision: 1341684

URL: http://svn.apache.org/viewvc?rev=1341684&view=rev
Log:
Redefine our Sqlite IS_STRICT_DESCENDANT_OF() macro to allow operating on the
"" relpath. This removes the need of duplicating queries for the root and
subtrees, without losing performance on the subtree case.

The full tree would use an index to select on wc_id anyway, so adding
 an extra key doesn't slow it down.

* build/transform_sql.py
  (process_file): Redefine IS_STRICT_DESCENDANT_OF().

* subversion/libsvn_wc/wc-queries.sql
  (STMT_CLEAR_BASE_NODE_RECURSIVE_DAV_CACHE,
   STMT_RECURSIVE_UPDATE_NODE_REPO,
   STMT_INSERT_TARGET_DEPTH_INFINITY,
   STMT_INSERT_TARGET_WITH_CHANGELIST_DEPTH_INFINITY,
   STMT_DELETE_WC_LOCK_ORPHAN_RECURSIVE,
   STMT_SELECT_COMMITTABLE_EXTERNALS_BELOW,
   STMT_SELECT_EXTERNALS_DEFINED,
   STMT_SELECT_EXTERNAL_PROPERTIES,
   STMT_SELECT_MIN_MAX_REVISIONS,
   STMT_HAS_SPARSE_NODES,
   STMT_SUBTREE_HAS_TREE_MODIFICATIONS,
   STMT_SUBTREE_HAS_PROP_MODIFICATIONS,
   STMT_SELECT_BASE_FILES_RECURSIVE):
      Remove now unneeded ?X = '' subexpressions.

* subversion/tests/libsvn_wc/wc-queries-test.c
  (slow_statements):
     Remove statements that started using indexes by this change.

Modified:
    subversion/trunk/build/transform_sql.py
    subversion/trunk/subversion/libsvn_wc/wc-queries.sql
    subversion/trunk/subversion/tests/libsvn_wc/wc-queries-test.c

Modified: subversion/trunk/build/transform_sql.py
URL: http://svn.apache.org/viewvc/subversion/trunk/build/transform_sql.py?rev=1341684&r1=1341683&r2=1341684&view=diff
==============================================================================
--- subversion/trunk/build/transform_sql.py (original)
+++ subversion/trunk/build/transform_sql.py Wed May 23 00:03:06 2012
@@ -110,10 +110,35 @@ class Processor(object):
     for line in input.split('\n'):
       line = line.replace('"', '\\"')
 
+      # IS_STRICT_DESCENDANT_OF()
+
+      # A common operation in the working copy is determining descendants of
+      # a node. To allow Sqlite to use its indexes to provide the answer we
+      # must provide simple less than and greater than operations.
+      #
+      # For relative paths that consist of one or more components like 'subdir'
+      # we can accomplish this by comparing local_relpath with 'subdir/' and
+      # 'subdir0' ('/'+1 = '0')
+      #
+      # For the working copy root this case is less simple and not strictly
+      # valid utf-8/16 (but luckily Sqlite doesn't validate utf-8 nor utf-16).
+      # The binary blob x'FFFF' is higher than any valid utf-8 and utf-16
+      # sequence.
+      #
+      # So for the root we can compare with > '' and < x'FFFF'. (This skips the
+      # root itself and selects all descendants)
+      #
+      ### RH: I implemented this first with a user defined Sqlite function. But
+      ### when I wrote the documentation for it, I found out I could just
+      ### define it this way, without losing the option of just dropping the
+      ### query in a plain sqlite3.
+
       # '/'+1 == '0'
-      line = re.sub(r'IS_STRICT_DESCENDANT_OF[(]([A-Za-z_.]+), ([?][0-9]+)[)]',
-                    r"((\2) != '' AND ((\1) > (\2) || '/') AND ((\1) < (\2) || '0')) ",
-                    line)
+      line = re.sub(
+            r'IS_STRICT_DESCENDANT_OF[(]([A-Za-z_.]+), ([?][0-9]+)[)]',
+            r"(((\1) > (CASE (\2) WHEN '' THEN '' ELSE (\2) || '/' END))" +
+            r" AND ((\1) < CASE (\2) WHEN '' THEN X'FFFF' ELSE (\2) || '0' END))",
+            line)
 
       if line.strip():
         handled = False

Modified: subversion/trunk/subversion/libsvn_wc/wc-queries.sql
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_wc/wc-queries.sql?rev=1341684&r1=1341683&r2=1341684&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_wc/wc-queries.sql (original)
+++ subversion/trunk/subversion/libsvn_wc/wc-queries.sql Wed May 23 00:03:06 2012
@@ -323,16 +323,14 @@ WHERE repos_id = ?1 AND repos_relpath = 
 -- STMT_CLEAR_BASE_NODE_RECURSIVE_DAV_CACHE
 UPDATE nodes SET dav_cache = NULL
 WHERE dav_cache IS NOT NULL AND wc_id = ?1 AND op_depth = 0
-  AND (?2 = ''
-       OR local_relpath = ?2
+  AND (local_relpath = ?2
        OR IS_STRICT_DESCENDANT_OF(local_relpath, ?2))
 
 -- STMT_RECURSIVE_UPDATE_NODE_REPO
 UPDATE nodes SET repos_id = ?4, dav_cache = NULL
 WHERE wc_id = ?1
   AND repos_id = ?3
-  AND (?2 = ''
-       OR local_relpath = ?2
+  AND (local_relpath = ?2
        OR IS_STRICT_DESCENDANT_OF(local_relpath, ?2))
 
 -- STMT_UPDATE_LOCK_REPOS_ID
@@ -501,8 +499,7 @@ INSERT INTO targets_list(wc_id, local_re
 SELECT wc_id, local_relpath, parent_relpath, kind
 FROM nodes_current
 WHERE wc_id = ?1
-       AND (?2 = ''
-            OR local_relpath = ?2 
+       AND (local_relpath = ?2 
             OR IS_STRICT_DESCENDANT_OF(local_relpath, ?2))
 
 -- STMT_INSERT_TARGET_WITH_CHANGELIST
@@ -534,8 +531,7 @@ SELECT N.wc_id, N.local_relpath, N.paren
   FROM actual_node AS A JOIN nodes_current AS N
     ON A.wc_id = N.wc_id AND A.local_relpath = N.local_relpath
  WHERE N.wc_id = ?1
-       AND (?2 = ''
-            OR N.local_relpath = ?2 
+       AND (N.local_relpath = ?2 
             OR IS_STRICT_DESCENDANT_OF(N.local_relpath, ?2))
        AND A.changelist = ?3
 
@@ -833,8 +829,7 @@ AND NOT EXISTS (SELECT 1 FROM nodes
 -- STMT_DELETE_WC_LOCK_ORPHAN_RECURSIVE
 DELETE FROM wc_lock
 WHERE wc_id = ?1
-  AND (?2 = ''
-       OR local_dir_relpath = ?2
+  AND (local_dir_relpath = ?2
        OR IS_STRICT_DESCENDANT_OF(local_dir_relpath, ?2))
   AND NOT EXISTS (SELECT 1 FROM nodes
                    WHERE nodes.wc_id = ?1
@@ -1034,10 +1029,10 @@ WHERE wc_id = ?1
   AND repos_id = (SELECT repos_id FROM nodes
                   WHERE nodes.local_relpath = ?2)
   AND ( ((NOT ?3)
-         AND (?2 = ''
+         AND (
               /* Want only the cildren of e.local_relpath;
                * externals can't have a local_relpath = ''. */
-              OR IS_STRICT_DESCENDANT_OF(local_relpath, ?2)))
+              IS_STRICT_DESCENDANT_OF(local_relpath, ?2)))
         OR
         ((?3)
          AND parent_relpath = ?2) )
@@ -1050,8 +1045,7 @@ WHERE wc_id = ?1
 SELECT local_relpath, def_local_relpath
 FROM externals
 WHERE wc_id = ?1 
-  AND (?2 = ''
-       OR def_local_relpath = ?2
+  AND (def_local_relpath = ?2
        OR IS_STRICT_DESCENDANT_OF(def_local_relpath, ?2))
 
 -- STMT_DELETE_EXTERNAL
@@ -1065,8 +1059,7 @@ SELECT IFNULL((SELECT properties FROM ac
        local_relpath, depth
 FROM nodes n
 WHERE wc_id = ?1
-  AND (?2 = ''
-       OR local_relpath = ?2
+  AND (local_relpath = ?2
        OR IS_STRICT_DESCENDANT_OF(local_relpath, ?2))
   AND kind = 'dir' AND presence='normal'
   AND op_depth=(SELECT MAX(op_depth) FROM nodes o
@@ -1312,9 +1305,8 @@ DROP TABLE IF EXISTS delete_list
 SELECT MIN(revision), MAX(revision),
        MIN(changed_revision), MAX(changed_revision) FROM nodes
   WHERE wc_id = ?1
-    AND (?2 = ''
-       OR local_relpath = ?2
-       OR IS_STRICT_DESCENDANT_OF(local_relpath, ?2))
+    AND (local_relpath = ?2
+         OR IS_STRICT_DESCENDANT_OF(local_relpath, ?2))
     AND presence IN ('normal', 'incomplete')
     AND file_external IS NULL
     AND op_depth = 0
@@ -1322,8 +1314,7 @@ SELECT MIN(revision), MAX(revision),
 -- STMT_HAS_SPARSE_NODES
 SELECT 1 FROM nodes
 WHERE wc_id = ?1
-  AND (?2 = ''
-       OR local_relpath = ?2
+  AND (local_relpath = ?2
        OR IS_STRICT_DESCENDANT_OF(local_relpath, ?2))
   AND op_depth = 0
   AND (presence IN ('absent', 'excluded')
@@ -1334,8 +1325,7 @@ LIMIT 1
 -- STMT_SUBTREE_HAS_TREE_MODIFICATIONS
 SELECT 1 FROM nodes
 WHERE wc_id = ?1
-  AND (?2 = ''
-       OR local_relpath = ?2
+  AND (local_relpath = ?2
        OR IS_STRICT_DESCENDANT_OF(local_relpath, ?2))
   AND op_depth > 0
 LIMIT 1
@@ -1343,8 +1333,7 @@ LIMIT 1
 -- STMT_SUBTREE_HAS_PROP_MODIFICATIONS
 SELECT 1 FROM actual_node
 WHERE wc_id = ?1
-  AND (?2 = ''
-       OR local_relpath = ?2
+  AND (local_relpath = ?2
        OR IS_STRICT_DESCENDANT_OF(local_relpath, ?2))
   AND properties IS NOT NULL
 LIMIT 1
@@ -1421,8 +1410,7 @@ LIMIT 1
 -- STMT_SELECT_BASE_FILES_RECURSIVE
 SELECT local_relpath, translated_size, last_mod_time FROM nodes AS n
 WHERE wc_id = ?1
-  AND (?2 = ''
-       OR local_relpath = ?2
+  AND (local_relpath = ?2
        OR IS_STRICT_DESCENDANT_OF(local_relpath, ?2))
   AND op_depth = 0
   AND kind='file'

Modified: subversion/trunk/subversion/tests/libsvn_wc/wc-queries-test.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/tests/libsvn_wc/wc-queries-test.c?rev=1341684&r1=1341683&r2=1341684&view=diff
==============================================================================
--- subversion/trunk/subversion/tests/libsvn_wc/wc-queries-test.c (original)
+++ subversion/trunk/subversion/tests/libsvn_wc/wc-queries-test.c Wed May 23 00:03:06 2012
@@ -79,7 +79,6 @@ static const int schema_statements[] =
 static const int slow_statements[] =
 {
   /* Operate on the entire WC */
-  STMT_CLEAR_BASE_NODE_RECURSIVE_DAV_CACHE,
   STMT_RECURSIVE_UPDATE_NODE_REPO,
   STMT_DELETE_ALL_NODES_ABOVE_DEPTH,
   STMT_WC_HAS_SERVER_EXCLUDED,
@@ -108,11 +107,6 @@ static const int slow_statements[] =
   STMT_SELECT_REVERT_LIST_COPIED_CHILDREN,
   STMT_SELECT_REVERT_LIST_RECURSIVE,
   STMT_DELETE_REVERT_LIST_RECURSIVE,
-  STMT_SELECT_MIN_MAX_REVISIONS,
-  STMT_HAS_SPARSE_NODES,
-  STMT_SUBTREE_HAS_TREE_MODIFICATIONS,
-  STMT_SUBTREE_HAS_PROP_MODIFICATIONS,
-  STMT_SELECT_BASE_FILES_RECURSIVE,
   STMT_SELECT_CONFLICT_MARKER_FILES,
   STMT_CLEAR_ALL_ACTUAL_NODE_LEAVING_CHANGELIST,
   STMT_DELETE_ACTUAL_EMPTIES,



RE: svn commit: r1341684 - in /subversion/trunk: build/transform_sql.py subversion/libsvn_wc/wc-queries.sql subversion/tests/libsvn_wc/wc-queries-test.c

Posted by Bert Huijben <be...@qqmail.nl>.

> -----Original Message-----
> From: Greg Stein [mailto:gstein@gmail.com]
> Sent: woensdag 23 mei 2012 4:56
> To: dev@subversion.apache.org
> Subject: Re: svn commit: r1341684 - in /subversion/trunk:
build/transform_sql.py
> subversion/libsvn_wc/wc-queries.sql subversion/tests/libsvn_wc/wc-queries-
> test.c
> 
> On Tue, May 22, 2012 at 8:03 PM,  <rh...@apache.org> wrote:
> >...
> > +++ subversion/trunk/build/transform_sql.py Wed May 23 00:03:06 2012
> > @@ -110,10 +110,35 @@ class Processor(object):
> >     for line in input.split('\n'):
> >       line = line.replace('"', '\\"')
> >
> > +      # IS_STRICT_DESCENDANT_OF()
> > +
> > +      # A common operation in the working copy is determining
descendants of
> > +      # a node. To allow Sqlite to use its indexes to provide the
answer we
> > +      # must provide simple less than and greater than operations.
> > +      #
> > +      # For relative paths that consist of one or more components like
'subdir'
> > +      # we can accomplish this by comparing local_relpath with
'subdir/' and
> > +      # 'subdir0' ('/'+1 = '0')
> > +      #
> > +      # For the working copy root this case is less simple and not
strictly
> > +      # valid utf-8/16 (but luckily Sqlite doesn't validate utf-8 nor
utf-16).
> > +      # The binary blob x'FFFF' is higher than any valid utf-8 and
utf-16
> > +      # sequence.
> > +      #
> > +      # So for the root we can compare with > '' and < x'FFFF'. (This
skips the
> > +      # root itself and selects all descendants)
> > +      #
> > +      ### RH: I implemented this first with a user defined Sqlite
function. But
> > +      ### when I wrote the documentation for it, I found out I could
just
> > +      ### define it this way, without losing the option of just
dropping the
> > +      ### query in a plain sqlite3.
> > +
> >       # '/'+1 == '0'
> > -      line = re.sub(r'IS_STRICT_DESCENDANT_OF[(]([A-Za-z_.]+),
([?][0-9]+)[)]',
> > -                    r"((\2) != '' AND ((\1) > (\2) || '/') AND ((\1) <
(\2) || '0')) ",
> > -                    line)
> > +      line = re.sub(
> > +            r'IS_STRICT_DESCENDANT_OF[(]([A-Za-z_.]+), ([?][0-9]+)[)]',
> > +            r"(((\1) > (CASE (\2) WHEN '' THEN '' ELSE (\2) || '/'
END))" +
> > +            r" AND ((\1) < CASE (\2) WHEN '' THEN X'FFFF' ELSE (\2) ||
'0' END))",
> > +            line)
> 
> Rather than using the CASE logic, couldn't you just do something like:
> 
> r"(((\2 = '') AND (\1 != '')) OR (... old condition ...))"
> 
> Seems simpler to me. Does it somehow invalidate the usage of the index?

Yes, this breaks the index usage :(

Sqlite determines which parts of the query are used for the indexing part
during statement preparation. So it doesn't know the actual values of ?1 and
?2 yet, and can only assume that they have the worst possible outcome in
every case.

The index is then used to create a result set, and the unevaluated portions
are then evaluated per row of the result.


In most cases Sqlite determines that cases like (local_relpath = ?2 OR
IS_STRICT_DESCENDANT_OF(local_relpath, ?2)) can be evaluated as two queries
after each other, both using the index. (As it statically knows these result
sets don't overlap)

In a few other cases, like STMT_RECURSIVE_UPDATE_NODE_REPO, the optimizer
isn't smart enough yet. It appears that it is smarter during simple
SELECT-s, then when updating or inserting.

	Bert


Re: svn commit: r1341684 - in /subversion/trunk: build/transform_sql.py subversion/libsvn_wc/wc-queries.sql subversion/tests/libsvn_wc/wc-queries-test.c

Posted by Greg Stein <gs...@gmail.com>.
On Tue, May 22, 2012 at 8:03 PM,  <rh...@apache.org> wrote:
>...
> +++ subversion/trunk/build/transform_sql.py Wed May 23 00:03:06 2012
> @@ -110,10 +110,35 @@ class Processor(object):
>     for line in input.split('\n'):
>       line = line.replace('"', '\\"')
>
> +      # IS_STRICT_DESCENDANT_OF()
> +
> +      # A common operation in the working copy is determining descendants of
> +      # a node. To allow Sqlite to use its indexes to provide the answer we
> +      # must provide simple less than and greater than operations.
> +      #
> +      # For relative paths that consist of one or more components like 'subdir'
> +      # we can accomplish this by comparing local_relpath with 'subdir/' and
> +      # 'subdir0' ('/'+1 = '0')
> +      #
> +      # For the working copy root this case is less simple and not strictly
> +      # valid utf-8/16 (but luckily Sqlite doesn't validate utf-8 nor utf-16).
> +      # The binary blob x'FFFF' is higher than any valid utf-8 and utf-16
> +      # sequence.
> +      #
> +      # So for the root we can compare with > '' and < x'FFFF'. (This skips the
> +      # root itself and selects all descendants)
> +      #
> +      ### RH: I implemented this first with a user defined Sqlite function. But
> +      ### when I wrote the documentation for it, I found out I could just
> +      ### define it this way, without losing the option of just dropping the
> +      ### query in a plain sqlite3.
> +
>       # '/'+1 == '0'
> -      line = re.sub(r'IS_STRICT_DESCENDANT_OF[(]([A-Za-z_.]+), ([?][0-9]+)[)]',
> -                    r"((\2) != '' AND ((\1) > (\2) || '/') AND ((\1) < (\2) || '0')) ",
> -                    line)
> +      line = re.sub(
> +            r'IS_STRICT_DESCENDANT_OF[(]([A-Za-z_.]+), ([?][0-9]+)[)]',
> +            r"(((\1) > (CASE (\2) WHEN '' THEN '' ELSE (\2) || '/' END))" +
> +            r" AND ((\1) < CASE (\2) WHEN '' THEN X'FFFF' ELSE (\2) || '0' END))",
> +            line)

Rather than using the CASE logic, couldn't you just do something like:

r"(((\2 = '') AND (\1 != '')) OR (... old condition ...))"

Seems simpler to me. Does it somehow invalidate the usage of the index?

Cheers,
-g