You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by rh...@apache.org on 2013/11/17 18:19:08 UTC

svn commit: r1542765 - in /subversion/trunk/subversion: libsvn_wc/upgrade.c libsvn_wc/wc-metadata.sql libsvn_wc/wc_db.c libsvn_wc/wc_db.h tests/libsvn_wc/wc-queries-test.c

Author: rhuijben
Date: Sun Nov 17 17:19:07 2013
New Revision: 1542765

URL: http://svn.apache.org/r1542765
Log:
Teach Sqlite a bit of how we use our working copy database files, like that
we only have one true value in wc_id.

By adding this tiny bit of information Sqlite 3.8.0, 3.8.1 and the preview
of 3.8.2 use the intended query plans for our working copy queries, just
like Sqlite 3.7.12-3.7.19 already did without these hints.
(The older versions do use this information though!)

For more details on why this is important and how this works see
- the wc-metadata.sql patch
- http://www.sqlite.org/queryplanner-ng.html
- http://www.sqlite.org/fileformat2.html#stat1tab

And for reference also check the users@sqlite archive where Richard Hipp
recommends this approach for our current use of Sqlite.

* subversion/libsvn_wc/upgrade.c
  (svn_wc__upgrade_sdb): Call svn_wc__db_install_schema_statistics() after the
    final format bump.

* subversion/libsvn_wc/wc-metadata.sql
  (STMT_INSTALL_SCHEMA_STATISTICS): New statement.

* subversion/libsvn_wc/wc_db.c
  (svn_wc__db_install_schema_statistics): New function.

* subversion/libsvn_wc/wc_db.h
  (svn_wc__db_install_schema_statistics): New function.

* subversion/tests/libsvn_wc/wc-queries-test.c
  (schema_statements): Install the statistics.
  (parse_stat_data): New helper function.
  (test_schema_statistics): New test.
  (test_funcs): Add test_schema_statistics.

Modified:
    subversion/trunk/subversion/libsvn_wc/upgrade.c
    subversion/trunk/subversion/libsvn_wc/wc-metadata.sql
    subversion/trunk/subversion/libsvn_wc/wc_db.c
    subversion/trunk/subversion/libsvn_wc/wc_db.h
    subversion/trunk/subversion/tests/libsvn_wc/wc-queries-test.c

Modified: subversion/trunk/subversion/libsvn_wc/upgrade.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_wc/upgrade.c?rev=1542765&r1=1542764&r2=1542765&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_wc/upgrade.c (original)
+++ subversion/trunk/subversion/libsvn_wc/upgrade.c Sun Nov 17 17:19:07 2013
@@ -1959,6 +1959,10 @@ svn_wc__upgrade_sdb(int *result_format,
       case SVN_WC__VERSION:
         /* already upgraded */
         *result_format = SVN_WC__VERSION;
+
+        SVN_SQLITE__WITH_LOCK(
+            svn_wc__db_install_schema_statistics(sdb, scratch_pool),
+            sdb);
     }
 
 #ifdef SVN_DEBUG

Modified: subversion/trunk/subversion/libsvn_wc/wc-metadata.sql
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_wc/wc-metadata.sql?rev=1542765&r1=1542764&r2=1542765&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_wc/wc-metadata.sql (original)
+++ subversion/trunk/subversion/libsvn_wc/wc-metadata.sql Sun Nov 17 17:19:07 2013
@@ -573,6 +573,54 @@ CREATE UNIQUE INDEX I_EXTERNALS_DEFINED 
                                                       local_relpath);
 
 /* ------------------------------------------------------------------------- */
+/* This statement provides SQLite with the necessary information about our
+   indexes to make better decisions in the query planner.
+
+   For every interesting index this contains a number of rows where the
+   statistics ar calculated for and then for every column in the index the
+   average number of rows with the same value in all columns left of this
+   column including the column itself.
+
+   See http://www.sqlite.org/fileformat2.html#stat1tab for more details.
+
+   The important thing here is that this tells Sqlite that the wc_id column
+   of the NODES and ACTUAL_NODE table is usually a single value, so queries
+   should use more than one column for index usage.
+
+   The current hints describe NODES+ACTUAL_NODE as a working copy with
+   8000 nodes in 1 a single working copy(=wc_id), 10 nodes per directory
+   and an average of 2 op-depth layers per node.
+
+   The number of integers must be number of index columns + 1, which is
+   verified via the test_schema_statistics() test.
+ */
+-- STMT_INSTALL_SCHEMA_STATISTICS
+ANALYZE sqlite_master; /* Creates empty sqlite_stat1 if necessary */
+
+INSERT OR REPLACE INTO sqlite_stat1(tbl, idx, stat) VALUES
+    ('NODES', 'sqlite_autoindex_NODES_1',               '8000 8000 2 1');
+INSERT OR REPLACE INTO sqlite_stat1(tbl, idx, stat) VALUES
+    ('NODES', 'I_NODES_PARENT',                         '8000 8000 10 2 1');
+/* Tell a lie: We ignore that 99.9% of all moved_to values are NULL */
+INSERT OR REPLACE INTO sqlite_stat1(tbl, idx, stat) VALUES
+    ('NODES', 'I_NODES_MOVED',                          '8000 8000 1 1');
+
+INSERT OR REPLACE INTO sqlite_stat1(tbl, idx, stat) VALUES
+    ('ACTUAL_NODE', 'sqlite_autoindex_ACTUAL_NODE_1',   '8000 8000 1');
+INSERT OR REPLACE INTO sqlite_stat1(tbl, idx, stat) VALUES
+    ('ACTUAL_NODE', 'I_ACTUAL_PARENT',                  '8000 8000 10 1');
+
+INSERT OR REPLACE INTO sqlite_stat1(tbl, idx, stat) VALUES
+    ('LOCK', 'sqlite_autoindex_LOCK_1',                 '100 100 1');
+
+INSERT OR REPLACE INTO sqlite_stat1(tbl, idx, stat) VALUES
+    ('WC_LOCK', 'sqlite_autoindex_WC_LOCK_1',           '100 100 1');
+
+/* sqlite_autoindex_WORK_QUEUE_1 doesn't exist because WORK_QUEUE is
+   a INTEGER PRIMARY KEY AUTOINCREMENT table */
+
+ANALYZE sqlite_master; /* Loads sqlite_stat1 data for query optimizer */
+/* ------------------------------------------------------------------------- */
 
 /* Format 20 introduces NODES and removes BASE_NODE and WORKING_NODE */
 

Modified: subversion/trunk/subversion/libsvn_wc/wc_db.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_wc/wc_db.c?rev=1542765&r1=1542764&r2=1542765&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_wc/wc_db.c (original)
+++ subversion/trunk/subversion/libsvn_wc/wc_db.c Sun Nov 17 17:19:07 2013
@@ -1440,6 +1440,15 @@ does_node_exist(svn_boolean_t *exists,
   return svn_error_trace(svn_sqlite__reset(stmt));
 }
 
+svn_error_t *
+svn_wc__db_install_schema_statistics(svn_sqlite__db_t *sdb,
+                                     apr_pool_t *scratch_pool)
+{
+  SVN_ERR(svn_sqlite__exec_statements(sdb, STMT_INSTALL_SCHEMA_STATISTICS));
+
+  return SVN_NO_ERROR;
+}
+
 /* Helper for create_db(). Initializes our wc.db schema.
  */
 static svn_error_t *
@@ -1467,6 +1476,8 @@ init_db(/* output values */
   SVN_ERR(create_repos_id(repos_id, repos_root_url, repos_uuid,
                           db, scratch_pool));
 
+  SVN_ERR(svn_wc__db_install_schema_statistics(db, scratch_pool));
+
   /* Insert the wcroot. */
   /* ### Right now, this just assumes wc metadata is being stored locally. */
   SVN_ERR(svn_sqlite__get_statement(&stmt, db, STMT_INSERT_WCROOT));

Modified: subversion/trunk/subversion/libsvn_wc/wc_db.h
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_wc/wc_db.h?rev=1542765&r1=1542764&r2=1542765&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_wc/wc_db.h (original)
+++ subversion/trunk/subversion/libsvn_wc/wc_db.h Sun Nov 17 17:19:07 2013
@@ -2831,6 +2831,16 @@ svn_wc__db_scan_deletion(const char **ba
    @{
 */
 
+/* Installs or updates Sqlite schema statistics for the current (aka latest)
+   working copy schema.
+
+   This function should be called once on initializing the database and after
+   an schema update completes */
+svn_error_t *
+svn_wc__db_install_schema_statistics(svn_sqlite__db_t *sdb,
+                                     apr_pool_t *scratch_pool);
+
+
 /* Create a new wc.db file for LOCAL_DIR_ABSPATH, which is going to be a
    working copy for the repository REPOS_ROOT_URL with uuid REPOS_UUID.
    Return the raw sqlite handle, repository id and working copy id

Modified: subversion/trunk/subversion/tests/libsvn_wc/wc-queries-test.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/tests/libsvn_wc/wc-queries-test.c?rev=1542765&r1=1542764&r2=1542765&view=diff
==============================================================================
--- subversion/trunk/subversion/tests/libsvn_wc/wc-queries-test.c (original)
+++ subversion/trunk/subversion/tests/libsvn_wc/wc-queries-test.c Sun Nov 17 17:19:07 2013
@@ -73,6 +73,7 @@ static const int schema_statements[] =
   STMT_CREATE_NODES,
   STMT_CREATE_NODES_TRIGGERS,
   STMT_CREATE_EXTERNALS,
+  STMT_INSTALL_SCHEMA_STATISTICS,
   /* Memory tables */
   STMT_CREATE_TARGETS_LIST,
   STMT_CREATE_CHANGELIST_LIST,
@@ -813,6 +814,147 @@ test_query_duplicates(apr_pool_t *scratc
   return warnings;
 }
 
+/* Helper to verify a bit of data in the sqlite3 statistics */
+static int
+parse_stat_data(const char *stat)
+{
+  int n = 0;
+  apr_int64_t last = APR_INT64_MAX;
+  while (*stat)
+    {
+      apr_int64_t v;
+      char *next;
+
+      if (*stat < '0' || *stat > '9')
+        return -2;
+
+      errno = 0;
+      v = apr_strtoi64(stat, &next, 10);
+
+      /* All numbers specify the average number of rows
+         with the same values in all columns left of it,
+         so the value must be >= 1 and lower than or equal
+         to all previous seen numbers */
+      if (v <= 0 || (v > last) || (errno != 0))
+        return -1;
+
+      last = v;
+
+      n++;
+      stat = next;
+
+      if (*stat == ' ')
+        stat++;
+    }
+
+  return n;
+}
+
+static svn_error_t *
+test_schema_statistics(apr_pool_t *scratch_pool)
+{
+  sqlite3 *sdb;
+  int r;
+  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
+  svn_error_t *warnings = NULL;
+  sqlite3_stmt *stmt;
+
+  SVN_ERR(create_memory_db(&sdb, scratch_pool));
+
+  SQLITE_ERR(
+      sqlite3_exec(sdb,
+                   "CREATE TABLE shadow_stat1(tbl TEXT, idx TEXT, stat TEXT)",
+                   NULL, NULL, NULL));
+
+  SQLITE_ERR(
+      sqlite3_exec(sdb,
+                   "INSERT INTO shadow_stat1 (tbl, idx, stat) "
+                   "SELECT tbl, idx, stat FROM sqlite_stat1",
+                   NULL, NULL, NULL));
+
+  SQLITE_ERR(
+      sqlite3_exec(sdb,
+                   "DROP TABLE sqlite_stat1",
+                   NULL, NULL, NULL));
+
+  /* Insert statement to give index at least 1 record */
+  SQLITE_ERR(
+      sqlite3_exec(sdb,
+                   "INSERT INTO nodes (wc_id, local_relpath, op_depth,"
+                   "                   presence, kind) "
+                   "VALUES (1, '', 0, 'normal', 'dir')",
+                   NULL, NULL, NULL));
+
+  SQLITE_ERR(
+      sqlite3_exec(sdb,
+                   "INSERT INTO actual_node (wc_id, local_relpath) "
+                   "VALUES (1, '')",
+                   NULL, NULL, NULL));
+
+  SQLITE_ERR(
+      sqlite3_exec(sdb,
+                   "INSERT INTO lock (repos_id, repos_relpath, lock_token) "
+                   "VALUES (1, '', '')",
+                   NULL, NULL, NULL));
+
+  /* These are currently not necessary for query optimization, but it's better
+     to tell Sqlite how we intend to use this table anyway */
+  SQLITE_ERR(
+      sqlite3_exec(sdb,
+                   "INSERT INTO wc_lock (wc_id, local_dir_relpath) "
+                   "VALUES (1, '')",
+                   NULL, NULL, NULL));
+
+  SQLITE_ERR(
+      sqlite3_exec(sdb,
+                   "INSERT INTO WORK_QUEUE (work) "
+                   "VALUES ('')",
+                   NULL, NULL, NULL));
+
+  SQLITE_ERR(
+      sqlite3_exec(sdb,
+                   "ANALYZE",
+                   NULL, NULL, NULL));
+
+  SQLITE_ERR(
+      sqlite3_prepare(sdb, "SELECT s.tbl, s.idx, s.stat, r.stat "
+                           "FROM shadow_stat1 s "
+                           "LEFT JOIN sqlite_stat1 r ON "
+                                "s.tbl=r.tbl and s.idx=r.idx",
+                      -1, &stmt, NULL));
+
+  while (sqlite3_step(stmt) == SQLITE_ROW)
+    {
+      const char *wc_stat       = (const char*)sqlite3_column_text(stmt, 2);
+      const char *sqlite_stat   = (const char*)sqlite3_column_text(stmt, 3);
+
+      if (! sqlite_stat)
+        {
+          return svn_error_createf(SVN_ERR_TEST_FAILED, NULL,
+                                   "Schema statistic failure:"
+                                   " Refering to unknown index '%s' on '%s'",
+                                   sqlite3_column_text(stmt, 1),
+                                   sqlite3_column_text(stmt, 0));
+        }
+
+      if (parse_stat_data(wc_stat) != parse_stat_data(sqlite_stat))
+        {
+          return svn_error_createf(SVN_ERR_TEST_FAILED, NULL,
+                                   "Schema statistic failure:"
+                                   " Column mismatch for '%s' on '%s'",
+                                   sqlite3_column_text(stmt, 1),
+                                   sqlite3_column_text(stmt, 0));
+        }
+    }
+
+  SQLITE_ERR(sqlite3_reset(stmt));
+  SQLITE_ERR(sqlite3_finalize(stmt));
+
+  SQLITE_ERR(sqlite3_close(sdb)); /* Close the DB if ok; otherwise leaked */
+
+  return SVN_NO_ERROR;
+}
+
 struct svn_test_descriptor_t test_funcs[] =
   {
     SVN_TEST_NULL,
@@ -824,5 +966,7 @@ struct svn_test_descriptor_t test_funcs[
                    "test query expectations"),
     SVN_TEST_PASS2(test_query_duplicates,
                    "test query duplicates"),
+    SVN_TEST_PASS2(test_schema_statistics,
+                   "test schema statistics"),
     SVN_TEST_NULL
   };