You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by rh...@apache.org on 2015/12/01 12:09:14 UTC

svn commit: r1717400 - in /subversion/branches/ra-git/subversion/libsvn_fs_git: fs_git.h fsgit-metadata.sql fsgit-queries.sql git-revroot.c gitdb.c util.c

Author: rhuijben
Date: Tue Dec  1 11:09:14 2015
New Revision: 1717400

URL: http://svn.apache.org/viewvc?rev=1717400&view=rev
Log:
On the git-ra branch: Produce and cache checksum values for git files.
This improves consistency checking, both inside libsvn_fs_git and on
clients. Fix some fsid calculations to avoid some deletes+adds during
update.

* subversion/libsvn_fs_git/fsgit-metadata.sql
  (REVMAP): Declare commit_id binary.
  (CHECKSUMMAP): New table.

* subversion/libsvn_fs_git/fsgit-queries.sql
  (STMT_SELECT_CHECKSUM,
   STMT_INSERT_CHECKSUM): New statements.

* subversion/libsvn_fs_git/fs_git.h
  (svn_fs_git__get_blob_stream): New function.
  (svn_fs_git__db_fetch_oid): Constify argument.
  (svn_fs_git__db_fetch_checksum): New function.

* subversion/libsvn_fs_git/git-revroot.c
  (fs_git_paths_changed): Remove initial '/'.
  (fs_git_node_id): Remove '/' like in other places.
  (fs_git_node_origin_rev): Return current revision to allow
    switching.
  (fs_git_dir_entries): Store full path in id.
  (fs_git_file_length): Document limitation.
  (fs_git_file_checksum): Document limitation. Fetch checksums.
  (fs_git_file_contents): Document limitation. Use new stream.

* subversion/libsvn_fs_git/gitdb.c
  (svn_fs_git__db_fetch_oid): Constify argument.
  (db_fetch_checksum,
   svn_fs_git__db_fetch_checksum): New function.

* subversion/libsvn_fs_git/util.c
  (includes): Add svn_sorts.h and fs-loader.h
  (git_fs_blob_stream_t): New struct.
  (blob_stream_cleanup,
   blob_stream_read,
   blob_stream_close,
   svn_fs_git__get_blob_stream): New function.

Modified:
    subversion/branches/ra-git/subversion/libsvn_fs_git/fs_git.h
    subversion/branches/ra-git/subversion/libsvn_fs_git/fsgit-metadata.sql
    subversion/branches/ra-git/subversion/libsvn_fs_git/fsgit-queries.sql
    subversion/branches/ra-git/subversion/libsvn_fs_git/git-revroot.c
    subversion/branches/ra-git/subversion/libsvn_fs_git/gitdb.c
    subversion/branches/ra-git/subversion/libsvn_fs_git/util.c

Modified: subversion/branches/ra-git/subversion/libsvn_fs_git/fs_git.h
URL: http://svn.apache.org/viewvc/subversion/branches/ra-git/subversion/libsvn_fs_git/fs_git.h?rev=1717400&r1=1717399&r2=1717400&view=diff
==============================================================================
--- subversion/branches/ra-git/subversion/libsvn_fs_git/fs_git.h (original)
+++ subversion/branches/ra-git/subversion/libsvn_fs_git/fs_git.h Tue Dec  1 11:09:14 2015
@@ -69,6 +69,15 @@ svn_fs_git__open(svn_fs_t *fs,
                  apr_pool_t *scratch_pool);
 
 
+/* From util.c */
+/* Gets a stream to read the file with the specified OID. */
+svn_error_t *
+svn_fs_git__get_blob_stream(svn_stream_t **stream,
+                            svn_fs_t *fs,
+                            const git_oid *oid,
+                            apr_pool_t *result_pool);
+
+
 /* From gitdb.c */
 svn_error_t *
 svn_fs_git__db_open(svn_fs_t *fs,
@@ -91,13 +100,21 @@ svn_fs_git__db_ensure_commit(svn_fs_t *f
 
 svn_error_t *
 svn_fs_git__db_fetch_oid(svn_boolean_t *found,
-                         git_oid **oid,
+                         const git_oid **oid,
                          const char **path,
                          svn_fs_t *fs,
                          svn_revnum_t revnum,
                          apr_pool_t *result_pool,
                          apr_pool_t *scratch_pool);
 
+svn_error_t *
+svn_fs_git__db_fetch_checksum(svn_checksum_t **checksum,
+                              svn_fs_t *fs,
+                              const git_oid *oid,
+                              svn_checksum_kind_t kind,
+                              apr_pool_t *result_pool,
+                              apr_pool_t *scratch_pool);
+
 /* */
 svn_error_t *
 svn_fs_git__revision_root(svn_fs_root_t **root_p,

Modified: subversion/branches/ra-git/subversion/libsvn_fs_git/fsgit-metadata.sql
URL: http://svn.apache.org/viewvc/subversion/branches/ra-git/subversion/libsvn_fs_git/fsgit-metadata.sql?rev=1717400&r1=1717399&r2=1717400&view=diff
==============================================================================
--- subversion/branches/ra-git/subversion/libsvn_fs_git/fsgit-metadata.sql (original)
+++ subversion/branches/ra-git/subversion/libsvn_fs_git/fsgit-metadata.sql Tue Dec  1 11:09:14 2015
@@ -36,7 +36,7 @@ CREATE TABLE REVMAP (
   revnum INTEGER PRIMARY KEY AUTOINCREMENT,
 
   /* The git commit mapped to the revision */
-  commit_id TEXT NOT NULL,
+  commit_id BINARY NOT NULL,
 
   /* The relpath below which we express this commit (E.g. 'trunk') */
   relpath TEXT NOT NULL
@@ -67,6 +67,13 @@ CREATE TABLE BRANCHMAP (
 CREATE UNIQUE INDEX I_BRANCHMAP_RELPATH ON BRANCHMAP (relpath, from_rev);
 CREATE UNIQUE INDEX I_BRANCHMAP_FROM_REV ON BRANCHMAP (from_rev, relpath);
 
+CREATE TABLE CHECKSUMMAP (
+  blob_id BINARY NOT NULL PRIMARY KEY,
+
+  md5_checksum TEXT NOT NULL,
+  sha1_checksum TEXT NOT NULL
+)
+
 PRAGMA user_version =
 -- define: SVN_FS_GIT__VERSION
 ;

Modified: subversion/branches/ra-git/subversion/libsvn_fs_git/fsgit-queries.sql
URL: http://svn.apache.org/viewvc/subversion/branches/ra-git/subversion/libsvn_fs_git/fsgit-queries.sql?rev=1717400&r1=1717399&r2=1717400&view=diff
==============================================================================
--- subversion/branches/ra-git/subversion/libsvn_fs_git/fsgit-queries.sql (original)
+++ subversion/branches/ra-git/subversion/libsvn_fs_git/fsgit-queries.sql Tue Dec  1 11:09:14 2015
@@ -51,6 +51,14 @@ LIMIT 1
 -- STMT_INSERT_COMMIT
 INSERT INTO REVMAP (revnum, commit_id, relpath) VALUES (?1, ?2, ?3)
 
+-- STMT_SELECT_CHECKSUM
+SELECT md5_checksum, sha1_checksum
+FROM CHECKSUMMAP
+WHERE blob_id = ?1
+
+-- STMT_INSERT_CHECKSUM
+INSERT INTO CHECKSUMMAP (blob_id, md5_checksum, sha1_checksum)
+VALUES (?1, ?2, ?3)
 
 /* Grab all the statements related to the schema.  */
 

Modified: subversion/branches/ra-git/subversion/libsvn_fs_git/git-revroot.c
URL: http://svn.apache.org/viewvc/subversion/branches/ra-git/subversion/libsvn_fs_git/git-revroot.c?rev=1717400&r1=1717399&r2=1717400&view=diff
==============================================================================
--- subversion/branches/ra-git/subversion/libsvn_fs_git/git-revroot.c (original)
+++ subversion/branches/ra-git/subversion/libsvn_fs_git/git-revroot.c Tue Dec  1 11:09:14 2015
@@ -274,13 +274,13 @@ fs_git_paths_changed(apr_hash_t **change
     {
       svn_fs_path_change2_t *ch;
 
-      ch = svn_fs__path_change_create_internal(make_id(root, "/trunk", pool),
+      ch = svn_fs__path_change_create_internal(make_id(root, "trunk", pool),
                                                svn_fs_path_change_add,
                                                pool);
       ch->node_kind = svn_node_dir;
       svn_hash_sets(changed_paths, "/trunk", ch);
 
-      ch = svn_fs__path_change_create_internal(make_id(root, "/branches",
+      ch = svn_fs__path_change_create_internal(make_id(root, "branches",
                                                        pool),
                                                svn_fs_path_change_add,
                                                pool);
@@ -365,6 +365,9 @@ static svn_error_t *
 fs_git_node_id(const svn_fs_id_t **id_p, svn_fs_root_t *root,
                const char *path, apr_pool_t *pool)
 {
+  if (*path == '/')
+    path++;
+
   *id_p = make_id(root, path, pool);
   return SVN_NO_ERROR;
 }
@@ -472,7 +475,8 @@ fs_git_node_origin_rev(svn_revnum_t *rev
                        svn_fs_root_t *root, const char *path,
                        apr_pool_t *pool)
 {
-  return svn_error_create(APR_ENOTIMPL, NULL, NULL);
+  *revision = root->rev; /* No common ancestry */
+  return SVN_NO_ERROR;
 }
 
 static svn_error_t *
@@ -642,8 +646,9 @@ fs_git_dir_entries(apr_hash_t **entries_
       const git_tree_entry *e = git_tree_entry_byindex(tree, idx);
 
       de = apr_pcalloc(pool, sizeof(*de));
-      de->id = make_id(root, path, pool);
       de->name = git_tree_entry_name(e);
+      de->id = make_id(root, svn_relpath_join(path, de->name, pool),
+                       pool);
 
       if (git_tree_entry_type(e) == GIT_OBJ_TREE)
         de->kind = svn_node_dir;
@@ -706,6 +711,12 @@ fs_git_file_length(svn_filesize_t *lengt
   if (!entry || git_tree_entry_type(entry) != GIT_OBJ_BLOB)
     return SVN_FS__ERR_NOT_FILE(root->fs, path);
 
+  if ((git_tree_entry_filemode(entry) & GIT_FILEMODE_LINK)
+      == GIT_FILEMODE_LINK)
+    {
+      /* ### TODO */
+    }
+
   SVN_ERR(get_entry_object(&obj, tree, entry, pool));
 
   blob = (git_blob*)obj;
@@ -722,8 +733,6 @@ fs_git_file_checksum(svn_checksum_t **ch
   const git_commit *commit;
   git_tree *tree;
   git_tree_entry *entry;
-  git_object *obj;
-  git_blob *blob;
   const char *relpath;
 
   SVN_ERR(find_branch(&commit, &relpath, root, path, pool));
@@ -738,11 +747,15 @@ fs_git_file_checksum(svn_checksum_t **ch
   if (!entry || git_tree_entry_type(entry) != GIT_OBJ_BLOB)
     return SVN_FS__ERR_NOT_FILE(root->fs, path);
 
-  SVN_ERR(get_entry_object(&obj, tree, entry, pool));
-
-  blob = (git_blob*)obj;
+  if ((git_tree_entry_filemode(entry) & GIT_FILEMODE_LINK)
+      == GIT_FILEMODE_LINK)
+    {
+      /* ### TODO */
+    }
 
-  *checksum = NULL; /* ### TODO: Get via DB cache */
+  SVN_ERR(svn_fs_git__db_fetch_checksum(checksum, root->fs,
+                                        git_tree_entry_id(entry),
+                                        kind, pool, pool));
   return SVN_NO_ERROR;
 }
 
@@ -754,10 +767,7 @@ fs_git_file_contents(svn_stream_t **cont
   const git_commit *commit;
   git_tree *tree;
   git_tree_entry *entry;
-  git_object *obj;
-  git_blob *blob;
   const char *relpath;
-  svn_filesize_t sz;
 
   SVN_ERR(find_branch(&commit, &relpath, root, path, pool));
 
@@ -771,22 +781,15 @@ fs_git_file_contents(svn_stream_t **cont
   if (!entry || git_tree_entry_type(entry) != GIT_OBJ_BLOB)
     return SVN_FS__ERR_NOT_FILE(root->fs, path);
 
-  SVN_ERR(get_entry_object(&obj, tree, entry, pool));
-
-  blob = (git_blob*)obj;
-
-  sz = git_blob_rawsize(blob);
-
-  /* For now use the github 10 MB limit */
-  if (sz < (10 * 1024 * 1024))
+  if ((git_tree_entry_filemode(entry) & GIT_FILEMODE_LINK)
+      == GIT_FILEMODE_LINK)
     {
-      svn_string_t *s = svn_string_ncreate(
-        git_blob_rawcontent(blob), (apr_size_t)sz, pool);
-
-      *contents = svn_stream_from_string(s, pool);
+      /* ### TODO */
     }
-  else
-    *contents = svn_stream_empty(pool);
+
+  SVN_ERR(svn_fs_git__get_blob_stream(contents, root->fs,
+                                      git_tree_entry_id(entry),
+                                      pool));
 
   return SVN_NO_ERROR;
 }

Modified: subversion/branches/ra-git/subversion/libsvn_fs_git/gitdb.c
URL: http://svn.apache.org/viewvc/subversion/branches/ra-git/subversion/libsvn_fs_git/gitdb.c?rev=1717400&r1=1717399&r2=1717400&view=diff
==============================================================================
--- subversion/branches/ra-git/subversion/libsvn_fs_git/gitdb.c (original)
+++ subversion/branches/ra-git/subversion/libsvn_fs_git/gitdb.c Tue Dec  1 11:09:14 2015
@@ -95,7 +95,7 @@ svn_fs_git__db_ensure_commit(svn_fs_t *f
 
 svn_error_t *
 svn_fs_git__db_fetch_oid(svn_boolean_t *found,
-                         git_oid **oid,
+                         const git_oid **oid,
                          const char **path,
                          svn_fs_t *fs,
                          svn_revnum_t revnum,
@@ -105,7 +105,6 @@ svn_fs_git__db_fetch_oid(svn_boolean_t *
   svn_fs_git_fs_t *fgf = fs->fsap_data;
   svn_sqlite__stmt_t *stmt;
   svn_boolean_t got_row;
-  svn_revnum_t new_rev;
 
   SVN_ERR(svn_sqlite__get_statement(&stmt, fgf->sdb,
                                     STMT_SELECT_COMMIT_BY_REV));
@@ -139,6 +138,89 @@ svn_fs_git__db_fetch_oid(svn_boolean_t *
   return SVN_NO_ERROR;
 }
 
+
+static svn_error_t *
+db_fetch_checksum(svn_checksum_t **checksum,
+                  svn_fs_t *fs,
+                  const git_oid *oid,
+                  int idx,
+                  apr_pool_t *result_pool,
+                  apr_pool_t *scratch_pool)
+{
+  svn_fs_git_fs_t *fgf = fs->fsap_data;
+  svn_sqlite__stmt_t *stmt;
+  svn_boolean_t got_row;
+  svn_stream_t *stream;
+  svn_checksum_t *sha1_checksum, *md5_checksum;
+
+  SVN_ERR(svn_sqlite__get_statement(&stmt, fgf->sdb,
+                                    STMT_SELECT_CHECKSUM));
+  SVN_ERR(svn_sqlite__bind_blob(stmt, 1, oid, sizeof(*oid)));
+  SVN_ERR(svn_sqlite__step(&got_row, stmt));
+
+  if (got_row)
+    SVN_ERR(svn_sqlite__column_checksum(checksum, stmt, idx,
+                                        result_pool));
+  else
+    *checksum = NULL;
+  SVN_ERR(svn_sqlite__reset(stmt));
+
+  if (got_row)
+    return SVN_NO_ERROR;
+
+  SVN_ERR(svn_fs_git__get_blob_stream(&stream, fs, oid, scratch_pool));
+
+  stream = svn_stream_checksummed2(stream, &sha1_checksum, NULL,
+                                   svn_checksum_sha1, TRUE, scratch_pool);
+  stream = svn_stream_checksummed2(stream, &md5_checksum, NULL,
+                                   svn_checksum_md5, TRUE, scratch_pool);
+
+  SVN_ERR(svn_stream_copy3(stream, svn_stream_empty(scratch_pool),
+                           NULL, NULL, scratch_pool));
+
+
+  SVN_ERR(svn_sqlite__get_statement(&stmt, fgf->sdb,
+                                    STMT_INSERT_CHECKSUM));
+  SVN_ERR(svn_sqlite__bind_blob(stmt, 1, oid, sizeof(*oid)));
+  SVN_ERR(svn_sqlite__bind_checksum(stmt, 2, md5_checksum, scratch_pool));
+  SVN_ERR(svn_sqlite__bind_checksum(stmt, 3, sha1_checksum, scratch_pool));
+  SVN_ERR(svn_sqlite__update(NULL, stmt));
+
+  if (idx == 1)
+    *checksum = svn_checksum_dup(md5_checksum, result_pool);
+  else
+    *checksum = svn_checksum_dup(sha1_checksum, result_pool);
+  return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_fs_git__db_fetch_checksum(svn_checksum_t **checksum,
+                              svn_fs_t *fs,
+                              const git_oid *oid,
+                              svn_checksum_kind_t kind,
+                              apr_pool_t *result_pool,
+                              apr_pool_t *scratch_pool)
+{
+  svn_fs_git_fs_t *fgf = fs->fsap_data;
+  int idx;
+
+  if (kind == svn_checksum_md5)
+    idx = 1;
+  else if (kind == svn_checksum_sha1)
+    idx = 2;
+  else
+    {
+      *checksum = NULL;
+      return SVN_NO_ERROR;
+    }
+
+  SVN_SQLITE__WITH_LOCK(db_fetch_checksum(checksum, fs, oid, idx,
+                                          result_pool, scratch_pool),
+                        fgf->sdb);
+
+  return SVN_NO_ERROR;
+}
+
 
 svn_error_t *
 svn_fs_git__db_open(svn_fs_t *fs,

Modified: subversion/branches/ra-git/subversion/libsvn_fs_git/util.c
URL: http://svn.apache.org/viewvc/subversion/branches/ra-git/subversion/libsvn_fs_git/util.c?rev=1717400&r1=1717399&r2=1717400&view=diff
==============================================================================
--- subversion/branches/ra-git/subversion/libsvn_fs_git/util.c (original)
+++ subversion/branches/ra-git/subversion/libsvn_fs_git/util.c Tue Dec  1 11:09:14 2015
@@ -21,12 +21,164 @@
  */
 
 #include "svn_fs.h"
+#include "svn_sorts.h"
 #include "svn_version.h"
 #include "svn_pools.h"
 
 #include "svn_private_config.h"
+
+#include "../libsvn_fs/fs-loader.h"
+
 #include "fs_git.h"
 
+typedef struct git_fs_blob_stream_t
+{
+  apr_pool_t *cleanup_pool;
+  git_odb *odb;
+  git_odb_stream *odb_stream;
+  const char *data;
+  apr_size_t data_left;
+} git_fs_blob_stream_t;
+
+static apr_status_t
+blob_stream_cleanup(void *baton)
+{
+  git_fs_blob_stream_t *bs = baton;
+
+  git_odb_stream_free(bs->odb_stream);
+  git_odb_free(bs->odb);
+  return SVN_NO_ERROR;
+}
+
+static svn_error_t *blob_stream_read(void *baton,
+                                     char *buffer,
+                                     apr_size_t *len)
+{
+  git_fs_blob_stream_t *bs = baton;
+
+  if (bs->data)
+    {
+      if (bs->data_left)
+        {
+          *len = MIN(*len, bs->data_left);
+          memcpy(buffer, bs->data, *len);
+          bs->data_left -= *len;
+          bs->data += *len;
+
+          if (!bs->data_left)
+            {
+              /* Releases file data! */
+              svn_pool_destroy(bs->cleanup_pool);
+              bs->cleanup_pool = NULL;
+            }
+        }
+      else
+        *len = 0;
+
+      return SVN_NO_ERROR;
+    }
+  else
+  {
+    GIT2_ERR(
+      git_odb_stream_read(bs->odb_stream, buffer, *len));
+  }
+
+  return SVN_NO_ERROR;
+}
+
+static svn_error_t *blob_stream_close(void *baton)
+{
+  git_fs_blob_stream_t *bs = baton;
+
+  if (bs->cleanup_pool)
+    {
+      if (bs->data)
+        apr_pool_destroy(bs->cleanup_pool);
+      else
+        apr_pool_cleanup_run(bs->cleanup_pool, bs, blob_stream_cleanup);
+      bs->cleanup_pool = NULL;
+    }
+  return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_fs_git__get_blob_stream(svn_stream_t **stream,
+                            svn_fs_t *fs,
+                            const git_oid *oid,
+                            apr_pool_t *result_pool)
+{
+  svn_fs_git_fs_t *fgf = fs->fsap_data;
+  git_odb *odb;
+#if 0
+  git_odb_stream *odb_stream;
+#endif
+  git_fs_blob_stream_t *blob_stream;
+  int git_err;
+
+  GIT2_ERR(git_repository_odb(&odb, fgf->repos));
+
+  /* ### Somehow libgit2 assumes that we should just keep everything
+     in RAM. There is not a single ODB backend in the libgit2 source that
+     support streaming reads (yet). */
+#if 0
+  git_err = git_odb_open_rstream(&odb_stream, odb, oid);
+  if (git_err)
+#endif
+    {
+      git_odb_object *ob;
+      apr_pool_t *subpool;
+      char *data;
+
+#if 0
+      giterr_clear();
+#endif
+      /* libgit2 doesn' promise that this works :(
+         (Somehow they don't want to support files that don't
+          fit in memory) */
+
+      git_err = git_odb_read(&ob, odb, oid);
+      if (git_err)
+        {
+          git_odb_free(odb);
+          return svn_fs_git__wrap_git_error();
+        }
+
+      subpool = svn_pool_create(result_pool);
+
+      apr_size_t sz = git_odb_object_size(ob);
+      data = apr_pmemdup(subpool, git_odb_object_data(ob), sz);
+
+      git_odb_object_free(ob);
+
+      blob_stream = apr_pcalloc(result_pool, sizeof(*blob_stream));
+      blob_stream->cleanup_pool = subpool;
+      blob_stream->data = data;
+      blob_stream->data_left = sz;
+
+      git_odb_free(odb);
+      odb = NULL;
+    }
+#if 0
+  else
+    {
+      blob_stream = apr_pcalloc(result_pool, sizeof(*blob_stream));
+      blob_stream->cleanup_pool = result_pool;
+      blob_stream->odb = odb;
+      blob_stream->odb_stream = odb_stream;
+    }
+#endif
+
+  *stream = svn_stream_create(blob_stream, result_pool);
+  svn_stream_set_read2(*stream, blob_stream_read, blob_stream_read);
+  svn_stream_set_close(*stream, blob_stream_close);
+
+  apr_pool_cleanup_register(result_pool, blob_stream, blob_stream_cleanup,
+                            apr_pool_cleanup_null);
+
+  return SVN_NO_ERROR;
+}
+
+
 
 #undef svn_fs_git__wrap_git_error
 svn_error_t *