You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by st...@apache.org on 2012/12/23 16:17:47 UTC

svn commit: r1425472 [4/6] - /subversion/branches/fsfs-format7/subversion/libsvn_fs_fs/

Added: subversion/branches/fsfs-format7/subversion/libsvn_fs_fs/recovery.c
URL: http://svn.apache.org/viewvc/subversion/branches/fsfs-format7/subversion/libsvn_fs_fs/recovery.c?rev=1425472&view=auto
==============================================================================
--- subversion/branches/fsfs-format7/subversion/libsvn_fs_fs/recovery.c (added)
+++ subversion/branches/fsfs-format7/subversion/libsvn_fs_fs/recovery.c Sun Dec 23 15:17:47 2012
@@ -0,0 +1,480 @@
+/* recovery.c --- FSFS recovery functionality
+*
+ * ====================================================================
+ *    Licensed to the Apache Software Foundation (ASF) under one
+ *    or more contributor license agreements.  See the NOTICE file
+ *    distributed with this work for additional information
+ *    regarding copyright ownership.  The ASF licenses this file
+ *    to you under the Apache License, Version 2.0 (the
+ *    "License"); you may not use this file except in compliance
+ *    with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing,
+ *    software distributed under the License is distributed on an
+ *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ *    KIND, either express or implied.  See the License for the
+ *    specific language governing permissions and limitations
+ *    under the License.
+ * ====================================================================
+ */
+
+#include "recovery.h"
+
+#include "svn_hash.h"
+#include "svn_pools.h"
+
+#include "key-gen.h"
+#include "low_level.h"
+#include "rep-cache.h"
+#include "revprops.h"
+#include "transaction.h"
+#include "util.h"
+
+#include "../libsvn_fs/fs-loader.h"
+
+#include "svn_private_config.h"
+
+/* Part of the recovery procedure.  Return the largest revision *REV in
+   filesystem FS.  Use POOL for temporary allocation. */
+static svn_error_t *
+recover_get_largest_revision(svn_fs_t *fs, svn_revnum_t *rev, apr_pool_t *pool)
+{
+  /* Discovering the largest revision in the filesystem would be an
+     expensive operation if we did a readdir() or searched linearly,
+     so we'll do a form of binary search.  left is a revision that we
+     know exists, right a revision that we know does not exist. */
+  apr_pool_t *iterpool;
+  svn_revnum_t left, right = 1;
+
+  iterpool = svn_pool_create(pool);
+  /* Keep doubling right, until we find a revision that doesn't exist. */
+  while (1)
+    {
+      svn_error_t *err;
+      apr_file_t *file;
+
+      err = svn_fs_fs__open_pack_or_rev_file(&file, fs, right, iterpool);
+      svn_pool_clear(iterpool);
+
+      if (err && err->apr_err == SVN_ERR_FS_NO_SUCH_REVISION)
+        {
+          svn_error_clear(err);
+          break;
+        }
+      else
+        SVN_ERR(err);
+
+      right <<= 1;
+    }
+
+  left = right >> 1;
+
+  /* We know that left exists and right doesn't.  Do a normal bsearch to find
+     the last revision. */
+  while (left + 1 < right)
+    {
+      svn_revnum_t probe = left + ((right - left) / 2);
+      svn_error_t *err;
+      apr_file_t *file;
+
+      err = svn_fs_fs__open_pack_or_rev_file(&file, fs, probe, iterpool);
+      svn_pool_clear(iterpool);
+
+      if (err && err->apr_err == SVN_ERR_FS_NO_SUCH_REVISION)
+        {
+          svn_error_clear(err);
+          right = probe;
+        }
+      else
+        {
+          SVN_ERR(err);
+          left = probe;
+        }
+    }
+
+  svn_pool_destroy(iterpool);
+
+  /* left is now the largest revision that exists. */
+  *rev = left;
+  return SVN_NO_ERROR;
+}
+
+/* A baton for reading a fixed amount from an open file.  For
+   recover_find_max_ids() below. */
+struct recover_read_from_file_baton
+{
+  svn_stream_t *stream;
+  apr_pool_t *pool;
+  apr_size_t remaining;
+};
+
+/* A stream read handler used by recover_find_max_ids() below.
+   Read and return at most BATON->REMAINING bytes from the stream,
+   returning nothing after that to indicate EOF. */
+static svn_error_t *
+read_handler_recover(void *baton, char *buffer, apr_size_t *len)
+{
+  struct recover_read_from_file_baton *b = baton;
+  apr_size_t bytes_to_read = *len;
+
+  if (b->remaining == 0)
+    {
+      /* Return a successful read of zero bytes to signal EOF. */
+      *len = 0;
+      return SVN_NO_ERROR;
+    }
+
+  if (bytes_to_read > b->remaining)
+    bytes_to_read = b->remaining;
+  b->remaining -= bytes_to_read;
+
+  return svn_stream_read(b->stream, buffer, &bytes_to_read);
+}
+
+/* Part of the recovery procedure.  Read the directory noderev at offset
+   OFFSET of file REV_FILE (the revision file of revision REV of
+   filesystem FS), and set MAX_NODE_ID and MAX_COPY_ID to be the node-id
+   and copy-id of that node, if greater than the current value stored
+   in either.  Recurse into any child directories that were modified in
+   this revision.
+
+   MAX_NODE_ID and MAX_COPY_ID must be arrays of at least MAX_KEY_SIZE.
+
+   Perform temporary allocation in POOL. */
+static svn_error_t *
+recover_find_max_ids(svn_fs_t *fs, svn_revnum_t rev,
+                     apr_file_t *rev_file, apr_off_t offset,
+                     char *max_node_id, char *max_copy_id,
+                     apr_pool_t *pool)
+{
+  apr_hash_t *headers;
+  char *value;
+  representation_t *data_rep;
+  rep_args_t *ra;
+  struct recover_read_from_file_baton baton;
+  svn_stream_t *stream;
+  apr_hash_t *entries;
+  apr_hash_index_t *hi;
+  apr_pool_t *iterpool;
+
+  SVN_ERR(svn_io_file_seek(rev_file, APR_SET, &offset, pool));
+  SVN_ERR(read_header_block(&headers, svn_stream_from_aprfile2(rev_file, TRUE,
+                                                               pool),
+                            pool));
+
+  /* Check that this is a directory.  It should be. */
+  value = apr_hash_get(headers, HEADER_TYPE, APR_HASH_KEY_STRING);
+  if (value == NULL || strcmp(value, KIND_DIR) != 0)
+    return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
+                            _("Recovery encountered a non-directory node"));
+
+  /* Get the data location.  No data location indicates an empty directory. */
+  value = apr_hash_get(headers, HEADER_TEXT, APR_HASH_KEY_STRING);
+  if (!value)
+    return SVN_NO_ERROR;
+  SVN_ERR(read_rep_offsets(&data_rep, value, NULL, FALSE, pool));
+
+  /* If the directory's data representation wasn't changed in this revision,
+     we've already scanned the directory's contents for noderevs, so we don't
+     need to again.  This will occur if a property is changed on a directory
+     without changing the directory's contents. */
+  if (data_rep->revision != rev)
+    return SVN_NO_ERROR;
+
+  /* We could use get_dir_contents(), but this is much cheaper.  It does
+     rely on directory entries being stored as PLAIN reps, though. */
+  offset = data_rep->offset;
+  SVN_ERR(svn_io_file_seek(rev_file, APR_SET, &offset, pool));
+
+  baton.stream = svn_stream_from_aprfile2(rev_file, TRUE, pool);
+  SVN_ERR(read_rep_line(&ra, baton.stream, pool));
+  if (ra->is_delta)
+    return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
+                            _("Recovery encountered a deltified directory "
+                              "representation"));
+
+  /* Now create a stream that's allowed to read only as much data as is
+     stored in the representation. */
+  baton.pool = pool;
+  baton.remaining = (apr_size_t) data_rep->expanded_size;
+  stream = svn_stream_create(&baton, pool);
+  svn_stream_set_read(stream, read_handler_recover);
+
+  /* Now read the entries from that stream. */
+  entries = apr_hash_make(pool);
+  SVN_ERR(svn_hash_read2(entries, stream, SVN_HASH_TERMINATOR, pool));
+  SVN_ERR(svn_stream_close(stream));
+
+  /* Now check each of the entries in our directory to find new node and
+     copy ids, and recurse into new subdirectories. */
+  iterpool = svn_pool_create(pool);
+  for (hi = apr_hash_first(pool, entries); hi; hi = apr_hash_next(hi))
+    {
+      char *str_val;
+      char *str;
+      svn_node_kind_t kind;
+      svn_fs_id_t *id;
+      const char *node_id, *copy_id;
+      apr_off_t child_dir_offset;
+      const svn_string_t *path = svn__apr_hash_index_val(hi);
+
+      svn_pool_clear(iterpool);
+
+      str_val = apr_pstrdup(iterpool, path->data);
+
+      str = svn_cstring_tokenize(" ", &str_val);
+      if (str == NULL)
+        return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
+                                _("Directory entry corrupt"));
+
+      if (strcmp(str, KIND_FILE) == 0)
+        kind = svn_node_file;
+      else if (strcmp(str, KIND_DIR) == 0)
+        kind = svn_node_dir;
+      else
+        {
+          return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
+                                  _("Directory entry corrupt"));
+        }
+
+      str = svn_cstring_tokenize(" ", &str_val);
+      if (str == NULL)
+        return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
+                                _("Directory entry corrupt"));
+
+      id = svn_fs_fs__id_parse(str, strlen(str), iterpool);
+
+      if (svn_fs_fs__id_rev(id) != rev)
+        {
+          /* If the node wasn't modified in this revision, we've already
+             checked the node and copy id. */
+          continue;
+        }
+
+      node_id = svn_fs_fs__id_node_id(id);
+      copy_id = svn_fs_fs__id_copy_id(id);
+
+      if (svn_fs_fs__key_compare(node_id, max_node_id) > 0)
+        {
+          SVN_ERR_ASSERT(strlen(node_id) < MAX_KEY_SIZE);
+          apr_cpystrn(max_node_id, node_id, MAX_KEY_SIZE);
+        }
+      if (svn_fs_fs__key_compare(copy_id, max_copy_id) > 0)
+        {
+          SVN_ERR_ASSERT(strlen(copy_id) < MAX_KEY_SIZE);
+          apr_cpystrn(max_copy_id, copy_id, MAX_KEY_SIZE);
+        }
+
+      if (kind == svn_node_file)
+        continue;
+
+      child_dir_offset = svn_fs_fs__id_offset(id);
+      SVN_ERR(recover_find_max_ids(fs, rev, rev_file, child_dir_offset,
+                                   max_node_id, max_copy_id, iterpool));
+    }
+  svn_pool_destroy(iterpool);
+
+  return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_fs_fs__find_max_ids(svn_fs_t *fs, svn_revnum_t youngest,
+                        char *max_node_id, char *max_copy_id,
+                        apr_pool_t *pool)
+{
+  fs_fs_data_t *ffd = fs->fsap_data;
+  apr_off_t root_offset;
+  apr_file_t *rev_file;
+
+  /* call this function for old repo formats only */
+  SVN_ERR_ASSERT(ffd->format < SVN_FS_FS__MIN_NO_GLOBAL_IDS_FORMAT);
+
+  SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, youngest, pool));
+  SVN_ERR(get_root_changes_offset(&root_offset, NULL, rev_file,
+                                  fs, youngest, pool));
+  SVN_ERR(recover_find_max_ids(fs, youngest, rev_file, root_offset,
+                               max_node_id, max_copy_id, pool));
+  SVN_ERR(svn_io_file_close(rev_file, pool));
+
+  return SVN_NO_ERROR;
+}
+
+/* Baton used for recover_body below. */
+struct recover_baton {
+  svn_fs_t *fs;
+  svn_cancel_func_t cancel_func;
+  void *cancel_baton;
+};
+
+/* The work-horse for svn_fs_fs__recover, called with the FS
+   write lock.  This implements the svn_fs_fs__with_write_lock()
+   'body' callback type.  BATON is a 'struct recover_baton *'. */
+static svn_error_t *
+recover_body(void *baton, apr_pool_t *pool)
+{
+  struct recover_baton *b = baton;
+  svn_fs_t *fs = b->fs;
+  fs_fs_data_t *ffd = fs->fsap_data;
+  svn_revnum_t max_rev;
+  char next_node_id_buf[MAX_KEY_SIZE], next_copy_id_buf[MAX_KEY_SIZE];
+  char *next_node_id = NULL, *next_copy_id = NULL;
+  svn_revnum_t youngest_rev;
+  svn_node_kind_t youngest_revprops_kind;
+
+  /* Lose potentially corrupted data in temp files */
+  SVN_ERR(cleanup_revprop_namespace(fs));
+
+  /* We need to know the largest revision in the filesystem. */
+  SVN_ERR(recover_get_largest_revision(fs, &max_rev, pool));
+
+  /* Get the expected youngest revision */
+  SVN_ERR(svn_fs_fs__youngest_rev(&youngest_rev, fs, pool));
+
+  /* Policy note:
+
+     Since the revprops file is written after the revs file, the true
+     maximum available revision is the youngest one for which both are
+     present.  That's probably the same as the max_rev we just found,
+     but if it's not, we could, in theory, repeatedly decrement
+     max_rev until we find a revision that has both a revs and
+     revprops file, then write db/current with that.
+
+     But we choose not to.  If a repository is so corrupt that it's
+     missing at least one revprops file, we shouldn't assume that the
+     youngest revision for which both the revs and revprops files are
+     present is healthy.  In other words, we're willing to recover
+     from a missing or out-of-date db/current file, because db/current
+     is truly redundant -- it's basically a cache so we don't have to
+     find max_rev each time, albeit a cache with unusual semantics,
+     since it also officially defines when a revision goes live.  But
+     if we're missing more than the cache, it's time to back out and
+     let the admin reconstruct things by hand: correctness at that
+     point may depend on external things like checking a commit email
+     list, looking in particular working copies, etc.
+
+     This policy matches well with a typical naive backup scenario.
+     Say you're rsyncing your FSFS repository nightly to the same
+     location.  Once revs and revprops are written, you've got the
+     maximum rev; if the backup should bomb before db/current is
+     written, then db/current could stay arbitrarily out-of-date, but
+     we can still recover.  It's a small window, but we might as well
+     do what we can. */
+
+  /* Even if db/current were missing, it would be created with 0 by
+     get_youngest(), so this conditional remains valid. */
+  if (youngest_rev > max_rev)
+    return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
+                             _("Expected current rev to be <= %ld "
+                               "but found %ld"), max_rev, youngest_rev);
+
+  /* We only need to search for maximum IDs for old FS formats which
+     se global ID counters. */
+  if (ffd->format < SVN_FS_FS__MIN_NO_GLOBAL_IDS_FORMAT)
+    {
+      /* Next we need to find the maximum node id and copy id in use across the
+         filesystem.  Unfortunately, the only way we can get this information
+         is to scan all the noderevs of all the revisions and keep track as
+         we go along. */
+      svn_revnum_t rev;
+      apr_pool_t *iterpool = svn_pool_create(pool);
+      char max_node_id[MAX_KEY_SIZE] = "0", max_copy_id[MAX_KEY_SIZE] = "0";
+      apr_size_t len;
+
+      for (rev = 0; rev <= max_rev; rev++)
+        {
+          apr_file_t *rev_file;
+          apr_off_t root_offset;
+
+          svn_pool_clear(iterpool);
+
+          if (b->cancel_func)
+            SVN_ERR(b->cancel_func(b->cancel_baton));
+
+          SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, rev, iterpool));
+          SVN_ERR(get_root_changes_offset(&root_offset, NULL, rev_file, fs, rev,
+                                          iterpool));
+          SVN_ERR(recover_find_max_ids(fs, rev, rev_file, root_offset,
+                                       max_node_id, max_copy_id, iterpool));
+          SVN_ERR(svn_io_file_close(rev_file, iterpool));
+        }
+      svn_pool_destroy(iterpool);
+
+      /* Now that we finally have the maximum revision, node-id and copy-id, we
+         can bump the two ids to get the next of each. */
+      len = strlen(max_node_id);
+      svn_fs_fs__next_key(max_node_id, &len, next_node_id_buf);
+      next_node_id = next_node_id_buf;
+      len = strlen(max_copy_id);
+      svn_fs_fs__next_key(max_copy_id, &len, next_copy_id_buf);
+      next_copy_id = next_copy_id_buf;
+    }
+
+  /* Before setting current, verify that there is a revprops file
+     for the youngest revision.  (Issue #2992) */
+  SVN_ERR(svn_io_check_path(path_revprops(fs, max_rev, pool),
+                            &youngest_revprops_kind, pool));
+  if (youngest_revprops_kind == svn_node_none)
+    {
+      svn_boolean_t missing = TRUE;
+      if (!packed_revprop_available(&missing, fs, max_rev, pool))
+        {
+          if (missing)
+            {
+              return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
+                                      _("Revision %ld has a revs file but no "
+                                        "revprops file"),
+                                      max_rev);
+            }
+          else
+            {
+              return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
+                                      _("Revision %ld has a revs file but the "
+                                        "revprops file is inaccessible"),
+                                      max_rev);
+            }
+          }
+    }
+  else if (youngest_revprops_kind != svn_node_file)
+    {
+      return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
+                               _("Revision %ld has a non-file where its "
+                                 "revprops file should be"),
+                               max_rev);
+    }
+
+  /* Prune younger-than-(newfound-youngest) revisions from the rep
+     cache if sharing is enabled taking care not to create the cache
+     if it does not exist. */
+  if (ffd->rep_sharing_allowed)
+    {
+      svn_boolean_t rep_cache_exists;
+
+      SVN_ERR(svn_fs_fs__exists_rep_cache(&rep_cache_exists, fs, pool));
+      if (rep_cache_exists)
+        SVN_ERR(svn_fs_fs__del_rep_reference(fs, max_rev, pool));
+    }
+
+  /* Now store the discovered youngest revision, and the next IDs if
+     relevant, in a new 'current' file. */
+  return write_current(fs, max_rev, next_node_id, next_copy_id, pool);
+}
+
+/* This implements the fs_library_vtable_t.recover() API. */
+svn_error_t *
+svn_fs_fs__recover(svn_fs_t *fs,
+                   svn_cancel_func_t cancel_func, void *cancel_baton,
+                   apr_pool_t *pool)
+{
+  struct recover_baton b;
+
+  /* We have no way to take out an exclusive lock in FSFS, so we're
+     restricted as to the types of recovery we can do.  Luckily,
+     we just want to recreate the 'current' file, and we can do that just
+     by blocking other writers. */
+  b.fs = fs;
+  b.cancel_func = cancel_func;
+  b.cancel_baton = cancel_baton;
+  return svn_fs_fs__with_write_lock(fs, recover_body, &b, pool);
+}

Added: subversion/branches/fsfs-format7/subversion/libsvn_fs_fs/recovery.h
URL: http://svn.apache.org/viewvc/subversion/branches/fsfs-format7/subversion/libsvn_fs_fs/recovery.h?rev=1425472&view=auto
==============================================================================
--- subversion/branches/fsfs-format7/subversion/libsvn_fs_fs/recovery.h (added)
+++ subversion/branches/fsfs-format7/subversion/libsvn_fs_fs/recovery.h Sun Dec 23 15:17:47 2012
@@ -0,0 +1,46 @@
+/* recovery.h : interface to the FSFS recovery functionality
+ *
+ * ====================================================================
+ *    Licensed to the Apache Software Foundation (ASF) under one
+ *    or more contributor license agreements.  See the NOTICE file
+ *    distributed with this work for additional information
+ *    regarding copyright ownership.  The ASF licenses this file
+ *    to you under the Apache License, Version 2.0 (the
+ *    "License"); you may not use this file except in compliance
+ *    with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing,
+ *    software distributed under the License is distributed on an
+ *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ *    KIND, either express or implied.  See the License for the
+ *    specific language governing permissions and limitations
+ *    under the License.
+ * ====================================================================
+ */
+
+#ifndef SVN_LIBSVN_FS__RECOVERY_H
+#define SVN_LIBSVN_FS__RECOVERY_H
+
+#include "fs.h"
+
+/* Find the "largest / max" node IDs in FS with the given YOUNGEST revision.
+   Return the result in the pre-allocated MAX_NODE_ID and MAX_COPY_ID data
+   buffer, respectively.   Use POOL for allocations.  */
+svn_error_t *
+svn_fs_fs__find_max_ids(svn_fs_t *fs,
+                        svn_revnum_t youngest,
+                        char *max_node_id,
+                        char *max_copy_id,
+                        apr_pool_t *pool);
+
+/* Recover the fsfs associated with filesystem FS.
+   Use optional CANCEL_FUNC/CANCEL_BATON for cancellation support.
+   Use POOL for temporary allocations. */
+svn_error_t *svn_fs_fs__recover(svn_fs_t *fs,
+                                svn_cancel_func_t cancel_func,
+                                void *cancel_baton,
+                                apr_pool_t *pool);
+
+#endif

Modified: subversion/branches/fsfs-format7/subversion/libsvn_fs_fs/revprops.c
URL: http://svn.apache.org/viewvc/subversion/branches/fsfs-format7/subversion/libsvn_fs_fs/revprops.c?rev=1425472&r1=1425471&r2=1425472&view=diff
==============================================================================
--- subversion/branches/fsfs-format7/subversion/libsvn_fs_fs/revprops.c (original)
+++ subversion/branches/fsfs-format7/subversion/libsvn_fs_fs/revprops.c Sun Dec 23 15:17:47 2012
@@ -29,6 +29,7 @@
 #include "fs_fs.h"
 #include "revprops.h"
 #include "util.h"
+#include "transaction.h"
 
 #include "private/svn_delta_private.h"
 #include "private/svn_string_private.h"