You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by st...@apache.org on 2012/12/25 23:22:07 UTC

svn commit: r1425778 - in /subversion/trunk: build.conf tools/dev/fsfs-access-map.c

Author: stefan2
Date: Tue Dec 25 22:22:07 2012
New Revision: 1425778

URL: http://svn.apache.org/viewvc?rev=1425778&view=rev
Log:
A new developer tool: take an strace from some fsfs-based operation
and create a read access heat map for it.  The latter gets written
as BMP file along with a few stats.  This allows to visualize the
efficiency (or lack thereof) of our FS usage.

* build.conf
  (libs): add new dependency
  (fsfs-access-map): new tool

* tools/dev/fsfs-access-map.c
  (): new tool

Added:
    subversion/trunk/tools/dev/fsfs-access-map.c
Modified:
    subversion/trunk/build.conf

Modified: subversion/trunk/build.conf
URL: http://svn.apache.org/viewvc/subversion/trunk/build.conf?rev=1425778&r1=1425777&r2=1425778&view=diff
==============================================================================
--- subversion/trunk/build.conf (original)
+++ subversion/trunk/build.conf Tue Dec 25 22:22:07 2012
@@ -1224,7 +1224,7 @@ libs = __ALL__
        ra-local-test
        svndiff-test vdelta-test
        entries-dump atomic-ra-revprop-change wc-lock-tester wc-incomplete-tester
-       diff diff3 diff4 fsfs-reorg fsfs-stats svn-bench
+       diff diff3 diff4 fsfs-reorg fsfs-stats fsfs-access-map svn-bench
        client-test
        conflict-data-test db-test pristine-store-test entries-compat-test
        op-depth-test dirent_uri-test wc-queries-test wc-test
@@ -1287,6 +1287,13 @@ sources = fsfs-stats.c
 install = tools
 libs = libsvn_delta libsvn_subr apr
 
+[fsfs-access-map]
+type = exe
+path = tools/dev
+sources = fsfs-access-map.c
+install = tools
+libs = libsvn_subr apr
+
 [diff]
 type = exe
 path = tools/diff

Added: subversion/trunk/tools/dev/fsfs-access-map.c
URL: http://svn.apache.org/viewvc/subversion/trunk/tools/dev/fsfs-access-map.c?rev=1425778&view=auto
==============================================================================
--- subversion/trunk/tools/dev/fsfs-access-map.c (added)
+++ subversion/trunk/tools/dev/fsfs-access-map.c Tue Dec 25 22:22:07 2012
@@ -0,0 +1,604 @@
+/* fsfs-access-map.c -- convert strace output into FSFS access bitmap
+ *
+ * ====================================================================
+ *    Licensed to the Apache Software Foundation (ASF) under one
+ *    or more contributor license agreements.  See the NOTICE file
+ *    distributed with this work for additional information
+ *    regarding copyright ownership.  The ASF licenses this file
+ *    to you under the Apache License, Version 2.0 (the
+ *    "License"); you may not use this file except in compliance
+ *    with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing,
+ *    software distributed under the License is distributed on an
+ *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ *    KIND, either express or implied.  See the License for the
+ *    specific language governing permissions and limitations
+ *    under the License.
+ * ====================================================================
+ */
+
+#include "svn_pools.h"
+#include "svn_string.h"
+#include "svn_io.h"
+
+#include "private/svn_string_private.h"
+
+/* The information we gather for each file.  There will be one instance
+ * per file name - even if the file got deleted and re-created.
+ */
+typedef struct file_stats_t
+{
+  /* file name as found in the open() call */
+  const char *name;
+
+  /* file size as determined during the this tool run.  Will be 0 for
+   * files that no longer exist.  However, there may still be entries
+   * in the read_map. */
+  apr_int64_t size;
+
+  /* for rev files (packed or non-packed), this will be the first revision
+   * that file. -1 for non-rev files. */
+  apr_int64_t rev_num;
+
+  /* number of times this file got opened */
+  apr_int64_t open_count;
+
+  /* number of lseek counts */
+  apr_int64_t seek_count;
+
+  /* number of lseek calls to clusters not previously read */
+  apr_int64_t uncached_seek_count;
+
+  /* number of read() calls */
+  apr_int64_t read_count;
+
+  /* total number of bytes returned by those reads */
+  apr_int64_t read_size;
+
+  /* number of clusters read */
+  apr_int64_t clusters_read;
+
+  /* number of different clusters read
+   * (i.e. number of non-zero entries in read_map). */
+  apr_int64_t unique_clusters_read;
+
+  /* cluster -> read count mapping (1 byte per cluster, saturated at 255) */
+  apr_array_header_t *read_map;
+
+} file_stats_t;
+
+/* Represents an open file handle.  It refers to a file and concatenates
+ * consecutive reads such that we don't artificially hit the same cluster
+ * multiple times.  Instances of this type will be reused to limit the
+ * allocation load on the lookup map.
+ */
+typedef struct handle_info_t
+{
+  /* the open file */
+  file_stats_t *file;
+
+  /* file offset at which the current series of reads started (default: 0) */
+  apr_int64_t last_read_start;
+
+  /* bytes read so far in the current series of reads started (default: 0) */
+  apr_int64_t last_read_size;
+
+} handle_info_t;
+
+/* useful typedef */
+typedef unsigned char byte;
+
+/* global const char * file name -> *file_info_t map */
+static apr_hash_t *files = NULL;
+
+/* global int handle -> *handle_info_t map.  Entries don't get removed
+ * by close().  Instead, we simply recycle (and re-initilize) existing
+ * instances. */
+static apr_hash_t *handles = NULL;
+
+/* assume cluster size.  64 and 128kB are typical values for RAIDs. */
+static apr_int64_t cluster_size = 64 * 1024;
+
+/* Call this after a sequence of reads has been ended by either close()
+ * or lseek() for this HANDLE_INFO.  This will update the read_map and
+ * unique_clusters_read members of the underlying file_info_t structure.
+ */
+static void
+store_read_info(handle_info_t *handle_info)
+{
+  if (handle_info->last_read_size)
+    {
+      apr_size_t i;
+      apr_size_t first_cluster
+         = (apr_size_t)(handle_info->last_read_start / cluster_size);
+      apr_size_t last_cluster
+         = (apr_size_t)((  handle_info->last_read_start
+                         + handle_info->last_read_size
+                         - 1) / cluster_size);
+
+      /* auto-expand access map in case the file later shrunk or got deleted */
+      while (handle_info->file->read_map->nelts <= last_cluster)
+        APR_ARRAY_PUSH(handle_info->file->read_map, byte) = 0;
+
+      /* accumulate the accesses per cluster. Saturate and count first
+       * (i.e. disjoint) accesses clusters */
+      handle_info->file->clusters_read += last_cluster - first_cluster + 1;
+      for (i = first_cluster; i <= last_cluster; ++i)
+        {
+          byte *count = &APR_ARRAY_IDX(handle_info->file->read_map, i, byte);
+          if (*count == 0)
+            handle_info->file->unique_clusters_read++;
+          if (*count < 255)
+            ++*count;
+        }
+    }
+}
+
+/* Handle a open() call.  Ensures that a file_info_t for the given NAME
+ * exists.  Auto-create and initialize a handle_info_t for it linked to
+ * HANDLE.
+ */
+static void
+open_file(const char *name, int handle)
+{
+  file_stats_t *file = apr_hash_get(files, name, APR_HASH_KEY_STRING);
+  handle_info_t *handle_info = apr_hash_get(handles, &handle, sizeof(handle));
+
+  /* auto-create file info */
+  if (!file)
+    {
+      apr_pool_t *pool = apr_hash_pool_get(files);
+      apr_pool_t *sub_pool = svn_pool_create(pool);
+
+      apr_file_t *apr_file = NULL;
+      apr_finfo_t finfo = { 0 };
+      apr_size_t cluster_count = 0;
+
+      /* determine file size (if file still exists) */
+      apr_file_open(&apr_file, name,
+                    APR_READ | APR_BUFFERED, APR_OS_DEFAULT, sub_pool);
+      if (apr_file)
+        apr_file_info_get(&finfo, APR_FINFO_SIZE, apr_file);
+      apr_pool_destroy(sub_pool);
+
+      file = apr_pcalloc(pool, sizeof(*file));
+      file->name = apr_pstrdup(pool, name);
+      file->size = finfo.size;
+
+      /* pre-allocate cluster map accordingly
+       * (will be auto-expanded later if necessary) */
+      cluster_count = (apr_size_t)(1 + (file->size - 1) / cluster_size);
+      file->read_map = apr_array_make(pool, file->size
+                                          ? cluster_count
+                                          : 1, sizeof(byte));
+
+      while (file->read_map->nelts < cluster_count)
+        APR_ARRAY_PUSH(file->read_map, byte) = 0;
+
+      /* determine first revision of rev / packed rev files */
+      if (strstr(name, "/db/revs/") != NULL && strstr(name, "manifest") == NULL)
+        if (strstr(name, ".pack/pack") != NULL)
+          file->rev_num = atoi(strstr(name, "/db/revs/") + 9);
+        else
+          file->rev_num = atoi(strrchr(name, '/') + 1);
+      else
+        file->rev_num = -1;
+
+      apr_hash_set(files, file->name, APR_HASH_KEY_STRING, file);
+    }
+
+  file->open_count++;
+
+  /* auto-create handle instance */
+  if (!handle_info)
+    {
+      apr_pool_t *pool = apr_hash_pool_get(handles);
+      int *key = apr_palloc(pool, sizeof(*key));
+      *key = handle;
+
+      handle_info = apr_pcalloc(pool, sizeof(*handle_info));
+      apr_hash_set(handles, key, sizeof(handle), handle_info);
+    }
+
+  /* link handle to file */
+  handle_info->file = file;
+  handle_info->last_read_start = 0;
+  handle_info->last_read_size = 0;
+}
+
+/* COUNT bytes have been read from file with the given HANDLE.
+ */
+static void
+read_file(int handle, apr_int64_t count)
+{
+  handle_info_t *handle_info = apr_hash_get(handles, &handle, sizeof(handle));
+  if (handle_info)
+    {
+      /* known file handle -> expand current read sequence */
+      
+      handle_info->last_read_size += count;
+      handle_info->file->read_count++;
+      handle_info->file->read_size += count;
+    }
+}
+
+/* Seek to offset LOCATION in file given by HANDLE.
+ */
+static void
+seek_file(int handle, apr_int64_t location)
+{
+  handle_info_t *handle_info = apr_hash_get(handles, &handle, sizeof(handle));
+  if (handle_info)
+    {
+      /* known file handle -> end current read sequence and start a new one */
+
+      apr_size_t cluster = (apr_size_t)(location / cluster_size);
+
+      store_read_info(handle_info);
+      
+      handle_info->last_read_size = 0;
+      handle_info->last_read_start = location;
+      handle_info->file->seek_count++;
+
+      /* if we seek to a location that had not been read from before,
+       * there will probably be a real I/O seek on the following read.
+       */
+      if (   handle_info->file->read_map->nelts <= cluster
+          || APR_ARRAY_IDX(handle_info->file->read_map, cluster, byte) == 0)
+        handle_info->file->uncached_seek_count++;
+    }
+}
+
+/* The given file HANDLE has been closed.
+ */
+static void
+close_file(int handle)
+{
+  /* for known file handles, end current read sequence */
+
+  handle_info_t *handle_info = apr_hash_get(handles, &handle, sizeof(handle));
+  if (handle_info)
+    store_read_info(handle_info);
+}
+
+/* Parse / process non-empty the LINE from an strace output.
+ */
+static void
+parse_line(svn_stringbuf_t *line)
+{
+  /* determine function name, first parameter and return value */
+  char *func_end = strchr(line->data, '(');
+  char *return_value = strrchr(line->data, ' ');
+
+  char *first_param_end = strchr(func_end, ',');
+  if (first_param_end == NULL)
+    first_param_end = strchr(func_end, ')');
+
+  *func_end++ = 0;
+  *first_param_end = 0;
+  ++return_value;
+
+  /* process those operations that we care about */
+  if (strcmp(line->data, "open") == 0)
+    {
+      /* remove double quotes from file name parameter */
+      *func_end++ = 0;
+      *--first_param_end = 0;
+      
+      open_file(func_end, atoi(return_value));
+    }
+  else if (strcmp(line->data, "read") == 0)
+    read_file(atoi(func_end), atoi(return_value));
+  else if (strcmp(line->data, "lseek") == 0)
+    seek_file(atoi(func_end), atoi(return_value));
+  else if (strcmp(line->data, "close") == 0)
+    close_file(atoi(func_end));
+}
+
+/* Process the strace output stored in FILE.
+ */
+static void
+parse_file(apr_file_t *file)
+{
+  apr_pool_t *pool = svn_pool_create(NULL);
+  apr_pool_t *iter_pool = svn_pool_create(pool);
+
+  /* limit lines to 4k (usually, we need less than 200 bytes) */
+  svn_stringbuf_t *line = svn_stringbuf_create_ensure(4096, pool);
+
+  do
+    {
+      svn_error_t *err = NULL;
+
+      line->len = line->blocksize-1;
+      err = svn_io_read_length_line(file, line->data, &line->len, iter_pool);
+      svn_error_clear(err);
+      if (err)
+        break;
+
+      parse_line(line);
+      apr_pool_clear(iter_pool);
+    }
+  while (line->len > 0);
+}
+
+/* qsort() callback.  Sort files by revision number.
+ */
+static int
+compare_files(file_stats_t **lhs, file_stats_t **rhs)
+{
+  return (*lhs)->rev_num < (*rhs)->rev_num;
+}
+
+/* Return all rev (and packed rev) files sorted by revision number.
+ * Allocate the result in POOL.
+ */
+static apr_array_header_t *
+get_rev_files(apr_pool_t *pool)
+{
+  apr_hash_index_t *hi;
+  apr_array_header_t *result = apr_array_make(pool,
+                                              apr_hash_count(files),
+                                              sizeof(file_stats_t *));
+
+  /* select all files that have a rev number */
+  for (hi = apr_hash_first(pool, files); hi; hi = apr_hash_next(hi))
+    {
+      const char *name = NULL;
+      apr_ssize_t len = 0;
+      file_stats_t *file = NULL;
+
+      apr_hash_this(hi, (const void **)&name, &len, (void**)&file);
+      if (file->rev_num >= 0)
+        APR_ARRAY_PUSH(result, file_stats_t *) = file;
+    }
+
+  /* sort them */
+  qsort(result->elts, result->nelts, result->elt_size,
+        (int (*)(const void *, const void *))compare_files);
+
+  /* return the result */
+  return result;
+}
+
+/* store VALUE to DEST in little-endian format.  Assume that the target
+ * buffer is filled with 0.
+ */
+static void
+write_number(byte *dest, int value)
+{
+  while (value)
+    {
+      *dest = (byte)(value % 256);
+      value /= 256;
+      ++dest;
+    }
+}
+
+/* Return a linearly interpolated y value for X with X0 <= X <= X1 and
+ * the corresponding Y0 and Y1 values.
+ */
+static int
+interpolate(int y0, int x0, int y1, int x1, int x)
+{
+  return y0 + ((y1 - y0) * (x - x0)) / (x1 - x0);
+}
+
+/* Return the BMP-encoded 24 bit COLOR for the given value.
+ */
+static void
+select_color(byte color[3], byte value)
+{
+  /* value -> color table. Missing values get interpolated.
+   * { count, B - G - R } */
+  byte table[7][4] =
+    {
+      {   0, 255, 255, 255 },   /* unread -> white */
+      {   1, 128, 128,   0 },   /* read once -> turquoise  */
+      {   2,   0, 128,   0 },   /* twice  -> green */
+      {   4,   0, 192, 192 },   /*    4x  -> yellow */
+      {  16,   0,   0, 192 },   /*   16x  -> red */
+      {  64, 192,   0, 128 },   /*   64x  -> purple */
+      { 255,   0,   0,   0 }    /*   max  -> black */
+    };
+
+  /* find upper limit entry for value */
+  int i;
+  for (i = 0; i < 7; ++i)
+    if (table[i][0] >= value)
+      break;
+
+  /* exact match? */
+  if (table[i][0] == value)
+    {
+      color[0] = table[i][1];
+      color[1] = table[i][2];
+      color[2] = table[i][3];
+    }
+  else
+    {
+      /* interpolate */
+      color[0] = (byte)interpolate(table[i-1][1], table[i-1][0],
+                                   table[i][1], table[i][0],
+                                   value);
+      color[1] = (byte)interpolate(table[i-1][2], table[i-1][0],
+                                   table[i][2], table[i][0],
+                                   value);
+      color[2] = (byte)interpolate(table[i-1][3], table[i-1][0],
+                                   table[i][3], table[i][0],
+                                   value);
+    }
+}
+
+/* write the cluster read map for all files in INFO as BMP image to FILE.
+ */
+static void
+write_bitmap(apr_array_header_t *info, apr_file_t *file)
+{
+  /* BMP file header (some values need to filled in later)*/
+  byte header[54] =
+    {
+      'B', 'M',        /* magic */
+      0, 0, 0, 0,      /* file size (to be written later) */
+      0, 0, 0, 0,      /* reserved, unused */
+      54, 0, 0, 0,     /* pixel map starts at offset 54dec */
+
+      40, 0, 0, 0,     /* DIB header has 40 bytes */
+      0, 0, 0, 0,      /* x size in pixel */
+      0, 0, 0, 0,      /* y size in pixel */
+      1, 0,            /* 1 color plane */
+      24, 0,           /* 24 bits / pixel */
+      0, 0, 0, 0,      /* no pixel compression used */
+      0, 0, 0, 0,      /* size of pixel array (to be written later) */
+      0xe8, 3, 0, 0,   /* 1 pixel / mm */
+      0xe8, 3, 0, 0,   /* 1 pixel / mm */
+      0, 0, 0, 0,      /* no colors in palette */
+      0, 0, 0, 0       /* no colors to import */
+    };
+  
+  int ysize = info->nelts;
+  int xsize = 0;
+  int x, y;
+  int row_size;
+  int padding;
+  apr_size_t written;
+
+  /* xsize = max cluster number */
+  for (y = 0; y < ysize; ++y)
+    if (xsize < APR_ARRAY_IDX(info, y, file_stats_t *)->read_map->nelts)
+      xsize = APR_ARRAY_IDX(info, y, file_stats_t *)->read_map->nelts;
+
+  /* rows in BMP files must be aligned to 4 bytes */
+  row_size = APR_ALIGN(xsize * 3, 4);
+  padding = row_size - xsize * 3;
+
+  /* write numbers to header */
+  write_number(header + 2, ysize * row_size + 54);
+  write_number(header + 18, xsize);
+  write_number(header + 22, ysize);
+  write_number(header + 38, ysize * row_size);
+
+  /* write header to file */
+  written = sizeof(header);
+  apr_file_write(file, header, &written);
+
+  /* write all rows */
+  for (y = 0; y < ysize; ++y)
+    {
+      file_stats_t *file_info = APR_ARRAY_IDX(info, y, file_stats_t *);
+      for (x = 0; x < xsize; ++x)
+        {
+          byte color[3] = { 128, 128, 128 };
+          if (x < file_info->read_map->nelts)
+            {
+              byte count = APR_ARRAY_IDX(file_info->read_map, x, byte);
+              select_color(color, count);
+            }
+
+          written = sizeof(color);
+          apr_file_write(file, color, &written);
+        }
+
+      if (padding)
+        {
+          char pad[3] = { 0 };
+          written = padding;
+          apr_file_write(file, pad, &written);
+        }
+    }
+}
+
+/* Write a summary of the I/O ops to stdout.
+ * Use POOL for temporaries.
+ */
+static void
+print_stats(apr_pool_t *pool)
+{
+  apr_int64_t open_count = 0;
+  apr_int64_t seek_count = 0;
+  apr_int64_t read_count = 0;
+  apr_int64_t read_size = 0;
+  apr_int64_t clusters_read = 0;
+  apr_int64_t unique_clusters_read = 0;
+  apr_int64_t uncached_seek_count = 0;
+  
+  apr_hash_index_t *hi;
+  for (hi = apr_hash_first(pool, files); hi; hi = apr_hash_next(hi))
+    {
+      const char *name = NULL;
+      apr_ssize_t len = 0;
+      file_stats_t *file = NULL;
+      
+      apr_hash_this(hi, (const void **)&name, &len, (void**)&file);
+
+      open_count += file->open_count;
+      seek_count += file->seek_count;
+      read_count += file->read_count;
+      read_size += file->read_size;
+      clusters_read += file->clusters_read;
+      unique_clusters_read += file->unique_clusters_read;
+      uncached_seek_count += file->uncached_seek_count;
+    }
+
+  printf("%20s files\n", svn__i64toa_sep(apr_hash_count(files), ',', pool));
+  printf("%20s files opened\n", svn__i64toa_sep(open_count, ',', pool));
+  printf("%20s seeks\n", svn__i64toa_sep(seek_count, ',', pool));
+  printf("%20s uncached seeks\n", svn__i64toa_sep(uncached_seek_count, ',', pool));
+  printf("%20s reads\n", svn__i64toa_sep(read_count, ',', pool));
+  printf("%20s unique clusters read\n", svn__i64toa_sep(unique_clusters_read, ',', pool));
+  printf("%20s clusters read\n", svn__i64toa_sep(clusters_read, ',', pool));
+  printf("%20s bytes read\n", svn__i64toa_sep(read_size, ',', pool));
+}
+
+/* Some help output. */
+static void
+print_usage()
+{
+  printf("fsfs-access-map <file>\n\n");
+  printf("Reads strace of some FSFS-based tool from <file>, prints some stats\n");
+  printf("and writes a cluster access map to 'access.bmp' the current folder.\n");
+  printf("Each pixel corresponds to one 64kB cluster and every line to a rev\n");
+  printf("or packed rev file in the repository.  Turquoise and greed indicate\n");
+  printf("1 and 2 hits, yellow to read-ish colors for up to 20, shares of\n");
+  printf("for up to 100 and black for > 200 hits.\n\n");
+  printf("A typical strace invocation looks like this:\n");
+  printf("strace -e trace=open,close,read,lseek -o strace.txt svn log ...\n");
+}
+
+/* linear control flow */
+int main(int argc, const char *argv[])
+{
+  apr_pool_t *pool = NULL;
+  apr_file_t *file = NULL;
+  
+  apr_initialize();
+  atexit(apr_terminate);
+
+  pool = svn_pool_create(NULL);
+  files = apr_hash_make(pool);
+  handles = apr_hash_make(pool);
+
+  if (argc == 1)
+    apr_file_open(&file, argv[1], APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
+                  pool);
+  if (file == NULL)
+    {
+      print_usage();
+      return 0;
+    }
+  parse_file(file);
+  apr_file_close(file);
+
+  print_stats(pool);
+  
+  apr_file_open(&file, "access.bmp",
+                APR_WRITE | APR_CREATE | APR_TRUNCATE | APR_BUFFERED,
+                APR_OS_DEFAULT, pool);
+  write_bitmap(get_rev_files(pool), file);
+  apr_file_close(file);
+
+  return 0;
+}
\ No newline at end of file