You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by st...@apache.org on 2012/12/25 23:22:07 UTC
svn commit: r1425778 - in /subversion/trunk: build.conf
tools/dev/fsfs-access-map.c
Author: stefan2
Date: Tue Dec 25 22:22:07 2012
New Revision: 1425778
URL: http://svn.apache.org/viewvc?rev=1425778&view=rev
Log:
A new developer tool: take an strace from some fsfs-based operation
and create a read access heat map for it. The latter gets written
as BMP file along with a few stats. This allows to visualize the
efficiency (or lack thereof) of our FS usage.
* build.conf
(libs): add new dependency
(fsfs-access-map): new tool
* tools/dev/fsfs-access-map.c
(): new tool
Added:
subversion/trunk/tools/dev/fsfs-access-map.c
Modified:
subversion/trunk/build.conf
Modified: subversion/trunk/build.conf
URL: http://svn.apache.org/viewvc/subversion/trunk/build.conf?rev=1425778&r1=1425777&r2=1425778&view=diff
==============================================================================
--- subversion/trunk/build.conf (original)
+++ subversion/trunk/build.conf Tue Dec 25 22:22:07 2012
@@ -1224,7 +1224,7 @@ libs = __ALL__
ra-local-test
svndiff-test vdelta-test
entries-dump atomic-ra-revprop-change wc-lock-tester wc-incomplete-tester
- diff diff3 diff4 fsfs-reorg fsfs-stats svn-bench
+ diff diff3 diff4 fsfs-reorg fsfs-stats fsfs-access-map svn-bench
client-test
conflict-data-test db-test pristine-store-test entries-compat-test
op-depth-test dirent_uri-test wc-queries-test wc-test
@@ -1287,6 +1287,13 @@ sources = fsfs-stats.c
install = tools
libs = libsvn_delta libsvn_subr apr
+[fsfs-access-map]
+type = exe
+path = tools/dev
+sources = fsfs-access-map.c
+install = tools
+libs = libsvn_subr apr
+
[diff]
type = exe
path = tools/diff
Added: subversion/trunk/tools/dev/fsfs-access-map.c
URL: http://svn.apache.org/viewvc/subversion/trunk/tools/dev/fsfs-access-map.c?rev=1425778&view=auto
==============================================================================
--- subversion/trunk/tools/dev/fsfs-access-map.c (added)
+++ subversion/trunk/tools/dev/fsfs-access-map.c Tue Dec 25 22:22:07 2012
@@ -0,0 +1,604 @@
+/* fsfs-access-map.c -- convert strace output into FSFS access bitmap
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ */
+
+#include "svn_pools.h"
+#include "svn_string.h"
+#include "svn_io.h"
+
+#include "private/svn_string_private.h"
+
+/* The information we gather for each file. There will be one instance
+ * per file name - even if the file got deleted and re-created.
+ */
+typedef struct file_stats_t
+{
+ /* file name as found in the open() call */
+ const char *name;
+
+ /* file size as determined during the this tool run. Will be 0 for
+ * files that no longer exist. However, there may still be entries
+ * in the read_map. */
+ apr_int64_t size;
+
+ /* for rev files (packed or non-packed), this will be the first revision
+ * that file. -1 for non-rev files. */
+ apr_int64_t rev_num;
+
+ /* number of times this file got opened */
+ apr_int64_t open_count;
+
+ /* number of lseek counts */
+ apr_int64_t seek_count;
+
+ /* number of lseek calls to clusters not previously read */
+ apr_int64_t uncached_seek_count;
+
+ /* number of read() calls */
+ apr_int64_t read_count;
+
+ /* total number of bytes returned by those reads */
+ apr_int64_t read_size;
+
+ /* number of clusters read */
+ apr_int64_t clusters_read;
+
+ /* number of different clusters read
+ * (i.e. number of non-zero entries in read_map). */
+ apr_int64_t unique_clusters_read;
+
+ /* cluster -> read count mapping (1 byte per cluster, saturated at 255) */
+ apr_array_header_t *read_map;
+
+} file_stats_t;
+
+/* Represents an open file handle. It refers to a file and concatenates
+ * consecutive reads such that we don't artificially hit the same cluster
+ * multiple times. Instances of this type will be reused to limit the
+ * allocation load on the lookup map.
+ */
+typedef struct handle_info_t
+{
+ /* the open file */
+ file_stats_t *file;
+
+ /* file offset at which the current series of reads started (default: 0) */
+ apr_int64_t last_read_start;
+
+ /* bytes read so far in the current series of reads started (default: 0) */
+ apr_int64_t last_read_size;
+
+} handle_info_t;
+
+/* useful typedef */
+typedef unsigned char byte;
+
+/* global const char * file name -> *file_info_t map */
+static apr_hash_t *files = NULL;
+
+/* global int handle -> *handle_info_t map. Entries don't get removed
+ * by close(). Instead, we simply recycle (and re-initilize) existing
+ * instances. */
+static apr_hash_t *handles = NULL;
+
+/* assume cluster size. 64 and 128kB are typical values for RAIDs. */
+static apr_int64_t cluster_size = 64 * 1024;
+
+/* Call this after a sequence of reads has been ended by either close()
+ * or lseek() for this HANDLE_INFO. This will update the read_map and
+ * unique_clusters_read members of the underlying file_info_t structure.
+ */
+static void
+store_read_info(handle_info_t *handle_info)
+{
+ if (handle_info->last_read_size)
+ {
+ apr_size_t i;
+ apr_size_t first_cluster
+ = (apr_size_t)(handle_info->last_read_start / cluster_size);
+ apr_size_t last_cluster
+ = (apr_size_t)(( handle_info->last_read_start
+ + handle_info->last_read_size
+ - 1) / cluster_size);
+
+ /* auto-expand access map in case the file later shrunk or got deleted */
+ while (handle_info->file->read_map->nelts <= last_cluster)
+ APR_ARRAY_PUSH(handle_info->file->read_map, byte) = 0;
+
+ /* accumulate the accesses per cluster. Saturate and count first
+ * (i.e. disjoint) accesses clusters */
+ handle_info->file->clusters_read += last_cluster - first_cluster + 1;
+ for (i = first_cluster; i <= last_cluster; ++i)
+ {
+ byte *count = &APR_ARRAY_IDX(handle_info->file->read_map, i, byte);
+ if (*count == 0)
+ handle_info->file->unique_clusters_read++;
+ if (*count < 255)
+ ++*count;
+ }
+ }
+}
+
+/* Handle a open() call. Ensures that a file_info_t for the given NAME
+ * exists. Auto-create and initialize a handle_info_t for it linked to
+ * HANDLE.
+ */
+static void
+open_file(const char *name, int handle)
+{
+ file_stats_t *file = apr_hash_get(files, name, APR_HASH_KEY_STRING);
+ handle_info_t *handle_info = apr_hash_get(handles, &handle, sizeof(handle));
+
+ /* auto-create file info */
+ if (!file)
+ {
+ apr_pool_t *pool = apr_hash_pool_get(files);
+ apr_pool_t *sub_pool = svn_pool_create(pool);
+
+ apr_file_t *apr_file = NULL;
+ apr_finfo_t finfo = { 0 };
+ apr_size_t cluster_count = 0;
+
+ /* determine file size (if file still exists) */
+ apr_file_open(&apr_file, name,
+ APR_READ | APR_BUFFERED, APR_OS_DEFAULT, sub_pool);
+ if (apr_file)
+ apr_file_info_get(&finfo, APR_FINFO_SIZE, apr_file);
+ apr_pool_destroy(sub_pool);
+
+ file = apr_pcalloc(pool, sizeof(*file));
+ file->name = apr_pstrdup(pool, name);
+ file->size = finfo.size;
+
+ /* pre-allocate cluster map accordingly
+ * (will be auto-expanded later if necessary) */
+ cluster_count = (apr_size_t)(1 + (file->size - 1) / cluster_size);
+ file->read_map = apr_array_make(pool, file->size
+ ? cluster_count
+ : 1, sizeof(byte));
+
+ while (file->read_map->nelts < cluster_count)
+ APR_ARRAY_PUSH(file->read_map, byte) = 0;
+
+ /* determine first revision of rev / packed rev files */
+ if (strstr(name, "/db/revs/") != NULL && strstr(name, "manifest") == NULL)
+ if (strstr(name, ".pack/pack") != NULL)
+ file->rev_num = atoi(strstr(name, "/db/revs/") + 9);
+ else
+ file->rev_num = atoi(strrchr(name, '/') + 1);
+ else
+ file->rev_num = -1;
+
+ apr_hash_set(files, file->name, APR_HASH_KEY_STRING, file);
+ }
+
+ file->open_count++;
+
+ /* auto-create handle instance */
+ if (!handle_info)
+ {
+ apr_pool_t *pool = apr_hash_pool_get(handles);
+ int *key = apr_palloc(pool, sizeof(*key));
+ *key = handle;
+
+ handle_info = apr_pcalloc(pool, sizeof(*handle_info));
+ apr_hash_set(handles, key, sizeof(handle), handle_info);
+ }
+
+ /* link handle to file */
+ handle_info->file = file;
+ handle_info->last_read_start = 0;
+ handle_info->last_read_size = 0;
+}
+
+/* COUNT bytes have been read from file with the given HANDLE.
+ */
+static void
+read_file(int handle, apr_int64_t count)
+{
+ handle_info_t *handle_info = apr_hash_get(handles, &handle, sizeof(handle));
+ if (handle_info)
+ {
+ /* known file handle -> expand current read sequence */
+
+ handle_info->last_read_size += count;
+ handle_info->file->read_count++;
+ handle_info->file->read_size += count;
+ }
+}
+
+/* Seek to offset LOCATION in file given by HANDLE.
+ */
+static void
+seek_file(int handle, apr_int64_t location)
+{
+ handle_info_t *handle_info = apr_hash_get(handles, &handle, sizeof(handle));
+ if (handle_info)
+ {
+ /* known file handle -> end current read sequence and start a new one */
+
+ apr_size_t cluster = (apr_size_t)(location / cluster_size);
+
+ store_read_info(handle_info);
+
+ handle_info->last_read_size = 0;
+ handle_info->last_read_start = location;
+ handle_info->file->seek_count++;
+
+ /* if we seek to a location that had not been read from before,
+ * there will probably be a real I/O seek on the following read.
+ */
+ if ( handle_info->file->read_map->nelts <= cluster
+ || APR_ARRAY_IDX(handle_info->file->read_map, cluster, byte) == 0)
+ handle_info->file->uncached_seek_count++;
+ }
+}
+
+/* The given file HANDLE has been closed.
+ */
+static void
+close_file(int handle)
+{
+ /* for known file handles, end current read sequence */
+
+ handle_info_t *handle_info = apr_hash_get(handles, &handle, sizeof(handle));
+ if (handle_info)
+ store_read_info(handle_info);
+}
+
+/* Parse / process non-empty the LINE from an strace output.
+ */
+static void
+parse_line(svn_stringbuf_t *line)
+{
+ /* determine function name, first parameter and return value */
+ char *func_end = strchr(line->data, '(');
+ char *return_value = strrchr(line->data, ' ');
+
+ char *first_param_end = strchr(func_end, ',');
+ if (first_param_end == NULL)
+ first_param_end = strchr(func_end, ')');
+
+ *func_end++ = 0;
+ *first_param_end = 0;
+ ++return_value;
+
+ /* process those operations that we care about */
+ if (strcmp(line->data, "open") == 0)
+ {
+ /* remove double quotes from file name parameter */
+ *func_end++ = 0;
+ *--first_param_end = 0;
+
+ open_file(func_end, atoi(return_value));
+ }
+ else if (strcmp(line->data, "read") == 0)
+ read_file(atoi(func_end), atoi(return_value));
+ else if (strcmp(line->data, "lseek") == 0)
+ seek_file(atoi(func_end), atoi(return_value));
+ else if (strcmp(line->data, "close") == 0)
+ close_file(atoi(func_end));
+}
+
+/* Process the strace output stored in FILE.
+ */
+static void
+parse_file(apr_file_t *file)
+{
+ apr_pool_t *pool = svn_pool_create(NULL);
+ apr_pool_t *iter_pool = svn_pool_create(pool);
+
+ /* limit lines to 4k (usually, we need less than 200 bytes) */
+ svn_stringbuf_t *line = svn_stringbuf_create_ensure(4096, pool);
+
+ do
+ {
+ svn_error_t *err = NULL;
+
+ line->len = line->blocksize-1;
+ err = svn_io_read_length_line(file, line->data, &line->len, iter_pool);
+ svn_error_clear(err);
+ if (err)
+ break;
+
+ parse_line(line);
+ apr_pool_clear(iter_pool);
+ }
+ while (line->len > 0);
+}
+
+/* qsort() callback. Sort files by revision number.
+ */
+static int
+compare_files(file_stats_t **lhs, file_stats_t **rhs)
+{
+ return (*lhs)->rev_num < (*rhs)->rev_num;
+}
+
+/* Return all rev (and packed rev) files sorted by revision number.
+ * Allocate the result in POOL.
+ */
+static apr_array_header_t *
+get_rev_files(apr_pool_t *pool)
+{
+ apr_hash_index_t *hi;
+ apr_array_header_t *result = apr_array_make(pool,
+ apr_hash_count(files),
+ sizeof(file_stats_t *));
+
+ /* select all files that have a rev number */
+ for (hi = apr_hash_first(pool, files); hi; hi = apr_hash_next(hi))
+ {
+ const char *name = NULL;
+ apr_ssize_t len = 0;
+ file_stats_t *file = NULL;
+
+ apr_hash_this(hi, (const void **)&name, &len, (void**)&file);
+ if (file->rev_num >= 0)
+ APR_ARRAY_PUSH(result, file_stats_t *) = file;
+ }
+
+ /* sort them */
+ qsort(result->elts, result->nelts, result->elt_size,
+ (int (*)(const void *, const void *))compare_files);
+
+ /* return the result */
+ return result;
+}
+
+/* store VALUE to DEST in little-endian format. Assume that the target
+ * buffer is filled with 0.
+ */
+static void
+write_number(byte *dest, int value)
+{
+ while (value)
+ {
+ *dest = (byte)(value % 256);
+ value /= 256;
+ ++dest;
+ }
+}
+
+/* Return a linearly interpolated y value for X with X0 <= X <= X1 and
+ * the corresponding Y0 and Y1 values.
+ */
+static int
+interpolate(int y0, int x0, int y1, int x1, int x)
+{
+ return y0 + ((y1 - y0) * (x - x0)) / (x1 - x0);
+}
+
+/* Return the BMP-encoded 24 bit COLOR for the given value.
+ */
+static void
+select_color(byte color[3], byte value)
+{
+ /* value -> color table. Missing values get interpolated.
+ * { count, B - G - R } */
+ byte table[7][4] =
+ {
+ { 0, 255, 255, 255 }, /* unread -> white */
+ { 1, 128, 128, 0 }, /* read once -> turquoise */
+ { 2, 0, 128, 0 }, /* twice -> green */
+ { 4, 0, 192, 192 }, /* 4x -> yellow */
+ { 16, 0, 0, 192 }, /* 16x -> red */
+ { 64, 192, 0, 128 }, /* 64x -> purple */
+ { 255, 0, 0, 0 } /* max -> black */
+ };
+
+ /* find upper limit entry for value */
+ int i;
+ for (i = 0; i < 7; ++i)
+ if (table[i][0] >= value)
+ break;
+
+ /* exact match? */
+ if (table[i][0] == value)
+ {
+ color[0] = table[i][1];
+ color[1] = table[i][2];
+ color[2] = table[i][3];
+ }
+ else
+ {
+ /* interpolate */
+ color[0] = (byte)interpolate(table[i-1][1], table[i-1][0],
+ table[i][1], table[i][0],
+ value);
+ color[1] = (byte)interpolate(table[i-1][2], table[i-1][0],
+ table[i][2], table[i][0],
+ value);
+ color[2] = (byte)interpolate(table[i-1][3], table[i-1][0],
+ table[i][3], table[i][0],
+ value);
+ }
+}
+
+/* write the cluster read map for all files in INFO as BMP image to FILE.
+ */
+static void
+write_bitmap(apr_array_header_t *info, apr_file_t *file)
+{
+ /* BMP file header (some values need to filled in later)*/
+ byte header[54] =
+ {
+ 'B', 'M', /* magic */
+ 0, 0, 0, 0, /* file size (to be written later) */
+ 0, 0, 0, 0, /* reserved, unused */
+ 54, 0, 0, 0, /* pixel map starts at offset 54dec */
+
+ 40, 0, 0, 0, /* DIB header has 40 bytes */
+ 0, 0, 0, 0, /* x size in pixel */
+ 0, 0, 0, 0, /* y size in pixel */
+ 1, 0, /* 1 color plane */
+ 24, 0, /* 24 bits / pixel */
+ 0, 0, 0, 0, /* no pixel compression used */
+ 0, 0, 0, 0, /* size of pixel array (to be written later) */
+ 0xe8, 3, 0, 0, /* 1 pixel / mm */
+ 0xe8, 3, 0, 0, /* 1 pixel / mm */
+ 0, 0, 0, 0, /* no colors in palette */
+ 0, 0, 0, 0 /* no colors to import */
+ };
+
+ int ysize = info->nelts;
+ int xsize = 0;
+ int x, y;
+ int row_size;
+ int padding;
+ apr_size_t written;
+
+ /* xsize = max cluster number */
+ for (y = 0; y < ysize; ++y)
+ if (xsize < APR_ARRAY_IDX(info, y, file_stats_t *)->read_map->nelts)
+ xsize = APR_ARRAY_IDX(info, y, file_stats_t *)->read_map->nelts;
+
+ /* rows in BMP files must be aligned to 4 bytes */
+ row_size = APR_ALIGN(xsize * 3, 4);
+ padding = row_size - xsize * 3;
+
+ /* write numbers to header */
+ write_number(header + 2, ysize * row_size + 54);
+ write_number(header + 18, xsize);
+ write_number(header + 22, ysize);
+ write_number(header + 38, ysize * row_size);
+
+ /* write header to file */
+ written = sizeof(header);
+ apr_file_write(file, header, &written);
+
+ /* write all rows */
+ for (y = 0; y < ysize; ++y)
+ {
+ file_stats_t *file_info = APR_ARRAY_IDX(info, y, file_stats_t *);
+ for (x = 0; x < xsize; ++x)
+ {
+ byte color[3] = { 128, 128, 128 };
+ if (x < file_info->read_map->nelts)
+ {
+ byte count = APR_ARRAY_IDX(file_info->read_map, x, byte);
+ select_color(color, count);
+ }
+
+ written = sizeof(color);
+ apr_file_write(file, color, &written);
+ }
+
+ if (padding)
+ {
+ char pad[3] = { 0 };
+ written = padding;
+ apr_file_write(file, pad, &written);
+ }
+ }
+}
+
+/* Write a summary of the I/O ops to stdout.
+ * Use POOL for temporaries.
+ */
+static void
+print_stats(apr_pool_t *pool)
+{
+ apr_int64_t open_count = 0;
+ apr_int64_t seek_count = 0;
+ apr_int64_t read_count = 0;
+ apr_int64_t read_size = 0;
+ apr_int64_t clusters_read = 0;
+ apr_int64_t unique_clusters_read = 0;
+ apr_int64_t uncached_seek_count = 0;
+
+ apr_hash_index_t *hi;
+ for (hi = apr_hash_first(pool, files); hi; hi = apr_hash_next(hi))
+ {
+ const char *name = NULL;
+ apr_ssize_t len = 0;
+ file_stats_t *file = NULL;
+
+ apr_hash_this(hi, (const void **)&name, &len, (void**)&file);
+
+ open_count += file->open_count;
+ seek_count += file->seek_count;
+ read_count += file->read_count;
+ read_size += file->read_size;
+ clusters_read += file->clusters_read;
+ unique_clusters_read += file->unique_clusters_read;
+ uncached_seek_count += file->uncached_seek_count;
+ }
+
+ printf("%20s files\n", svn__i64toa_sep(apr_hash_count(files), ',', pool));
+ printf("%20s files opened\n", svn__i64toa_sep(open_count, ',', pool));
+ printf("%20s seeks\n", svn__i64toa_sep(seek_count, ',', pool));
+ printf("%20s uncached seeks\n", svn__i64toa_sep(uncached_seek_count, ',', pool));
+ printf("%20s reads\n", svn__i64toa_sep(read_count, ',', pool));
+ printf("%20s unique clusters read\n", svn__i64toa_sep(unique_clusters_read, ',', pool));
+ printf("%20s clusters read\n", svn__i64toa_sep(clusters_read, ',', pool));
+ printf("%20s bytes read\n", svn__i64toa_sep(read_size, ',', pool));
+}
+
+/* Some help output. */
+static void
+print_usage()
+{
+ printf("fsfs-access-map <file>\n\n");
+ printf("Reads strace of some FSFS-based tool from <file>, prints some stats\n");
+ printf("and writes a cluster access map to 'access.bmp' the current folder.\n");
+ printf("Each pixel corresponds to one 64kB cluster and every line to a rev\n");
+ printf("or packed rev file in the repository. Turquoise and greed indicate\n");
+ printf("1 and 2 hits, yellow to read-ish colors for up to 20, shares of\n");
+ printf("for up to 100 and black for > 200 hits.\n\n");
+ printf("A typical strace invocation looks like this:\n");
+ printf("strace -e trace=open,close,read,lseek -o strace.txt svn log ...\n");
+}
+
+/* linear control flow */
+int main(int argc, const char *argv[])
+{
+ apr_pool_t *pool = NULL;
+ apr_file_t *file = NULL;
+
+ apr_initialize();
+ atexit(apr_terminate);
+
+ pool = svn_pool_create(NULL);
+ files = apr_hash_make(pool);
+ handles = apr_hash_make(pool);
+
+ if (argc == 1)
+ apr_file_open(&file, argv[1], APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
+ pool);
+ if (file == NULL)
+ {
+ print_usage();
+ return 0;
+ }
+ parse_file(file);
+ apr_file_close(file);
+
+ print_stats(pool);
+
+ apr_file_open(&file, "access.bmp",
+ APR_WRITE | APR_CREATE | APR_TRUNCATE | APR_BUFFERED,
+ APR_OS_DEFAULT, pool);
+ write_bitmap(get_rev_files(pool), file);
+ apr_file_close(file);
+
+ return 0;
+}
\ No newline at end of file