You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by gs...@apache.org on 2011/04/15 04:10:50 UTC
svn commit: r1092569 - in /subversion/trunk/tools/dev/benchmarks: ./ suite1/ suite1/benchmark.py suite1/run

Author: gstein
Date: Fri Apr 15 02:10:49 2011
New Revision: 1092569

URL: http://svn.apache.org/viewvc?rev=1092569&view=rev
Log:
Add Neels' benchmarks, from his email of April 7 to the dev list.

These are unchanged from his attachments. Additional changes will follow
as separate commits.

* dev/benchmarks: new directory to hold benchmarks

* dev/benchmarks/suite1: new directory for Neels' benchmark suite

* dev/benchmarks/suite1/benchmark.py: new benchmark tool

* dev/benchmarks/suite1/run:
    wrapper for benchmark.py to set up some basic conditions and parameters

Added:
    subversion/trunk/tools/dev/benchmarks/
    subversion/trunk/tools/dev/benchmarks/suite1/
    subversion/trunk/tools/dev/benchmarks/suite1/benchmark.py   (with props)
    subversion/trunk/tools/dev/benchmarks/suite1/run   (with props)

Added: subversion/trunk/tools/dev/benchmarks/suite1/benchmark.py
URL: http://svn.apache.org/viewvc/subversion/trunk/tools/dev/benchmarks/suite1/benchmark.py?rev=1092569&view=auto
==============================================================================
--- subversion/trunk/tools/dev/benchmarks/suite1/benchmark.py (added)
+++ subversion/trunk/tools/dev/benchmarks/suite1/benchmark.py Fri Apr 15 02:10:49 2011
@@ -0,0 +1,553 @@
+#!/usr/bin/env python
+
+"""
+usage: benchmark.py run <run_file> <levels> <spread> [N]
+       benchmark.py show <run_file>
+       benchmark.py compare <run_file1> <run_file2>
+
+Test data is written to run_file.
+If a run_file exists, data is added to it.
+<levels> is the number of directory levels to create
+<spread> is the number of child trees spreading off each dir level
+If <N> is provided, the run is repeated N times.
+"""
+
+import os, sys, time
+import tempfile
+
+from datetime import datetime, timedelta
+from subprocess import Popen, PIPE, call
+import random
+import shutil
+
+import cPickle
+
+VERBOSE = False
+
+DEFAULT_TIMINGS_PATH = './benchmark_py_last_run.py-pickle'
+
+timings = None
+
+def run_cmd(cmd, stdin=None, shell=False):
+
+  if shell:
+    printable_cmd = 'CMD: ' + cmd
+  else:
+    printable_cmd = 'CMD: ' + ' '.join(cmd)
+  if VERBOSE:
+    print printable_cmd
+
+  if stdin:
+    stdin_arg = PIPE
+  else:
+    stdin_arg = None
+
+  p = Popen(cmd, stdin=stdin_arg, stdout=PIPE, stderr=PIPE, shell=shell)
+  stdout,stderr = p.communicate(input=stdin)
+
+  if VERBOSE:
+    if (stdout):
+      print "STDOUT: [[[\n%s]]]" % ''.join(stdout)
+  if (stderr):
+    print "STDERR: [[[\n%s]]]" % ''.join(stderr)
+
+  return stdout,stderr
+
+def timedelta_to_seconds(td):
+  return ( float(td.seconds)
+           + float(td.microseconds) / (10**6)
+           + td.days * 24 * 60 * 60 )
+
+
+class Timings:
+
+  def __init__(self, *ignore_svn_cmds):
+    self.timings = {}
+    self.current_name = None
+    self.tic_at = None
+    self.ignore = ignore_svn_cmds
+    self.name = None
+
+  def tic(self, name):
+    if name in self.ignore:
+      return
+    self.toc()
+    self.current_name = name
+    self.tic_at = datetime.now()
+
+  def toc(self):
+    if self.current_name and self.tic_at:
+      toc_at = datetime.now()
+      self.submit_timing(self.current_name, 
+                         timedelta_to_seconds(toc_at - self.tic_at))
+    self.current_name = None
+    self.tic_at = None
+
+  def submit_timing(self, name, seconds):
+    times = self.timings.get(name)
+    if not times:
+      times = []
+      self.timings[name] = times
+    times.append(seconds)
+
+  def summary(self):
+    s = []
+    if self.name:
+      s.append('Timings for %s' % self.name)
+    s.append('    N   min     max     avg    operation  (unit is seconds)')
+
+    names = sorted(self.timings.keys())
+
+    for name in names:
+      timings = self.timings.get(name)
+      if not name or not timings: continue
+
+      s.append('%5d %7.3f %7.3f %7.3f  %s' % (
+                 len(timings),
+                 min(timings),
+                 max(timings),
+                 reduce(lambda x,y: x + y, timings) / len(timings),
+                 name))
+    return '\n'.join(s)
+
+
+  def compare_to(self, other):
+    selfname = self.name
+    if not selfname:
+      selfname = 'unnamed'
+    othername = other.name
+    if not othername:
+      othername = 'the other'
+
+    s = ['COMPARE %s to %s'%(othername, selfname),
+         '  1.23|+0.45  means factor=1.23, difference in seconds = 0.45',
+         '  factor < 1 or difference < 0 means \'%s\' is faster than \'%s\''
+           % (self.name, othername)]
+
+    s.append('      min              max              avg         operation')
+
+    def do_div(a, b):
+      if b:
+        return float(a) / float(b)
+      else:
+        return 0.0
+
+    def do_diff(a, b):
+      return float(a) - float(b)
+
+    def min_max_avg(ttimings):
+      return ( min(ttimings),
+               max(ttimings),
+               reduce(lambda x,y: x + y, ttimings) / len(ttimings) )
+
+    names = sorted(self.timings.keys())
+
+    for name in names:
+      timings = self.timings.get(name)
+      other_timings = other.timings.get(name)
+      if not other_timings:
+        continue
+
+
+      min_me, max_me, avg_me = min_max_avg(timings)
+      min_other, max_other, avg_other = min_max_avg(other_timings)
+
+      s.append('%-16s %-16s %-16s  %s' % (
+                 '%7.3f|%+7.4f' % (
+                     do_div(min_me, min_other),
+                     do_diff(min_me, min_other)
+                   ),
+
+                 '%7.3f|%+7.4f' % (
+                     do_div(max_me, max_other),
+                     do_diff(max_me, max_other)
+                   ),
+
+                 '%7.3f|%+7.4f' % (
+                     do_div(avg_me, avg_other),
+                     do_diff(avg_me, avg_other)
+                   ),
+
+                 name))
+    return '\n'.join(s)
+
+
+  def add(self, other):
+    for name, other_times in other.timings.items():
+      my_times = self.timings.get(name)
+      if not my_times:
+        my_times = []
+        self.timings[name] = my_times
+      my_times.extend(other_times)
+
+
+
+
+j = os.path.join
+
+_create_count = 0
+
+def next_name(prefix):
+  global _create_count
+  _create_count += 1
+  return '_'.join((prefix, str(_create_count)))
+
+def create_tree(in_dir, levels, spread=5):
+  try:
+    os.mkdir(in_dir)
+  except:
+    pass
+
+  for i in range(spread):
+    # files
+    fn = j(in_dir, next_name('file'))
+    f = open(fn, 'w')
+    f.write('This is %s\n' % fn)
+    f.close()
+
+    # dirs
+    if (levels > 1):
+      dn = j(in_dir, next_name('dir'))
+      create_tree(dn, levels - 1, spread)
+
+
+def svn(*args):
+  global timings
+  name = args[0]
+  cmd = ['svn']
+  cmd.extend(args)
+  if VERBOSE:
+    print 'svn cmd: ' + ' '.join(cmd)
+ 
+  stdin = None
+  if stdin:
+    stdin_arg = PIPE
+  else:
+    stdin_arg = None
+
+  timings.tic(name)
+  try:
+    p = Popen(cmd, stdin=stdin_arg, stdout=PIPE, stderr=PIPE, shell=False)
+    stdout,stderr = p.communicate(input=stdin)
+  finally:
+    timings.toc()
+
+  if VERBOSE:
+    if (stdout):
+      print "STDOUT: [[[\n%s]]]" % ''.join(stdout)
+    if (stderr):
+      print "STDERR: [[[\n%s]]]" % ''.join(stderr)
+
+  return stdout,stderr
+
+
+def add(*args):
+  return svn('add', *args)
+
+def ci(*args):
+  return svn('commit', '-mm', *args)
+
+def up(*args):
+  return svn('update', *args)
+
+def st(*args):
+  return svn('status', *args)
+
+_chars = [chr(x) for x in range(ord('a'), ord('z') +1)]
+
+def randstr(len=8):
+  return ''.join( [random.choice(_chars) for i in range(len)] )
+
+def _copy(path):
+  dest = next_name(path + '_copied')
+  svn('copy', path, dest)
+
+def _move(path):
+  dest = path + '_moved'
+  svn('move', path, dest)
+
+def _propmod(path):
+  so, se = svn('proplist', path)
+  propnames = [line.strip() for line in so.strip().split('\n')[1:]]
+
+  # modify?
+  if len(propnames):
+    svn('ps', propnames[len(propnames) / 2], randstr(), path)
+
+  # del?
+  if len(propnames) > 1:
+    svn('propdel', propnames[len(propnames) / 2], path)
+
+
+def _propadd(path):
+  # set a new one.
+  svn('propset', randstr(), randstr(), path)
+
+
+def _mod(path):
+  if os.path.isdir(path):
+    return _propmod(path)
+
+  f = open(path, 'a')
+  f.write('\n%s\n' % randstr())
+  f.close()
+
+def _add(path):
+  if os.path.isfile(path):
+    return _mod(path)
+
+  if random.choice((True, False)):
+    # create a dir
+    svn('mkdir', j(path, next_name('new_dir')))
+  else:
+    # create a file
+    new_path = j(path, next_name('new_file'))
+    f = open(new_path, 'w')
+    f.write(randstr())
+    f.close()
+    svn('add', new_path)
+
+def _del(path):
+  svn('delete', path)
+
+_mod_funcs = (_mod, _add, _propmod, _propadd, )#_copy,) # _move, _del)
+  
+def modify_tree(in_dir, fraction):
+  child_names = os.listdir(in_dir)
+  for child_name in child_names:
+    if child_name[0] == '.':
+      continue
+    if random.random() < fraction:
+      path = j(in_dir, child_name)
+      random.choice(_mod_funcs)(path)
+
+  for child_name in child_names:
+    if child_name[0] == '.': continue
+    path = j(in_dir, child_name)
+    if os.path.isdir(path):
+      modify_tree(path, fraction)
+  
+def propadd_tree(in_dir, fraction):
+  for child_name in os.listdir(in_dir):
+    if child_name[0] == '.': continue
+    path = j(in_dir, child_name)
+    if random.random() < fraction:
+      _propadd(path)
+    if os.path.isdir(path):
+      propadd_tree(path, fraction)
+
+
+def run(levels, spread, N):
+  global timings
+
+  # ensure identical modifications for every run of this script
+  random.seed(0)
+
+  for i in range(N):
+
+    base = tempfile.mkdtemp()
+    try:
+      repos = j(base, 'repos')
+      wc = j(base, 'wc')
+      wc2 = j(base, 'wc2')
+
+      file_url = 'file://%s' % repos
+
+      so, se = run_cmd(['which', 'svn'])
+      if not so:
+        print "Can't find svn."
+        exit(1)
+
+      print '\nRunning svn benchmark in', base
+      print 'dir levels: %s; new files and dirs per leaf: %s; run %d of %d' %(
+            levels, spread, i + 1, N)
+
+      so, se = svn('--version')
+      print ', '.join( so.split('\n')[:2] )
+      started = datetime.now()
+
+      try:
+        run_cmd(['svnadmin', 'create', repos])
+        svn('checkout', file_url, wc)
+
+        trunk = j(wc, 'trunk')
+        create_tree(trunk, levels, spread)
+        add(trunk)
+        st(wc)
+        ci(wc)
+        up(wc)
+        propadd_tree(trunk, 0.5)
+        ci(wc)
+        up(wc)
+        st(wc)
+
+        trunk_url = file_url + '/trunk'
+        branch_url = file_url + '/branch'
+
+        svn('copy', '-mm', trunk_url, branch_url)
+        st(wc)
+
+        up(wc)
+        st(wc)
+
+        svn('checkout', trunk_url, wc2)
+        st(wc2)
+        modify_tree(wc2, 0.5)
+        st(wc2)
+        ci(wc2)
+        up(wc2)
+        up(wc)
+
+        svn('switch', branch_url, wc2)
+        modify_tree(wc2, 0.5)
+        st(wc2)
+        ci(wc2)
+        up(wc2)
+        up(wc)
+
+        modify_tree(trunk, 0.5)
+        st(wc)
+        ci(wc)
+        up(wc2)
+        up(wc)
+
+        svn('merge', '--accept=postpone', trunk_url, wc2)
+        st(wc2)
+        svn('resolve', '--accept=mine-conflict', wc2)
+        st(wc2)
+        svn('resolved', '-R', wc2)
+        st(wc2)
+        ci(wc2)
+        up(wc2)
+        up(wc)
+
+        svn('merge', '--accept=postpone', '--reintegrate', branch_url, trunk)
+        st(wc)
+        svn('resolve', '--accept=mine-conflict', wc)
+        st(wc)
+        svn('resolved', '-R', wc)
+        st(wc)
+        ci(wc)
+        up(wc2)
+        up(wc)
+
+        svn('delete', j(wc, 'branch'))
+        ci(wc)
+        up(wc2)
+        up(wc)
+
+
+      finally:
+        stopped = datetime.now()
+        print '\nDone with svn benchmark in', (stopped - started)
+        timings.submit_timing('TOTAL RUN', timedelta_to_seconds(stopped - started))
+
+        # rename ps to prop mod
+        if timings.timings.get('ps'):
+          has = timings.timings.get('prop mod')
+          if not has:
+            has = []
+            timings.timings['prop mod'] = has
+          has.extend( timings.timings['ps'] )
+          del timings.timings['ps']
+
+        print timings.summary()
+    finally:
+      shutil.rmtree(base)
+
+
+def read_from_file(file_path):
+  f = open(file_path, 'rb')
+  try:
+    instance = cPickle.load(f)
+    instance.name = os.path.basename(file_path)
+  finally:
+    f.close()
+  return instance
+
+
+def write_to_file(file_path, instance):
+  f = open(file_path, 'wb')
+  cPickle.dump(instance, f)
+  f.close()
+
+def usage():
+  print __doc__
+
+if __name__ == '__main__':
+  if len(sys.argv) > 1 and 'compare'.startswith(sys.argv[1]):
+    if len(sys.argv) < 4:
+      usage()
+      exit(1)
+    
+    p1,p2 = sys.argv[2:4]
+
+    t1 = read_from_file(p1)
+    t2 = read_from_file(p2)
+
+    print t1.summary()
+    print '---'
+    print t2.summary()
+    print '---'
+    print t2.compare_to(t1)
+
+  elif len(sys.argv) > 1 and 'combine'.startswith(sys.argv[1]):
+    if len(sys.argv) < 5:
+      usage()
+      exit(1)
+    
+    dest = sys.argv[-1]
+    paths = sys.argv[2:-1]
+
+    total = Timings('--version');
+
+    for path in paths:
+      t = read_from_file(path)
+      total.add(t)
+
+    print total.summary()
+    write_to_file(dest, total)
+
+
+
+  elif len(sys.argv) > 1 and 'run'.startswith(sys.argv[1]):
+    try:
+      timings_path = sys.argv[2]
+      levels = int(sys.argv[3])
+      spread = int(sys.argv[4])
+
+      if len(sys.argv) > 5:
+        N = int(sys.argv[5])
+      else:
+        N = 1
+    except:
+      usage()
+      raise
+
+      
+    print '\n\nHi, going to run a Subversion benchmark series of %d runs...' % N
+
+    if os.path.isfile(timings_path):
+      print 'Going to add results to existing file', timings_path
+      timings = read_from_file(timings_path)
+    else:
+      print 'Going to write results to new file', timings_path
+      timings = Timings('--version')
+
+    run(levels, spread, N)
+
+    write_to_file(timings_path, timings)
+
+
+  elif len(sys.argv) > 1 and 'show'.startswith(sys.argv[1]):
+    if len(sys.argv) < 2:
+      usage()
+      exit(1)
+      
+    for timings_path in sys.argv[2:]:
+      timings = read_from_file(timings_path)
+      print '---\n%s' % timings_path
+      print timings.summary()
+
+  else: usage()
+

Propchange: subversion/trunk/tools/dev/benchmarks/suite1/benchmark.py
------------------------------------------------------------------------------
    svn:executable = *

Added: subversion/trunk/tools/dev/benchmarks/suite1/run
URL: http://svn.apache.org/viewvc/subversion/trunk/tools/dev/benchmarks/suite1/run?rev=1092569&view=auto
==============================================================================
--- subversion/trunk/tools/dev/benchmarks/suite1/run (added)
+++ subversion/trunk/tools/dev/benchmarks/suite1/run Fri Apr 15 02:10:49 2011
@@ -0,0 +1,87 @@
+#!/usr/bin/env bash
+benchmark="$PWD/benchmark.py"
+
+parent="$(date +"%Y%m%d-%H%M%S")"
+inital_workdir="$PWD"
+mkdir "$parent"
+cd "$parent"
+pwd
+
+ORIGINAL_PATH="$PATH"
+
+use(){
+  svn="$1"
+  if [ "$svn" = "trunk" ]; then
+    export PATH="$HOME/pat/trunk/prefix/bin:$ORIGINAL_PATH"
+  else
+    export PATH="$HOME/pat/stable/prefix/bin:$ORIGINAL_PATH"
+  fi
+}
+
+
+batch(){
+  levels="$1"
+  spread="$2"
+  N="$3"
+  pre="${levels}x${spread}_"
+  use 1.6
+  "$benchmark" run ${pre}1.6 $levels $spread $N
+  use trunk
+  "$benchmark" run ${pre}trunk $levels $spread $N
+  echo 
+  echo "Results for dir levels: $levels  spread: $spread"
+  "$benchmark" compare ${pre}1.6 ${pre}trunk
+}
+
+N=6
+al=5
+as=5
+bl=100
+bs=1
+cl=1
+cs=100
+
+##DEBUG
+#N=1
+#al=1
+#as=1
+#bl=2
+#bs=1
+#cl=1
+#cs=2
+##DEBUG
+
+
+{
+started="$(date)"
+echo "Started at $started"
+echo 
+
+batch $al $as $N
+batch $bl $bs $N
+batch $cl $cs $N
+
+echo
+echo =========================================================================
+echo
+echo "calculating total of 1.6..."
+"$benchmark" combine *x*_1.6 total_1.6
+
+echo
+echo "calculating total of trunk..."
+"$benchmark" combine *x*_trunk total_trunk
+
+echo
+echo "comparing averaged totals..."
+"$benchmark" compare total_1.6 total_trunk
+
+echo
+echo "Had started at $started,"
+echo "       done at $(date)"
+pwd
+} 2>&1 | tee results.txt
+
+cd "$inital_workdir"
+if [ -f "$parent/total_trunk" ]; then
+  rm -rf "$parent"
+fi

Propchange: subversion/trunk/tools/dev/benchmarks/suite1/run
------------------------------------------------------------------------------
    svn:executable = *