You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by ec...@apache.org on 2012/12/20 20:01:50 UTC
svn commit: r1424638 - in /accumulo/branches/1.4/test/system/bench: ./ cloudstone1/ lib/

Author: ecn
Date: Thu Dec 20 19:01:50 2012
New Revision: 1424638

URL: http://svn.apache.org/viewvc?rev=1424638&view=rev
Log:
ACCUMULO-897 get the benchmarks to run again

Modified:
    accumulo/branches/1.4/test/system/bench/README
    accumulo/branches/1.4/test/system/bench/cloudstone1/cloudstone1.py
    accumulo/branches/1.4/test/system/bench/lib/Benchmark.py
    accumulo/branches/1.4/test/system/bench/lib/CreateTablesBenchmark.py
    accumulo/branches/1.4/test/system/bench/lib/IngestBenchmark.py
    accumulo/branches/1.4/test/system/bench/lib/RowHashBenchmark.py
    accumulo/branches/1.4/test/system/bench/lib/TableSplitsBenchmark.py
    accumulo/branches/1.4/test/system/bench/lib/TeraSortBenchmark.py
    accumulo/branches/1.4/test/system/bench/lib/cloudshell.py
    accumulo/branches/1.4/test/system/bench/lib/path.py
    accumulo/branches/1.4/test/system/bench/run.py

Modified: accumulo/branches/1.4/test/system/bench/README
URL: http://svn.apache.org/viewvc/accumulo/branches/1.4/test/system/bench/README?rev=1424638&r1=1424637&r2=1424638&view=diff
==============================================================================
--- accumulo/branches/1.4/test/system/bench/README (original)
+++ accumulo/branches/1.4/test/system/bench/README Thu Dec 20 19:01:50 2012
@@ -38,5 +38,6 @@ The 4th Benchmark is Terasort.  Run the 
 
 4. Misc
 
-These benchmarks create tables in accumulo named 'test_ingest' and 'CloudIngestTest'.  These tables are *NOT* deleted
-at the end of the benchmarks.
+These benchmarks create tables in accumulo named 'test_ingest' and 'CloudIngestTest'.  These tables are deleted
+at the end of the benchmarks. The benchmarks will also alter user auths while it runs. It is recommended that
+a benchmark user is created.

Modified: accumulo/branches/1.4/test/system/bench/cloudstone1/cloudstone1.py
URL: http://svn.apache.org/viewvc/accumulo/branches/1.4/test/system/bench/cloudstone1/cloudstone1.py?rev=1424638&r1=1424637&r2=1424638&view=diff
==============================================================================
--- accumulo/branches/1.4/test/system/bench/cloudstone1/cloudstone1.py (original)
+++ accumulo/branches/1.4/test/system/bench/cloudstone1/cloudstone1.py Thu Dec 20 19:01:50 2012
@@ -29,7 +29,7 @@ class CloudStone1(Benchmark):
                'and we can reach all the slaves. Lower is better.'
 
     def runTest(self):
-        code, out, err = cloudshell.run(self.username, self.password, 'table !METADATA\nscan\n')
+        code, out, err = cloudshell.run(self.username, self.password, 'table !METADATA\nscan -np\n')
         results = runAll('echo help | %s shell' %
                          accumulo('bin', 'accumulo'))
                          

Modified: accumulo/branches/1.4/test/system/bench/lib/Benchmark.py
URL: http://svn.apache.org/viewvc/accumulo/branches/1.4/test/system/bench/lib/Benchmark.py?rev=1424638&r1=1424637&r2=1424638&view=diff
==============================================================================
--- accumulo/branches/1.4/test/system/bench/lib/Benchmark.py (original)
+++ accumulo/branches/1.4/test/system/bench/lib/Benchmark.py Thu Dec 20 19:01:50 2012
@@ -91,10 +91,7 @@ class Benchmark(unittest.TestCase):
         return 0
     
     def findjar(self, path):
-        globjar = glob.glob(path)
-        for j in globjar:
-            if j.find('javadoc') >= 0 or j.find('sources') >= 0:
-                globjar.remove(j)
+        globjar = [ j for j in glob.glob(path) if j.find('javadoc') == -1 and j.find('sources') == -1 ]
         return globjar[0]
         
     # Returns the location of the local examples jar
@@ -104,10 +101,12 @@ class Benchmark(unittest.TestCase):
     # Returns a string of core, thrift and zookeeper jars with a specified delim
     def getjars(self, delim=','):
         accumulo_core_jar = self.findjar(accumulo('lib', 'accumulo-core*.jar'))
-        accumulo_start_jar = self.findjar(accumulo('lib', 'accumulo-start*.jar'))
+        accumulo_start_jar = self.findjar(accumulo('lib', 'accumulo-start*.jar'))       
+        cloudtrace_jar = self.findjar(accumulo('lib', 'cloudtrace*.jar'))
         accumulo_thrift_jar = self.findjar(accumulo('lib', 'libthrift*.jar'))
         accumulo_zookeeper_jar = self.findjar(os.path.join(os.getenv('ZOOKEEPER_HOME'), 'zookeeper*.jar'))
-        return delim.join([accumulo_core_jar, accumulo_thrift_jar, accumulo_zookeeper_jar, accumulo_start_jar])
+        return delim.join([accumulo_core_jar, accumulo_thrift_jar, accumulo_zookeeper_jar, accumulo_start_jar,
+            cloudtrace_jar])
        
     # Builds the running command for the map/reduce class specified sans the arguments
     def buildcommand(self, classname, *args):

Modified: accumulo/branches/1.4/test/system/bench/lib/CreateTablesBenchmark.py
URL: http://svn.apache.org/viewvc/accumulo/branches/1.4/test/system/bench/lib/CreateTablesBenchmark.py?rev=1424638&r1=1424637&r2=1424638&view=diff
==============================================================================
--- accumulo/branches/1.4/test/system/bench/lib/CreateTablesBenchmark.py (original)
+++ accumulo/branches/1.4/test/system/bench/lib/CreateTablesBenchmark.py Thu Dec 20 19:01:50 2012
@@ -30,20 +30,31 @@ class CreateTablesBenchmark(Benchmark):
     tables = 1000
 
     def setUp(self): 
+        for x in range(1, self.tables):
+            currentTable = 'test_ingest%d' % (x)      
+            log.debug("Checking for table existence: %s" % currentTable)
+            code, out, err = cloudshell.run(self.username, self.password, 'table %s\n' % currentTable)
+            if out.find('does not exist') == -1:
+                command = 'deletetable -f %s\n' % (currentTable)
+                log.debug("Running Command %r", command)
+                code, out, err = cloudshell.run(self.username, self.password, command)
+                self.assertEqual(code, 0, 'Did not successfully delete table: %s' % currentTable)
         Benchmark.setUp(self)  
-        
+
     def runTest(self):
         for x in range(1, self.tables):
             currentTable = 'test_ingest%d' % (x)      
             command = 'createtable %s\n' % (currentTable)
             log.debug("Running Command %r", command)
             code, out, err = cloudshell.run(self.username, self.password, command)
+            self.assertEqual(code, 0, 'Did not successfully create table: %s' % currentTable)
             # print err
         for x in range(1, self.tables):
             currentTable = 'test_ingest%d' % (x)      
-            command = 'deletetable %s\n' % (currentTable)
+            command = 'deletetable -f %s\n' % (currentTable)
             log.debug("Running Command %r", command)
             code, out, err = cloudshell.run(self.username, self.password, command)
+            self.assertEqual(code, 0, 'Did not successfully delete table: %s' % currentTable)
             # print err
         log.debug("Process finished")
         return code, out, err

Modified: accumulo/branches/1.4/test/system/bench/lib/IngestBenchmark.py
URL: http://svn.apache.org/viewvc/accumulo/branches/1.4/test/system/bench/lib/IngestBenchmark.py?rev=1424638&r1=1424637&r2=1424638&view=diff
==============================================================================
--- accumulo/branches/1.4/test/system/bench/lib/IngestBenchmark.py (original)
+++ accumulo/branches/1.4/test/system/bench/lib/IngestBenchmark.py Thu Dec 20 19:01:50 2012
@@ -30,14 +30,21 @@ class IngestBenchmark(Benchmark):
 
     def setUp(self):
         code, out, err = cloudshell.run(self.username, self.password, 'table test_ingest\n')
-        if out.find('no such table') >= 0:
+        if out.find('does not exist') == -1:
             log.debug("Deleting table test_ingest")
-            code, out, err = cloudshell.run(self.username, self.password, 'deletetable test_ingest\n')
-            self.sleep(10)
+            code, out, err = cloudshell.run(self.username, self.password, 'deletetable -f test_ingest\n')
+            self.assertEquals(code, 0, "Could not delete the table 'test_ingest'")
         code, out, err = cloudshell.run(self.username, self.password, 'createtable test_ingest\n')
-        self.assertEqual(code, 0)
+        self.assertEqual(code, 0, "Could not create the table 'test_ingest'")
         Benchmark.setUp(self)
 
+    def tearDown(self):
+        command = 'deletetable -f test_ingest\n'
+        log.debug("Running Command %r", command)
+        code, out, err = cloudshell.run(self.username, self.password, command)
+        self.assertEqual(code, 0, "Could not delete the table 'test_ingest'")
+        Benchmark.tearDown(self)
+
     def size(self):
         return 50
 
@@ -67,10 +74,6 @@ class IngestBenchmark(Benchmark):
         for code, slaves in codes.items():
             if code != 0:
                 self.assertEqual(code, 0, "Bad exit code (%d) from slaves %r" % (code, slaves))
-        command = 'deletetable test_ingest\n'
-        log.debug("Running Command %r", command)
-        code, out, err = cloudshell.run(self.username, self.password, command)
-        # print err
 
     def score(self):
         if self.finished:

Modified: accumulo/branches/1.4/test/system/bench/lib/RowHashBenchmark.py
URL: http://svn.apache.org/viewvc/accumulo/branches/1.4/test/system/bench/lib/RowHashBenchmark.py?rev=1424638&r1=1424637&r2=1424638&view=diff
==============================================================================
--- accumulo/branches/1.4/test/system/bench/lib/RowHashBenchmark.py (original)
+++ accumulo/branches/1.4/test/system/bench/lib/RowHashBenchmark.py Thu Dec 20 19:01:50 2012
@@ -42,23 +42,18 @@ class RowHashBenchmark(Benchmark):
     output_table = 'RowHashTestOutput'
 
     def setUp(self): 
-        random.jumpahead(int(time.time()))
-        num = random.randint(1, 100000)
-        self.input_table = self.input_table + "_" + str(num) 
-        self.output_table = self.output_table + "_" + str(num)    
-        #if (not os.getenv("HADOOP_CLASSPATH")):
-        #    os.putenv("HADOOP_CLASSPATH", self.getjars(":"))
         dir = os.path.dirname(os.path.realpath(__file__))
         file = os.path.join( dir, 'splits' )  
-        # code, out, err = cloudshell.run(self.username, self.password, 'table RowHashTestInput\n') 
-        # if out.find('no such table') == -1:
-        #    code, out, err = cloudshell.run(self.username, self.password, 'deletetable RowHashTestInput\n') 
-        #    self.sleep(15)
+        code, out, err = cloudshell.run(self.username, self.password, 'table %s\n' % self.input_table) 
+        if out.find('does not exist') == -1:
+           code, out, err = cloudshell.run(self.username, self.password, 'deletetable -f %s\n' % self.input_table) 
+           self.sleep(15)
         code, out, err = cloudshell.run(self.username, self.password, "createtable %s -sf %s\n" % (self.input_table, file))
-        #code, out, err = cloudshell.run('table RowHashTest\n') 
-        #if out.find('no such table') == -1:
-        #    code, out, err = cloudshell.run('user root\nsecret\ndeletetable RowHashTest\n') 
-        #    self.sleep(15)
+        code, out, err = cloudshell.run(self.username, self.password, 'table %s\n' % self.output_table) 
+        if out.find('does not exist') == -1:
+            code, out, err = cloudshell.run(self.username, self.password, 'deletetable -f %s\n' %
+                    self.output_table) 
+            self.sleep(15)
         code, out, err = cloudshell.run(self.username, self.password, "createtable %s -sf %s\n" % (self.output_table, file))
         command = self.buildcommand('org.apache.accumulo.examples.simple.mapreduce.TeraSortIngest',
                                     self.numrows(),
@@ -77,6 +72,13 @@ class RowHashBenchmark(Benchmark):
         out, err = handle.communicate("")  
         Benchmark.setUp(self)
 
+    def tearDown(self):
+        code, out, err = cloudshell.run(self.username, self.password, "deletetable -f %s\n" % self.input_table)
+        self.assertEqual(code, 0, 'Could not delete %s, %s' % (self.input_table, out))
+        code, out, err = cloudshell.run(self.username, self.password, "deletetable -f %s\n" % self.output_table)
+        self.assertEqual(code, 0, 'Could not delete %s, %s' % (self.output_table, out))
+        Benchmark.tearDown(self)
+
     def keysizemin(self):
         return self.keymin
 
@@ -109,7 +111,7 @@ class RowHashBenchmark(Benchmark):
         return handle.returncode, out, err
     
     def shortDescription(self):
-        return 'Hashes %d rows from one tableand outputs them into another Table. '\
+        return 'Hashes %d rows from one table and outputs them into another table. '\
                'Lower score is better.' % (self.numrows())
                
     def setSpeed(self, speed):

Modified: accumulo/branches/1.4/test/system/bench/lib/TableSplitsBenchmark.py
URL: http://svn.apache.org/viewvc/accumulo/branches/1.4/test/system/bench/lib/TableSplitsBenchmark.py?rev=1424638&r1=1424637&r2=1424638&view=diff
==============================================================================
--- accumulo/branches/1.4/test/system/bench/lib/TableSplitsBenchmark.py (original)
+++ accumulo/branches/1.4/test/system/bench/lib/TableSplitsBenchmark.py Thu Dec 20 19:01:50 2012
@@ -35,35 +35,31 @@ class TableSplitsBenchmark(Benchmark):
     tablename = 'test_splits'
 
     def setUp(self): 
-        random.jumpahead(int(time.time()))
-        num = random.randint(1, 100000)
-        self.tablename = self.tablename + "_" + str(num)     
         # Need to generate a splits file for each speed
-        #code, out, err = cloudshell.run(self.username, self.password, 'table %s\n' % self.tablename)
-        #if out.find('no such table') == -1:
-        #    log.debug('Deleting table %s' % self.tablename)
-        #    code, out, err = cloudshell.run('user %s\n%s\ndeletetable %s\n' % (self.user, 
-        #                                                                          self.password, 
-        #                                                                          self.tablename))
-        #    self.sleep(5)
+        code, out, err = cloudshell.run(self.username, self.password, 'table %s\n' % self.tablename)
+        if out.find('does not exist') == -1:
+            log.debug('Deleting table %s' % self.tablename)
+            code, out, err = cloudshell.run(self.username, self.password, 'deletetable -f %s\n' % self.tablename)
+            self.assertEqual(code, 0, "Could not delete table")
         Benchmark.setUp(self)
 
     def runTest(self):             
         command = 'createtable %s -sf %s\n' % (self.tablename, self.splitsfile)
         log.debug("Running Command %r", command)
         code, out, err = cloudshell.run(self.username, self.password, command)
+        self.assertEqual(code, 0, 'Could not create table: %s' % out)
         return code, out, err
 
     def shortDescription(self):
         return 'Creates a table with splits. Lower score is better.'
         
     def tearDown(self):
+        command = 'deletetable -f %s\n' % self.tablename
+        log.debug("Running Command %r", command)
+        code, out, err = cloudshell.run(self.username, self.password, command)
+        self.assertEqual(code, 0, "Could not delete table")
+        log.debug("Process finished")        
         Benchmark.tearDown(self)
-        # self.sleep(5)
-        # command = 'deletetable test_splits\n'
-        # log.debug("Running Command %r", command)
-        # code, out, err = cloudshell.run(self.username, self.password, command)
-        # log.debug("Process finished")        
 
     def setSpeed(self, speed):
         dir = os.path.dirname(os.path.realpath(__file__))

Modified: accumulo/branches/1.4/test/system/bench/lib/TeraSortBenchmark.py
URL: http://svn.apache.org/viewvc/accumulo/branches/1.4/test/system/bench/lib/TeraSortBenchmark.py?rev=1424638&r1=1424637&r2=1424638&view=diff
==============================================================================
--- accumulo/branches/1.4/test/system/bench/lib/TeraSortBenchmark.py (original)
+++ accumulo/branches/1.4/test/system/bench/lib/TeraSortBenchmark.py Thu Dec 20 19:01:50 2012
@@ -42,16 +42,16 @@ class TeraSortBenchmark(Benchmark):
 
 
     def setUp(self): 
-        random.jumpahead(int(time.time()))
-        num = random.randint(1, 100000)   
-        #self.tablename = self.tablename + "-" + str(num)  
-        # Find which hadoop version
-        # code, out, err = cloudshell.run(self.username, self.password, 'table %s\n' % self.tablename)
-        #if out.find('no such table') == -1:
-        #    log.debug('Deleting table %s' % self.tablename)
-        #    code, out, err = cloudshell.run(self.username, self.password, 'deletetable %s\n' % self.tablename)
-        #    self.sleep(10)
+        code, out, err = cloudshell.run(self.username, self.password, 'table %s\n' % self.tablename)
+        if out.find('does not exist') == -1:
+            log.debug('Deleting table %s' % self.tablename)
+            code, out, err = cloudshell.run(self.username, self.password, 'deletetable -f %s\n' % self.tablename)
         Benchmark.setUp(self)
+
+    def tearDown(self):
+        code, out, err = cloudshell.run(self.username, self.password, "deletetable -f %s\n" % self.tablename)
+        self.assertEqual(code, 0, 'Could not delete %s, %s' % (self.tablename, out))
+        Benchmark.tearDown(self)
         
     def keysizemin(self):
         return self.keymin
@@ -88,6 +88,7 @@ class TeraSortBenchmark(Benchmark):
         log.debug("Running: %r", command)
         out, err = handle.communicate("")
         log.debug("Process finished: %d (%s)", handle.returncode, ' '.join(handle.command))
+        self.assertEqual(handle.returncode, 0, "Job did not complete successfully")
         return handle.returncode, out, err
         
     def needsAuthentication(self):

Modified: accumulo/branches/1.4/test/system/bench/lib/cloudshell.py
URL: http://svn.apache.org/viewvc/accumulo/branches/1.4/test/system/bench/lib/cloudshell.py?rev=1424638&r1=1424637&r2=1424638&view=diff
==============================================================================
--- accumulo/branches/1.4/test/system/bench/lib/cloudshell.py (original)
+++ accumulo/branches/1.4/test/system/bench/lib/cloudshell.py Thu Dec 20 19:01:50 2012
@@ -23,7 +23,7 @@ from lib.options import log
     
 def run(username, password, input):
     "Run a command in accumulo"
-    handle = runner.start([path.accumulo('bin', 'accumulo'), 'shell -u %s -p %s' % (username, password) ],
+    handle = runner.start([path.accumulo('bin', 'accumulo'), 'shell', '-u', username, '-p', password],
                           stdin=subprocess.PIPE)
     log.debug("Running: %r", input)
     out, err = handle.communicate(input)

Modified: accumulo/branches/1.4/test/system/bench/lib/path.py
URL: http://svn.apache.org/viewvc/accumulo/branches/1.4/test/system/bench/lib/path.py?rev=1424638&r1=1424637&r2=1424638&view=diff
==============================================================================
--- accumulo/branches/1.4/test/system/bench/lib/path.py (original)
+++ accumulo/branches/1.4/test/system/bench/lib/path.py Thu Dec 20 19:01:50 2012
@@ -17,9 +17,7 @@
 import os
 
 HERE = os.path.dirname(__file__)
-ACCUMULO_HOME = os.path.normpath(
-    os.path.join(HERE, *(os.pardir,)*4)
-    )
+ACCUMULO_HOME = os.getenv('ACCUMULO_HOME')
 
 def accumulo(*args):
     return os.path.join(ACCUMULO_HOME, *args)

Modified: accumulo/branches/1.4/test/system/bench/run.py
URL: http://svn.apache.org/viewvc/accumulo/branches/1.4/test/system/bench/run.py?rev=1424638&r1=1424637&r2=1424638&view=diff
==============================================================================
--- accumulo/branches/1.4/test/system/bench/run.py (original)
+++ accumulo/branches/1.4/test/system/bench/run.py Thu Dec 20 19:01:50 2012
@@ -55,6 +55,9 @@ def main():
     if not os.getenv('ZOOKEEPER_HOME'):
         print 'Please set the environment variable \'ZOOKEEPER_HOME\' before running the benchmarks'
         sys.exit(0)
+    if not os.getenv('ACCUMULO_HOME'):
+        print 'Please set the environment variable \'ACCUMULO_HOME\' before running the benchmarks'
+        sys.exit(0)
     import textwrap
     benchmarks = getBenchmarks()
     benchmarks.sort(benchComparator)