You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by na...@apache.org on 2017/08/17 18:44:13 UTC

systemml git commit: [MINOR] fixes for HDFS path

Repository: systemml
Updated Branches:
  refs/heads/master 4384ebbda -> 114200724


[MINOR] fixes for HDFS path

Closes #624


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/11420072
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/11420072
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/11420072

Branch: refs/heads/master
Commit: 11420072412c0c873b72267d1e9764c87abc57b4
Parents: 4384ebb
Author: krishnakalyan3 <kr...@gmail.com>
Authored: Thu Aug 17 11:43:49 2017 -0700
Committer: Nakul Jindal <na...@gmail.com>
Committed: Thu Aug 17 11:43:49 2017 -0700

----------------------------------------------------------------------
 bin/utils.py                            |  9 +++----
 scripts/perftest/python/run_perftest.py | 17 +++++++-------
 scripts/perftest/python/utils_exec.py   | 19 ++++++++++++++-
 scripts/perftest/python/utils_fs.py     | 11 +++++++++
 scripts/perftest/python/utils_misc.py   | 35 ++++++++++++++++++++++++++++
 5 files changed, 78 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/11420072/bin/utils.py
----------------------------------------------------------------------
diff --git a/bin/utils.py b/bin/utils.py
index 6f40881..cf17960 100644
--- a/bin/utils.py
+++ b/bin/utils.py
@@ -74,15 +74,16 @@ def find_dml_file(systemml_home, script_file):
     Location of the dml script
     """
     scripts_dir = join(systemml_home, 'scripts')
-    if not (exists(script_file)):
-        script_file = find_file(script_file, scripts_dir)
-        if script_file is None:
+    if not exists(script_file):
+        script_file_path = find_file(script_file, scripts_dir)
+        if script_file_path is not None:
+            return script_file_path
+        else:
             print('Could not find DML script: ' + script_file)
             sys.exit()
 
     return script_file
 
-
 def log4j_path(systemml_home):
     """
     Create log4j.properties from the template if not exist

http://git-wip-us.apache.org/repos/asf/systemml/blob/11420072/scripts/perftest/python/run_perftest.py
----------------------------------------------------------------------
diff --git a/scripts/perftest/python/run_perftest.py b/scripts/perftest/python/run_perftest.py
index 8c3d1fa..20f5380 100755
--- a/scripts/perftest/python/run_perftest.py
+++ b/scripts/perftest/python/run_perftest.py
@@ -32,7 +32,8 @@ from datagen import config_packets_datagen
 from train import config_packets_train
 from predict import config_packets_predict
 from utils_misc import get_families, config_reader, \
-    exec_dml_and_parse_time, exec_test_data, check_predict, get_folder_metrics, split_config_args
+    exec_dml_and_parse_time, exec_test_data, check_predict, get_folder_metrics, split_config_args, \
+    get_default_dir
 from utils_fs import create_dir_local, write_success, check_SUCCESS_file_exists
 
 # A packet is a dictionary
@@ -275,7 +276,7 @@ if __name__ == '__main__':
     default_mat_shape = ['10k_100']
 
     # Default temp directory, contains everything generated in perftest
-    default_temp_dir = join(systemml_home, 'scripts', 'perftest', 'temp')
+    default_config_dir = join(systemml_home, 'scripts', 'perftest', 'temp')
 
     # Initialize time
     start_time = time.time()
@@ -308,7 +309,7 @@ if __name__ == '__main__':
     cparser.add_argument('--mat-shape', default=default_mat_shape, help='space separated list of shapes of matrices '
                          'to generate (e.g 10k_1k, 20M_4k)', metavar='', nargs='+')
 
-    cparser.add_argument('--config-dir', default=default_temp_dir, help='temporary directory '
+    cparser.add_argument('--config-dir', default=default_config_dir, help='temporary directory '
                          'where generated, training and prediction data is put', metavar='')
     cparser.add_argument('--filename', default='perf_test', help='name of the output file for the perf'
                          ' metrics', metavar='')
@@ -316,8 +317,7 @@ if __name__ == '__main__':
                          help='space separated list of types of workloads to run (available: data-gen, train, predict)',
                          metavar='', choices=workload, nargs='+')
     # Change this to temp-dir
-    cparser.add_argument('--temp-dir', default=default_temp_dir,
-                         help='define the file system to work on', metavar='')
+    cparser.add_argument('--temp-dir', help='define the file system to work on', metavar='')
 
     # Configuration Options
     cparser.add_argument('-stats', help='Monitor and report caching/recompilation statistics, '
@@ -350,8 +350,8 @@ if __name__ == '__main__':
     # Global variables
     perftest_args_dict, systemml_args_dict, backend_args_dict = split_config_args(all_arg_dict)
 
-    # Debug arguments
-    # print(arg_dict)
+    # temp_dir hdfs / local path check
+    perftest_args_dict['temp_dir'] = get_default_dir(args.temp_dir, args.exec_type, default_config_dir)
 
     # default_mat_type validity
     if len(args.mat_type) > 2:
@@ -401,4 +401,5 @@ if __name__ == '__main__':
     perf_test_entry(**perftest_args_dict)
 
     total_time = (time.time() - start_time)
-    logging.info('Performance tests complete {0:.3f} secs \n'.format(total_time))
+    logging.info('total_time,none,none,none,none,{}'.format(total_time))
+    logging.info('Performance tests complete')

http://git-wip-us.apache.org/repos/asf/systemml/blob/11420072/scripts/perftest/python/utils_exec.py
----------------------------------------------------------------------
diff --git a/scripts/perftest/python/utils_exec.py b/scripts/perftest/python/utils_exec.py
index cf98d0f..92a267f 100755
--- a/scripts/perftest/python/utils_exec.py
+++ b/scripts/perftest/python/utils_exec.py
@@ -20,6 +20,7 @@
 #
 #-------------------------------------------------------------
 
+import sys
 import subprocess
 import shlex
 import re
@@ -45,7 +46,7 @@ def subprocess_exec(cmd_string, log_file_path=None, extract=None):
     Based on extract we return the relevant string
     """
     # Debug
-    # print(cmd_string)
+    #print(cmd_string)
     exec_command = shlex.split(cmd_string)
     proc1 = subprocess.Popen(exec_command, stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE)
@@ -63,6 +64,8 @@ def subprocess_exec(cmd_string, log_file_path=None, extract=None):
             return_data = parse_time(std_outs)
         if extract == 'dir':
             return_data = parse_hdfs_paths(std_outs)
+        if extract == 'hdfs_base':
+            return_data = parse_hdfs_base(std_outs)
         if extract is None:
             return_data = 0
 
@@ -73,6 +76,20 @@ def subprocess_exec(cmd_string, log_file_path=None, extract=None):
     return return_data
 
 
+def parse_hdfs_base(std_outs):
+    """
+    return: String
+    hdfs base uri
+    """
+    hdfs_uri = None
+    for line in std_outs:
+        if line.startswith('hdfs://'):
+            hdfs_uri = line
+    if hdfs_uri is None:
+        sys.exit('HDFS URI not found')
+    return hdfs_uri
+
+
 def write_logs(std_outs, log_file_path):
     """
     Write all logs to the specified location

http://git-wip-us.apache.org/repos/asf/systemml/blob/11420072/scripts/perftest/python/utils_fs.py
----------------------------------------------------------------------
diff --git a/scripts/perftest/python/utils_fs.py b/scripts/perftest/python/utils_fs.py
index 7e04907..b3cc659 100755
--- a/scripts/perftest/python/utils_fs.py
+++ b/scripts/perftest/python/utils_fs.py
@@ -21,6 +21,7 @@
 #-------------------------------------------------------------
 
 import os
+import sys
 from os.path import join
 import glob
 from functools import reduce
@@ -101,6 +102,16 @@ def contains_dir(hdfs_dirs, sub_folder):
     return False
 
 
+def check_hdfs_path(path):
+    """
+    Check if a path is present in HDFS
+    """
+    cmd = ['hdfs', 'dfs', '-test', '-e', path]
+    return_code = subprocess_exec(' '.join(cmd))
+    if return_code != 0:
+        return sys.exit('Please create {}'.format(path))
+
+
 def relevant_folders(path, algo, family, matrix_type, matrix_shape, mode):
     """
     Finds the right folder to read the data based on given parameters

http://git-wip-us.apache.org/repos/asf/systemml/blob/11420072/scripts/perftest/python/utils_misc.py
----------------------------------------------------------------------
diff --git a/scripts/perftest/python/utils_misc.py b/scripts/perftest/python/utils_misc.py
index a3c98c2..704f22b 100755
--- a/scripts/perftest/python/utils_misc.py
+++ b/scripts/perftest/python/utils_misc.py
@@ -25,7 +25,9 @@ import os
 import json
 import re
 import sys
+import getpass
 from utils_exec import subprocess_exec
+from utils_fs import check_hdfs_path
 
 # This file contains all misc utility functions required by performance test module
 
@@ -361,3 +363,36 @@ def mat_type_check(current_family, matrix_types, dense_algos):
             current_type.append(current_matrix_type)
 
     return current_type
+
+
+def get_default_dir(temp_dir, exec_mode, config_dir):
+    """
+    temp_dir: String
+    exec_mode: String
+    config_dir: String
+
+    return: String
+    Local or HDFS home directory
+    """
+
+    if exec_mode == 'singlenode':
+        if temp_dir is None:
+            return config_dir
+        if temp_dir is not None:
+            return temp_dir
+
+    if exec_mode == 'hybrid_spark':
+        cmd = ['hdfs', 'getconf', '-confKey', 'fs.default.name']
+        hdfs_base = subprocess_exec(' '.join(cmd), extract='hdfs_base')
+
+        if temp_dir is None:
+            hdfs_home = join(hdfs_base, 'user', getpass.getuser())
+            check_hdfs_path(hdfs_home)
+            return hdfs_home
+
+        if temp_dir is not None:
+            if temp_dir.startswith('hdfs'):
+                return temp_dir
+            else:
+                hdfs_home = join(hdfs_base, 'user', getpass.getuser(), temp_dir)
+                return hdfs_home