You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by na...@apache.org on 2017/08/17 18:44:13 UTC
systemml git commit: [MINOR] fixes for HDFS path
Repository: systemml
Updated Branches:
refs/heads/master 4384ebbda -> 114200724
[MINOR] fixes for HDFS path
Closes #624
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/11420072
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/11420072
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/11420072
Branch: refs/heads/master
Commit: 11420072412c0c873b72267d1e9764c87abc57b4
Parents: 4384ebb
Author: krishnakalyan3 <kr...@gmail.com>
Authored: Thu Aug 17 11:43:49 2017 -0700
Committer: Nakul Jindal <na...@gmail.com>
Committed: Thu Aug 17 11:43:49 2017 -0700
----------------------------------------------------------------------
bin/utils.py | 9 +++----
scripts/perftest/python/run_perftest.py | 17 +++++++-------
scripts/perftest/python/utils_exec.py | 19 ++++++++++++++-
scripts/perftest/python/utils_fs.py | 11 +++++++++
scripts/perftest/python/utils_misc.py | 35 ++++++++++++++++++++++++++++
5 files changed, 78 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/systemml/blob/11420072/bin/utils.py
----------------------------------------------------------------------
diff --git a/bin/utils.py b/bin/utils.py
index 6f40881..cf17960 100644
--- a/bin/utils.py
+++ b/bin/utils.py
@@ -74,15 +74,16 @@ def find_dml_file(systemml_home, script_file):
Location of the dml script
"""
scripts_dir = join(systemml_home, 'scripts')
- if not (exists(script_file)):
- script_file = find_file(script_file, scripts_dir)
- if script_file is None:
+ if not exists(script_file):
+ script_file_path = find_file(script_file, scripts_dir)
+ if script_file_path is not None:
+ return script_file_path
+ else:
print('Could not find DML script: ' + script_file)
sys.exit()
return script_file
-
def log4j_path(systemml_home):
"""
Create log4j.properties from the template if not exist
http://git-wip-us.apache.org/repos/asf/systemml/blob/11420072/scripts/perftest/python/run_perftest.py
----------------------------------------------------------------------
diff --git a/scripts/perftest/python/run_perftest.py b/scripts/perftest/python/run_perftest.py
index 8c3d1fa..20f5380 100755
--- a/scripts/perftest/python/run_perftest.py
+++ b/scripts/perftest/python/run_perftest.py
@@ -32,7 +32,8 @@ from datagen import config_packets_datagen
from train import config_packets_train
from predict import config_packets_predict
from utils_misc import get_families, config_reader, \
- exec_dml_and_parse_time, exec_test_data, check_predict, get_folder_metrics, split_config_args
+ exec_dml_and_parse_time, exec_test_data, check_predict, get_folder_metrics, split_config_args, \
+ get_default_dir
from utils_fs import create_dir_local, write_success, check_SUCCESS_file_exists
# A packet is a dictionary
@@ -275,7 +276,7 @@ if __name__ == '__main__':
default_mat_shape = ['10k_100']
# Default temp directory, contains everything generated in perftest
- default_temp_dir = join(systemml_home, 'scripts', 'perftest', 'temp')
+ default_config_dir = join(systemml_home, 'scripts', 'perftest', 'temp')
# Initialize time
start_time = time.time()
@@ -308,7 +309,7 @@ if __name__ == '__main__':
cparser.add_argument('--mat-shape', default=default_mat_shape, help='space separated list of shapes of matrices '
'to generate (e.g 10k_1k, 20M_4k)', metavar='', nargs='+')
- cparser.add_argument('--config-dir', default=default_temp_dir, help='temporary directory '
+ cparser.add_argument('--config-dir', default=default_config_dir, help='temporary directory '
'where generated, training and prediction data is put', metavar='')
cparser.add_argument('--filename', default='perf_test', help='name of the output file for the perf'
' metrics', metavar='')
@@ -316,8 +317,7 @@ if __name__ == '__main__':
help='space separated list of types of workloads to run (available: data-gen, train, predict)',
metavar='', choices=workload, nargs='+')
# Change this to temp-dir
- cparser.add_argument('--temp-dir', default=default_temp_dir,
- help='define the file system to work on', metavar='')
+ cparser.add_argument('--temp-dir', help='define the file system to work on', metavar='')
# Configuration Options
cparser.add_argument('-stats', help='Monitor and report caching/recompilation statistics, '
@@ -350,8 +350,8 @@ if __name__ == '__main__':
# Global variables
perftest_args_dict, systemml_args_dict, backend_args_dict = split_config_args(all_arg_dict)
- # Debug arguments
- # print(arg_dict)
+ # temp_dir hdfs / local path check
+ perftest_args_dict['temp_dir'] = get_default_dir(args.temp_dir, args.exec_type, default_config_dir)
# default_mat_type validity
if len(args.mat_type) > 2:
@@ -401,4 +401,5 @@ if __name__ == '__main__':
perf_test_entry(**perftest_args_dict)
total_time = (time.time() - start_time)
- logging.info('Performance tests complete {0:.3f} secs \n'.format(total_time))
+ logging.info('total_time,none,none,none,none,{}'.format(total_time))
+ logging.info('Performance tests complete')
http://git-wip-us.apache.org/repos/asf/systemml/blob/11420072/scripts/perftest/python/utils_exec.py
----------------------------------------------------------------------
diff --git a/scripts/perftest/python/utils_exec.py b/scripts/perftest/python/utils_exec.py
index cf98d0f..92a267f 100755
--- a/scripts/perftest/python/utils_exec.py
+++ b/scripts/perftest/python/utils_exec.py
@@ -20,6 +20,7 @@
#
#-------------------------------------------------------------
+import sys
import subprocess
import shlex
import re
@@ -45,7 +46,7 @@ def subprocess_exec(cmd_string, log_file_path=None, extract=None):
Based on extract we return the relevant string
"""
# Debug
- # print(cmd_string)
+ #print(cmd_string)
exec_command = shlex.split(cmd_string)
proc1 = subprocess.Popen(exec_command, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
@@ -63,6 +64,8 @@ def subprocess_exec(cmd_string, log_file_path=None, extract=None):
return_data = parse_time(std_outs)
if extract == 'dir':
return_data = parse_hdfs_paths(std_outs)
+ if extract == 'hdfs_base':
+ return_data = parse_hdfs_base(std_outs)
if extract is None:
return_data = 0
@@ -73,6 +76,20 @@ def subprocess_exec(cmd_string, log_file_path=None, extract=None):
return return_data
+def parse_hdfs_base(std_outs):
+ """
+ return: String
+ hdfs base uri
+ """
+ hdfs_uri = None
+ for line in std_outs:
+ if line.startswith('hdfs://'):
+ hdfs_uri = line
+ if hdfs_uri is None:
+ sys.exit('HDFS URI not found')
+ return hdfs_uri
+
+
def write_logs(std_outs, log_file_path):
"""
Write all logs to the specified location
http://git-wip-us.apache.org/repos/asf/systemml/blob/11420072/scripts/perftest/python/utils_fs.py
----------------------------------------------------------------------
diff --git a/scripts/perftest/python/utils_fs.py b/scripts/perftest/python/utils_fs.py
index 7e04907..b3cc659 100755
--- a/scripts/perftest/python/utils_fs.py
+++ b/scripts/perftest/python/utils_fs.py
@@ -21,6 +21,7 @@
#-------------------------------------------------------------
import os
+import sys
from os.path import join
import glob
from functools import reduce
@@ -101,6 +102,16 @@ def contains_dir(hdfs_dirs, sub_folder):
return False
+def check_hdfs_path(path):
+ """
+ Check if a path is present in HDFS
+ """
+ cmd = ['hdfs', 'dfs', '-test', '-e', path]
+ return_code = subprocess_exec(' '.join(cmd))
+ if return_code != 0:
+ return sys.exit('Please create {}'.format(path))
+
+
def relevant_folders(path, algo, family, matrix_type, matrix_shape, mode):
"""
Finds the right folder to read the data based on given parameters
http://git-wip-us.apache.org/repos/asf/systemml/blob/11420072/scripts/perftest/python/utils_misc.py
----------------------------------------------------------------------
diff --git a/scripts/perftest/python/utils_misc.py b/scripts/perftest/python/utils_misc.py
index a3c98c2..704f22b 100755
--- a/scripts/perftest/python/utils_misc.py
+++ b/scripts/perftest/python/utils_misc.py
@@ -25,7 +25,9 @@ import os
import json
import re
import sys
+import getpass
from utils_exec import subprocess_exec
+from utils_fs import check_hdfs_path
# This file contains all misc utility functions required by performance test module
@@ -361,3 +363,36 @@ def mat_type_check(current_family, matrix_types, dense_algos):
current_type.append(current_matrix_type)
return current_type
+
+
+def get_default_dir(temp_dir, exec_mode, config_dir):
+ """
+ temp_dir: String
+ exec_mode: String
+ config_dir: String
+
+ return: String
+ Local or HDFS home directory
+ """
+
+ if exec_mode == 'singlenode':
+ if temp_dir is None:
+ return config_dir
+ if temp_dir is not None:
+ return temp_dir
+
+ if exec_mode == 'hybrid_spark':
+ cmd = ['hdfs', 'getconf', '-confKey', 'fs.default.name']
+ hdfs_base = subprocess_exec(' '.join(cmd), extract='hdfs_base')
+
+ if temp_dir is None:
+ hdfs_home = join(hdfs_base, 'user', getpass.getuser())
+ check_hdfs_path(hdfs_home)
+ return hdfs_home
+
+ if temp_dir is not None:
+ if temp_dir.startswith('hdfs'):
+ return temp_dir
+ else:
+ hdfs_home = join(hdfs_base, 'user', getpass.getuser(), temp_dir)
+ return hdfs_home