You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by na...@apache.org on 2017/09/28 22:30:01 UTC
systemml git commit: [MINOR]bug fixes & feature added in perf test &
spark-submit python scripts
Repository: systemml
Updated Branches:
refs/heads/master 0cb2f7f68 -> a725b2d2e
[MINOR]bug fixes & feature added in perf test & spark-submit python scripts
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/a725b2d2
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/a725b2d2
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/a725b2d2
Branch: refs/heads/master
Commit: a725b2d2ebf6dcb56f4edb68376c3849c8991b27
Parents: 0cb2f7f
Author: Nakul Jindal <na...@gmail.com>
Authored: Thu Sep 28 15:28:17 2017 -0700
Committer: Nakul Jindal <na...@gmail.com>
Committed: Thu Sep 28 15:28:17 2017 -0700
----------------------------------------------------------------------
bin/systemml-spark-submit.py | 30 ++++++++-----
scripts/perftest/python/datagen.py | 2 +-
scripts/perftest/python/predict.py | 2 +-
scripts/perftest/python/run_perftest.py | 19 ++++++---
scripts/perftest/python/train.py | 2 +-
scripts/perftest/python/utils_misc.py | 63 +++++++++++++++-------------
6 files changed, 70 insertions(+), 48 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/systemml/blob/a725b2d2/bin/systemml-spark-submit.py
----------------------------------------------------------------------
diff --git a/bin/systemml-spark-submit.py b/bin/systemml-spark-submit.py
index b6426b3..b4da801 100755
--- a/bin/systemml-spark-submit.py
+++ b/bin/systemml-spark-submit.py
@@ -92,25 +92,35 @@ def spark_submit_entry(master, driver_memory, num_executors, executor_memory,
ml_options.append(stats)
if gpu is not None:
ml_options.append('-gpu')
- ml_options.append(gpu)
+ if gpu is not 'no_option':
+ ml_options.append(gpu)
if len(ml_options) < 1:
ml_options = ''
# stats, explain, target_jars
cmd_spark = [spark_path, '--class', 'org.apache.sysml.api.DMLScript',
- '--master', master, '--driver-memory', driver_memory,
- '--num-executors', num_executors, '--executor-memory', executor_memory,
- '--executor-cores', executor_cores, '--conf', default_conf,
+ '--master', master,
+ '--driver-memory', driver_memory,
+ '--conf', default_conf,
'--jars', cuda_jars, systemml_jars]
+ if num_executors is not None:
+ cmd_spark = cmd_spark + ['--num-executors', num_executors]
+
+ if executor_memory is not None:
+ cmd_spark = cmd_spark + ['--executor-memory', executor_memory]
+
+ if executor_cores is not None:
+ cmd_spark = cmd_spark + ['--executor-cores', executor_cores]
+
cmd_system_ml = ['-config', default_config,
'-exec', 'hybrid_spark', '-f', script_file, ' '.join(ml_options)]
cmd = cmd_spark + cmd_system_ml
# Debug
- # print(' '.join(cmd))
+ print(' '.join(cmd))
return_code = os.system(' '.join(cmd))
return return_code
@@ -120,10 +130,10 @@ if __name__ == '__main__':
description='System-ML Spark Submit Script')
# SPARK-SUBMIT Options
cparser.add_argument('--master', default='local[*]', help='local, yarn-client, yarn-cluster', metavar='')
- cparser.add_argument('--driver-memory', default='5G', help='Memory for driver (e.g. 512M)', metavar='')
- cparser.add_argument('--num-executors', default='2', help='Number of executors to launch', metavar='')
- cparser.add_argument('--executor-memory', default='2G', help='Memory per executor', metavar='')
- cparser.add_argument('--executor-cores', default='1', help='Number of cores', metavar='')
+ cparser.add_argument('--driver-memory', default='8G', help='Memory for driver (e.g. 512M, 1G)', metavar='')
+ cparser.add_argument('--num-executors', nargs=1, help='Number of executors to launch', metavar='')
+ cparser.add_argument('--executor-memory', nargs=1, help='Memory per executor', metavar='')
+ cparser.add_argument('--executor-cores', nargs=1, help='Number of executor cores', metavar='')
cparser.add_argument('--conf', help='Spark configuration file', nargs='+', metavar='')
# SYSTEM-ML Options
@@ -138,7 +148,7 @@ if __name__ == '__main__':
metavar='')
cparser.add_argument('-gpu', help='uses CUDA instructions when reasonable, '
'set <force> option to skip conservative memory estimates '
- 'and use GPU wherever possible', nargs='?')
+ 'and use GPU wherever possible', nargs='?', const='no_option')
cparser.add_argument('-f', required=True, help='specifies dml/pydml file to execute; '
'path can be local/hdfs/gpfs', metavar='')
http://git-wip-us.apache.org/repos/asf/systemml/blob/a725b2d2/scripts/perftest/python/datagen.py
----------------------------------------------------------------------
diff --git a/scripts/perftest/python/datagen.py b/scripts/perftest/python/datagen.py
index 9b9edf1..6794187 100755
--- a/scripts/perftest/python/datagen.py
+++ b/scripts/perftest/python/datagen.py
@@ -25,7 +25,7 @@ from os.path import join
from utils_misc import split_rowcol, config_writer, mat_type_check
# This file contains configuration settings for data generation
-DATA_FORMAT = 'csv'
+DATA_FORMAT = 'binary'
MATRIX_TYPE_DICT = {'dense': '0.9',
'sparse': '0.01'}
http://git-wip-us.apache.org/repos/asf/systemml/blob/a725b2d2/scripts/perftest/python/predict.py
----------------------------------------------------------------------
diff --git a/scripts/perftest/python/predict.py b/scripts/perftest/python/predict.py
index 21eed6a..67467b1 100755
--- a/scripts/perftest/python/predict.py
+++ b/scripts/perftest/python/predict.py
@@ -26,7 +26,7 @@ from utils_misc import config_writer, mat_type_check
from utils_fs import relevant_folders
# Contains configuration setting for predicting
-DATA_FORMAT = 'csv'
+DATA_FORMAT = 'binary'
def m_svm_predict(save_folder_name, datagen_dir, train_dir, predict_dir, config_dir):
http://git-wip-us.apache.org/repos/asf/systemml/blob/a725b2d2/scripts/perftest/python/run_perftest.py
----------------------------------------------------------------------
diff --git a/scripts/perftest/python/run_perftest.py b/scripts/perftest/python/run_perftest.py
index dffb7a2..f0b272a 100755
--- a/scripts/perftest/python/run_perftest.py
+++ b/scripts/perftest/python/run_perftest.py
@@ -141,7 +141,7 @@ def algorithm_workflow(algo, exec_type, config_path, dml_file_name, action_mode,
return exit_flag_success
-def perf_test_entry(family, algo, exec_type, mat_type, mat_shape, config_dir, mode, temp_dir):
+def perf_test_entry(family, algo, exec_type, mat_type, mat_shape, config_dir, mode, temp_dir, file_system_type):
"""
This function is the entry point for performance testing
@@ -168,6 +168,9 @@ def perf_test_entry(family, algo, exec_type, mat_type, mat_shape, config_dir, mo
temp_dir: String
Location to store all output files created during perf test
+
+ file_system_type: String
+
"""
# algos to run is a list of tuples with
# [(m-svm, binomial), (m-svm, multinomial)...]
@@ -275,6 +278,7 @@ if __name__ == '__main__':
mat_type = ['dense', 'sparse', 'all']
workload = ['data-gen', 'train', 'predict']
execution_mode = ['hybrid_spark', 'singlenode']
+ file_system_type = ['hdfs', 'local']
# Default Arguments
default_mat_shape = ['10k_100']
@@ -308,7 +312,6 @@ if __name__ == '__main__':
'spark.driver.extraJavaOptions=\"-Xms20g -Xmn2g\"'
-
# Argparse Module
cparser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description='SystemML Performance Test Script')
@@ -335,8 +338,12 @@ if __name__ == '__main__':
cparser.add_argument('--mode', default=workload,
help='space separated list of types of workloads to run (available: data-gen, train, predict)',
metavar='', choices=workload, nargs='+')
- # Change this to temp-dir
- cparser.add_argument('--temp-dir', help='define the file system to work on', metavar='')
+ cparser.add_argument('--temp-dir', help='the path on the file system to place the working temporary directory at',
+ metavar='')
+ cparser.add_argument('--file-system-type', choices=file_system_type, metavar='',
+ help='file system for temp directory, '
+ 'supported types are \'hdfs\' for hybrid_spark and \'local\' for standalone;'
+ 'default for hybrid_spark is \'hdfs\' and for standalone is \'local\'')
# Configuration Options
cparser.add_argument('-stats', help='Monitor and report caching/recompilation statistics, '
@@ -347,7 +354,7 @@ if __name__ == '__main__':
cparser.add_argument('-config', help='System-ML configuration file (e.g SystemML-config.xml)', metavar='')
cparser.add_argument('-gpu', help='uses CUDA instructions when reasonable, '
'set <force> option to skip conservative memory estimates '
- 'and use GPU wherever possible', nargs='?')
+ 'and use GPU wherever possible', nargs='?', const='no_option')
# Spark Configuration Option
cparser.add_argument('--master', help='local, yarn-client, yarn-cluster', metavar='')
cparser.add_argument('--driver-memory', help='Memory for driver (e.g. 512M)', metavar='')
@@ -371,7 +378,7 @@ if __name__ == '__main__':
perftest_args_dict, systemml_args_dict, backend_args_dict = split_config_args(all_arg_dict)
# temp_dir hdfs / local path check
- perftest_args_dict['temp_dir'] = get_default_dir(args.temp_dir, args.exec_type, default_config_dir)
+ perftest_args_dict['temp_dir'] = get_default_dir(args.file_system_type, args.temp_dir, args.exec_type, default_config_dir)
# default_mat_type validity
if len(args.mat_type) > 2:
http://git-wip-us.apache.org/repos/asf/systemml/blob/a725b2d2/scripts/perftest/python/train.py
----------------------------------------------------------------------
diff --git a/scripts/perftest/python/train.py b/scripts/perftest/python/train.py
index 4717ff7..a95950d 100755
--- a/scripts/perftest/python/train.py
+++ b/scripts/perftest/python/train.py
@@ -27,7 +27,7 @@ from functools import reduce
from utils_fs import relevant_folders
# Contains configuration setting for training
-DATA_FORMAT = 'csv'
+DATA_FORMAT = 'binary'
def binomial_m_svm_train(save_folder_name, datagen_dir, train_dir, config_dir):
http://git-wip-us.apache.org/repos/asf/systemml/blob/a725b2d2/scripts/perftest/python/utils_misc.py
----------------------------------------------------------------------
diff --git a/scripts/perftest/python/utils_misc.py b/scripts/perftest/python/utils_misc.py
index 92dbc73..87b870b 100755
--- a/scripts/perftest/python/utils_misc.py
+++ b/scripts/perftest/python/utils_misc.py
@@ -54,30 +54,28 @@ def split_config_args(args):
perftest_args_dict['filename'] = args['filename']
perftest_args_dict['mode'] = args['mode']
perftest_args_dict['temp_dir'] = args['temp_dir']
+ perftest_args_dict['file_system_type'] = args['file_system_type']
systemml_args_dict = {}
- if 'stats' in args.keys():
- if args['stats'] is not None:
- systemml_args_dict['-stats'] = args['stats']
- else:
- systemml_args_dict['-stats'] = ''
+ if args['stats'] is not None:
+ systemml_args_dict['-stats'] = args['stats']
+ else:
+ systemml_args_dict['-stats'] = ''
- if 'explain' in args.keys():
- if args['explain'] is not None:
- systemml_args_dict['-explain'] = args['explain']
- else:
- systemml_args_dict['-explain'] = ''
+ if args['explain'] is not None:
+ systemml_args_dict['-explain'] = args['explain']
+ else:
+ systemml_args_dict['-explain'] = ''
- if 'config' in args.keys():
- if args['config'] is not None:
- systemml_args_dict['-config'] = args['config']
+ if args['config'] is not None:
+ systemml_args_dict['-config'] = args['config']
- if 'gpu' in args.keys():
- if args['gpu'] is not None:
- systemml_args_dict['-gpu'] = args['gpu']
- else:
+ if args['gpu'] is not None:
+ if args['gpu'] == 'no_option':
systemml_args_dict['-gpu'] = ''
+ else:
+ systemml_args_dict['-gpu'] = args['gpu']
backend_args_dict = {}
exec_type = args['exec_type']
@@ -373,8 +371,9 @@ def mat_type_check(current_family, matrix_types, dense_algos):
return current_type
-def get_default_dir(temp_dir, exec_mode, config_dir):
+def get_default_dir(file_system_type, temp_dir, exec_mode, config_dir):
"""
+ file_system_type: String
temp_dir: String
exec_mode: String
config_dir: String
@@ -390,17 +389,23 @@ def get_default_dir(temp_dir, exec_mode, config_dir):
return temp_dir
if exec_mode == 'hybrid_spark':
- cmd = ['hdfs', 'getconf', '-confKey', 'fs.default.name']
- hdfs_base = subprocess_exec(' '.join(cmd), extract='hdfs_base')
+ if file_system_type == 'hdfs':
+ cmd = ['hdfs', 'getconf', '-confKey', 'fs.default.name']
+ hdfs_base = subprocess_exec(' '.join(cmd), extract='hdfs_base')
- if temp_dir is None:
- hdfs_home = join(hdfs_base, 'user', getpass.getuser())
- check_hdfs_path(hdfs_home)
- return hdfs_home
+ if temp_dir is None:
+ hdfs_home = join(hdfs_base, 'user', getpass.getuser())
+ check_hdfs_path(hdfs_home)
+ return hdfs_home
- if temp_dir is not None:
- if temp_dir.startswith('hdfs'):
+ if temp_dir is not None:
+ if temp_dir.startswith('hdfs'):
+ return temp_dir
+ else:
+ hdfs_home = join(hdfs_base, 'user', getpass.getuser(), temp_dir)
+ return hdfs_home
+ else:
+ if temp_dir is None:
+ return config_dir
+ if temp_dir is not None:
return temp_dir
- else:
- hdfs_home = join(hdfs_base, 'user', getpass.getuser(), temp_dir)
- return hdfs_home