You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by kk...@apache.org on 2017/11/09 19:48:26 UTC

systemml git commit: [SYSTEMML-1978] Add PCA to Performance Test Suite

Repository: systemml
Updated Branches:
  refs/heads/master bd139a575 -> d69686273


[SYSTEMML-1978]  Add PCA to Performance Test Suite

Closes #694


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/d6968627
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/d6968627
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/d6968627

Branch: refs/heads/master
Commit: d69686273da8bf4dc09441ec34ef3863eb437629
Parents: bd139a5
Author: Krishna Kalyan <kr...@gmail.com>
Authored: Thu Nov 9 20:42:49 2017 +0100
Committer: Krishna Kalyan <kr...@gmail.com>
Committed: Thu Nov 9 20:42:49 2017 +0100

----------------------------------------------------------------------
 scripts/perftest/python/datagen.py      | 17 +++++++++++++++++
 scripts/perftest/python/run_perftest.py | 11 +++++++----
 scripts/perftest/python/train.py        | 15 +++++++++++++++
 3 files changed, 39 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/d6968627/scripts/perftest/python/datagen.py
----------------------------------------------------------------------
diff --git a/scripts/perftest/python/datagen.py b/scripts/perftest/python/datagen.py
index 54f2eff..55dd06d 100755
--- a/scripts/perftest/python/datagen.py
+++ b/scripts/perftest/python/datagen.py
@@ -215,6 +215,23 @@ def stats2_datagen(matrix_dim, matrix_type, datagen_dir, config_dir):
     return save_path
 
 
+def dimreduction_datagen(matrix_dim, matrix_type, datagen_dir, config_dir):
+
+    path_name = '.'.join(['dimreduction', matrix_type, str(matrix_dim)])
+    datagen_write = join(datagen_dir, path_name)
+    save_path = join(config_dir, path_name)
+    row, col = split_rowcol(matrix_dim)
+
+    R = row
+    C = col
+    OUT = join(datagen_write, 'X.data')
+
+    config = dict(R=R, C=C, OUT=OUT, FMT=DATA_FORMAT)
+
+    config_writer(save_path + '.json', config)
+    return save_path
+
+
 def config_packets_datagen(algo_payload, matrix_type, matrix_shape, datagen_dir, dense_algos, config_dir):
     """
     This function has two responsibilities. Generate the configuration files for

http://git-wip-us.apache.org/repos/asf/systemml/blob/d6968627/scripts/perftest/python/run_perftest.py
----------------------------------------------------------------------
diff --git a/scripts/perftest/python/run_perftest.py b/scripts/perftest/python/run_perftest.py
index 6e87261..1f78a75 100755
--- a/scripts/perftest/python/run_perftest.py
+++ b/scripts/perftest/python/run_perftest.py
@@ -47,7 +47,8 @@ ML_ALGO = {'binomial': ['MultiLogReg', 'l2-svm', 'm-svm'],
            'regression1': ['LinearRegDS', 'LinearRegCG'],
            'regression2': ['GLM_poisson', 'GLM_gamma', 'GLM_binomial'],
            'stats1': ['Univar-Stats', 'bivar-stats'],
-           'stats2': ['stratstats']}
+           'stats2': ['stratstats'],
+           'dimreduction': ['PCA']}
 
 ML_GENDATA = {'binomial': 'genRandData4LogisticRegression',
               'clustering': 'genRandData4Kmeans',
@@ -55,7 +56,8 @@ ML_GENDATA = {'binomial': 'genRandData4LogisticRegression',
               'regression1': 'genRandData4LogisticRegression',
               'regression2': 'genRandData4LogisticRegression',
               'stats1': 'genRandData4DescriptiveStats',
-              'stats2': 'genRandData4StratStats'}
+              'stats2': 'genRandData4StratStats',
+              'dimreduction': 'genRandData4PCA'}
 
 ML_TRAIN = {'GLM_poisson': 'GLM',
             'GLM_gamma': 'GLM',
@@ -69,7 +71,8 @@ ML_TRAIN = {'GLM_poisson': 'GLM',
             'm-svm': 'm-svm',
             'l2-svm': 'l2-svm',
             'MultiLogReg': 'MultiLogReg',
-            'naive-bayes': 'naive-bayes'}
+            'naive-bayes': 'naive-bayes',
+            'PCA': 'PCA'}
 
 ML_PREDICT = {'Kmeans': 'Kmeans-predict',
               'LinearRegCG': 'GLM-predict',
@@ -82,7 +85,7 @@ ML_PREDICT = {'Kmeans': 'Kmeans-predict',
               'GLM_gamma': 'GLM-predict',
               'GLM_binomial': 'GLM-predict'}
 
-DENSE_TYPE_ALGOS = ['clustering', 'stats1', 'stats2']
+DENSE_TYPE_ALGOS = ['clustering', 'stats1', 'stats2', 'dimreduction']
 
 
 # Responsible for execution and metric logging

http://git-wip-us.apache.org/repos/asf/systemml/blob/d6968627/scripts/perftest/python/train.py
----------------------------------------------------------------------
diff --git a/scripts/perftest/python/train.py b/scripts/perftest/python/train.py
index 4428e8f..907c2b9 100755
--- a/scripts/perftest/python/train.py
+++ b/scripts/perftest/python/train.py
@@ -338,6 +338,21 @@ def regression2_glm_poisson_train(save_folder_name, datagen_dir, train_dir, conf
     return data_folders
 
 
+def dimreduction_pca_train(save_folder_name, datagen_dir, train_dir, config_dir):
+    save_path = join(config_dir, save_folder_name)
+    train_write = join(train_dir, save_folder_name)
+
+    INPUT = join(datagen_dir, 'X.data')
+    SCALE = '1'
+    PROJDATA = '1'
+    OUTPUT = join(train_write, 'Output.data')
+
+    config = dict(INPUT=INPUT, SCALE=SCALE, PROJDATA=PROJDATA, OUTPUT=OUTPUT, OFMT=DATA_FORMAT)
+    config_writer(save_path + '.json', config)
+
+    return [save_path]
+
+
 def config_packets_train(algo_payload, matrix_type, matrix_shape, datagen_dir, train_dir, dense_algos, config_dir):
     """
     This function has two responsibilities. Generate the configuration files for