You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by mb...@apache.org on 2021/10/09 20:50:00 UTC
[systemds] branch master updated: [MINOR] Additional fixes perftest
(algorithms location, datagen)
This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/master by this push:
new 788a42d [MINOR] Additional fixes perftest (algorithms location, datagen)
788a42d is described below
commit 788a42d2e6c0b9880eb85aeacc46a5a996a76653
Author: Matthias Boehm <mb...@gmail.com>
AuthorDate: Sat Oct 9 22:47:56 2021 +0200
[MINOR] Additional fixes perftest (algorithms location, datagen)
---
scripts/perftest/runAll.sh | 14 +++++++++-----
scripts/perftest/runAllBinomial.sh | 13 ++-----------
scripts/perftest/runAllMultinomial.sh | 12 ++----------
scripts/perftest/runAllRegression.sh | 12 ++----------
scripts/perftest/runGLM_binomial_probit.sh | 4 ++--
scripts/perftest/runGLM_gamma_log.sh | 4 ++--
scripts/perftest/runGLM_poisson_log.sh | 4 ++--
scripts/perftest/runL2SVM.sh | 4 ++--
scripts/perftest/runLinearRegCG.sh | 4 ++--
scripts/perftest/runLinearRegDS.sh | 4 ++--
scripts/perftest/runMSVM.sh | 4 ++--
scripts/perftest/runMultiLogReg.sh | 4 ++--
scripts/perftest/runNaiveBayes.sh | 4 ++--
scripts/perftest/scripts/extractTestData.dml | 2 +-
14 files changed, 34 insertions(+), 55 deletions(-)
diff --git a/scripts/perftest/runAll.sh b/scripts/perftest/runAll.sh
index 3206e66..22693b7 100755
--- a/scripts/perftest/runAll.sh
+++ b/scripts/perftest/runAll.sh
@@ -37,19 +37,23 @@ CMD="./sparkDML.sh"
# . ~/intel/oneapi/setvars.sh intel64
# . /opt/intel/bin/compilervars.sh intel64
-
### Micro Benchmarks:
#./MatrixMult.sh
#./MatrixTranspose.sh
-
-### Algorithms Benchmarks:
-
# init time measurement
+if [ ! -d logs ]; then mkdir -p logs ; fi
+if [ ! -d results ]; then mkdir -p results ; fi
if [ ! -d results ]; then mkdir -p results ; fi
date >> results/times.txt
-# TODO Use the built-in function lmPredict instead of the GLM-predict.dml script, for linear regression.
+### Data Generation
+#echo "-- Generating binomial data: " >> results/times.txt;
+./genBinomialData.sh ${CMD} ${TEMPFOLDER} &>> logs/genBinomialData.out
+echo "-- Generating multinomial data." >> results/times.txt;
+./genMultinomialData.sh ${CMD} ${TEMPFOLDER} &>> logs/genMultinomialData.out
+
+### Algorithms Benchmarks:
./runAllBinomial.sh $CMD $TEMPFOLDER
./runAllMultinomial.sh $CMD $TEMPFOLDER
./runAllRegression.sh $CMD $TEMPFOLDER
diff --git a/scripts/perftest/runAllBinomial.sh b/scripts/perftest/runAllBinomial.sh
index 9c8b738..65f5734 100755
--- a/scripts/perftest/runAllBinomial.sh
+++ b/scripts/perftest/runAllBinomial.sh
@@ -21,10 +21,7 @@
#-------------------------------------------------------------
COMMAND=$1
-if [ "$COMMAND" == "" ]; then COMMAND="systemds" ; fi
-
TEMPFOLDER=$2
-if [ "$TEMPFOLDER" == "" ]; then TEMPFOLDER=temp ; fi
BASE=${TEMPFOLDER}/binomial
MAXITR=20
@@ -35,17 +32,11 @@ err_report() {
}
trap 'err_report $LINENO' ERR
-if [ ! -d logs ]; then mkdir -p logs ; fi
-if [ ! -d results ]; then mkdir -p results ; fi
-
echo "RUN BINOMIAL EXPERIMENTS: "$(date) >> results/times.txt;
-# data generation
-echo "-- Generating binomial data: " >> results/times.txt;
-./genBinomialData.sh ${COMMAND} ${TEMPFOLDER} &>> logs/genBinomialData.out
-
# run all classifiers with binomial labels on all datasets
-for d in "10k_1k_dense" "10k_1k_sparse" # "100k_1k_dense" "100k_1k_sparse" "1M_1k_dense" "1M_1k_sparse" "10M_1k_dense" "10M_1k_sparse" #"_KDD" "100M_1k_dense" "100M_1k_sparse"
+# see genBinomialData
+for d in "10k_1k_dense" "10k_1k_sparse" "100k_1k_dense" "100k_1k_sparse" "1M_1k_dense" "1M_1k_sparse" "10M_1k_dense" "10M_1k_sparse" #"_KDD" "100M_1k_dense" "100M_1k_sparse"
do
for f in "runMultiLogReg" "runL2SVM" "runMSVM"
do
diff --git a/scripts/perftest/runAllMultinomial.sh b/scripts/perftest/runAllMultinomial.sh
index da6277a..4df9931 100755
--- a/scripts/perftest/runAllMultinomial.sh
+++ b/scripts/perftest/runAllMultinomial.sh
@@ -21,8 +21,6 @@
#-------------------------------------------------------------
COMMAND=$1
-if [ "$COMMAND" == "" ]; then COMMAND="systemds" ; fi
-
TEMPFOLDER=$2
if [ "$TEMPFOLDER" == "" ]; then TEMPFOLDER=temp ; fi
@@ -36,17 +34,11 @@ err_report() {
}
trap 'err_report $LINENO' ERR
-if [ ! -d logs ]; then mkdir -p logs ; fi
-if [ ! -d results ]; then mkdir -p results ; fi
-
echo " RUN MULTINOMIAL EXPERIMENTS: "$(date) >> results/times.txt;
-# data generation
-echo "-- Generating multinomial data." >> results/times.txt;
-./genMultinomialData.sh ${COMMAND} ${TEMPFOLDER} &>> logs/genMultinomialData.out
-
# run all classifiers with binomial labels on all datasets
-for d in "10k_1k_dense" "10k_1k_sparse" # "100k_1k_dense" "100k_1k_sparse" "1M_1k_dense" "1M_1k_sparse" "10M_1k_dense" "10M_1k_sparse" "100M_1k_dense" "100M_1k_sparse"
+# see genMultinomialData
+for d in "10k_1k_dense" "10k_1k_sparse" "100k_1k_dense" "100k_1k_sparse" "1M_1k_dense" "1M_1k_sparse" "10M_1k_dense" "10M_1k_sparse" #"100M_1k_dense" "100M_1k_sparse"
do
for f in "runNaiveBayes"
do
diff --git a/scripts/perftest/runAllRegression.sh b/scripts/perftest/runAllRegression.sh
index 4e3d098..1322560 100755
--- a/scripts/perftest/runAllRegression.sh
+++ b/scripts/perftest/runAllRegression.sh
@@ -21,8 +21,6 @@
#-------------------------------------------------------------
COMMAND=$1
-if [ "$COMMAND" == "" ]; then COMMAND="systemds" ; fi
-
TEMPFOLDER=$2
if [ "$TEMPFOLDER" == "" ]; then TEMPFOLDER=temp ; fi
@@ -35,17 +33,11 @@ err_report() {
}
trap 'err_report $LINENO' ERR
-if [ ! -d logs ]; then mkdir -p logs ; fi
-if [ ! -d results ]; then mkdir -p results ; fi
-
echo "RUN REGRESSION EXPERIMENTS" $(date) >> results/times.txt;
-# data generation
-echo "-- Generating binomial data: " >> results/times.txt;
-./genBinomialData.sh ${COMMAND} ${TEMPFOLDER} &>> logs/genBinomialData.out
-
# run all regression algorithms with binomial labels on all datasets
-for d in "10k_1k_dense" "10k_1k_sparse" # "100k_1k_dense" "100k_1k_sparse" "1M_1k_dense" "1M_1k_sparse" "10M_1k_dense" "10M_1k_sparse" #"_KDD" "100M_1k_dense" "100M_1k_sparse"
+# see genBinomialData
+for d in "10k_1k_dense" "10k_1k_sparse" "100k_1k_dense" "100k_1k_sparse" "1M_1k_dense" "1M_1k_sparse" "10M_1k_dense" "10M_1k_sparse" #"_KDD" "100M_1k_dense" "100M_1k_sparse"
do
# -------------------------------------------------------------------------------------------------------------------
diff --git a/scripts/perftest/runGLM_binomial_probit.sh b/scripts/perftest/runGLM_binomial_probit.sh
index e101083..5068a57 100755
--- a/scripts/perftest/runGLM_binomial_probit.sh
+++ b/scripts/perftest/runGLM_binomial_probit.sh
@@ -30,7 +30,7 @@ for i in 0 1 2; do
#training
tstart=$(date +%s.%N)
- # ${CMD} -f ../algorithms/GLM.dml \
+ # ${CMD} -f ./algorithms/GLM.dml \
${CMD} -f scripts/GLM.dml \
--config conf/SystemDS-config.xml \
--stats \
@@ -41,7 +41,7 @@ for i in 0 1 2; do
#predict
tstart=$(date +%s.%N)
- ${CMD} -f ../algorithms/GLM-predict.dml \
+ ${CMD} -f ./algorithms/GLM-predict.dml \
--config conf/SystemDS-config.xml \
--stats \
--nvargs dfam=2 link=3 fmt=csv X=$1_test B=${BASE}/b Y=$2_test M=${BASE}/m O=${BASE}/out.csv
diff --git a/scripts/perftest/runGLM_gamma_log.sh b/scripts/perftest/runGLM_gamma_log.sh
index 9eb8e9f..787a6c7 100755
--- a/scripts/perftest/runGLM_gamma_log.sh
+++ b/scripts/perftest/runGLM_gamma_log.sh
@@ -30,7 +30,7 @@ for i in 0 1 2; do
#training
tstart=$(date +%s.%N)
- #${CMD} -f ../algorithms/GLM.dml \
+ #${CMD} -f ./algorithms/GLM.dml \
${CMD} -f scripts/GLM.dml \
--config conf/SystemDS-config.xml \
--stats \
@@ -41,7 +41,7 @@ for i in 0 1 2; do
#predict
tstart=$(date +%s.%N)
- ${CMD} -f ../algorithms/GLM-predict.dml \
+ ${CMD} -f ./algorithms/GLM-predict.dml \
--config conf/SystemDS-config.xml \
--stats \
--nvargs dfam=1 vpow=2.0 link=1 lpow=0.0 fmt=csv X=$1_test B=${BASE}/b Y=$2_test M=${BASE}/m O=${BASE}/out.csv
diff --git a/scripts/perftest/runGLM_poisson_log.sh b/scripts/perftest/runGLM_poisson_log.sh
index 69c2419..f8e1861 100755
--- a/scripts/perftest/runGLM_poisson_log.sh
+++ b/scripts/perftest/runGLM_poisson_log.sh
@@ -30,7 +30,7 @@ for i in 0 1 2; do
#training
tstart=$(date +%s.%N)
- #${CMD} -f ../algorithms/GLM.dml \
+ #${CMD} -f ./algorithms/GLM.dml \
${CMD} -f scripts/GLM.dml \
--config conf/SystemDS-config.xml \
--stats \
@@ -41,7 +41,7 @@ for i in 0 1 2; do
#predict
tstart=$(date +%s.%N)
- ${CMD} -f ../algorithms/GLM-predict.dml \
+ ${CMD} -f ./algorithms/GLM-predict.dml \
--config conf/SystemDS-config.xml \
--stats \
--nvargs dfam=1 vpow=1.0 link=1 lpow=0.0 fmt=csv X=$1_test B=${BASE}/b Y=$2_test M=${BASE}/m O=${BASE}/out.csv
diff --git a/scripts/perftest/runL2SVM.sh b/scripts/perftest/runL2SVM.sh
index 9b0f2a9..03e966d 100755
--- a/scripts/perftest/runL2SVM.sh
+++ b/scripts/perftest/runL2SVM.sh
@@ -30,7 +30,7 @@ for i in 0 1; do
tstart=$(date +%s.%N)
# /algorithms/l2-svm.dml already calls a built-in function for the l2 svm.
- ${CMD} -f ../algorithms/l2-svm.dml \
+ ${CMD} -f ./algorithms/l2-svm.dml \
--config conf/SystemDS-config.xml \
--stats \
--nvargs X=$1 Y=$2 icpt=$i tol=0.0001 reg=0.01 maxiter=$5 model=${BASE}/b fmt="csv"
@@ -40,7 +40,7 @@ for i in 0 1; do
#predict
tstart=$(date +%s.%N)
- #${CMD} -f ../algorithms/l2-svm-predict.dml \
+ #${CMD} -f ./algorithms/l2-svm-predict.dml \
${CMD} -f scripts/l2-svm-predict.dml \
--config conf/SystemDS-config.xml \
--stats \
diff --git a/scripts/perftest/runLinearRegCG.sh b/scripts/perftest/runLinearRegCG.sh
index e61fd97..ae22a12 100755
--- a/scripts/perftest/runLinearRegCG.sh
+++ b/scripts/perftest/runLinearRegCG.sh
@@ -31,7 +31,7 @@ do
#training
tstart=$(date +%s.%N)
- #${CMD} -f ../algorithms/LinearRegCG.dml \
+ #${CMD} -f ./algorithms/LinearRegCG.dml \
${CMD} -f scripts/LinearRegCG.dml \
--config conf/SystemDS-config.xml \
--stats \
@@ -42,7 +42,7 @@ do
#predict
tstart=$(date +%s.%N)
- ${CMD} -f ../algorithms/GLM-predict.dml \
+ ${CMD} -f ./algorithms/GLM-predict.dml \
--config conf/SystemDS-config.xml \
--stats \
--nvargs dfam=1 link=1 vpow=0.0 lpow=1.0 fmt=csv X=$1_test B=${BASE}/b Y=$2_test M=${BASE}/m O=${BASE}/out.csv
diff --git a/scripts/perftest/runLinearRegDS.sh b/scripts/perftest/runLinearRegDS.sh
index 547ce0b..ad4617b 100755
--- a/scripts/perftest/runLinearRegDS.sh
+++ b/scripts/perftest/runLinearRegDS.sh
@@ -31,7 +31,7 @@ do
#training
tstart=$(date +%s.%N)
- #${CMD} -f ../algorithms/LinearRegDS.dml \
+ #${CMD} -f ./algorithms/LinearRegDS.dml \
${CMD} -f scripts/LinearRegDS.dml \
--config conf/SystemDS-config.xml \
--stats \
@@ -42,7 +42,7 @@ do
#predict
tstart=$(date +%s.%N)
- ${CMD} -f ../algorithms/GLM-predict.dml \
+ ${CMD} -f ./algorithms/GLM-predict.dml \
--config conf/SystemDS-config.xml \
--stats \
--nvargs dfam=1 link=1 vpow=0.0 lpow=1.0 fmt=csv X=$1_test B=${BASE}/b Y=$2_test M=${BASE}/m O=${BASE}/out.csv
diff --git a/scripts/perftest/runMSVM.sh b/scripts/perftest/runMSVM.sh
index fde7cb5..8cabc4d 100755
--- a/scripts/perftest/runMSVM.sh
+++ b/scripts/perftest/runMSVM.sh
@@ -28,7 +28,7 @@ BASE=$4
for i in 0 1; do
#training
tstart=$(date +%s.%N)
- # ${CMD} -f ../algorithms/m-svm.dml \
+ # ${CMD} -f ./algorithms/m-svm.dml \
${CMD} -f scripts/m-svm.dml \
--config conf/SystemDS-config.xml \
--stats \
@@ -39,7 +39,7 @@ for i in 0 1; do
#predict
tstart=$(date +%s.%N)
- #${CMD} -f ../algorithms/m-svm-predict.dml \
+ #${CMD} -f ./algorithms/m-svm-predict.dml \
${CMD} -f scripts/m-svm-predict.dml \
--config conf/SystemDS-config.xml \
--stats \
diff --git a/scripts/perftest/runMultiLogReg.sh b/scripts/perftest/runMultiLogReg.sh
index 6dcb385..9cdbf36 100755
--- a/scripts/perftest/runMultiLogReg.sh
+++ b/scripts/perftest/runMultiLogReg.sh
@@ -31,7 +31,7 @@ if [ $3 -gt 2 ]; then DFAM=3; fi
for i in 0 1 2; do
#training
tstart=$(date +%s.%N)
- # ${CMD} -f ../algorithms/MultiLogReg.dml \
+ # ${CMD} -f ./algorithms/MultiLogReg.dml \
${CMD} -f scripts/MultiLogReg.dml \
--config conf/SystemDS-config.xml \
--stats \
@@ -42,7 +42,7 @@ for i in 0 1 2; do
#predict
tstart=$(date +%s.%N)
- ${CMD} -f ../algorithms/GLM-predict.dml \
+ ${CMD} -f ./algorithms/GLM-predict.dml \
--config conf/SystemDS-config.xml \
--stats \
--nvargs dfam=$DFAM vpow=-1 link=2 lpow=-1 fmt=csv X=$1_test B=${BASE}/b Y=$2_test M=${BASE}/m O=${BASE}/out.csv
diff --git a/scripts/perftest/runNaiveBayes.sh b/scripts/perftest/runNaiveBayes.sh
index 88ce467..f4931db 100755
--- a/scripts/perftest/runNaiveBayes.sh
+++ b/scripts/perftest/runNaiveBayes.sh
@@ -26,7 +26,7 @@ BASE=$4
#training
tstart=$(date +%s.%N)
-#${CMD} -f ../algorithms/naive-bayes.dml \
+#${CMD} -f ./algorithms/naive-bayes.dml \
${CMD} -f scripts/naive-bayes.dml \
--config conf/SystemDS-config.xml \
--stats \
@@ -37,7 +37,7 @@ echo "NaiveBayes train on "$1": "$ttrain >> results/times.txt
#predict
tstart=$(date +%s.%N)
-#${CMD} -f ../algorithms/naive-bayes-predict.dml \
+#${CMD} -f ./algorithms/naive-bayes-predict.dml \
${CMD} -f scripts/naive-bayes-predict.dml \
--config conf/SystemDS-config.xml \
--stats \
diff --git a/scripts/perftest/scripts/extractTestData.dml b/scripts/perftest/scripts/extractTestData.dml
index 702ddbd..2d6c6d6 100755
--- a/scripts/perftest/scripts/extractTestData.dml
+++ b/scripts/perftest/scripts/extractTestData.dml
@@ -22,7 +22,7 @@
X = read($1);
y = read($2);
-[X,y,Xtest,ytest] = split(X=X, Y=y, f=0.8);
+[X,Xtest,y,ytest] = split(X=X, Y=y, f=0.8);
write(Xtest, $3, format=$5);
write(ytest, $4, format=$5);