You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by de...@apache.org on 2017/05/26 06:06:06 UTC

incubator-systemml git commit: [SYSTEMML-1380] Kmeans isY and verb parameters should be boolean

Repository: incubator-systemml
Updated Branches:
  refs/heads/master d69f3441c -> 0a89676fa


[SYSTEMML-1380] Kmeans isY and verb parameters should be boolean

Change Kmeans.dml isY and verb parameters to boolean.

Closes #516.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/0a89676f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/0a89676f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/0a89676f

Branch: refs/heads/master
Commit: 0a89676fa67a891bdcc87bc526071e53a35a2d87
Parents: d69f344
Author: krishnakalyan3 <kr...@gmail.com>
Authored: Thu May 25 23:00:52 2017 -0700
Committer: Deron Eriksson <de...@us.ibm.com>
Committed: Thu May 25 23:00:52 2017 -0700

----------------------------------------------------------------------
 docs/algorithms-clustering.md | 14 +++++++-------
 scripts/algorithms/Kmeans.dml | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0a89676f/docs/algorithms-clustering.md
----------------------------------------------------------------------
diff --git a/docs/algorithms-clustering.md b/docs/algorithms-clustering.md
index 0c91fa1..7554660 100644
--- a/docs/algorithms-clustering.md
+++ b/docs/algorithms-clustering.md
@@ -123,10 +123,10 @@ apart is a "false negative" etc.
                                     maxi=[int]
                                     tol=[double]
                                     samp=[int]
-                                    isY=[int]
+                                    isY=[boolean]
                                     Y=[file]
                                     fmt=[format]
-                                    verb=[int]
+                                    verb=[boolean]
 </div>
 <div data-lang="Spark" markdown="1">
     $SPARK_HOME/bin/spark-submit --master yarn-cluster
@@ -143,10 +143,10 @@ apart is a "false negative" etc.
                                          maxi=[int]
                                          tol=[double]
                                          samp=[int]
-                                         isY=[int]
+                                         isY=[boolean]
                                          Y=[file]
                                          fmt=[format]
-                                         verb=[int]
+                                         verb=[boolean]
 </div>
 </div>
 
@@ -203,14 +203,14 @@ in the centroid initialization procedure
 available mapping of records to clusters (defined by the output
 centroids)
 
-**isY**: (default: `0`) `0` = do not write matrix $Y$, `1` = write $Y$
+**isY**: (default: `FALSE`) Do not write matrix $Y$
 
 **fmt**: (default: `"text"`) Matrix file output format, such as `text`,
 `mm`, or `csv`; see read/write functions in
 SystemML Language Reference for details.
 
-**verb**: (default: `0`) `0` = do not print per-iteration statistics for
-each run, `1` = print them (the "verbose" option)
+**verb**: (default: `FALSE`) Do not print per-iteration statistics for
+each run
 
 
 ### Arguments - K-Means Prediction

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0a89676f/scripts/algorithms/Kmeans.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/Kmeans.dml b/scripts/algorithms/Kmeans.dml
index 85e13c5..386c85f 100644
--- a/scripts/algorithms/Kmeans.dml
+++ b/scripts/algorithms/Kmeans.dml
@@ -33,15 +33,15 @@
 # tol   Double 0.000001 Tolerance (epsilon) for WCSS change ratio
 # samp  Int       50    Average number of records per centroid in data samples
 # C     String  "C.mtx" Location to store the output matrix with the centroids
-# isY   Int        0    0 = do not write Y,  1 = write Y
+# isY   Boolean FALSE   do not write Y
 # Y     String  "Y.mtx" Location to store the mapping of records to centroids
 # fmt   String  "text"  Matrix output format, usually "text" or "csv"
-# verb  Int        0    0 = do not print per-iteration stats, 1 = print them
+# verb  Boolean FALSE   do not print per-iteration stats
 # ----------------------------------------------------------------------------
 #
 # Example:
 # hadoop jar SystemML.jar -f Kmeans.dml -nvargs X=X.mtx k=5 C=centroids.mtx
-# hadoop jar SystemML.jar -f Kmeans.dml -nvargs X=X.mtx k=5 runs=100 maxi=5000 tol=0.00000001 samp=20 C=centroids.mtx isY=1 Y=clusters.mtx verb=1
+# hadoop jar SystemML.jar -f Kmeans.dml -nvargs X=X.mtx k=5 runs=100 maxi=5000 tol=0.00000001 samp=20 C=centroids.mtx isY=TRUE Y=clusters.mtx verb=TRUE
 
 fileX = $X;
 fileY = ifdef ($Y, "Y.mtx");
@@ -51,8 +51,8 @@ num_centroids = $k;
 num_runs   = ifdef ($runs, 10);      # $runs=10;
 max_iter   = ifdef ($maxi, 1000);    # $maxi=1000;
 eps        = ifdef ($tol, 0.000001); # $tol=0.000001;
-is_write_Y = ifdef ($isY, 0);        # $isY=0;
-is_verbose = ifdef ($verb, 0);       # $verb=0;
+is_write_Y = ifdef ($isY, FALSE);    # $isY=FALSE;
+is_verbose = ifdef ($verb, FALSE);   # $verb=FALSE;
 fmtCY      = ifdef ($fmt, "text");   # $fmt="text";
 avg_sample_size_per_centroid = ifdef ($samp, 50);  # $samp=50;
 
@@ -149,7 +149,7 @@ parfor (run_index in 1 : num_runs, check = 0)
         # Compute the current centroid-based within-cluster sum of squares (WCSS)
         wcss_old = wcss;
         wcss = sumXsq + sum (minD);
-        if (is_verbose == 1) {
+        if (is_verbose == TRUE) {
             if (iter_count == 0) {
                 print ("Run " + run_index + ", At Start-Up:  Centroid WCSS = " + wcss);
             } else {
@@ -208,7 +208,7 @@ if (num_successful_runs > 0) {
     C = All_Centroids [(num_centroids * (best_index - 1) + 1) : (num_centroids * best_index), ];
     print ("Writing out the best-WCSS centroids...");
     write (C, fileC, format=fmtCY);
-    if (is_write_Y == 1) {
+    if (is_write_Y == TRUE) {
         print ("Writing out the best-WCSS cluster labels...");
         D =  -2 * (X %*% t(C)) + t(rowSums (C ^ 2));
         P = (D <= rowMins (D));