You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by de...@apache.org on 2016/03/30 23:58:39 UTC

incubator-systemml git commit: [SYSTEMML-586] Fix in-memory distribution for JMLC

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 772a329de -> c7ae748c3


[SYSTEMML-586] Fix in-memory distribution for JMLC

Update pom.xml to build in-memory distribution.
Update inmemory.xml to include necessary dependencies for JMLC.
Update JMLC example in jmlc.md.

Closes #94.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/c7ae748c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/c7ae748c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/c7ae748c

Branch: refs/heads/master
Commit: c7ae748c38b3580fbf7bef6b8b5dd1519f657415
Parents: 772a329
Author: Deron Eriksson <de...@us.ibm.com>
Authored: Wed Mar 30 14:53:37 2016 -0700
Committer: Deron Eriksson <de...@us.ibm.com>
Committed: Wed Mar 30 14:53:37 2016 -0700

----------------------------------------------------------------------
 docs/jmlc.md              | 101 ++++++++++++++++++++++++++---------------
 pom.xml                   |   5 +-
 src/assembly/inmemory.xml |  21 +++++----
 3 files changed, 78 insertions(+), 49 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c7ae748c/docs/jmlc.md
----------------------------------------------------------------------
diff --git a/docs/jmlc.md b/docs/jmlc.md
index 0bc0b82..2183700 100644
--- a/docs/jmlc.md
+++ b/docs/jmlc.md
@@ -26,7 +26,7 @@ limitations under the License.
 # Overview
 
 The `Java Machine Learning Connector (JMLC)` API is a programmatic interface for interacting with SystemML
-in an embedded fashion. To use JMLC, the SystemML jar file needs to be included on the
+in an embedded fashion. To use JMLC, the small footprint "in-memory" SystemML jar file needs to be included on the
 classpath of the Java application, since JMLC invokes SystemML in an existing Java Virtual Machine. Because
 of this, JMLC allows access to SystemML's optimizations and fast linear algebra, but the bulk performance
 gain from running SystemML on a large Spark or Hadoop cluster is not available. However, this embeddable nature
@@ -83,8 +83,7 @@ W = read("./tmp/W", rows=-1, cols=-1);
 numRows = nrow(X);
 numCols = ncol(X);
 b = W[numCols+1,]
-ones = matrix(1, rows=numRows, cols=1)
-scores = X %*% W[1:numCols,] + ones %*% b;
+scores = X %*% W[1:numCols,] + b;
 predicted_y = rowIndexMax(scores);
 
 write(predicted_y, "./tmp", format="text");
@@ -95,10 +94,8 @@ In the Java below, we initialize SystemML by obtaining a `Connection` object. Ne
 (`"scoring-example.dml"`) as a `String`. We precompile this script by calling the `prepareScript` method on the
 `Connection` object with the names of the inputs (`"W"` and `"X"`) and outputs (`"predicted_y"`) to register.
 
-Following this, we read in the model (`"sentiment_model.mtx"`) and convert the model to a 47x46 matrix, where the
-last row of the matrix is for the `b` values. We set this matrix as the `"W"` input. Next, we create a random 46x46 matrix
-of doubles for test data with a sparsity of 0.7 and set this matrix as the `"X"` input. We then execute the script and
-read the `"predicted_y"` result matrix.
+Following this, we set matrix `"W"` and we set a matrix of input data `"X"`. We execute the script and read
+the resulting `"predicted_y"` matrix. We repeat this process. When done, we close the SystemML `Connection`.
 
 
 #### Java
@@ -110,49 +107,79 @@ read the `"predicted_y"` result matrix.
  
  import org.apache.sysml.api.jmlc.Connection;
  import org.apache.sysml.api.jmlc.PreparedScript;
- import org.apache.sysml.api.jmlc.ResultVariables;
  
  public class JMLCExample {
  
     public static void main(String[] args) throws Exception {
  
-       // obtain connection to SystemML
-       Connection conn = new Connection();
+        // obtain connection to SystemML
+        Connection conn = new Connection();
  
-       // read in and precompile DML script, registering inputs and outputs
-       String dml = conn.readScript("scoring-example.dml");
-       PreparedScript script = conn.prepareScript(dml, new String[] { "W", "X" }, new String[] { "predicted_y" }, false);
+        // read in and precompile DML script, registering inputs and outputs
+        String dml = conn.readScript("scoring-example.dml");
+        PreparedScript script = conn.prepareScript(dml, new String[] { "W", "X" }, new String[] { "predicted_y" }, false);
  
-      // read in model and set model
-       String model = conn.readScript("sentiment_model.mtx");
-       double[][] w = conn.convertToDoubleMatrix(model, 47, 46);
-       script.setMatrix("W", w);
+        double[][] mtx = matrix(4, 3, new double[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 });
+        double[][] result = null;
  
-       // read in data and set data
-       double[][] x = generateRandomMatrix(46, 46, -1, 1, 0.7, System.nanoTime());
-       script.setMatrix("X", x);
+        // set inputs, execute script, and obtain output
+        script.setMatrix("W", mtx);
+        script.setMatrix("X", randomMatrix(3, 3, -1, 1, 0.7));
+        result = script.executeScript().getMatrix("predicted_y");
+        displayMatrix(result);
  
-       // execute script and get output
-       ResultVariables results = script.executeScript();
-       double[][] y = results.getMatrix("predicted_y");
+        script.setMatrix("W", mtx);
+        script.setMatrix("X", randomMatrix(3, 3, -1, 1, 0.7));
+        result = script.executeScript().getMatrix("predicted_y");
+        displayMatrix(result);
  
-       // close connection
-       conn.close();
+        script.setMatrix("W", mtx);
+        script.setMatrix("X", randomMatrix(3, 3, -1, 1, 0.7));
+        result = script.executeScript().getMatrix("predicted_y");
+        displayMatrix(result);
+ 
+        // close connection
+        conn.close();
+    }
+ 
+     public static double[][] matrix(int rows, int cols, double[] vals) {
+        double[][] matrix = new double[rows][cols];
+        if ((vals == null) || (vals.length == 0)) {
+            return matrix;
+        }
+        for (int i = 0; i < vals.length; i++) {
+            matrix[i / cols][i % cols] = vals[i];
+        }
+        return matrix;
     }
  
-    public static double[][] generateRandomMatrix(int rows, int cols, double min, double max, double sparsity, long seed) {
-       double[][] matrix = new double[rows][cols];
-       Random random = (seed == -1) ? new Random(System.currentTimeMillis()) : new Random(seed);
-       for (int i = 0; i < rows; i++) {
-          for (int j = 0; j < cols; j++) {
-             if (random.nextDouble() > sparsity) {
-                continue;
-             }
-             matrix[i][j] = (random.nextDouble() * (max - min) + min);
-          }
-       }
-       return matrix;
+    public static double[][] randomMatrix(int rows, int cols, double min, double max, double sparsity) {
+        double[][] matrix = new double[rows][cols];
+        Random random = new Random(System.currentTimeMillis());
+        for (int i = 0; i < rows; i++) {
+            for (int j = 0; j < cols; j++) {
+                if (random.nextDouble() > sparsity) {
+                    continue;
+                }
+                matrix[i][j] = (random.nextDouble() * (max - min) + min);
+            }
+        }
+        return matrix;
     }
+ 
+    public static void displayMatrix(double[][] matrix) {
+        System.out.println("Matrix size:" + matrix.length + "x" + matrix[0].length);
+        for (int i = 0; i < matrix.length; i++) {
+            for (int j = 0; j < matrix[0].length; j++) {
+                if (j > 0) {
+                    System.out.print(", ");
+                }
+                System.out.print("[" + i + "," + j + "]:" + matrix[i][j]);
+            }
+            System.out.println();
+        }
+    }
+ 
  }
 {% endhighlight %}
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c7ae748c/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 67022e9..6ba66d7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -112,7 +112,6 @@
 									<groupId>org.apache.hadoop</groupId>
 									<artifactId>hadoop-test</artifactId>
 									<version>1.2.1</version>
-									<!-- <classifier>jar</classifier> -->
 									<type>jar</type>
 									<overWrite>true</overWrite>
 									<outputDirectory>${project.build.directory}/hadoop-test</outputDirectory>
@@ -613,7 +612,7 @@
 								</configuration>
 							</execution>
 
-							<!--execution>
+							<execution>
 								<id>create-inmemory-jar</id>
 								<phase>package</phase>
 								<goals>
@@ -624,7 +623,7 @@
 										<descriptor>src/assembly/inmemory.xml</descriptor>
 									</descriptors>
 								</configuration>
-							</execution-->
+							</execution>
 						</executions>
 					</plugin>
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c7ae748c/src/assembly/inmemory.xml
----------------------------------------------------------------------
diff --git a/src/assembly/inmemory.xml b/src/assembly/inmemory.xml
index 3b33848..5d3ef03 100644
--- a/src/assembly/inmemory.xml
+++ b/src/assembly/inmemory.xml
@@ -38,13 +38,6 @@
 			<outputDirectory>.</outputDirectory>
 		</fileSet>
 
-		<fileSet>
-			<directory>${basedir}/src/main/config</directory>
-			<includes>
-				<include>log4j.properties</include>
-			</includes>
-			<outputDirectory>.</outputDirectory>
-		</fileSet>
 	</fileSets>
 
 	<!-- 
@@ -57,11 +50,21 @@
 	</files>
 	-->
 
-	<!--  Include all the libraries needed to run in standalone mode. -->
+	<!--  Include all the libraries needed to run in in-memory (JMLC) mode. -->
 	<dependencySets>
 		<dependencySet>
 			<includes>
-				<include>*:*system-ml*</include>
+				<include>*:commons-collections*</include>
+				<include>*:commons-configuration*</include>
+				<include>*:commons-lang*</include>
+				<include>*:commons-logging*</include>
+				<include>*:hadoop-auth*</include>
+				<include>*:hadoop-common*</include>
+				<include>*:hadoop-mapreduce-client-core*</include>
+				<include>*:log4j*</include>
+				<include>*:slf4j-api*</include>
+				<include>*:slf4j-log4j*</include>
+				<include>*:systemml*</include>
 			</includes>
 			<outputDirectory>.</outputDirectory>
 			<scope>provided</scope>