You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by lr...@apache.org on 2015/11/19 21:47:30 UTC

[48/50] [abbrv] incubator-systemml git commit: [SYSML-345] Add New Unary Aggregate "SUM_SQ" Operator

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/31bf3285/src/test/java/com/ibm/bi/dml/test/integration/functions/aggregate/SumSqTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/com/ibm/bi/dml/test/integration/functions/aggregate/SumSqTest.java b/src/test/java/com/ibm/bi/dml/test/integration/functions/aggregate/SumSqTest.java
new file mode 100644
index 0000000..a1867ab
--- /dev/null
+++ b/src/test/java/com/ibm/bi/dml/test/integration/functions/aggregate/SumSqTest.java
@@ -0,0 +1,268 @@
+/**
+ * (C) Copyright IBM Corp. 2010, 2015
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package com.ibm.bi.dml.test.integration.functions.aggregate;
+
+import com.ibm.bi.dml.api.DMLScript;
+import com.ibm.bi.dml.api.DMLScript.RUNTIME_PLATFORM;
+import com.ibm.bi.dml.hops.OptimizerUtils;
+import com.ibm.bi.dml.lops.LopProperties.ExecType;
+import com.ibm.bi.dml.runtime.instructions.Instruction;
+import com.ibm.bi.dml.runtime.matrix.data.MatrixValue.CellIndex;
+import com.ibm.bi.dml.test.integration.AutomatedTestBase;
+import com.ibm.bi.dml.test.integration.TestConfiguration;
+import com.ibm.bi.dml.test.utils.TestUtils;
+import com.ibm.bi.dml.utils.Statistics;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.HashMap;
+
+/**
+ * Test the sum of squared values function, "sum(X^2)".
+ */
+public class SumSqTest extends AutomatedTestBase {
+
+    private static final String TEST_NAME = "SumSq";
+    private static final String TEST_DIR = "functions/aggregate/";
+    private static final String INPUT_NAME = "X";
+    private static final String OUTPUT_NAME = "sumSq";
+
+    private static final String op = "uasqk+";
+    private static final int rows = 1234;
+    private static final int cols = 567;
+    private static final double sparsity1 = 1;
+    private static final double sparsity2 = 0.2;
+    private static final double eps = Math.pow(10, -10);
+
+    @Override
+    public void setUp() {
+        TestUtils.clearAssertionInformation();
+        TestConfiguration config = new TestConfiguration(TEST_DIR, TEST_NAME);
+        addTestConfiguration(TEST_NAME, config);
+    }
+
+    // Dense matrix w/ rewrites
+    @Test
+    public void testSumSquaredDenseMatrixRewriteCP() {
+        testSumSquared(TEST_NAME, false, false, true, ExecType.CP);
+    }
+
+    @Test
+    public void testSumSquaredDenseMatrixRewriteSpark() {
+        testSumSquared(TEST_NAME, false, false, true, ExecType.SPARK);
+    }
+
+    @Test
+    public void testSumSquaredDenseMatrixRewriteMR() {
+        testSumSquared(TEST_NAME, false, false, true, ExecType.MR);
+    }
+
+    // Dense matrix w/o rewrites
+    @Test
+    public void testSumSquaredDenseMatrixNoRewriteCP() {
+        testSumSquared(TEST_NAME, false, false, false, ExecType.CP);
+    }
+
+    @Test
+    public void testSumSquaredDenseMatrixNoRewriteSpark() {
+        testSumSquared(TEST_NAME, false, false, false, ExecType.SPARK);
+    }
+
+    @Test
+    public void testSumSquaredDenseMatrixNoRewriteMR() {
+        testSumSquared(TEST_NAME, false, false, false, ExecType.MR);
+    }
+
+    // Dense vector w/ rewrites
+    @Test
+    public void testSumSquaredDenseVectorRewriteCP() {
+        testSumSquared(TEST_NAME, false, true, true, ExecType.CP);
+    }
+
+    @Test
+    public void testSumSquaredDenseVectorRewriteSpark() {
+        testSumSquared(TEST_NAME, false, true, true, ExecType.SPARK);
+    }
+
+    @Test
+    public void testSumSquaredDenseVectorRewriteMR() {
+        testSumSquared(TEST_NAME, false, true, true, ExecType.MR);
+    }
+
+    // Dense vector w/o rewrites
+    @Test
+    public void testSumSquaredDenseVectorNoRewriteCP() {
+        testSumSquared(TEST_NAME, false, true, false, ExecType.CP);
+    }
+
+    @Test
+    public void testSumSquaredDenseVectorNoRewriteSpark() {
+        testSumSquared(TEST_NAME, false, true, false, ExecType.SPARK);
+    }
+
+    @Test
+    public void testSumSquaredDenseVectorNoRewriteMR() {
+        testSumSquared(TEST_NAME, false, true, false, ExecType.MR);
+    }
+
+    // Sparse matrix w/ rewrites
+    @Test
+    public void testSumSquaredSparseMatrixRewriteCP() {
+        testSumSquared(TEST_NAME, true, false, true, ExecType.CP);
+    }
+
+    @Test
+    public void testSumSquaredSparseMatrixRewriteSpark() {
+        testSumSquared(TEST_NAME, true, false, true, ExecType.SPARK);
+    }
+
+    @Test
+    public void testSumSquaredSparseMatrixRewriteMR() {
+        testSumSquared(TEST_NAME, true, false, true, ExecType.MR);
+    }
+
+    // Sparse matrix w/o rewrites
+    @Test
+    public void testSumSquaredSparseMatrixNoRewriteCP() {
+        testSumSquared(TEST_NAME, true, false, false, ExecType.CP);
+    }
+
+    @Test
+    public void testSumSquaredSparseMatrixNoRewriteSpark() {
+        testSumSquared(TEST_NAME, true, false, false, ExecType.SPARK);
+    }
+
+    @Test
+    public void testSumSquaredSparseMatrixNoRewriteMR() {
+        testSumSquared(TEST_NAME, true, false, false, ExecType.MR);
+    }
+
+    // Sparse vector w/ rewrites
+    @Test
+    public void testSumSquaredSparseVectorRewriteCP() {
+        testSumSquared(TEST_NAME, true, true, true, ExecType.CP);
+    }
+
+    @Test
+    public void testSumSquaredSparseVectorRewriteSpark() {
+        testSumSquared(TEST_NAME, true, true, true, ExecType.SPARK);
+    }
+
+    @Test
+    public void testSumSquaredSparseVectorRewriteMR() {
+        testSumSquared(TEST_NAME, true, true, true, ExecType.MR);
+    }
+
+    // Sparse vector w/o rewrites
+    @Test
+    public void testSumSquaredSparseVectorNoRewriteCP() {
+        testSumSquared(TEST_NAME, true, true, false, ExecType.CP);
+    }
+
+    @Test
+    public void testSumSquaredSparseVectorNoRewriteSpark() {
+        testSumSquared(TEST_NAME, true, true, false, ExecType.SPARK);
+    }
+
+    @Test
+    public void testSumSquaredSparseVectorNoRewriteMR() {
+        testSumSquared(TEST_NAME, true, true, false, ExecType.MR);
+    }
+
+    /**
+     * Test the sum of squared values function, "sum(X^2)", on
+     * dense/sparse matrices/vectors with rewrites/no rewrites on
+     * the CP/Spark/MR platforms.
+     *
+     * @param testName The name of this test case.
+     * @param sparse Whether or not the matrix/vector should be sparse.
+     * @param vector Boolean value choosing between a vector and a matrix.
+     * @param rewrites Whether or not to employ algebraic rewrites.
+     * @param platform Selection between CP/Spark/MR platforms.
+     */
+    private void testSumSquared(String testName, boolean sparse, boolean vector,
+                                boolean rewrites, ExecType platform) {
+        // Configure settings for this test case
+        boolean rewritesOld = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
+        OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewrites;
+
+        RUNTIME_PLATFORM platformOld = rtplatform;
+        switch (platform) {
+            case MR:
+                rtplatform = RUNTIME_PLATFORM.HADOOP;
+                break;
+            case SPARK:
+                rtplatform = RUNTIME_PLATFORM.SPARK;
+                break;
+            default:
+                rtplatform = RUNTIME_PLATFORM.SINGLE_NODE;
+                break;
+        }
+
+        boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
+        if (rtplatform == RUNTIME_PLATFORM.SPARK)
+            DMLScript.USE_LOCAL_SPARK_CONFIG = true;
+
+        try {
+            // Create and load test configuration
+            TestConfiguration config = getTestConfiguration(testName);
+            String HOME = SCRIPT_DIR + TEST_DIR;
+            fullDMLScriptName = HOME + testName + ".dml";
+            programArgs = new String[]{"-explain", "-stats", "-args",
+                    HOME + INPUT_DIR + INPUT_NAME,
+                    HOME + OUTPUT_DIR + OUTPUT_NAME};
+            fullRScriptName = HOME + testName + ".R";
+            rCmd = "Rscript" + " " + fullRScriptName + " " +
+                    HOME + INPUT_DIR + " " + HOME + EXPECTED_DIR;
+            loadTestConfiguration(config);
+
+            // Generate data
+            double sparsity = sparse ? sparsity2 : sparsity1;
+            int columns = vector ? 1 : cols;
+            double[][] X = getRandomMatrix(rows, columns, -1, 1, sparsity, 7);
+            writeInputMatrixWithMTD(INPUT_NAME, X, true);
+
+            // Run DML and R scripts
+            runTest(true, false, null, -1);
+            runRScript(true);
+
+            // Compare output matrices
+            HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS(OUTPUT_NAME);
+            HashMap<CellIndex, Double> rfile  = readRMatrixFromFS(OUTPUT_NAME);
+            TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
+
+            // On CP and Spark modes, check that the rewrite actually
+            // occurred for matrix cases and not for vector cases.
+            if (rewrites && (platform == ExecType.SPARK || platform == ExecType.CP)) {
+                String prefix = (platform == ExecType.SPARK) ? Instruction.SP_INST_PREFIX : "";
+                String opcode = prefix + op;
+                boolean rewriteApplied = Statistics.getCPHeavyHitterOpCodes().contains(opcode);
+                if (vector)
+                    Assert.assertFalse("Rewrite applied to vector case.", rewriteApplied);
+                else
+                    Assert.assertTrue("Rewrite not applied to matrix case.", rewriteApplied);
+            }
+        }
+        finally {
+            // Reset settings
+            OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewritesOld;
+            rtplatform = platformOld;
+            DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/31bf3285/src/test/scripts/functions/aggregate/ColSumsSq.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/ColSumsSq.R b/src/test/scripts/functions/aggregate/ColSumsSq.R
new file mode 100644
index 0000000..9bdaefb
--- /dev/null
+++ b/src/test/scripts/functions/aggregate/ColSumsSq.R
@@ -0,0 +1,26 @@
+#-------------------------------------------------------------
+#
+# (C) Copyright IBM Corp. 2010, 2015
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+X <- as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+colSumsSq <- t(colSums(X^2))
+
+writeMM(as(colSumsSq, "CsparseMatrix"), paste(args[2], "colSumsSq", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/31bf3285/src/test/scripts/functions/aggregate/ColSumsSq.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/ColSumsSq.dml b/src/test/scripts/functions/aggregate/ColSumsSq.dml
new file mode 100644
index 0000000..600f9b0
--- /dev/null
+++ b/src/test/scripts/functions/aggregate/ColSumsSq.dml
@@ -0,0 +1,21 @@
+#-------------------------------------------------------------
+#
+# (C) Copyright IBM Corp. 2010, 2015
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#-------------------------------------------------------------
+
+X = read($1);
+colSumsSq = colSums(X^2)
+write(colSumsSq, $2);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/31bf3285/src/test/scripts/functions/aggregate/RowSumsSq.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/RowSumsSq.R b/src/test/scripts/functions/aggregate/RowSumsSq.R
new file mode 100644
index 0000000..5317bfc
--- /dev/null
+++ b/src/test/scripts/functions/aggregate/RowSumsSq.R
@@ -0,0 +1,26 @@
+#-------------------------------------------------------------
+#
+# (C) Copyright IBM Corp. 2010, 2015
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+X <- as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+rowSumsSq <- rowSums(X^2)
+
+writeMM(as(rowSumsSq, "CsparseMatrix"), paste(args[2], "rowSumsSq", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/31bf3285/src/test/scripts/functions/aggregate/RowSumsSq.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/RowSumsSq.dml b/src/test/scripts/functions/aggregate/RowSumsSq.dml
new file mode 100644
index 0000000..8f0b14e
--- /dev/null
+++ b/src/test/scripts/functions/aggregate/RowSumsSq.dml
@@ -0,0 +1,21 @@
+#-------------------------------------------------------------
+#
+# (C) Copyright IBM Corp. 2010, 2015
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#-------------------------------------------------------------
+
+X = read($1);
+rowSumsSq = rowSums(X^2)
+write(rowSumsSq, $2);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/31bf3285/src/test/scripts/functions/aggregate/SumSq.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/SumSq.R b/src/test/scripts/functions/aggregate/SumSq.R
new file mode 100644
index 0000000..226fee6
--- /dev/null
+++ b/src/test/scripts/functions/aggregate/SumSq.R
@@ -0,0 +1,26 @@
+#-------------------------------------------------------------
+#
+# (C) Copyright IBM Corp. 2010, 2015
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+X <- as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+sumSq <- sum(X^2)
+
+writeMM(as(sumSq, "CsparseMatrix"), paste(args[2], "sumSq", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/31bf3285/src/test/scripts/functions/aggregate/SumSq.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/SumSq.dml b/src/test/scripts/functions/aggregate/SumSq.dml
new file mode 100644
index 0000000..ae342ab
--- /dev/null
+++ b/src/test/scripts/functions/aggregate/SumSq.dml
@@ -0,0 +1,21 @@
+#-------------------------------------------------------------
+#
+# (C) Copyright IBM Corp. 2010, 2015
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#-------------------------------------------------------------
+
+X = read($1);
+sumSq = as.matrix(sum(X^2))
+write(sumSq, $2);