You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by ss...@apache.org on 2021/03/11 13:22:14 UTC

[systemds] branch master updated: [SYSTEMDS-2891] Builtin Gaussian Classifier - TODO: replace the looped implementation of covariance matrix into a vectorized builtin DIA project WS2020/21 Closes #1153.

This is an automated email from the ASF dual-hosted git repository.

ssiddiqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/master by this push:
     new c6c460d  [SYSTEMDS-2891] Builtin Gaussian Classifier   - TODO: replace the looped implementation of covariance matrix into     a vectorized builtin DIA project WS2020/21 Closes #1153.
c6c460d is described below

commit c6c460d0a5000b14c698ff8ef9e013a6d78d162c
Author: Kevin Pretterhofer <ke...@student.tugraz.at>
AuthorDate: Thu Mar 11 14:19:51 2021 +0100

    [SYSTEMDS-2891] Builtin Gaussian Classifier
      - TODO: replace the looped implementation of covariance matrix into
        a vectorized builtin
    DIA project WS2020/21
    Closes #1153.
---
 scripts/builtin/gaussianClassifier.dml             | 128 ++++++++++++++++
 .../java/org/apache/sysds/common/Builtins.java     |   1 +
 .../builtin/BuiltinGaussianClassifierTest.java     | 168 +++++++++++++++++++++
 .../scripts/functions/builtin/GaussianClassifier.R | 102 +++++++++++++
 .../functions/builtin/GaussianClassifier.dml       |  37 +++++
 .../builtin/GaussianClassifierPrediction.dml       |  68 +++++++++
 6 files changed, 504 insertions(+)

diff --git a/scripts/builtin/gaussianClassifier.dml b/scripts/builtin/gaussianClassifier.dml
new file mode 100644
index 0000000..c17db9f
--- /dev/null
+++ b/scripts/builtin/gaussianClassifier.dml
@@ -0,0 +1,128 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+#
+# Computes the parameters needed for Gaussian Classification.
+# Thus it computes the following per class: the prior probability,
+# the inverse covariance matrix, the mean per feature and the determinant
+# of the covariance matrix. Furthermore (if not explicitely defined), it
+# adds some small smoothing value along the variances, to prevent
+# numerical errors / instabilities.
+#
+#
+# INPUT PARAMETERS:
+# -------------------------------------------------------------------------------------------------
+# NAME           TYPE               DEFAULT  MEANING
+# -------------------------------------------------------------------------------------------------
+# D              Matrix[Double]     ---      Input matrix (training set)
+# C              Matrix[Double]     ---      Target vector
+# varSmoothing   Double             1e-9     Smoothing factor for variances
+# verbose        Boolean            TRUE     Print accuracy of the training set
+# ---------------------------------------------------------------------------------------------
+# OUTPUT:
+# ---------------------------------------------------------------------------------------------
+# NAME                  TYPE             DEFAULT  MEANING
+# ---------------------------------------------------------------------------------------------
+# classPriors           Matrix[Double]   ---      Vector storing the class prior probabilities
+# classMeans            Matrix[Double]   ---      Matrix storing the means of the classes
+# classInvCovariances   List[Unknown]    ---      List of inverse covariance matrices
+# determinants          Matrix[Double]   ---      Vector storing the determinants of the classes
+# ---------------------------------------------------------------------------------------------
+#
+
+
+m_gaussianClassifier = function(Matrix[Double] D, Matrix[Double] C, Double varSmoothing=1e-9, Boolean verbose = TRUE)
+  return (Matrix[Double] classPriors, Matrix[Double] classMeans,
+  List[Unknown] classInvCovariances, Matrix[Double] determinants)
+{
+  #Retrieve number of samples, classes and features
+  nSamples = nrow(D)
+  nClasses = max(C)
+  nFeats = ncol(D)
+
+  #Compute means, variances and priors
+  classCounts = aggregate(target=C, groups=C, fn="count", ngroups=as.integer(nClasses));
+  classMeans = aggregate(target=D, groups=C, fn="mean", ngroups=as.integer(nClasses));
+  classVars = aggregate(target=D, groups=C, fn="variance", ngroups=as.integer(nClasses));
+  classPriors = classCounts / nSamples
+
+  smoothedVar = diag(matrix(1.0, rows=nFeats, cols=1)) * max(classVars) * varSmoothing
+
+  classInvCovariances = list()
+  determinants = matrix(0, rows=nClasses, cols=1)
+
+  #Compute determinants and inverseCovariances
+  for (class in 1:nClasses)
+  {
+    covMatrix = matrix(0, rows=nFeats, cols=nFeats)
+    classMatrix = removeEmpty(target=D, margin="rows", select=(C==class))
+    # TODO replace with implementation of new built-in for var-cov matrix
+    # possible vectorized implementation but results are varying in some digits
+    # difference = classMatrix - classMeans[class,]
+    # cov_S = 1/nrow(classMatrix) * (t(difference) %*% difference)
+    
+    for (i in 1:nFeats)
+    {
+      for (j in 1:nFeats)
+      {
+        if (j == i)
+          covMatrix[i,j] = classVars[class, j]
+        else if (j < i)
+          covMatrix[i,j] = covMatrix[j,i]
+        else
+          covMatrix[i,j] = cov(classMatrix[,i], classMatrix[,j])
+      }
+    }
+
+    #Apply smoothing of the variances, to avoid numerical errors
+    covMatrix = covMatrix + smoothedVar
+
+    #Compute inverse
+    [eVals, eVecs] = eigen(covMatrix)
+    lam = diag(eVals^(-1))
+    invCovMatrix = eVecs %*% lam %*% t(eVecs)
+
+    #Compute determinant
+    det = prod(eVals)
+
+    determinants[class, 1] = det
+    classInvCovariances = append(classInvCovariances, invCovMatrix)
+  }
+
+  #Compute accuracy on the training set
+  if (verbose)
+  {
+    results = matrix(0, rows=nSamples, cols=nClasses)
+    parfor (class in 1:nClasses)
+    {
+      for (i in 1:nSamples)
+      {
+        intermediate = 0
+        meanDiff = (D[i,] - classMeans[class,])
+        intermediate = -1/2 * log((2*pi)^nFeats * determinants[class,])
+        intermediate = intermediate - 1/2 * (meanDiff %*% as.matrix(classInvCovariances[class]) %*% t(meanDiff))
+        intermediate = log(classPriors[class,]) + intermediate
+        results[i, class] = intermediate
+      }
+    }
+    acc = sum(rowIndexMax(results) == C) / nSamples * 100
+    print("Training Accuracy (%): " + acc)
+  }
+}
diff --git a/src/main/java/org/apache/sysds/common/Builtins.java b/src/main/java/org/apache/sysds/common/Builtins.java
index 60a84a5..316f6e3 100644
--- a/src/main/java/org/apache/sysds/common/Builtins.java
+++ b/src/main/java/org/apache/sysds/common/Builtins.java
@@ -118,6 +118,7 @@ public enum Builtins {
 	EVAL("eval", false),
 	FLOOR("floor", false),
 	FRAME_SORT("frameSort", true),
+	GAUSSIAN_CLASSIFIER("gaussianClassifier", true),
 	GET_PERMUTATIONS("getPermutations", true),
 	GLM("glm", true),
 	GMM("gmm", true),
diff --git a/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinGaussianClassifierTest.java b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinGaussianClassifierTest.java
new file mode 100644
index 0000000..38ac980
--- /dev/null
+++ b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinGaussianClassifierTest.java
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.functions.builtin;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.sysds.runtime.matrix.data.MatrixValue.CellIndex;
+import org.apache.sysds.test.AutomatedTestBase;
+import org.apache.sysds.test.TestConfiguration;
+import org.apache.sysds.test.TestUtils;
+import org.junit.Test;
+
+public class BuiltinGaussianClassifierTest extends AutomatedTestBase
+{
+	private final static String TEST_NAME = "GaussianClassifier";
+	private final static String TEST_DIR = "functions/builtin/";
+	private final static String TEST_CLASS_DIR = TEST_DIR + BuiltinGaussianClassifierTest.class.getSimpleName() + "/";
+
+	private final static String DATASET = SCRIPT_DIR + "functions/transform/input/iris/iris.csv";
+
+
+	@Override
+	public void setUp() {
+		addTestConfiguration(TEST_NAME,new TestConfiguration(TEST_CLASS_DIR, TEST_NAME,new String[]{"B"})); 
+	}
+
+	@Test
+	public void testSmallDenseFiveClasses() {
+		testGaussianClassifier(80, 10, 0.9, 5);
+	}
+
+	@Test
+	public void testSmallDenseTenClasses() {
+		testGaussianClassifier(80, 30, 0.9, 10);
+	}
+
+	@Test
+	public void testBiggerDenseFiveClasses() {
+		testGaussianClassifier(200, 50, 0.9, 5);
+	}
+
+	@Test
+	public void testBiggerDenseTenClasses() {
+		testGaussianClassifier(200, 50, 0.9, 10);
+	}
+
+	@Test
+	public void testBiggerSparseFiveClasses() {
+		testGaussianClassifier(200, 50, 0.3, 5);
+	}
+
+	@Test
+	public void testBiggerSparseTenClasses() {
+		testGaussianClassifier(200, 50, 0.3, 10);
+	}
+
+	@Test
+	public void testSmallSparseFiveClasses() {
+		testGaussianClassifier(80, 30, 0.3, 5);
+	}
+
+	@Test
+	public void testSmallSparseTenClasses() {
+		testGaussianClassifier(80, 30, 0.3, 10);
+	}
+
+	public void testGaussianClassifier(int rows, int cols, double sparsity, int classes)
+	{
+		loadTestConfiguration(getTestConfiguration(TEST_NAME));
+		String HOME = SCRIPT_DIR + TEST_DIR;
+		fullDMLScriptName = HOME + TEST_NAME + ".dml";
+
+		double varSmoothing = 1e-9;
+
+		List<String> proArgs = new ArrayList<>();
+		proArgs.add("-args");
+		proArgs.add(input("X"));
+		proArgs.add(input("Y"));
+		proArgs.add(String.valueOf(varSmoothing));
+		proArgs.add(output("priors"));
+		proArgs.add(output("means"));
+		proArgs.add(output("determinants"));
+		proArgs.add(output("invcovs"));
+
+		programArgs = proArgs.toArray(new String[proArgs.size()]);
+
+		rCmd = getRCmd(inputDir(), Double.toString(varSmoothing), expectedDir());
+		
+		double[][] X = getRandomMatrix(rows, cols, 0, 100, sparsity, -1);
+		double[][] Y = getRandomMatrix(rows, 1, 0, 1, 1, -1);
+		for(int i=0; i<rows; i++){
+			Y[i][0] = (int)(Y[i][0]*classes) + 1;
+			Y[i][0] = (Y[i][0] > classes) ? classes : Y[i][0];
+		}
+
+		writeInputMatrixWithMTD("X", X, true);
+		writeInputMatrixWithMTD("Y", Y, true);
+
+		runTest(true, EXCEPTION_NOT_EXPECTED, null, -1);
+
+		runRScript(true);
+
+		HashMap<CellIndex, Double> priorR = readRMatrixFromExpectedDir("priors");
+		HashMap<CellIndex, Double> priorSYSTEMDS= readDMLMatrixFromOutputDir("priors");
+		HashMap<CellIndex, Double> meansRtemp = readRMatrixFromExpectedDir("means");
+		HashMap<CellIndex, Double> meansSYSTEMDStemp = readDMLMatrixFromOutputDir("means");
+		HashMap<CellIndex, Double> determinantsRtemp = readRMatrixFromExpectedDir("determinants");
+		HashMap<CellIndex, Double> determinantsSYSTEMDStemp = readDMLMatrixFromOutputDir("determinants");
+		HashMap<CellIndex, Double> invcovsRtemp = readRMatrixFromExpectedDir("invcovs");
+		HashMap<CellIndex, Double> invcovsSYSTEMDStemp = readDMLMatrixFromOutputDir("invcovs");
+
+		double[][] meansR = TestUtils.convertHashMapToDoubleArray(meansRtemp);
+		double[][] meansSYSTEMDS = TestUtils.convertHashMapToDoubleArray(meansSYSTEMDStemp);
+		double[][] determinantsR = TestUtils.convertHashMapToDoubleArray(determinantsRtemp);
+		double[][] determinantsSYSTEMDS = TestUtils.convertHashMapToDoubleArray(determinantsSYSTEMDStemp);
+		double[][] invcovsR = TestUtils.convertHashMapToDoubleArray(invcovsRtemp);
+		double[][] invcovsSYSTEMDS = TestUtils.convertHashMapToDoubleArray(invcovsSYSTEMDStemp);
+
+		TestUtils.compareMatrices(priorR, priorSYSTEMDS, Math.pow(10, -5.0), "priorR", "priorSYSTEMDS");
+		TestUtils.compareMatricesBitAvgDistance(meansR, meansSYSTEMDS, 5L,5L, this.toString());
+		TestUtils.compareMatricesBitAvgDistance(determinantsR, determinantsSYSTEMDS, (long)2E+12,(long)2E+12, this.toString());
+		TestUtils.compareMatricesBitAvgDistance(invcovsR, invcovsSYSTEMDS, (long)2E+20,(long)2E+20, this.toString());
+	}
+
+	@Test
+	public void testIrisPrediction()
+	{
+		loadTestConfiguration(getTestConfiguration(TEST_NAME));
+		String HOME = SCRIPT_DIR + TEST_DIR;
+		fullDMLScriptName = HOME + TEST_NAME + "Prediction.dml";
+
+		double varSmoothing = 1e-9;
+
+		List<String> proArgs = new ArrayList<>();
+		proArgs.add("-args");
+		proArgs.add(DATASET);
+		proArgs.add(String.valueOf(varSmoothing));
+		proArgs.add(output("trueLabels"));
+		proArgs.add(output("predLabels"));
+
+		programArgs = proArgs.toArray(new String[proArgs.size()]);
+		runTest(true, EXCEPTION_NOT_EXPECTED, null, -1);
+
+		HashMap<CellIndex, Double> trueLabels = readDMLMatrixFromOutputDir("trueLabels");
+		HashMap<CellIndex, Double> predLabels = readDMLMatrixFromOutputDir("predLabels");
+
+		TestUtils.compareMatrices(trueLabels, predLabels, 0, "trueLabels", "predLabels");
+	}
+}
diff --git a/src/test/scripts/functions/builtin/GaussianClassifier.R b/src/test/scripts/functions/builtin/GaussianClassifier.R
new file mode 100644
index 0000000..5a0ba1a
--- /dev/null
+++ b/src/test/scripts/functions/builtin/GaussianClassifier.R
@@ -0,0 +1,102 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+args <- commandArgs(TRUE)
+library("Matrix")
+
+D <- as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+c <- as.matrix(readMM(paste(args[1], "Y.mtx", sep="")))
+
+nClasses <- as.integer(max(c))
+varSmoothing <- as.double(args[2])
+
+nSamples <- nrow(D)
+nFeatures <- ncol(D)
+
+classInvCovariances <- list()
+
+classMeans <- aggregate(D, by=list(c), FUN= mean)
+classMeans <- classMeans[1:nFeatures+1]
+
+classVars <- aggregate(D, by=list(c), FUN=var)
+classVars[is.na(classVars)] <- 0
+smoothedVar <- varSmoothing * max(classVars) * diag(nFeatures)
+
+classCounts <- aggregate(c, by=list(c), FUN=length)
+classCounts <- classCounts[2]
+classPriors <- classCounts / nSamples
+
+determinants <- matrix(0, nrow=nClasses, ncol=1)
+
+for (i in 1:nClasses)
+{
+  classMatrix <- subset(D, c==i)
+  covMatrix <- cov(x=classMatrix, use="all.obs")
+  covMatrix[is.na(covMatrix)] <- 0
+  covMatrix <- covMatrix + smoothedVar
+  #determinant <- det(covMatrix)
+  #determinants[i] <- det(covMatrix)
+
+  ev <- eigen(covMatrix)
+  vecs <- ev$vectors
+  vals <- ev$values
+  lam <- diag(vals^(-1))
+  determinants[i] <- prod(vals)
+
+  invCovMatrix <- vecs %*% lam %*% t(vecs)
+  invCovMatrix[is.na(invCovMatrix)] <- 0
+  classInvCovariances[[i]] <- invCovMatrix
+}
+
+
+#Calc accuracy
+results <- matrix(0, nrow=nSamples, ncol=nClasses)
+for (class in 1:nClasses)
+{
+  for (i in 1:nSamples)
+  {
+    intermediate <- 0
+    meanDiff <- (D[i,] - classMeans[class,])
+    intermediate <- -1/2 * log((2*pi)^nFeatures * determinants[class,])
+    intermediate <- intermediate - 1/2 * (as.matrix(meanDiff) %*% as.matrix(classInvCovariances[[class]]) 
+    %*% t(as.matrix(meanDiff)))
+    intermediate <- log(classPriors[class,]) + intermediate
+    results[i, class] <- intermediate
+  }
+}
+
+pred <- max.col(results)
+acc <- sum(pred == c) / nSamples * 100
+print(paste("Training Accuracy (%): ", acc, sep=""))
+
+classPriors <- data.matrix(classPriors)
+classMeans <- data.matrix(classMeans)
+
+#Cbind the inverse covariance matrices, to make them comparable in the unit tests
+stackedInvCovs <- classInvCovariances[[1]]
+for (i in 2:nClasses)
+{
+  stackedInvCovs <- cbind(stackedInvCovs, classInvCovariances[[i]])
+}
+
+writeMM(as(classPriors, "CsparseMatrix"), paste(args[3], "priors", sep=""));
+writeMM(as(classMeans, "CsparseMatrix"), paste(args[3], "means", sep=""));
+writeMM(as(determinants, "CsparseMatrix"), paste(args[3], "determinants", sep=""));
+writeMM(as(stackedInvCovs, "CsparseMatrix"), paste(args[3], "invcovs", sep=""));
diff --git a/src/test/scripts/functions/builtin/GaussianClassifier.dml b/src/test/scripts/functions/builtin/GaussianClassifier.dml
new file mode 100644
index 0000000..f868b27
--- /dev/null
+++ b/src/test/scripts/functions/builtin/GaussianClassifier.dml
@@ -0,0 +1,37 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = read($1);
+y = read($2);
+
+[prior, means, covs, det] = gaussianClassifier(D=X, C=y, varSmoothing=$3);
+
+#Cbind the inverse covariance matrices, to make them comparable in the unit tests
+invcovs = as.matrix(covs[1])
+for (i in 2:max(y))
+{
+  invcovs = cbind(invcovs, as.matrix(covs[i]))
+}
+
+write(prior, $4);
+write(means, $5);
+write(det, $6);
+write(invcovs, $7);
diff --git a/src/test/scripts/functions/builtin/GaussianClassifierPrediction.dml b/src/test/scripts/functions/builtin/GaussianClassifierPrediction.dml
new file mode 100644
index 0000000..88153d1
--- /dev/null
+++ b/src/test/scripts/functions/builtin/GaussianClassifierPrediction.dml
@@ -0,0 +1,68 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+O = read($1, data_type = "frame", format = "csv")
+
+#Create feature matrix to learn parameters (45 samples per class)
+X = as.matrix(O[1:45, 2:ncol(O)-1]) #setosa
+X = rbind(X, as.matrix(O[51:95, 2:ncol(O)-1])) #versicolor
+X = rbind(X, as.matrix(O[101:145, 2:ncol(O)-1])) #virginica
+
+y = matrix (1, rows=45, cols=1)
+y = rbind(y, matrix(2,  rows=45, cols=1))
+y = rbind(y, matrix(3, rows=45, cols=1))
+
+[prior, means, covs, det] = gaussianClassifier(D=X, C=y, varSmoothing=$2);
+
+#Create feature matrix for prediction (5 samples per class)
+Xp = as.matrix(O[46:50, 2:ncol(O)-1]) #setosa
+Xp = rbind(Xp, as.matrix(O[96:100, 2:ncol(O)-1])) #versicolor
+Xp = rbind(Xp, as.matrix(O[146:150, 2:ncol(O)-1])) #virginica
+
+#Set the target labels
+yp = matrix(1, rows=5, cols=1)
+yp = rbind(yp, matrix(2, rows=5, cols=1))
+yp = rbind(yp, matrix(3, rows=5, cols=1))
+
+#Compute the prediction with the learned parameters
+nSamples = nrow(Xp)
+nClasses = max(yp)
+nFeats = ncol(Xp)
+results = matrix(0, rows=nSamples, cols=nClasses)
+
+for (class in 1:nClasses)
+{
+  for (i in 1:nSamples)
+  {
+    intermediate = 0
+    meanDiff = (Xp[i,] - means[class,])
+    intermediate = -1/2 * log((2*pi)^nFeats * det[class,])
+    intermediate = intermediate - 1/2 * (meanDiff %*% as.matrix(covs[class]) %*% t(meanDiff))
+    intermediate = log(prior[class,]) + intermediate
+    results[i, class] = intermediate
+  }
+}
+
+#Get the predicted labels from the result matrix
+predicted = rowIndexMax(results)
+
+write(yp, $3);
+write(predicted, $4);