You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by mb...@apache.org on 2021/07/18 15:27:05 UTC
[systemds] branch master updated: [SYSTEMDS-2849] Example GAN implementation and tests

This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/master by this push:
     new 8d7e52b  [SYSTEMDS-2849] Example GAN implementation and tests
8d7e52b is described below

commit 8d7e52b3c01661cda9c25e87a5436906e209c1f3
Author: Christoph Kainz <ch...@student.tugraz.at>
AuthorDate: Thu Jun 24 13:20:53 2021 +0200

    [SYSTEMDS-2849] Example GAN implementation and tests
    
    AMLS project SS2021.
    Closes #1324.
---
 .../apache/sysds/test/applications/GANTest.java    |  83 ++++
 src/test/scripts/applications/GAN/GAN_cnn.dml      | 510 +++++++++++++++++++++
 src/test/scripts/applications/GAN/GAN_mnist.dml    |  74 +++
 src/test/scripts/applications/GAN/GAN_simple.dml   | 358 +++++++++++++++
 4 files changed, 1025 insertions(+)

diff --git a/src/test/java/org/apache/sysds/test/applications/GANTest.java b/src/test/java/org/apache/sysds/test/applications/GANTest.java
new file mode 100644
index 0000000..3ee26a7
--- /dev/null
+++ b/src/test/java/org/apache/sysds/test/applications/GANTest.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.applications;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.sysds.test.TestConfiguration;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+import org.apache.sysds.common.Types.ExecMode;
+import org.apache.sysds.test.AutomatedTestBase;
+
+@RunWith(value = Parameterized.class)
+@net.jcip.annotations.NotThreadSafe
+public class GANTest extends AutomatedTestBase
+{
+	protected final static String TEST_DIR = "applications/GAN/";
+	protected final static String TEST_NAME = "GAN_mnist";
+	protected String TEST_CLASS_DIR = TEST_DIR + GANTest.class.getSimpleName() + "/";
+
+	protected String _type;
+
+	public GANTest(String type) {
+		_type = type;
+	}
+
+	@Parameters
+	public static Collection<Object[]> data() {
+		Object[][] data = new Object[][] { 
+			// {"cnn"}, //TODO investigate what's taking so long, and why are there spark instructions in hybrid?
+			{"simple"}};
+		return Arrays.asList(data);
+	}
+
+	@Override
+	public void setUp() {
+		addTestConfiguration(TEST_NAME, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME, new String[] {"accuracy.scalar"}));
+	}
+
+	@Test
+	public void testGAN() {
+		System.out.println("Running GAN test");
+		getAndLoadTestConfiguration(TEST_NAME);
+		ExecMode modeOld = setExecMode(ExecMode.SINGLE_NODE);
+		try {
+			fullDMLScriptName = getScript();
+			List<String> proArgs = new ArrayList<>();
+			proArgs.add("-args");
+			proArgs.add(_type);
+			proArgs.add(output(""));
+			programArgs = proArgs.toArray(new String[proArgs.size()]);
+			runTest(true, EXCEPTION_NOT_EXPECTED, null, -1);
+	
+			writeExpectedScalar("accuracy", 0.95); //0.5 w/ 5000 instead of 1000
+			compareResults(0.15);
+		}
+		finally {
+			resetExecMode(modeOld);
+		}
+	}
+}
diff --git a/src/test/scripts/applications/GAN/GAN_cnn.dml b/src/test/scripts/applications/GAN/GAN_cnn.dml
new file mode 100644
index 0000000..8759b2d
--- /dev/null
+++ b/src/test/scripts/applications/GAN/GAN_cnn.dml
@@ -0,0 +1,510 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("nn/layers/affine.dml") as affine
+source("nn/layers/leaky_relu.dml") as leaky_relu
+source("nn/layers/conv2d_builtin.dml") as conv2d
+source("nn/layers/conv2d_transpose.dml") as conv2d_transpose
+source("nn/layers/log_loss.dml") as log_loss
+source("nn/layers/dropout.dml") as dropout
+source("nn/layers/batch_norm1d.dml") as batch_norm_1d
+source("nn/layers/batch_norm2d.dml") as batch_norm_2d
+source("nn/layers/softmax.dml") as softmax
+source("nn/layers/sigmoid.dml") as sigmoid
+source("nn/layers/tanh.dml") as tanh
+source("nn/optim/adam.dml") as adam
+
+train = function(matrix[double] X, int iterations)
+    return (matrix[double] GW_1, matrix[double] Gb_1, matrix[double] GW_2, matrix[double] Gb_2, matrix[double] GW_3,
+            matrix[double] Gb_3, matrix[double] GW_4, matrix[double] Gb_4, matrix[double] DW_1, matrix[double] Db_1,
+            matrix[double] DW_2, matrix[double] Db_2, matrix[double] DW_3, matrix[double] Db_3)
+{
+/*
+   * Trains the generator and the discriminator of the GAN.
+   *
+   * The input matrix, X, has N examples, each with 784 features.
+   *
+   * Inputs:
+   *  - X: Input data matrix, of shape (N, 784).
+   *  - iterations: number of iterations for training
+   *
+   * Outputs:
+   *  - GW_1: Generator 1st layer weights (parameters) matrix, of shape (100, D).
+   *  - Gb_1: Generator 1st layer biases vector, of shape (1, D).
+   *  - GW_2: Generator 2nd layer weights (parameters) matrix, of shape (256, 128*HWf*HWf).
+   *  - Gb_2: Generator 2nd layer biases vector, of shape (128, 1).
+   *  - GW_3: Generator 3rd layer weights (parameters) matrix, of shape (128, 64*HWf*HWf).
+   *  - Gb_3: Generator 3rd layer biases vector, of shape (64, 1).
+   *  - GW_4: Generator 4th layer weights (parameters) matrix, of shape (64, 1*HWf*HWf).
+   *  - Gb_4: Generator 4th layer biases vector, of shape (1, 1).
+   *  - DW_1: Discriminator 1st layer weights (parameters) matrix, of shape (64, 1*HWf*HWf).
+   *  - Db_1: Discriminator 1st layer biases vector, of shape (64, 1).
+   *  - DW_2: Discriminator 2nd layer weights (parameters) matrix, of shape (128, 64*HWf*HWf).
+   *  - Db_2: Discriminator 2nd layer biases vector, of shape (128, 1).
+   *  - DW_3: Discriminator 3rd layer weights (parameters) matrix, of shape (6272, 1).
+   *  - Db_3: Discriminator 3rd layer biases vector, of shape (1, 1).
+*/
+    N = nrow(X)
+    batch_size = 128
+    half_batch = batch_size / 2
+    D = 7*7*256
+    HWf = 5
+
+    #Define Generator:
+    [GW_1, Gb_1] = affine::init(100, D, -1)
+    [GW_2, Gb_2] = conv2d_transpose::init(128, 256, HWf, HWf)
+    [GW_3, Gb_3] = conv2d_transpose::init(64, 128, HWf, HWf)
+    [GW_4, Gb_4] = conv2d_transpose::init(1, 64, HWf, HWf)
+    [mGW_1, vGW_1] = adam::init(GW_1)
+    [mGb_1, vGb_1] = adam::init(Gb_1)
+    [mGW_2, vGW_2] = adam::init(GW_2)
+    [mGb_2, vGb_2] = adam::init(Gb_2)
+    [mGW_3, vGW_3] = adam::init(GW_3)
+    [mGb_3, vGb_3] = adam::init(Gb_3)
+    [mGW_4, vGW_4] = adam::init(GW_4)
+    [mGb_4, vGb_4] = adam::init(Gb_4)
+
+    gen_model = list(GW_1, Gb_1, GW_2, Gb_2, GW_3, Gb_3, GW_4, Gb_4)
+    gen_grad = list(mGW_1, vGW_1, mGb_1, vGb_1, mGW_2, vGW_2, mGb_2, vGb_2, mGW_3, vGW_3, mGb_3, vGb_3, mGW_4, vGW_4, mGb_4, vGb_4)
+
+    #Define Discriminator:
+    [DW_1, Db_1] = conv2d::init(64, 1, HWf, HWf, -1)
+    [DW_2, Db_2] = conv2d::init(128, 64, HWf, HWf, -1)
+    [DW_3, Db_3] = affine::init(6272, 1, -1)
+    [mDW_1, vDW_1] = adam::init(DW_1)
+    [mDb_1, vDb_1] = adam::init(Db_1)
+    [mDW_2, vDW_2] = adam::init(DW_2)
+    [mDb_2, vDb_2] = adam::init(Db_2)
+    [mDW_3, vDW_3] = adam::init(DW_3)
+    [mDb_3, vDb_3] = adam::init(Db_3)
+
+    disc_model = list(DW_1, Db_1, DW_2, Db_2, DW_3, Db_3)
+    disc_grad = list(mDW_1, vDW_1, mDb_1, vDb_1, mDW_2, vDW_2, mDb_2, vDb_2, mDW_3, vDW_3, mDb_3, vDb_3)
+
+    fake = matrix(0, 0, 784)
+
+    for(i in 1:iterations)
+    {
+        print('step ' + toString(i) + ' / ' + toString(iterations))
+        #generate samples
+        noise = rand(rows = half_batch, cols = 100, min = 0.0, max = 1.0)
+        [fake_images, gen_params] = gen_forward(noise, gen_model, 'train')
+        rand = sample(N, half_batch)
+        real_images = matrix(0, half_batch, 784)
+        for(r in 1:half_batch)
+        {
+            real_images[r,] = X[as.scalar(rand[r]),]
+        }
+
+        #train discriminator
+        [decision, disc_params] = disc_forward(real_images, disc_model)
+        targets = matrix(1, half_batch, 1)
+        dloss1 = log_loss::forward(decision, targets)
+        [dX, disc_model, disc_grad] = disc_backward(decision, targets, FALSE, i, disc_model, disc_grad, disc_params)
+        [decision, disc_params] = disc_forward(fake_images, disc_model)
+        targets = matrix(0, half_batch, 1)
+        dloss2 = log_loss::forward(decision, targets)
+        [dX, disc_model, disc_grad] = disc_backward(decision, targets, FALSE, i, disc_model, disc_grad, disc_params)
+        print('discriminator_loss: ' + toString((dloss1 + dloss2)))
+
+        #generate samples
+        noise = rand(rows = batch_size, cols = 100, min = 0.0, max = 1.0)
+        [fake_images, gen_params] = gen_forward(noise, gen_model, 'train')
+
+        #train generator
+        [decision, disc_params] = disc_forward(fake_images, disc_model)
+        targets = matrix(1, batch_size, 1)
+        gloss = log_loss::forward(decision, targets)
+        [dX, disc_model, disc_grad] = disc_backward(decision, targets, TRUE, i, disc_model, disc_grad, disc_params)
+        [gen_model, gen_grad] = gen_backward(dX, i, gen_model, gen_grad, gen_params, 'train')
+        print('generator_loss: ' + toString(gloss))
+
+        # get sample generated image to observe evolution of generated images
+        if(i %% (iterations/10) == 0)
+        {
+            fake = rbind(fake, fake_images[1])
+        }
+    }
+    out_dir = "target/testTemp/applications/GAN/GANTest/"
+    fake = 0.5 * fake + 0.5
+    write(fake, out_dir+"/evo")
+    DW_1 = as.matrix(disc_model[1])
+    Db_1 = as.matrix(disc_model[2])
+    DW_2 = as.matrix(disc_model[3])
+    Db_2 = as.matrix(disc_model[4])
+    DW_3 = as.matrix(disc_model[5])
+    Db_3 = as.matrix(disc_model[6])
+    GW_1 = as.matrix(gen_model[1])
+    Gb_1 = as.matrix(gen_model[2])
+    GW_2 = as.matrix(gen_model[3])
+    Gb_2 = as.matrix(gen_model[4])
+    GW_3 = as.matrix(gen_model[5])
+    Gb_3 = as.matrix(gen_model[6])
+    GW_4 = as.matrix(gen_model[7])
+    Gb_4 = as.matrix(gen_model[8])
+}
+
+gen_forward = function(matrix[double] noise, list[unknown] model, String mode)
+    return(matrix[double] images, list[unknown] params)
+{
+/*
+   * Computes the forward pass of the generator.
+   * Generates fake images from input noise.
+   *
+   * Inputs:
+   *  - noise: Randomly generated noise, of shape (N, 100).
+   *  - model: List containing the generator weights and biases.
+   *  - mode: 'train' or 'test' for batch normalization layers.
+   *
+   * Outputs:
+   *  - images: Generated images, of shape (N, 784).
+   *  - params: List of outputs of the generator layers, needed for backward pass.
+*/
+    D = 7*7*256
+    HWf = 5
+    pad = 2
+    stride = 2
+
+    GW_1 = as.matrix(model[1])
+    Gb_1 = as.matrix(model[2])
+    GW_2 = as.matrix(model[3])
+    Gb_2 = as.matrix(model[4])
+    GW_3 = as.matrix(model[5])
+    Gb_3 = as.matrix(model[6])
+    GW_4 = as.matrix(model[7])
+    Gb_4 = as.matrix(model[8])
+
+    #Generator forward:
+    #Layer 1
+    out_1G = affine::forward(noise, GW_1, Gb_1)
+    [out_1G_batch_norm, ema_mean_upd_1, ema_var_upd_1, cache_mean_1, cache_var_1, cache_norm_1] = batch_norm_1d::forward(out_1G,
+                                                matrix(1,1,D), matrix(0,1,D), mode, matrix(0,1,D), matrix(1,1,D), 0.99, 0.001)
+    out_1G_leaky_relu = leaky_relu::forward(out_1G_batch_norm)
+    #Layer 2
+    [out_2G, hout_2G, wout_2G] = conv2d_transpose::forward(out_1G_leaky_relu, GW_2, Gb_2, 256, 7, 7, HWf, HWf, 1, 1,
+                                                                   pad, pad, 0, 0)
+    [out_2G_batch_norm, ema_mean_upd_2, ema_var_upd_2, cache_mean_2, cache_inv_var_2] = batch_norm_2d::forward(out_2G,
+                matrix(1,128,1), matrix(0,128,1), 128, hout_2G, wout_2G, mode, matrix(0,128,1), matrix(1,128,1), 0.99, 0.001)
+    out_2G_leaky_relu = leaky_relu::forward(out_2G_batch_norm)
+
+    #Layer 3
+    [out_3G, hout_3G, wout_3G] = conv2d_transpose::forward(out_2G_leaky_relu, GW_3, Gb_3, 128, hout_2G, wout_2G, HWf,
+                                                                   HWf, stride, stride, pad, pad, 1, 1)
+    [out_3G_batch_norm, ema_mean_upd_3, ema_var_upd_3, cache_mean_3, cache_inv_var_3] = batch_norm_2d::forward(out_3G,
+                matrix(1,64,1), matrix(0,64,1), 64, hout_3G, wout_3G, mode, matrix(0,64,1), matrix(1,64,1), 0.99, 0.001)
+    out_3G_leaky_relu = leaky_relu::forward(out_3G_batch_norm)
+
+    #Output Layer
+    [out_4G, hout_4G, wout_4G] = conv2d_transpose::forward(out_3G_leaky_relu, GW_4, Gb_4, 64, hout_3G, wout_3G, HWf,
+                                                                   HWf, stride, stride, pad, pad, 1, 1)
+    out_4G_tanh = tanh::forward(out_4G)
+
+    images = out_4G_tanh
+    params = list(noise, out_1G, out_1G_batch_norm, ema_mean_upd_1, ema_var_upd_1, cache_mean_1, cache_var_1,
+                   cache_norm_1, out_1G_leaky_relu, out_2G, hout_2G, wout_2G, out_2G_batch_norm, cache_mean_2, cache_inv_var_2,
+                   out_2G_leaky_relu, out_3G, hout_3G, wout_3G, out_3G_batch_norm, cache_mean_3, cache_inv_var_3, out_3G_leaky_relu,
+                   out_4G, hout_4G, wout_4G)
+}
+
+disc_forward = function(matrix[double] X, list[unknown] model)
+    return(matrix[double] decision, list[unknown] params)
+{
+/*
+   * Computes the forward pass of the discriminator.
+   * Decides if input images are real or fake.
+   *
+   * Inputs:
+   *  - X: Input matrix containing sample images, of shape (N, 784).
+   *  - model: List containing the discriminator weights and biases.
+   *
+   * Outputs:
+   *  - decision: Decisions for realness of input, of shape (N, 1).
+   *  - params: List of outputs of the discriminator layers, needed for backward pass.
+*/
+    HWin = 28
+    HWf = 5
+    pad = 2
+    stride = 2
+
+    DW_1 = as.matrix(model[1])
+    Db_1 = as.matrix(model[2])
+    DW_2 = as.matrix(model[3])
+    Db_2 = as.matrix(model[4])
+    DW_3 = as.matrix(model[5])
+    Db_3 = as.matrix(model[6])
+
+    #Discriminator forward
+    #Layer 1
+    [out_1D, hout_1D, wout_1D] = conv2d::forward(X, DW_1, Db_1, 1, HWin, HWin, HWf, HWf, stride, stride, pad, pad)
+    out_1D_leaky_relu = leaky_relu::forward(out_1D)
+    [out_1D_dropout, mask_1] = dropout::forward(out_1D_leaky_relu, 0.3, -1)
+
+    #Layer 2
+    [out_2D, hout_2D, wout_2D] = conv2d::forward(out_1D_dropout, DW_2, Db_2, 64, hout_1D, wout_1D, HWf, HWf, stride,
+                                                         stride, pad, pad)
+    out_2D_leaky_relu = leaky_relu::forward(out_2D)
+    [out_2D_dropout, mask_2] = dropout::forward(out_2D_leaky_relu, 0.3, -1)
+
+    #Output Layer
+    out_3D = affine::forward(out_2D_dropout, DW_3, Db_3)
+    decision = sigmoid::forward(out_3D)
+    params = list(X, out_1D, hout_1D, wout_1D, out_1D_leaky_relu, out_1D_dropout, mask_1, out_2D, hout_2D, wout_2D,
+                  out_2D_leaky_relu, out_2D_dropout, mask_2, out_3D)
+}
+
+disc_backward = function(matrix[double] decision, matrix[double] targets, boolean lock, int iteration, list[unknown] model, list[unknown] gradients,
+                         list[unknown] params)
+    return(matrix[double] dX, list[unknown] model, list[unknown] gradients)
+{
+/*
+   * Computes the backward pass of the discriminator.
+   * Updates gradients and weights of the discriminator.
+   *
+   * Inputs:
+   *  - decisions: Input matrix containing discriminator decisions, of shape (N, 1).
+   *  - targets: Target values for the decisions, of shape (N, 1).
+   *  - lock: Boolean that governs if discriminator weights are to be updated, TRUE means the weights are not updated.
+   *  - iteration: Current iteration of the training.
+   *  - model: List containing the discriminator weights and biases.
+   *  - gradients: List containing the discriminator gradients.
+   *  - params: List of outputs of the discriminator layers from the forward pass.
+   *
+   * Outputs:
+   *  - dX: Gradient wrt `X`, of shape (N, 784).
+   *  - model: List containing the updated discriminator weights and biases.
+   *  - gradients: List containing the updated discriminator gradients.
+*/
+    HWin = 28
+    HWf = 5
+    pad = 2
+    stride = 2
+
+    lr = 0.0002
+    beta1 = 0.5
+    beta2 = 0.999
+    epsilon = 1e-07
+
+    DW_1 = as.matrix(model[1])
+    Db_1 = as.matrix(model[2])
+    DW_2 = as.matrix(model[3])
+    Db_2 = as.matrix(model[4])
+    DW_3 = as.matrix(model[5])
+    Db_3 = as.matrix(model[6])
+
+    mDW_1 = as.matrix(gradients[1])
+    vDW_1 = as.matrix(gradients[2])
+    mDb_1 = as.matrix(gradients[3])
+    vDb_1 = as.matrix(gradients[4])
+    mDW_2 = as.matrix(gradients[5])
+    vDW_2 = as.matrix(gradients[6])
+    mDb_2 = as.matrix(gradients[7])
+    vDb_2 = as.matrix(gradients[8])
+    mDW_3 = as.matrix(gradients[9])
+    vDW_3 = as.matrix(gradients[10])
+    mDb_3 = as.matrix(gradients[11])
+    vDb_3 = as.matrix(gradients[12])
+
+    #Discriminator backward
+    #Output Layer
+    dloss = log_loss::backward(decision, targets)
+    dout_3D = sigmoid::backward(dloss, as.matrix(params[14]))
+    [dout_2D, dDW_3, dDb_3] = affine::backward(dout_3D, as.matrix(params[12]), DW_3, Db_3)
+
+    #Layer 2
+    dout_2D_dropout = dropout::backward(dout_2D, as.matrix(params[11]), 0.3, as.matrix(params[13]))
+    dout_2D_leaky_relu = leaky_relu::backward(dout_2D_dropout, as.matrix(params[8]))
+    [dout_1D, dDW_2, dDb_2] = conv2d::backward(dout_2D_leaky_relu, as.scalar(params[9]), as.scalar(params[10]),
+                                               as.matrix(params[6]), DW_2, Db_2, 64, as.scalar(params[3]),
+                                               as.scalar(params[4]), HWf, HWf, stride, stride, pad, pad)
+
+    #Layer 1
+    dout_1D_dropout = dropout::backward(dout_1D, as.matrix(params[5]), 0.3, as.matrix(params[7]))
+    dout_1D_leaky_relu = leaky_relu::backward(dout_1D_dropout, as.matrix(params[2]))
+    [dX, dDW_1, dDb_1] = conv2d::backward(dout_1D_leaky_relu, as.scalar(params[3]), as.scalar(params[4]),
+                                          as.matrix(params[1]), DW_1, Db_1, 1, HWin, HWin, HWf, HWf, stride, stride,
+                                          pad, pad)
+
+    if(!lock)
+    {
+        #optimize
+        [DW_1, mDW_1, vDW_1] = adam::update(DW_1, dDW_1, lr, beta1, beta2, epsilon, iteration, mDW_1, vDW_1)
+        [Db_1, mDb_1, vDb_1] = adam::update(Db_1, dDb_1, lr, beta1, beta2, epsilon, iteration, mDb_1, vDb_1)
+        [DW_2, mDW_2, vDW_2] = adam::update(DW_2, dDW_2, lr, beta1, beta2, epsilon, iteration, mDW_2, vDW_2)
+        [Db_2, mDb_2, vDb_2] = adam::update(Db_2, dDb_2, lr, beta1, beta2, epsilon, iteration, mDb_2, vDb_2)
+        [DW_3, mDW_3, vDW_3] = adam::update(DW_3, dDW_3, lr, beta1, beta2, epsilon, iteration, mDW_3, vDW_3)
+        [Db_3, mDb_3, vDb_3] = adam::update(Db_3, dDb_3, lr, beta1, beta2, epsilon, iteration, mDb_3, vDb_3)
+
+        model = list(DW_1, Db_1, DW_2, Db_2, DW_3, Db_3)
+        gradients = list(mDW_1, vDW_1, mDb_1, vDb_1, mDW_2, vDW_2, mDb_2, vDb_2, mDW_3, vDW_3, mDb_3, vDb_3)
+    }
+}
+
+gen_backward = function(matrix[double] dX, int iteration, list[unknown] model, list[unknown] gradients, list[unknown] params,
+                        String mode)
+    return(list[unknown] model, list[unknown] gradients)
+{
+/*
+   * Computes the backward pass of the generator.
+   * Updates gradients and weights of the generator.
+   *
+   * Inputs:
+   *  - dX: Gradient wrt `X`, of shape (N, 784).
+   *  - iteration: Current iteration of the training.
+   *  - model: List containing the generator weights and biases.
+   *  - gradients: List containing the generator gradients.
+   *  - params: List of outputs of the generator layers from the forward pass.
+   *
+   * Outputs:
+   *  - model: List containing the updated generator weights and biases.
+   *  - gradients: List containing the updated generator gradients.
+*/
+    D = 7*7*256
+    HWf = 5
+    pad = 2
+    stride = 2
+
+    lr = 0.0002
+    beta1 = 0.5
+    beta2 = 0.999
+    epsilon = 1e-07
+
+    GW_1 = as.matrix(model[1])
+    Gb_1 = as.matrix(model[2])
+    GW_2 = as.matrix(model[3])
+    Gb_2 = as.matrix(model[4])
+    GW_3 = as.matrix(model[5])
+    Gb_3 = as.matrix(model[6])
+    GW_4 = as.matrix(model[7])
+    Gb_4 = as.matrix(model[8])
+
+    mGW_1 = as.matrix(gradients[1])
+    vGW_1 = as.matrix(gradients[2])
+    mGb_1 = as.matrix(gradients[3])
+    vGb_1 = as.matrix(gradients[4])
+    mGW_2 = as.matrix(gradients[5])
+    vGW_2 = as.matrix(gradients[6])
+    mGb_2 = as.matrix(gradients[7])
+    vGb_2 = as.matrix(gradients[8])
+    mGW_3 = as.matrix(gradients[9])
+    vGW_3 = as.matrix(gradients[10])
+    mGb_3 = as.matrix(gradients[11])
+    vGb_3 = as.matrix(gradients[12])
+    mGW_4 = as.matrix(gradients[13])
+    vGW_4 = as.matrix(gradients[14])
+    mGb_4 = as.matrix(gradients[15])
+    vGb_4 = as.matrix(gradients[16])
+
+    #Generator backward
+    #Output Layer
+    dout_4G_tanh = tanh::backward(dX, as.matrix(params[24]))
+    [dout_4G, dGW_4, dGb_4] = conv2d_transpose::backward(dout_4G_tanh, as.scalar(params[25]), as.scalar(params[26]),
+                              as.matrix(params[23]), GW_4, Gb_4, 64, as.scalar(params[21]), as.scalar(params[22]),
+                              HWf, HWf, stride, stride, pad, pad)
+    #Layer 3
+    dout_3G_leaky_relu = leaky_relu::backward(dout_4G, as.matrix(params[20]))
+    [dout_3G_batch_norm, dgamma_3G, dbeta_3G] = batch_norm_2d::backward(dout_3G_leaky_relu, as.matrix(params[21]), as.matrix(params[22]),
+                                                         as.matrix(params[17]), matrix(1,64,1), 64, as.scalar(params[18]),
+                                                         as.scalar(params[19]), 0.001)
+    [dout_3G, dGW_3, dGb_3] = conv2d_transpose::backward(dout_3G_batch_norm, as.scalar(params[18]), as.scalar(params[19]),
+                              as.matrix(params[16]), GW_3, Gb_3, 128, as.scalar(params[11]), as.scalar(params[12]), HWf,
+                              HWf, stride, stride, pad, pad)
+
+    #Layer 2
+    dout_2G_leaky_relu = leaky_relu::backward(dout_3G, as.matrix(params[13]))
+    [dout_2G_batch_norm, dgamma_2G, bbeta_2G] = batch_norm_2d::backward(dout_2G_leaky_relu, as.matrix(params[14]),
+                                                as.matrix(params[15]), as.matrix(params[10]), matrix(1,128,1), 128,
+                                                as.scalar(params[11]), as.scalar(params[12]), 0.001)
+    [dout_2G, dGW_2, dGb_2] = conv2d_transpose::backward(dout_2G_batch_norm, as.scalar(params[11]), as.scalar(params[12]),
+                                                         as.matrix(params[9]), GW_2, Gb_2, 256, 7, 7, HWf, HWf, 1, 1, pad, pad)
+
+    #Layer 1
+    dout_1G_leaky_relu = leaky_relu::backward(dout_2G, as.matrix(params[3]))
+    [dout_1G_batch_norm, dgamma_1G, dbeta_1G] = batch_norm_1d::backward(dout_1G_leaky_relu, as.matrix(params[3]),
+                                                as.matrix(params[4]), as.matrix(params[5]), as.matrix(params[6]),
+                                                as.matrix(params[7]), as.matrix(params[8]), as.matrix(params[2]),
+                                                matrix(1,1,D), matrix(0,1,D), mode, matrix(0,1,D), matrix(1,1,D), 0.99, 0.001)
+    [dout_1G, dGW_1, dGb_1] = affine::backward(dout_1G_batch_norm, as.matrix(params[1]), GW_1, Gb_1)
+
+    #optimize
+    [GW_1, mGW_1, vGW_1] = adam::update(GW_1, dGW_1, lr, beta1, beta2, epsilon, iteration, mGW_1, vGW_1)
+    [Gb_1, mGb_1, vGb_1] = adam::update(Gb_1, dGb_1, lr, beta1, beta2, epsilon, iteration, mGb_1, vGb_1)
+    [GW_2, mGW_2, vGW_2] = adam::update(GW_2, dGW_2, lr, beta1, beta2, epsilon, iteration, mGW_2, vGW_2)
+    [Gb_2, mGb_2, vGb_2] = adam::update(Gb_2, dGb_2, lr, beta1, beta2, epsilon, iteration, mGb_2, vGb_2)
+    [GW_3, mGW_3, vGW_3] = adam::update(GW_3, dGW_3, lr, beta1, beta2, epsilon, iteration, mGW_3, vGW_3)
+    [Gb_3, mGb_3, vGb_3] = adam::update(Gb_3, dGb_3, lr, beta1, beta2, epsilon, iteration, mGb_3, vGb_3)
+    [GW_4, mGW_4, vGW_4] = adam::update(GW_4, dGW_4, lr, beta1, beta2, epsilon, iteration, mGW_4, vGW_4)
+    [Gb_4, mGb_4, vGb_4] = adam::update(Gb_4, dGb_4, lr, beta1, beta2, epsilon, iteration, mGb_4, vGb_4)
+
+    model = list(GW_1, Gb_1, GW_2, Gb_2, GW_3, Gb_3, GW_4, Gb_4)
+    gradients = list(mGW_1, vGW_1, mGb_1, vGb_1, mGW_2, vGW_2, mGb_2, vGb_2, mGW_3, vGW_3, mGb_3, vGb_3, mGW_4, vGW_4, mGb_4, vGb_4)
+}
+
+generate = function(int amount, matrix[double] GW_1, matrix[double] Gb_1, matrix[double] GW_2, matrix[double] Gb_2,
+                    matrix[double] GW_3, matrix[double] Gb_3, matrix[double] GW_4, matrix[double] Gb_4)
+    return(matrix[double] images)
+{
+/*
+   * Generates amount images from random noise.
+   *
+   *
+   * Inputs:
+   *  - amount: Amount of images to be generated.
+   *  - GW_1: Generator 1st layer weights (parameters) matrix, of shape (100, D).
+   *  - Gb_1: Generator 1st layer biases vector, of shape (1, D).
+   *  - GW_2: Generator 2nd layer weights (parameters) matrix, of shape (256, 128*HWf*HWf).
+   *  - Gb_2: Generator 2nd layer biases vector, of shape (128, 1).
+   *  - GW_3: Generator 3rd layer weights (parameters) matrix, of shape (128, 64*HWf*HWf).
+   *  - Gb_3: Generator 3rd layer biases vector, of shape (64, 1).
+   *  - GW_4: Generator 4th layer weights (parameters) matrix, of shape (64, 1*HWf*HWf).
+   *  - Gb_4: Generator 4th layer biases vector, of shape (1, 1).
+   *
+   * Outputs:
+   *  - images: Matrix of generated images of  shape (amount, D).
+*/
+
+    noise = rand(rows = amount, cols = 100, min = 0.0, max = 1.0)
+    [images, params] = gen_forward(noise, list(GW_1, Gb_1, GW_2, Gb_2, GW_3, Gb_3, GW_4, Gb_4), 'train')
+}
+
+eval = function(matrix[double] images, matrix[double] DW_1, matrix[double] Db_1, matrix[double] DW_2, matrix[double] Db_2,
+                matrix[double] DW_3, matrix[double] Db_3)
+    return(matrix[double] decision)
+{
+/*
+   * Predicts if set of input images is real or fake.
+   *
+   *
+   * Inputs:
+   *  - images: Matrix of generated images of  shape (N, D).
+   *  - DW_1: Discriminator 1st layer weights (parameters) matrix, of shape (64, 1*HWf*HWf).
+   *  - Db_1: Discriminator 1st layer biases vector, of shape (64, 1).
+   *  - DW_2: Discriminator 2nd layer weights (parameters) matrix, of shape (128, 64*HWf*HWf).
+   *  - Db_2: Discriminator 2nd layer biases vector, of shape (128, 1).
+   *  - DW_3: Discriminator 3rd layer weights (parameters) matrix, of shape (6272, 1).
+   *  - Db_3: Discriminator 3rd layer biases vector, of shape (1, 1).
+   *
+   * Outputs:
+   *  - prediction: Matrix of predictions of  shape (N, 1).
+*/
+
+    [decision, disc_params] = disc_forward(images, list(DW_1, Db_1, DW_2, Db_2, DW_3, Db_3))
+}
+
diff --git a/src/test/scripts/applications/GAN/GAN_mnist.dml b/src/test/scripts/applications/GAN/GAN_mnist.dml
new file mode 100644
index 0000000..7018259
--- /dev/null
+++ b/src/test/scripts/applications/GAN/GAN_mnist.dml
@@ -0,0 +1,74 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Generates sample images for a GAN model and reports discriminator accuracy.
+# Can be called with either 'simple' or 'cnn' for GAN a model with affine layers only or a model using cnn
+# respectively.
+# This file is called by GanTest.java
+
+source("src/test/scripts/applications/GAN/GAN_cnn.dml") as GANC
+source("src/test/scripts/applications/GAN/GAN_simple.dml") as GANS
+
+train = read("src/test/resources/datasets/MNIST/mnist_test.csv", format="csv")
+test = read("src/test/resources/datasets/MNIST/mnist_test.csv", format="csv")
+out_dir = $2
+
+X_train = train[,2:ncol(train)] / 127.5 - 1
+X_test = test[,2:ncol(test)] / 127.5 - 1
+
+# amount of samples to be generated
+amount = 10
+
+# generate fake images
+if($1 == 'cnn') {
+    [GW1, Gb1, GW2, Gb2, GW3, Gb3, GW4, Gb4, DW1, Db1, DW2, Db2, DW3, Db3] = GANC::train(X_train, 500)
+    fake = GANC::generate(amount, GW1, Gb1, GW2, Gb2, GW3, Gb3, GW4, Gb4)
+}
+else if($1 == 'simple') {
+    [GW1, Gb1, GW2, Gb2, DW1, Db1, DW2, Db2] = GANS::train(X_train, 500)
+    fake = GANS::generate(amount, GW1, Gb1, GW2, Gb2)
+}
+
+# draw amount real images randomly
+rand = sample(nrow(X_test), amount)
+real = matrix(0, amount, 784)
+for(r in 1:amount)
+    real[r,] = X_test[as.scalar(rand[r]),]
+
+# create labels
+images = rbind(fake, real)
+labels_fake = matrix(0, amount, 1)
+labels_real = matrix(1, amount, 1)
+labels = rbind(labels_fake, labels_real)
+
+# get prediction
+if($1 == 'cnn')
+  predicted = GANC::eval(images, DW1, Db1, DW2, Db2, DW3, Db3)
+else if($1 == 'simple')
+  predicted = GANS::eval(images, DW1, Db1, DW2, Db2)
+
+correct_pred = (round(predicted) == labels)
+accuracy = mean(correct_pred)
+print("accuracy: " + accuracy)
+
+fake = 0.5 * fake + 0.5
+
+write(accuracy, out_dir+"/accuracy", format="text")
diff --git a/src/test/scripts/applications/GAN/GAN_simple.dml b/src/test/scripts/applications/GAN/GAN_simple.dml
new file mode 100644
index 0000000..ba165c2
--- /dev/null
+++ b/src/test/scripts/applications/GAN/GAN_simple.dml
@@ -0,0 +1,358 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("nn/layers/affine.dml") as affine
+source("nn/layers/leaky_relu.dml") as leaky_relu
+source("nn/layers/log_loss.dml") as log_loss
+source("nn/layers/softmax.dml") as softmax
+source("nn/layers/sigmoid.dml") as sigmoid
+source("nn/layers/tanh.dml") as tanh
+source("nn/optim/adam.dml") as adam
+
+train = function(matrix[double] X, int iterations)
+    return (matrix[double] GW_1, matrix[double] Gb_1, matrix[double] GW_2, matrix[double] Gb_2, matrix[double] DW_1,
+            matrix[double] Db_1, matrix[double] DW_2, matrix[double] Db_2)
+{
+/*
+   * Trains the generator and the discriminator of the GAN.
+   *
+   * The input matrix, X, has N examples, each with 784 features.
+   *
+   * Inputs:
+   *  - X: Input data matrix, of shape (N, 784).
+   *  - iterations: number of iterations for training
+   *
+   * Outputs:
+   *  - GW_1: Generator 1st layer weights (parameters) matrix, of shape (100, 128).
+   *  - Gb_1: Generator 1st layer biases vector, of shape (1, 128).
+   *  - GW_2: Generator 2nd layer weights (parameters) matrix, of shape (128, 784).
+   *  - Gb_2: Generator 2st layer biases vector, of shape (1, 784).
+   *  - DW_1: Discriminator 1st layer weights (parameters) matrix, of shape (784, 128).
+   *  - Db_1: Discriminator 1st layer biases vector, of shape (1, 128).
+   *  - DW_2: Discriminator 2nd layer weights (parameters) matrix, of shape (128, 1).
+   *  - Db_2: Discriminator 2nd layer biases vector, of shape (1, 1).
+*/
+    N = nrow(X)
+    batch_size = 128
+    half_batch = batch_size / 2
+
+    #Define Generator:
+    [GW_1, Gb_1] = affine::init(100, 128, -1)
+    [GW_2, Gb_2] = affine::init(128, 28*28, -1)
+    [mGW_1, vGW_1] = adam::init(GW_1)
+    [mGb_1, vGb_1] = adam::init(Gb_1)
+    [mGW_2, vGW_2] = adam::init(GW_2)
+    [mGb_2, vGb_2] = adam::init(Gb_2)
+
+    gen_model = list(GW_1, Gb_1, GW_2, Gb_2)
+    gen_grad = list(mGW_1, vGW_1, mGb_1, vGb_1, mGW_2, vGW_2, mGb_2, vGb_2)
+
+    #Define Discriminator:
+    [DW_1, Db_1] = affine::init(28*28, 128, -1)
+    [DW_2, Db_2] = affine::init(128, 1, -1)
+    [mDW_1, vDW_1] = adam::init(DW_1)
+    [mDb_1, vDb_1] = adam::init(Db_1)
+    [mDW_2, vDW_2] = adam::init(DW_2)
+    [mDb_2, vDb_2] = adam::init(Db_2)
+
+    disc_model = list(DW_1, Db_1, DW_2, Db_2)
+    disc_grad = list(mDW_1, vDW_1, mDb_1, vDb_1, mDW_2, vDW_2, mDb_2, vDb_2)
+
+    fake = matrix(0, 0, 784)
+
+    for(i in 1:iterations)
+    {
+        print('step ' + toString(i) + ' / ' + toString(iterations))
+        #generate samples
+        noise = rand(rows = half_batch, cols = 100, min = 0.0, max = 1.0)
+        [fake_images, gen_params] = gen_forward(noise, gen_model)
+        rand = sample(N, half_batch)
+        real_images = matrix(0, half_batch, 784)
+        for(r in 1:half_batch)
+        {
+            real_images[r,] = X[as.scalar(rand[r]),]
+        }
+
+        #train discriminator
+        [decision, disc_params] = disc_forward(real_images, disc_model)
+        targets = matrix(1, half_batch, 1)
+        dloss1 = log_loss::forward(decision, targets)
+        [dX, disc_model, disc_grad] = disc_backward(decision, targets, FALSE, i, disc_model, disc_grad, disc_params)
+        [decision, disc_params] = disc_forward(fake_images, disc_model)
+        targets = matrix(0, half_batch, 1)
+        dloss2 = log_loss::forward(decision, targets)
+        [dX, disc_model, disc_grad] = disc_backward(decision, targets, FALSE, i, disc_model, disc_grad, disc_params)
+        print('discriminator_loss: ' + toString((dloss1 + dloss2)))
+
+        #generate samples
+        noise = rand(rows = batch_size, cols = 100, min = 0.0, max = 1.0)
+        [fake_images, gen_params] = gen_forward(noise, gen_model)
+
+        #train generator
+        [decision, disc_params] = disc_forward(fake_images, disc_model)
+        targets = matrix(1, batch_size, 1)
+        gloss = log_loss::forward(decision, targets)
+        [dX, disc_model, disc_grad] = disc_backward(decision, targets, TRUE, i, disc_model, disc_grad, disc_params)
+        [gen_model, gen_grad] = gen_backward(dX, i, gen_model, gen_grad, gen_params)
+        print('generator_loss: ' + toString(gloss))
+
+        # get sample generated image to observe evolution of generated images
+        if(i %% (iterations/10) == 0)
+        {
+            fake = rbind(fake, fake_images[1])
+        }
+    }
+    out_dir = "target/testTemp/applications/GAN/GANTest/"
+    fake = 0.5 * fake + 0.5
+    write(fake, out_dir+"/evo")
+    DW_1 = as.matrix(disc_model[1])
+    Db_1 = as.matrix(disc_model[2])
+    DW_2 = as.matrix(disc_model[3])
+    Db_2 = as.matrix(disc_model[4])
+    GW_1 = as.matrix(gen_model[1])
+    Gb_1 = as.matrix(gen_model[2])
+    GW_2 = as.matrix(gen_model[3])
+    Gb_2 = as.matrix(gen_model[4])
+}
+
+gen_forward = function(matrix[double] noise, list[unknown] model)
+    return(matrix[double] images, list[unknown] params)
+{
+/*
+   * Computes the forward pass of the generator.
+   * Generates fake images from input noise.
+   *
+   * Inputs:
+   *  - noise: Randomly generated noise, of shape (N, 100).
+   *  - model: List containing the generator weights and biases.
+   *
+   * Outputs:
+   *  - images: Generated images, of shape (N, 784).
+   *  - params: List of outputs of the generator layers, needed for backward pass.
+*/
+    GW_1 = as.matrix(model[1])
+    Gb_1 = as.matrix(model[2])
+    GW_2 = as.matrix(model[3])
+    Gb_2 = as.matrix(model[4])
+
+    #Generator forward:
+    #Layer 1
+    out_1G = affine::forward(noise, GW_1, Gb_1)
+    out_1G_leaky_relu = leaky_relu::forward(out_1G)
+    #Layer 2
+    out_2G = affine::forward(out_1G_leaky_relu, GW_2, Gb_2)
+    out_2G_tanh = tanh::forward(out_2G)
+    images = out_2G_tanh
+    params = list(noise, out_1G, out_1G_leaky_relu, out_2G)
+}
+
+disc_forward = function(matrix[double] X, list[unknown] model)
+    return(matrix[double] decision, list[unknown] params)
+{
+/*
+   * Computes the forward pass of the discriminator.
+   * Decides if input images are real or fake.
+   *
+   * Inputs:
+   *  - X: Input matrix containing sample images, of shape (N, 784).
+   *  - model: List containing the discriminator weights and biases.
+   *
+   * Outputs:
+   *  - decision: Decisions for realness of input, of shape (N, 1).
+   *  - params: List of outputs of the discriminator layers, needed for backward pass.
+*/
+    DW_1 = as.matrix(model[1])
+    Db_1 = as.matrix(model[2])
+    DW_2 = as.matrix(model[3])
+    Db_2 = as.matrix(model[4])
+
+    #Discriminator forward
+    #Layer 1
+    out_1D = affine::forward(X, DW_1, Db_1)
+    out_1D_leaky_relu = leaky_relu::forward(out_1D)
+
+    #Layer 2
+    out_2D = affine::forward(out_1D_leaky_relu, DW_2, Db_2)
+    decision = sigmoid::forward(out_2D)
+    params = list(X, out_1D, out_1D_leaky_relu, out_2D)
+}
+
+disc_backward = function(matrix[double] decision, matrix[double] targets, boolean lock, int iteration, list[unknown] model, list[unknown] gradients,
+                         list[unknown] params)
+    return(matrix[double] dX, list[unknown] model, list[unknown] gradients)
+{
+/*
+   * Computes the backward pass of the discriminator.
+   * Updates gradients and weights of the discriminator.
+   *
+   * Inputs:
+   *  - decisions: Input matrix containing discriminator decisions, of shape (N, 1).
+   *  - targets: Target values for the decisions, of shape (N, 1).
+   *  - lock: Boolean that governs if discriminator weights are to be updated, TRUE means the weights are not updated.
+   *  - iteration: Current iteration of the training.
+   *  - model: List containing the discriminator weights and biases.
+   *  - gradients: List containing the discriminator gradients.
+   *  - params: List of outputs of the discriminator layers from the forward pass.
+   *
+   * Outputs:
+   *  - dX: Gradient wrt `X`, of shape (N, 784).
+   *  - model: List containing the updated discriminator weights and biases.
+   *  - gradients: List containing the updated discriminator gradients.
+*/
+    lr = 0.0002
+    beta1 = 0.5
+    beta2 = 0.999
+    epsilon = 1e-07
+
+    DW_1 = as.matrix(model[1])
+    Db_1 = as.matrix(model[2])
+    DW_2 = as.matrix(model[3])
+    Db_2 = as.matrix(model[4])
+
+    mDW_1 = as.matrix(gradients[1])
+    vDW_1 = as.matrix(gradients[2])
+    mDb_1 = as.matrix(gradients[3])
+    vDb_1 = as.matrix(gradients[4])
+    mDW_2 = as.matrix(gradients[5])
+    vDW_2 = as.matrix(gradients[6])
+    mDb_2 = as.matrix(gradients[7])
+    vDb_2 = as.matrix(gradients[8])
+
+    #Discriminator backward
+    #Layer 2
+    dloss = log_loss::backward(decision, targets)
+    dout_2D = sigmoid::backward(dloss, as.matrix(params[4]))
+    [dout_1D, dDW_2, dDb_2] = affine::backward(dout_2D, as.matrix(params[3]), DW_2, Db_2)
+
+    #Layer 1
+    dout_1D_leaky_relu = leaky_relu::backward(dout_1D, as.matrix(params[2]))
+    [dX, dDW_1, dDb_1] = affine::backward(dout_1D_leaky_relu, as.matrix(params[1]), DW_1, Db_1)
+
+    if(!lock)
+    {
+        #optimize
+        [DW_1, mDW_1, vDW_1] = adam::update(DW_1, dDW_1, lr, beta1, beta2, epsilon, iteration, mDW_1, vDW_1)
+        [Db_1, mDb_1, vDb_1] = adam::update(Db_1, dDb_1, lr, beta1, beta2, epsilon, iteration, mDb_1, vDb_1)
+        [DW_2, mDW_2, vDW_2] = adam::update(DW_2, dDW_2, lr, beta1, beta2, epsilon, iteration, mDW_2, vDW_2)
+        [Db_2, mDb_2, vDb_2] = adam::update(Db_2, dDb_2, lr, beta1, beta2, epsilon, iteration, mDb_2, vDb_2)
+        model = list(DW_1, Db_1, DW_2, Db_2)
+        gradients = list(mDW_1, vDW_1, mDb_1, vDb_1, mDW_2, vDW_2, mDb_2, vDb_2)
+    }
+}
+
+gen_backward = function(matrix[double] dX, int iteration, list[unknown] model, list[unknown] gradients, list[unknown] params)
+    return(list[unknown] model, list[unknown] gradients)
+{
+/*
+   * Computes the backward pass of the generator.
+   * Updates gradients and weights of the generator.
+   *
+   * Inputs:
+   *  - dX: Gradient wrt `X`, of shape (N, 784).
+   *  - iteration: Current iteration of the training.
+   *  - model: List containing the generator weights and biases.
+   *  - gradients: List containing the generator gradients.
+   *  - params: List of outputs of the generator layers from the forward pass.
+   *
+   * Outputs:
+   *  - model: List containing the updated generator weights and biases.
+   *  - gradients: List containing the updated generator gradients.
+*/
+    lr = 0.0002
+    beta1 = 0.5
+    beta2 = 0.999
+    epsilon = 1e-07
+
+    GW_1 = as.matrix(model[1])
+    Gb_1 = as.matrix(model[2])
+    GW_2 = as.matrix(model[3])
+    Gb_2 = as.matrix(model[4])
+
+    mGW_1 = as.matrix(gradients[1])
+    vGW_1 = as.matrix(gradients[2])
+    mGb_1 = as.matrix(gradients[3])
+    vGb_1 = as.matrix(gradients[4])
+    mGW_2 = as.matrix(gradients[5])
+    vGW_2 = as.matrix(gradients[6])
+    mGb_2 = as.matrix(gradients[7])
+    vGb_2 = as.matrix(gradients[8])
+
+    #Layer 2
+    dout_2G_tanh = tanh::backward(dX, as.matrix(params[4]))
+    [dout_2G, dGW_2, dGb_2] = affine::backward(dout_2G_tanh, as.matrix(params[3]), GW_2, Gb_2)
+
+    #Layer 1
+    dout_1G_leaky_relu = leaky_relu::backward(dout_2G, as.matrix(params[2]))
+    [dout_1G, dGW_1, dGb_1] = affine::backward(dout_1G_leaky_relu, as.matrix(params[1]), GW_1, Gb_1)
+
+    #optimize
+    [GW_1, mGW_1, vGW_1] = adam::update(GW_1, dGW_1, lr, beta1, beta2, epsilon, iteration, mGW_1, vGW_1)
+    [Gb_1, mGb_1, vGb_1] = adam::update(Gb_1, dGb_1, lr, beta1, beta2, epsilon, iteration, mGb_1, vGb_1)
+    [GW_2, mGW_2, vGW_2] = adam::update(GW_2, dGW_2, lr, beta1, beta2, epsilon, iteration, mGW_2, vGW_2)
+    [Gb_2, mGb_2, vGb_2] = adam::update(Gb_2, dGb_2, lr, beta1, beta2, epsilon, iteration, mGb_2, vGb_2)
+
+    model = list(GW_1, Gb_1, GW_2, Gb_2)
+    gradients = list(mGW_1, vGW_1, mGb_1, vGb_1, mGW_2, vGW_2, mGb_2, vGb_2)
+}
+
+generate = function(int amount, matrix[double] GW_1, matrix[double] Gb_1, matrix[double] GW_2, matrix[double] Gb_2)
+    return(matrix[double] images)
+{
+/*
+   * Generates amount images from random noise.
+   *
+   *
+   * Inputs:
+   *  - amount: Amount of images to be generated.
+   *  - GW_1: Generator 1st layer weights (parameters) matrix, of shape (100, 128).
+   *  - Gb_1: Generator 1st layer biases vector, of shape (1, 128).
+   *  - GW_2: Generator 2nd layer weights (parameters) matrix, of shape (128, 784).
+   *  - Gb_2: Generator 2st layer biases vector, of shape (1, 784).
+   *
+   * Outputs:
+   *  - images: Matrix of generated images, of shape (amount, D).
+*/
+
+    noise = rand(rows = amount, cols = 100, min = 0.0, max = 1.0)
+    [images, params] = gen_forward(noise, list(GW_1, Gb_1, GW_2, Gb_2))
+}
+
+eval = function(matrix[double] images, matrix[double] DW_1, matrix[double] Db_1, matrix[double] DW_2, matrix[double] Db_2)
+    return(matrix[double] decision)
+{
+/*
+   * Predicts if set of input images is real or fake.
+   *
+   *
+   * Inputs:
+   *  - images: Matrix of sample images of  shape (N, D).
+   *  - DW_1: Discriminator 1st layer weights (parameters) matrix, of shape (784, 128).
+   *  - Db_1: Discriminator 1st layer biases vector, of shape (1, 128).
+   *  - DW_2: Discriminator 2nd layer weights (parameters) matrix, of shape (128, 1).
+   *  - Db_2: Discriminator 2nd layer biases vector, of shape (1, 1).
+   *
+   * Outputs:
+   *  - prediction: Matrix of predictions, of shape (N, 1).
+*/
+
+    [decision, disc_params] = disc_forward(images, list(DW_1, Db_1, DW_2, Db_2))
+}
+