You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by ni...@apache.org on 2019/03/29 17:26:19 UTC
[systemml] branch master updated: [SYSTEMML-540] Added performance tests for ResNet200

This is an automated email from the ASF dual-hosted git repository.

niketanpansare pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemml.git


The following commit(s) were added to refs/heads/master by this push:
     new 794c5a2  [SYSTEMML-540] Added performance tests for ResNet200
794c5a2 is described below

commit 794c5a232a3f462e2a85836dea55570f102e1682
Author: Niketan Pansare <np...@us.ibm.com>
AuthorDate: Fri Mar 29 10:26:04 2019 -0700

    [SYSTEMML-540] Added performance tests for ResNet200
    
    These tests compare the effect of different eviction policies when
    training ResNet as well as performs baseline comparison with Unified
    Memory, TF and TF-GPU.
---
 scripts/perftest/gpu_resnet_perftest/resnet.py | 282 +++++++++++++++++++++++++
 scripts/perftest/gpu_resnet_perftest/run.py    | 219 +++++++++++++++++++
 scripts/perftest/gpu_resnet_perftest/run.sh    |  72 +++++++
 3 files changed, 573 insertions(+)

diff --git a/scripts/perftest/gpu_resnet_perftest/resnet.py b/scripts/perftest/gpu_resnet_perftest/resnet.py
new file mode 100644
index 0000000..a2e8514
--- /dev/null
+++ b/scripts/perftest/gpu_resnet_perftest/resnet.py
@@ -0,0 +1,282 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+from __future__ import division
+
+import six
+from keras.models import Model
+from keras.layers import (
+    Input,
+    Activation,
+    Dense,
+    Flatten
+)
+from keras.layers.convolutional import (
+    Conv2D,
+    MaxPooling2D,
+    AveragePooling2D
+)
+from keras.layers.merge import add
+from keras.layers.normalization import BatchNormalization
+from keras.regularizers import l2
+from keras import backend as K
+
+
+def _bn_relu(input):
+    """Helper to build a BN -> relu block
+    """
+    norm = BatchNormalization(axis=CHANNEL_AXIS)(input)
+    return Activation("relu")(norm)
+
+
+def _conv_bn_relu(**conv_params):
+    """Helper to build a conv -> BN -> relu block
+    """
+    filters = conv_params["filters"]
+    kernel_size = conv_params["kernel_size"]
+    strides = conv_params.setdefault("strides", (1, 1))
+    kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")
+    padding = conv_params.setdefault("padding", "same")
+    kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-4))
+
+    def f(input):
+        conv = Conv2D(filters=filters, kernel_size=kernel_size,
+                      strides=strides, padding=padding,
+                      kernel_initializer=kernel_initializer,
+                      kernel_regularizer=kernel_regularizer)(input)
+        return _bn_relu(conv)
+
+    return f
+
+
+def _bn_relu_conv(**conv_params):
+    """Helper to build a BN -> relu -> conv block.
+    This is an improved scheme proposed in http://arxiv.org/pdf/1603.05027v2.pdf
+    """
+    filters = conv_params["filters"]
+    kernel_size = conv_params["kernel_size"]
+    strides = conv_params.setdefault("strides", (1, 1))
+    kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")
+    padding = conv_params.setdefault("padding", "same")
+    kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-4))
+
+    def f(input):
+        activation = _bn_relu(input)
+        return Conv2D(filters=filters, kernel_size=kernel_size,
+                      strides=strides, padding=padding,
+                      kernel_initializer=kernel_initializer,
+                      kernel_regularizer=kernel_regularizer)(activation)
+
+    return f
+
+
+def _shortcut(input, residual):
+    """Adds a shortcut between input and residual block and merges them with "sum"
+    """
+    # Expand channels of shortcut to match residual.
+    # Stride appropriately to match residual (width, height)
+    # Should be int if network architecture is correctly configured.
+    input_shape = K.int_shape(input)
+    residual_shape = K.int_shape(residual)
+    stride_width = int(round(input_shape[ROW_AXIS] / residual_shape[ROW_AXIS]))
+    stride_height = int(round(input_shape[COL_AXIS] / residual_shape[COL_AXIS]))
+    equal_channels = input_shape[CHANNEL_AXIS] == residual_shape[CHANNEL_AXIS]
+
+    shortcut = input
+    # 1 X 1 conv if shape is different. Else identity.
+    if stride_width > 1 or stride_height > 1 or not equal_channels:
+        shortcut = Conv2D(filters=residual_shape[CHANNEL_AXIS],
+                          kernel_size=(1, 1),
+                          strides=(stride_width, stride_height),
+                          padding="valid",
+                          kernel_initializer="he_normal",
+                          kernel_regularizer=l2(0.0001))(input)
+
+    return add([shortcut, residual])
+
+
+def _residual_block(block_function, filters, repetitions, is_first_layer=False):
+    """Builds a residual block with repeating bottleneck blocks.
+    """
+    def f(input):
+        for i in range(repetitions):
+            init_strides = (1, 1)
+            if i == 0 and not is_first_layer:
+                init_strides = (2, 2)
+            input = block_function(filters=filters, init_strides=init_strides,
+                                   is_first_block_of_first_layer=(is_first_layer and i == 0))(input)
+        return input
+
+    return f
+
+
+def basic_block(filters, init_strides=(1, 1), is_first_block_of_first_layer=False):
+    """Basic 3 X 3 convolution blocks for use on resnets with layers <= 34.
+    Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf
+    """
+    def f(input):
+
+        if is_first_block_of_first_layer:
+            # don't repeat bn->relu since we just did bn->relu->maxpool
+            conv1 = Conv2D(filters=filters, kernel_size=(3, 3),
+                           strides=init_strides,
+                           padding="same",
+                           kernel_initializer="he_normal",
+                           kernel_regularizer=l2(1e-4))(input)
+        else:
+            conv1 = _bn_relu_conv(filters=filters, kernel_size=(3, 3),
+                                  strides=init_strides)(input)
+
+        residual = _bn_relu_conv(filters=filters, kernel_size=(3, 3))(conv1)
+        return _shortcut(input, residual)
+
+    return f
+
+
+def bottleneck(filters, init_strides=(1, 1), is_first_block_of_first_layer=False):
+    """Bottleneck architecture for > 34 layer resnet.
+    Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf
+
+    Returns:
+        A final conv layer of filters * 4
+    """
+    def f(input):
+
+        if is_first_block_of_first_layer:
+            # don't repeat bn->relu since we just did bn->relu->maxpool
+            conv_1_1 = Conv2D(filters=filters, kernel_size=(1, 1),
+                              strides=init_strides,
+                              padding="same",
+                              kernel_initializer="he_normal",
+                              kernel_regularizer=l2(1e-4))(input)
+        else:
+            conv_1_1 = _bn_relu_conv(filters=filters, kernel_size=(1, 1),
+                                     strides=init_strides)(input)
+
+        conv_3_3 = _bn_relu_conv(filters=filters, kernel_size=(3, 3))(conv_1_1)
+        residual = _bn_relu_conv(filters=filters * 4, kernel_size=(1, 1))(conv_3_3)
+        return _shortcut(input, residual)
+
+    return f
+
+
+def _handle_dim_ordering():
+    global ROW_AXIS
+    global COL_AXIS
+    global CHANNEL_AXIS
+    if K.image_dim_ordering() == 'tf':
+        ROW_AXIS = 1
+        COL_AXIS = 2
+        CHANNEL_AXIS = 3
+    else:
+        CHANNEL_AXIS = 1
+        ROW_AXIS = 2
+        COL_AXIS = 3
+
+
+def _get_block(identifier):
+    if isinstance(identifier, six.string_types):
+        res = globals().get(identifier)
+        if not res:
+            raise ValueError('Invalid {}'.format(identifier))
+        return res
+    return identifier
+
+
+class ResnetBuilder(object):
+    @staticmethod
+    def build(input_shape, num_outputs, block_fn, repetitions):
+        """Builds a custom ResNet like architecture.
+
+        Args:
+            input_shape: The input shape in the form (nb_channels, nb_rows, nb_cols)
+            num_outputs: The number of outputs at final softmax layer
+            block_fn: The block function to use. This is either `basic_block` or `bottleneck`.
+                The original paper used basic_block for layers < 50
+            repetitions: Number of repetitions of various block units.
+                At each block unit, the number of filters are doubled and the input size is halved
+
+        Returns:
+            The keras `Model`.
+        """
+        _handle_dim_ordering()
+        if len(input_shape) != 3:
+            raise Exception("Input shape should be a tuple (nb_channels, nb_rows, nb_cols)")
+
+        # Permute dimension order if necessary
+        if K.image_dim_ordering() == 'tf':
+            input_shape = (input_shape[1], input_shape[2], input_shape[0])
+
+        # Load function from str if needed.
+        block_fn = _get_block(block_fn)
+
+        input = Input(shape=input_shape)
+        conv1 = _conv_bn_relu(filters=64, kernel_size=(7, 7), strides=(2, 2))(input)
+        pool1 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="same")(conv1)
+
+        block = pool1
+        filters = 64
+        for i, r in enumerate(repetitions):
+            block = _residual_block(block_fn, filters=filters, repetitions=r, is_first_layer=(i == 0))(block)
+            filters *= 2
+
+        # Last activation
+        block = _bn_relu(block)
+
+        # Classifier block
+        block_shape = K.int_shape(block)
+        pool2 = AveragePooling2D(pool_size=(block_shape[ROW_AXIS], block_shape[COL_AXIS]),
+                                 strides=(1, 1))(block)
+        flatten1 = Flatten()(pool2)
+        dense = Dense(units=num_outputs, kernel_initializer="he_normal",
+                      activation="softmax")(flatten1)
+
+        model = Model(inputs=input, outputs=dense)
+        return model
+
+    @staticmethod
+    def build_resnet_18(input_shape, num_outputs):
+        return ResnetBuilder.build(input_shape, num_outputs, basic_block, [2, 2, 2, 2])
+
+    @staticmethod
+    def build_resnet_34(input_shape, num_outputs):
+        return ResnetBuilder.build(input_shape, num_outputs, basic_block, [3, 4, 6, 3])
+
+    @staticmethod
+    def build_resnet_50(input_shape, num_outputs):
+        return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 4, 6, 3])
+
+    @staticmethod
+    def build_resnet_101(input_shape, num_outputs):
+        return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 4, 23, 3])
+
+    @staticmethod
+    def build_resnet_152(input_shape, num_outputs):
+        return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 8, 36, 3])
+
+    @staticmethod
+    def build_resnet_200(input_shape, num_outputs):
+        return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 24, 36, 3])
+
+    @staticmethod
+    def build_resnet_1001(input_shape, num_outputs):
+        # TODO: From https://github.com/KaimingHe/resnet-1k-layers/blob/master/resnet-pre-act.lua
+        return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [16, 64, 128, 256])
diff --git a/scripts/perftest/gpu_resnet_perftest/run.py b/scripts/perftest/gpu_resnet_perftest/run.py
new file mode 100644
index 0000000..eb7cc14
--- /dev/null
+++ b/scripts/perftest/gpu_resnet_perftest/run.py
@@ -0,0 +1,219 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+import time, os, argparse, sys, math
+import numpy as np
+
+from pyspark import SparkContext
+sc = SparkContext()
+from pyspark.sql import SparkSession
+spark = SparkSession.builder.getOrCreate()
+
+parser=argparse.ArgumentParser("Testing deep networks for different batches")
+parser.add_argument('--network', type=str, default='vgg16', choices=['vgg16', 'vgg19', 'resnet200', 'resnet1001', 'unet'])
+parser.add_argument('--allocator', type=str, default='cuda', choices=['cuda', 'unified_memory'])
+parser.add_argument('--batch_size', help='Batch size. Default: 64', type=int, default=64)
+parser.add_argument('--num_images', help='Number of images. Default: 2048', type=int, default=2048)
+parser.add_argument('--eviction_policy', help='Eviction policy. Default: align_memory', type=str, default='align_memory', choices=['align_memory', 'lru', 'fifo', 'min_evict', 'lfu', 'mru'])
+parser.add_argument('--framework', help='The framework to use for running the benchmark. Default: systemml', type=str, default='systemml', choices=['systemml', 'tensorflow', 'systemml_force_gpu', 'tensorflow-gpu'])
+parser.add_argument('--num_channels', help='Number of channels. Default: 3', type=int, default=3)
+parser.add_argument('--height', help='Height. Default: 224', type=int, default=224)
+parser.add_argument('--width', help='Width. Default: 224', type=int, default=224)
+args=parser.parse_args()
+
+#######################################################################
+# Required to ensure that TF only uses exactly 1 GPU if framework is tensorflow-gpu, else no gpu
+os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+if args.framework == 'tensorflow-gpu':
+	os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
+        os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+else:
+	# Disable tensorflow from grabbing the entire GPU memory
+	os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
+	os.environ['CUDA_VISIBLE_DEVICES'] = ''
+#######################################################################
+
+# To discount the transfer time of batches, we use one randomly generated batch
+# and scale the number of epochs
+batch_size = args.batch_size
+num_images = args.num_images
+num_images = num_images - int(num_images % batch_size)
+n_batches_for_epoch = num_images / batch_size
+
+# Model-specific parameters
+num_classes = 1000
+input_shape = (args.num_channels, args.height, args.width)
+if args.network == 'unet' and (input_shape[0] != 1 or input_shape[1] != 256 or input_shape[2] != 256):
+	raise ValueError('Incorrect input shape for unet: ' + str(input_shape) + '. Supported input shape fo unet: (1, 256, 256)' )
+num_pixels = input_shape[0]*input_shape[1]*input_shape[2]
+
+import keras
+from keras.utils import np_utils
+from keras import backend as K
+if args.framework.startswith('systemml'):
+	K.set_image_data_format('channels_first')
+import os 
+import numpy as np
+from keras.models import *
+from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, Dropout, Cropping2D, concatenate # merge
+from keras.optimizers import *
+
+#####################################################################################
+# Ideally we would have preferred to compare the performance on double precision
+# as SystemML's CPU backend only supports double precision. 
+# But since TF 1.7 crashes with double precision, we only test with single precision 
+use_double_precision = False 
+if use_double_precision:
+	K.set_floatx('float64')
+if args.framework == 'tensorflow-gpu':
+	import tensorflow as tf
+	from keras.backend.tensorflow_backend import set_session
+	tf_config = tf.ConfigProto()
+	if args.allocator =='cuda':
+		tf_config.gpu_options.per_process_gpu_memory_fraction = 0.9
+	elif args.allocator =='unified_memory':
+		tf_config.gpu_options.allow_growth = True
+	set_session(tf.Session(config=tf_config))
+#####################################################################################
+
+error_occured = False
+print("Building model ... ")
+if args.network == 'vgg16':
+	model = keras.applications.vgg16.VGG16(weights='imagenet', classes=num_classes)
+elif args.network == 'vgg19':
+	model = keras.applications.vgg19.VGG19(weights='imagenet', classes=num_classes)
+elif args.network == 'resnet200':
+	import resnet
+	model = resnet.ResnetBuilder.build_resnet_200(input_shape, num_classes)
+elif args.network == 'resnet1001':
+	import resnet
+	model = resnet.ResnetBuilder.build_resnet_1001(input_shape, num_classes)
+elif args.network == 'unet':
+	def conv3x3(input, num_filters):
+			conv = Conv2D(num_filters, 3, activation = 'relu', padding = 'same')(input)
+			conv = Conv2D(num_filters, 3, activation = 'relu', padding = 'same')(conv)
+			return conv
+	num_filters = [64, 128, 256, 512, 1024]
+	model_input = Input((input_shape[1], input_shape[2], input_shape[0]))
+	input = model_input
+	side_inputs = []
+	for i in range(len(num_filters)):
+			# Apply max pooling for all except first down_conv
+			input = MaxPooling2D(pool_size=(2, 2))(input) if i != 0 else input
+			input = conv3x3(input, num_filters[i])
+			# Apply dropouts to only last 2 down_conv
+			input = Dropout(0.5)(input) if i >= len(num_filters)-2 else input
+			side_inputs.append(input)
+	input = side_inputs.pop()
+	num_filters.pop()
+	for i in range(len(num_filters)):
+			filters = num_filters.pop()
+			input = Conv2D(filters, 3, activation = 'relu', padding = 'same')(UpSampling2D(size = (2,2))(input))
+			#input = merge([side_inputs.pop(), input], mode = 'concat', concat_axis = 3)
+			input = concatenate([side_inputs.pop(), input])
+			input = conv3x3(input, filters)
+	conv1 = Conv2D(2, 3, activation = 'relu', padding = 'same')(input)
+	model_output = Conv2D(1, 1, activation = 'sigmoid')(conv1)
+	model = Model(input = model_input, output = model_output)
+else:
+	raise ValueError('Unsupported network:' + args.network)
+if args.network == 'unet':
+	model.compile(optimizer = keras.optimizers.SGD(lr=1e-6, momentum=0.95, decay=5e-4, nesterov=True), loss = 'mean_squared_error')
+else:
+	model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.SGD(lr=1e-6, momentum=0.95, decay=5e-4, nesterov=True))
+
+#------------------------------------------------------------------------------------------
+# Use this for baseline experiments:
+# Alternate way to avoid eviction is to perform multiple forward/backward pass, aggregate gradients and finally perform update.
+looped_minibatch = False
+local_batch_size = batch_size
+if looped_minibatch:
+	if args.network == 'resnet200':
+		local_batch_size = 16
+	else:
+		raise ValueError('looped_minibatch not yet implemented for ' + str(args.network))
+	if batch_size % local_batch_size != 0:
+		raise ValueError('local_batch_size = ' + str(local_batch_size) + ' should be multiple of batch size=' + str(batch_size))
+#------------------------------------------------------------------------------------------
+
+if args.framework.startswith('systemml'):
+	print("Initializing Keras2DML.")
+	from systemml.mllearn import Keras2DML
+	should_load_weights=False
+	sysml_model = Keras2DML(spark, model, load_keras_weights=should_load_weights, weights="tmp_weights1")
+	if looped_minibatch:
+		sysml_model.set(train_algo="looped_minibatch", parallel_batches=int(batch_size/local_batch_size), test_algo="batch") # systemml doesnot have a generator
+		sysml_model.set(weight_parallel_batches=False)
+	else:
+		sysml_model.set(train_algo="batch", test_algo="batch") 
+	sysml_model.set(perform_fused_backward_update=True)
+	sysml_model.setStatistics(True).setStatisticsMaxHeavyHitters(100)
+	# Since this script is used for measuring performance and not for printing script, inline the nn library
+	sysml_model.set(inline_nn_library=True)
+	# For apples-to-apples comparison, donot force set the allocated array to 0
+	sysml_model.setConfigProperty("sysml.gpu.force.memSetZero", "false")
+	# Use single GPU
+	sysml_model.setConfigProperty("sysml.gpu.availableGPUs", "0")
+	# Use user-specified allocator: cuda (default) or unified_memory
+	sysml_model.setConfigProperty("sysml.gpu.memory.allocator", args.allocator);
+	# Use user-specified eviction policy
+	sysml_model.setConfigProperty("sysml.gpu.eviction.policy", args.eviction_policy)
+	# Please consider allocating large enough JVM and using large CPU cache
+	sysml_model.setConfigProperty("sysml.gpu.eviction.shadow.bufferSize", "0.5")
+	sysml_model.setConfigProperty("sysml.caching.bufferSize", "1.0")
+	# Use user-specified precision
+	if not use_double_precision:
+		sysml_model.setConfigProperty("sysml.floating.point.precision", "single")
+	sysml_model.setGPU(True).setForceGPU(args.framework=='systemml_force_gpu')
+	Xb = np.random.uniform(0,1,num_pixels*batch_size)
+	Xb = Xb.reshape((batch_size, num_pixels))
+	if args.network == 'unet':
+		yb = np.random.randint(5, size=num_pixels*batch_size).reshape((batch_size, num_pixels))
+		sysml_model.set(perform_one_hot_encoding=False)
+	else:
+		yb = np.random.randint(num_classes, size=batch_size)
+	from py4j.protocol import Py4JJavaError
+	start = time.time()
+	try:
+		print("Invoking fit")
+		sysml_model.fit(Xb, yb, batch_size=local_batch_size, epochs=n_batches_for_epoch)
+		print("Done with fit")
+	except Py4JJavaError as e:
+		error_occured = True
+		print("Execution failed: " + str(e))
+	except AttributeError as e1:
+		error_occured = True
+		print("Execution failed: " + str(e1))
+elif args.framework.startswith('tensorflow'):
+	Xb = np.random.randint(256, size=num_pixels*batch_size).reshape((batch_size, input_shape[1],input_shape[2], input_shape[0])) + 1
+	if args.network == 'unet':
+		yb = np.random.randint(5, size=num_pixels*batch_size).reshape((batch_size, input_shape[1],input_shape[2], input_shape[0]))
+	else:
+		yb = np.random.randint(num_classes, size=batch_size)
+		yb = np_utils.to_categorical(yb, num_classes)
+	start = time.time()
+	model.fit(Xb, yb, batch_size=batch_size, epochs=n_batches_for_epoch)
+K.clear_session()
+end = time.time()
+if not error_occured:
+	with open('time.txt', 'a') as f:
+		f.write(args.framework + ',' + args.network + ',synthetic_imagenet,1,' + str(batch_size) + ',1,' + str(num_images) + "," + str(end-start) + "," + args.eviction_policy + ',' + args.allocator + '\n')
diff --git a/scripts/perftest/gpu_resnet_perftest/run.sh b/scripts/perftest/gpu_resnet_perftest/run.sh
new file mode 100644
index 0000000..30187f1
--- /dev/null
+++ b/scripts/perftest/gpu_resnet_perftest/run.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#rm -rf time.txt logs
+#mkdir logs
+
+SPARK_HOME='/home/.../spark-2.3.0-bin-hadoop2.7'
+DRIVER_MEMORY='200g'
+
+function compare_baseline {
+	network=$1
+	num_images=$2
+	batch_size=$3
+	num_channels=$4
+	height=$5
+	width=$6
+	allocator='unified_memory'
+	eviction_policy='lru'
+	for framework in tensorflow-gpu tensorflow systemml_force_gpu
+	do
+		echo "Running "$framework"_"$batch_size"_"$network"_"$num_images"_"$eviction_policy
+		rm -rf tmp_weights1 scratch_space spark-warehouse &> /dev/null
+		$SPARK_HOME/bin/spark-submit --driver-memory $DRIVER_MEMORY run.py --num_channels $num_channels --height $height --width $width --num_images $num_images --eviction_policy $eviction_policy --network $network --batch_size $batch_size --framework $framework --allocator $allocator &> logs/$framework"_"$batch_size"_"$network"_"$num_images"_"$eviction_policy"_"$allocator"_"$num_channels"_"$height"_"$width".log"
+	done
+}
+
+function compare_eviction_policy {
+	network=$1
+	num_images=$2
+	batch_size=$3
+	num_channels=$4
+	height=$5
+	width=$6
+	framework='systemml_force_gpu'
+	allocator='cuda'
+	for eviction_policy in min_evict align_memory lru lfu
+	do
+		echo "Running "$framework"_"$batch_size"_"$network"_"$num_images"_"$eviction_policy
+		rm -rf tmp_weights1 scratch_space spark-warehouse &> /dev/null
+		$SPARK_HOME/bin/spark-submit --driver-memory $DRIVER_MEMORY run.py --num_channels $num_channels --height $height --width $width --num_images $num_images --eviction_policy $eviction_policy --network $network --batch_size $batch_size --framework $framework --allocator $allocator &> logs/$framework"_"$batch_size"_"$network"_"$num_images"_"$eviction_policy"_"$allocator"_"$num_channels"_"$height"_"$width".log"
+	done
+}
+
+# Experiment 1: Very Deep ResNet-200
+compare_baseline resnet200 2 1 3 1792 1792
+compare_eviction_policy resnet200 2 1 3 1792 1792
+
+# Experiment 2: Psuedo in-memory  ResNet-200
+for b in 32 96 64 48 16 4
+do
+	compare_baseline resnet200 15360 $b 3 224 224  
+	compare_eviction_policy resnet200 15360 $b 3 224 224
+done
\ No newline at end of file