You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by ni...@apache.org on 2019/03/29 17:26:19 UTC
[systemml] branch master updated: [SYSTEMML-540] Added performance
tests for ResNet200
This is an automated email from the ASF dual-hosted git repository.
niketanpansare pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemml.git
The following commit(s) were added to refs/heads/master by this push:
new 794c5a2 [SYSTEMML-540] Added performance tests for ResNet200
794c5a2 is described below
commit 794c5a232a3f462e2a85836dea55570f102e1682
Author: Niketan Pansare <np...@us.ibm.com>
AuthorDate: Fri Mar 29 10:26:04 2019 -0700
[SYSTEMML-540] Added performance tests for ResNet200
These tests compare the effect of different eviction policies when
training ResNet as well as performs baseline comparison with Unified
Memory, TF and TF-GPU.
---
scripts/perftest/gpu_resnet_perftest/resnet.py | 282 +++++++++++++++++++++++++
scripts/perftest/gpu_resnet_perftest/run.py | 219 +++++++++++++++++++
scripts/perftest/gpu_resnet_perftest/run.sh | 72 +++++++
3 files changed, 573 insertions(+)
diff --git a/scripts/perftest/gpu_resnet_perftest/resnet.py b/scripts/perftest/gpu_resnet_perftest/resnet.py
new file mode 100644
index 0000000..a2e8514
--- /dev/null
+++ b/scripts/perftest/gpu_resnet_perftest/resnet.py
@@ -0,0 +1,282 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+from __future__ import division
+
+import six
+from keras.models import Model
+from keras.layers import (
+ Input,
+ Activation,
+ Dense,
+ Flatten
+)
+from keras.layers.convolutional import (
+ Conv2D,
+ MaxPooling2D,
+ AveragePooling2D
+)
+from keras.layers.merge import add
+from keras.layers.normalization import BatchNormalization
+from keras.regularizers import l2
+from keras import backend as K
+
+
+def _bn_relu(input):
+ """Helper to build a BN -> relu block
+ """
+ norm = BatchNormalization(axis=CHANNEL_AXIS)(input)
+ return Activation("relu")(norm)
+
+
+def _conv_bn_relu(**conv_params):
+ """Helper to build a conv -> BN -> relu block
+ """
+ filters = conv_params["filters"]
+ kernel_size = conv_params["kernel_size"]
+ strides = conv_params.setdefault("strides", (1, 1))
+ kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")
+ padding = conv_params.setdefault("padding", "same")
+ kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-4))
+
+ def f(input):
+ conv = Conv2D(filters=filters, kernel_size=kernel_size,
+ strides=strides, padding=padding,
+ kernel_initializer=kernel_initializer,
+ kernel_regularizer=kernel_regularizer)(input)
+ return _bn_relu(conv)
+
+ return f
+
+
+def _bn_relu_conv(**conv_params):
+ """Helper to build a BN -> relu -> conv block.
+ This is an improved scheme proposed in http://arxiv.org/pdf/1603.05027v2.pdf
+ """
+ filters = conv_params["filters"]
+ kernel_size = conv_params["kernel_size"]
+ strides = conv_params.setdefault("strides", (1, 1))
+ kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")
+ padding = conv_params.setdefault("padding", "same")
+ kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-4))
+
+ def f(input):
+ activation = _bn_relu(input)
+ return Conv2D(filters=filters, kernel_size=kernel_size,
+ strides=strides, padding=padding,
+ kernel_initializer=kernel_initializer,
+ kernel_regularizer=kernel_regularizer)(activation)
+
+ return f
+
+
+def _shortcut(input, residual):
+ """Adds a shortcut between input and residual block and merges them with "sum"
+ """
+ # Expand channels of shortcut to match residual.
+ # Stride appropriately to match residual (width, height)
+ # Should be int if network architecture is correctly configured.
+ input_shape = K.int_shape(input)
+ residual_shape = K.int_shape(residual)
+ stride_width = int(round(input_shape[ROW_AXIS] / residual_shape[ROW_AXIS]))
+ stride_height = int(round(input_shape[COL_AXIS] / residual_shape[COL_AXIS]))
+ equal_channels = input_shape[CHANNEL_AXIS] == residual_shape[CHANNEL_AXIS]
+
+ shortcut = input
+ # 1 X 1 conv if shape is different. Else identity.
+ if stride_width > 1 or stride_height > 1 or not equal_channels:
+ shortcut = Conv2D(filters=residual_shape[CHANNEL_AXIS],
+ kernel_size=(1, 1),
+ strides=(stride_width, stride_height),
+ padding="valid",
+ kernel_initializer="he_normal",
+ kernel_regularizer=l2(0.0001))(input)
+
+ return add([shortcut, residual])
+
+
+def _residual_block(block_function, filters, repetitions, is_first_layer=False):
+ """Builds a residual block with repeating bottleneck blocks.
+ """
+ def f(input):
+ for i in range(repetitions):
+ init_strides = (1, 1)
+ if i == 0 and not is_first_layer:
+ init_strides = (2, 2)
+ input = block_function(filters=filters, init_strides=init_strides,
+ is_first_block_of_first_layer=(is_first_layer and i == 0))(input)
+ return input
+
+ return f
+
+
+def basic_block(filters, init_strides=(1, 1), is_first_block_of_first_layer=False):
+ """Basic 3 X 3 convolution blocks for use on resnets with layers <= 34.
+ Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf
+ """
+ def f(input):
+
+ if is_first_block_of_first_layer:
+ # don't repeat bn->relu since we just did bn->relu->maxpool
+ conv1 = Conv2D(filters=filters, kernel_size=(3, 3),
+ strides=init_strides,
+ padding="same",
+ kernel_initializer="he_normal",
+ kernel_regularizer=l2(1e-4))(input)
+ else:
+ conv1 = _bn_relu_conv(filters=filters, kernel_size=(3, 3),
+ strides=init_strides)(input)
+
+ residual = _bn_relu_conv(filters=filters, kernel_size=(3, 3))(conv1)
+ return _shortcut(input, residual)
+
+ return f
+
+
+def bottleneck(filters, init_strides=(1, 1), is_first_block_of_first_layer=False):
+ """Bottleneck architecture for > 34 layer resnet.
+ Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf
+
+ Returns:
+ A final conv layer of filters * 4
+ """
+ def f(input):
+
+ if is_first_block_of_first_layer:
+ # don't repeat bn->relu since we just did bn->relu->maxpool
+ conv_1_1 = Conv2D(filters=filters, kernel_size=(1, 1),
+ strides=init_strides,
+ padding="same",
+ kernel_initializer="he_normal",
+ kernel_regularizer=l2(1e-4))(input)
+ else:
+ conv_1_1 = _bn_relu_conv(filters=filters, kernel_size=(1, 1),
+ strides=init_strides)(input)
+
+ conv_3_3 = _bn_relu_conv(filters=filters, kernel_size=(3, 3))(conv_1_1)
+ residual = _bn_relu_conv(filters=filters * 4, kernel_size=(1, 1))(conv_3_3)
+ return _shortcut(input, residual)
+
+ return f
+
+
+def _handle_dim_ordering():
+ global ROW_AXIS
+ global COL_AXIS
+ global CHANNEL_AXIS
+ if K.image_dim_ordering() == 'tf':
+ ROW_AXIS = 1
+ COL_AXIS = 2
+ CHANNEL_AXIS = 3
+ else:
+ CHANNEL_AXIS = 1
+ ROW_AXIS = 2
+ COL_AXIS = 3
+
+
+def _get_block(identifier):
+ if isinstance(identifier, six.string_types):
+ res = globals().get(identifier)
+ if not res:
+ raise ValueError('Invalid {}'.format(identifier))
+ return res
+ return identifier
+
+
+class ResnetBuilder(object):
+ @staticmethod
+ def build(input_shape, num_outputs, block_fn, repetitions):
+ """Builds a custom ResNet like architecture.
+
+ Args:
+ input_shape: The input shape in the form (nb_channels, nb_rows, nb_cols)
+ num_outputs: The number of outputs at final softmax layer
+ block_fn: The block function to use. This is either `basic_block` or `bottleneck`.
+ The original paper used basic_block for layers < 50
+ repetitions: Number of repetitions of various block units.
+ At each block unit, the number of filters are doubled and the input size is halved
+
+ Returns:
+ The keras `Model`.
+ """
+ _handle_dim_ordering()
+ if len(input_shape) != 3:
+ raise Exception("Input shape should be a tuple (nb_channels, nb_rows, nb_cols)")
+
+ # Permute dimension order if necessary
+ if K.image_dim_ordering() == 'tf':
+ input_shape = (input_shape[1], input_shape[2], input_shape[0])
+
+ # Load function from str if needed.
+ block_fn = _get_block(block_fn)
+
+ input = Input(shape=input_shape)
+ conv1 = _conv_bn_relu(filters=64, kernel_size=(7, 7), strides=(2, 2))(input)
+ pool1 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="same")(conv1)
+
+ block = pool1
+ filters = 64
+ for i, r in enumerate(repetitions):
+ block = _residual_block(block_fn, filters=filters, repetitions=r, is_first_layer=(i == 0))(block)
+ filters *= 2
+
+ # Last activation
+ block = _bn_relu(block)
+
+ # Classifier block
+ block_shape = K.int_shape(block)
+ pool2 = AveragePooling2D(pool_size=(block_shape[ROW_AXIS], block_shape[COL_AXIS]),
+ strides=(1, 1))(block)
+ flatten1 = Flatten()(pool2)
+ dense = Dense(units=num_outputs, kernel_initializer="he_normal",
+ activation="softmax")(flatten1)
+
+ model = Model(inputs=input, outputs=dense)
+ return model
+
+ @staticmethod
+ def build_resnet_18(input_shape, num_outputs):
+ return ResnetBuilder.build(input_shape, num_outputs, basic_block, [2, 2, 2, 2])
+
+ @staticmethod
+ def build_resnet_34(input_shape, num_outputs):
+ return ResnetBuilder.build(input_shape, num_outputs, basic_block, [3, 4, 6, 3])
+
+ @staticmethod
+ def build_resnet_50(input_shape, num_outputs):
+ return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 4, 6, 3])
+
+ @staticmethod
+ def build_resnet_101(input_shape, num_outputs):
+ return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 4, 23, 3])
+
+ @staticmethod
+ def build_resnet_152(input_shape, num_outputs):
+ return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 8, 36, 3])
+
+ @staticmethod
+ def build_resnet_200(input_shape, num_outputs):
+ return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 24, 36, 3])
+
+ @staticmethod
+ def build_resnet_1001(input_shape, num_outputs):
+ # TODO: From https://github.com/KaimingHe/resnet-1k-layers/blob/master/resnet-pre-act.lua
+ return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [16, 64, 128, 256])
diff --git a/scripts/perftest/gpu_resnet_perftest/run.py b/scripts/perftest/gpu_resnet_perftest/run.py
new file mode 100644
index 0000000..eb7cc14
--- /dev/null
+++ b/scripts/perftest/gpu_resnet_perftest/run.py
@@ -0,0 +1,219 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+import time, os, argparse, sys, math
+import numpy as np
+
+from pyspark import SparkContext
+sc = SparkContext()
+from pyspark.sql import SparkSession
+spark = SparkSession.builder.getOrCreate()
+
+parser=argparse.ArgumentParser("Testing deep networks for different batches")
+parser.add_argument('--network', type=str, default='vgg16', choices=['vgg16', 'vgg19', 'resnet200', 'resnet1001', 'unet'])
+parser.add_argument('--allocator', type=str, default='cuda', choices=['cuda', 'unified_memory'])
+parser.add_argument('--batch_size', help='Batch size. Default: 64', type=int, default=64)
+parser.add_argument('--num_images', help='Number of images. Default: 2048', type=int, default=2048)
+parser.add_argument('--eviction_policy', help='Eviction policy. Default: align_memory', type=str, default='align_memory', choices=['align_memory', 'lru', 'fifo', 'min_evict', 'lfu', 'mru'])
+parser.add_argument('--framework', help='The framework to use for running the benchmark. Default: systemml', type=str, default='systemml', choices=['systemml', 'tensorflow', 'systemml_force_gpu', 'tensorflow-gpu'])
+parser.add_argument('--num_channels', help='Number of channels. Default: 3', type=int, default=3)
+parser.add_argument('--height', help='Height. Default: 224', type=int, default=224)
+parser.add_argument('--width', help='Width. Default: 224', type=int, default=224)
+args=parser.parse_args()
+
+#######################################################################
+# Required to ensure that TF only uses exactly 1 GPU if framework is tensorflow-gpu, else no gpu
+os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+if args.framework == 'tensorflow-gpu':
+ os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
+ os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+else:
+ # Disable tensorflow from grabbing the entire GPU memory
+ os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
+ os.environ['CUDA_VISIBLE_DEVICES'] = ''
+#######################################################################
+
+# To discount the transfer time of batches, we use one randomly generated batch
+# and scale the number of epochs
+batch_size = args.batch_size
+num_images = args.num_images
+num_images = num_images - int(num_images % batch_size)
+n_batches_for_epoch = num_images / batch_size
+
+# Model-specific parameters
+num_classes = 1000
+input_shape = (args.num_channels, args.height, args.width)
+if args.network == 'unet' and (input_shape[0] != 1 or input_shape[1] != 256 or input_shape[2] != 256):
+ raise ValueError('Incorrect input shape for unet: ' + str(input_shape) + '. Supported input shape fo unet: (1, 256, 256)' )
+num_pixels = input_shape[0]*input_shape[1]*input_shape[2]
+
+import keras
+from keras.utils import np_utils
+from keras import backend as K
+if args.framework.startswith('systemml'):
+ K.set_image_data_format('channels_first')
+import os
+import numpy as np
+from keras.models import *
+from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, Dropout, Cropping2D, concatenate # merge
+from keras.optimizers import *
+
+#####################################################################################
+# Ideally we would have preferred to compare the performance on double precision
+# as SystemML's CPU backend only supports double precision.
+# But since TF 1.7 crashes with double precision, we only test with single precision
+use_double_precision = False
+if use_double_precision:
+ K.set_floatx('float64')
+if args.framework == 'tensorflow-gpu':
+ import tensorflow as tf
+ from keras.backend.tensorflow_backend import set_session
+ tf_config = tf.ConfigProto()
+ if args.allocator =='cuda':
+ tf_config.gpu_options.per_process_gpu_memory_fraction = 0.9
+ elif args.allocator =='unified_memory':
+ tf_config.gpu_options.allow_growth = True
+ set_session(tf.Session(config=tf_config))
+#####################################################################################
+
+error_occured = False
+print("Building model ... ")
+if args.network == 'vgg16':
+ model = keras.applications.vgg16.VGG16(weights='imagenet', classes=num_classes)
+elif args.network == 'vgg19':
+ model = keras.applications.vgg19.VGG19(weights='imagenet', classes=num_classes)
+elif args.network == 'resnet200':
+ import resnet
+ model = resnet.ResnetBuilder.build_resnet_200(input_shape, num_classes)
+elif args.network == 'resnet1001':
+ import resnet
+ model = resnet.ResnetBuilder.build_resnet_1001(input_shape, num_classes)
+elif args.network == 'unet':
+ def conv3x3(input, num_filters):
+ conv = Conv2D(num_filters, 3, activation = 'relu', padding = 'same')(input)
+ conv = Conv2D(num_filters, 3, activation = 'relu', padding = 'same')(conv)
+ return conv
+ num_filters = [64, 128, 256, 512, 1024]
+ model_input = Input((input_shape[1], input_shape[2], input_shape[0]))
+ input = model_input
+ side_inputs = []
+ for i in range(len(num_filters)):
+ # Apply max pooling for all except first down_conv
+ input = MaxPooling2D(pool_size=(2, 2))(input) if i != 0 else input
+ input = conv3x3(input, num_filters[i])
+ # Apply dropouts to only last 2 down_conv
+ input = Dropout(0.5)(input) if i >= len(num_filters)-2 else input
+ side_inputs.append(input)
+ input = side_inputs.pop()
+ num_filters.pop()
+ for i in range(len(num_filters)):
+ filters = num_filters.pop()
+ input = Conv2D(filters, 3, activation = 'relu', padding = 'same')(UpSampling2D(size = (2,2))(input))
+ #input = merge([side_inputs.pop(), input], mode = 'concat', concat_axis = 3)
+ input = concatenate([side_inputs.pop(), input])
+ input = conv3x3(input, filters)
+ conv1 = Conv2D(2, 3, activation = 'relu', padding = 'same')(input)
+ model_output = Conv2D(1, 1, activation = 'sigmoid')(conv1)
+ model = Model(input = model_input, output = model_output)
+else:
+ raise ValueError('Unsupported network:' + args.network)
+if args.network == 'unet':
+ model.compile(optimizer = keras.optimizers.SGD(lr=1e-6, momentum=0.95, decay=5e-4, nesterov=True), loss = 'mean_squared_error')
+else:
+ model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.SGD(lr=1e-6, momentum=0.95, decay=5e-4, nesterov=True))
+
+#------------------------------------------------------------------------------------------
+# Use this for baseline experiments:
+# Alternate way to avoid eviction is to perform multiple forward/backward pass, aggregate gradients and finally perform update.
+looped_minibatch = False
+local_batch_size = batch_size
+if looped_minibatch:
+ if args.network == 'resnet200':
+ local_batch_size = 16
+ else:
+ raise ValueError('looped_minibatch not yet implemented for ' + str(args.network))
+ if batch_size % local_batch_size != 0:
+ raise ValueError('local_batch_size = ' + str(local_batch_size) + ' should be multiple of batch size=' + str(batch_size))
+#------------------------------------------------------------------------------------------
+
+if args.framework.startswith('systemml'):
+ print("Initializing Keras2DML.")
+ from systemml.mllearn import Keras2DML
+ should_load_weights=False
+ sysml_model = Keras2DML(spark, model, load_keras_weights=should_load_weights, weights="tmp_weights1")
+ if looped_minibatch:
+ sysml_model.set(train_algo="looped_minibatch", parallel_batches=int(batch_size/local_batch_size), test_algo="batch") # systemml doesnot have a generator
+ sysml_model.set(weight_parallel_batches=False)
+ else:
+ sysml_model.set(train_algo="batch", test_algo="batch")
+ sysml_model.set(perform_fused_backward_update=True)
+ sysml_model.setStatistics(True).setStatisticsMaxHeavyHitters(100)
+ # Since this script is used for measuring performance and not for printing script, inline the nn library
+ sysml_model.set(inline_nn_library=True)
+ # For apples-to-apples comparison, donot force set the allocated array to 0
+ sysml_model.setConfigProperty("sysml.gpu.force.memSetZero", "false")
+ # Use single GPU
+ sysml_model.setConfigProperty("sysml.gpu.availableGPUs", "0")
+ # Use user-specified allocator: cuda (default) or unified_memory
+ sysml_model.setConfigProperty("sysml.gpu.memory.allocator", args.allocator);
+ # Use user-specified eviction policy
+ sysml_model.setConfigProperty("sysml.gpu.eviction.policy", args.eviction_policy)
+ # Please consider allocating large enough JVM and using large CPU cache
+ sysml_model.setConfigProperty("sysml.gpu.eviction.shadow.bufferSize", "0.5")
+ sysml_model.setConfigProperty("sysml.caching.bufferSize", "1.0")
+ # Use user-specified precision
+ if not use_double_precision:
+ sysml_model.setConfigProperty("sysml.floating.point.precision", "single")
+ sysml_model.setGPU(True).setForceGPU(args.framework=='systemml_force_gpu')
+ Xb = np.random.uniform(0,1,num_pixels*batch_size)
+ Xb = Xb.reshape((batch_size, num_pixels))
+ if args.network == 'unet':
+ yb = np.random.randint(5, size=num_pixels*batch_size).reshape((batch_size, num_pixels))
+ sysml_model.set(perform_one_hot_encoding=False)
+ else:
+ yb = np.random.randint(num_classes, size=batch_size)
+ from py4j.protocol import Py4JJavaError
+ start = time.time()
+ try:
+ print("Invoking fit")
+ sysml_model.fit(Xb, yb, batch_size=local_batch_size, epochs=n_batches_for_epoch)
+ print("Done with fit")
+ except Py4JJavaError as e:
+ error_occured = True
+ print("Execution failed: " + str(e))
+ except AttributeError as e1:
+ error_occured = True
+ print("Execution failed: " + str(e1))
+elif args.framework.startswith('tensorflow'):
+ Xb = np.random.randint(256, size=num_pixels*batch_size).reshape((batch_size, input_shape[1],input_shape[2], input_shape[0])) + 1
+ if args.network == 'unet':
+ yb = np.random.randint(5, size=num_pixels*batch_size).reshape((batch_size, input_shape[1],input_shape[2], input_shape[0]))
+ else:
+ yb = np.random.randint(num_classes, size=batch_size)
+ yb = np_utils.to_categorical(yb, num_classes)
+ start = time.time()
+ model.fit(Xb, yb, batch_size=batch_size, epochs=n_batches_for_epoch)
+K.clear_session()
+end = time.time()
+if not error_occured:
+ with open('time.txt', 'a') as f:
+ f.write(args.framework + ',' + args.network + ',synthetic_imagenet,1,' + str(batch_size) + ',1,' + str(num_images) + "," + str(end-start) + "," + args.eviction_policy + ',' + args.allocator + '\n')
diff --git a/scripts/perftest/gpu_resnet_perftest/run.sh b/scripts/perftest/gpu_resnet_perftest/run.sh
new file mode 100644
index 0000000..30187f1
--- /dev/null
+++ b/scripts/perftest/gpu_resnet_perftest/run.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#rm -rf time.txt logs
+#mkdir logs
+
+SPARK_HOME='/home/.../spark-2.3.0-bin-hadoop2.7'
+DRIVER_MEMORY='200g'
+
+function compare_baseline {
+ network=$1
+ num_images=$2
+ batch_size=$3
+ num_channels=$4
+ height=$5
+ width=$6
+ allocator='unified_memory'
+ eviction_policy='lru'
+ for framework in tensorflow-gpu tensorflow systemml_force_gpu
+ do
+ echo "Running "$framework"_"$batch_size"_"$network"_"$num_images"_"$eviction_policy
+ rm -rf tmp_weights1 scratch_space spark-warehouse &> /dev/null
+ $SPARK_HOME/bin/spark-submit --driver-memory $DRIVER_MEMORY run.py --num_channels $num_channels --height $height --width $width --num_images $num_images --eviction_policy $eviction_policy --network $network --batch_size $batch_size --framework $framework --allocator $allocator &> logs/$framework"_"$batch_size"_"$network"_"$num_images"_"$eviction_policy"_"$allocator"_"$num_channels"_"$height"_"$width".log"
+ done
+}
+
+function compare_eviction_policy {
+ network=$1
+ num_images=$2
+ batch_size=$3
+ num_channels=$4
+ height=$5
+ width=$6
+ framework='systemml_force_gpu'
+ allocator='cuda'
+ for eviction_policy in min_evict align_memory lru lfu
+ do
+ echo "Running "$framework"_"$batch_size"_"$network"_"$num_images"_"$eviction_policy
+ rm -rf tmp_weights1 scratch_space spark-warehouse &> /dev/null
+ $SPARK_HOME/bin/spark-submit --driver-memory $DRIVER_MEMORY run.py --num_channels $num_channels --height $height --width $width --num_images $num_images --eviction_policy $eviction_policy --network $network --batch_size $batch_size --framework $framework --allocator $allocator &> logs/$framework"_"$batch_size"_"$network"_"$num_images"_"$eviction_policy"_"$allocator"_"$num_channels"_"$height"_"$width".log"
+ done
+}
+
+# Experiment 1: Very Deep ResNet-200
+compare_baseline resnet200 2 1 3 1792 1792
+compare_eviction_policy resnet200 2 1 3 1792 1792
+
+# Experiment 2: Psuedo in-memory ResNet-200
+for b in 32 96 64 48 16 4
+do
+ compare_baseline resnet200 15360 $b 3 224 224
+ compare_eviction_policy resnet200 15360 $b 3 224 224
+done
\ No newline at end of file