You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by in...@apache.org on 2018/11/15 23:05:50 UTC
[incubator-mxnet] branch master updated: [Example] Gradcam
consolidation in tutorial (#13255)
This is an automated email from the ASF dual-hosted git repository.
indhub pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 8ac7fb9 [Example] Gradcam consolidation in tutorial (#13255)
8ac7fb9 is described below
commit 8ac7fb930fdfa6ef3ac61be7569a17eb95f1ad4c
Author: Ankit Khedia <36...@users.noreply.github.com>
AuthorDate: Thu Nov 15 15:05:36 2018 -0800
[Example] Gradcam consolidation in tutorial (#13255)
* fixing gradcam
* changed loading parameters code
* fixing type conversions issue with previous versions of matplotlib
* gradcam consolidation
* creating directory structures in utils
* changing location
* empty commit
---
docs/conf.py | 2 +-
.../vision}/cnn_visualization/gradcam.py | 0
docs/tutorials/vision/cnn_visualization.md | 3 +-
example/cnn_visualization/README.md | 17 ----
example/cnn_visualization/gradcam_demo.py | 110 ---------------------
example/cnn_visualization/vgg.py | 90 -----------------
6 files changed, 3 insertions(+), 219 deletions(-)
diff --git a/docs/conf.py b/docs/conf.py
index 656a1da..af23521 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -107,7 +107,7 @@ master_doc = 'index'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
-exclude_patterns = ['3rdparty', 'build_version_doc', 'virtualenv', 'api/python/model.md', 'README.md']
+exclude_patterns = ['3rdparty', 'build_version_doc', 'virtualenv', 'api/python/model.md', 'README.md', 'tutorial_utils']
# The reST default role (used for this markup: `text`) to use for all documents.
#default_role = None
diff --git a/example/cnn_visualization/gradcam.py b/docs/tutorial_utils/vision/cnn_visualization/gradcam.py
similarity index 100%
rename from example/cnn_visualization/gradcam.py
rename to docs/tutorial_utils/vision/cnn_visualization/gradcam.py
diff --git a/docs/tutorials/vision/cnn_visualization.md b/docs/tutorials/vision/cnn_visualization.md
index a350fff..fd6a464 100644
--- a/docs/tutorials/vision/cnn_visualization.md
+++ b/docs/tutorials/vision/cnn_visualization.md
@@ -22,7 +22,7 @@ from matplotlib import pyplot as plt
import numpy as np
gradcam_file = "gradcam.py"
-base_url = "https://raw.githubusercontent.com/indhub/mxnet/cnnviz/example/cnn_visualization/{}?raw=true"
+base_url = "https://github.com/apache/incubator-mxnet/tree/master/docs/tutorial_utils/vision/cnn_visualization/{}?raw=true"
mx.test_utils.download(base_url.format(gradcam_file), fname=gradcam_file)
import gradcam
```
@@ -182,6 +182,7 @@ Next, we'll write a method to get an image, preprocess it, predict category and
2. **Guided Grad-CAM:** Guided Grad-CAM shows which exact pixels contributed the most to the CNN's decision.
3. **Saliency map:** Saliency map is a monochrome image showing which pixels contributed the most to the CNN's decision. Sometimes, it is easier to see the areas in the image that most influence the output in a monochrome image than in a color image.
+
```python
def visualize(net, img_path, conv_layer_name):
orig_img = mx.img.imread(img_path)
diff --git a/example/cnn_visualization/README.md b/example/cnn_visualization/README.md
deleted file mode 100644
index 10b9149..0000000
--- a/example/cnn_visualization/README.md
+++ /dev/null
@@ -1,17 +0,0 @@
-# Visualzing CNN decisions
-
-This folder contains an MXNet Gluon implementation of [Grad-CAM](https://arxiv.org/abs/1610.02391) that helps visualize CNN decisions.
-
-A tutorial on how to use this from Jupyter notebook is available [here](https://mxnet.incubator.apache.org/tutorials/vision/cnn_visualization.html).
-
-You can also do the visualization from terminal:
-```
-$ python gradcam_demo.py hummingbird.jpg
-Predicted category : hummingbird (94)
-Original Image : hummingbird_orig.jpg
-Grad-CAM : hummingbird_gradcam.jpg
-Guided Grad-CAM : hummingbird_guided_gradcam.jpg
-Saliency Map : hummingbird_saliency.jpg
-```
-
-![Output of gradcam_demo.py](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/cnn_visualization/hummingbird_filenames.png)
diff --git a/example/cnn_visualization/gradcam_demo.py b/example/cnn_visualization/gradcam_demo.py
deleted file mode 100644
index d9ca5dd..0000000
--- a/example/cnn_visualization/gradcam_demo.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import mxnet as mx
-from mxnet import gluon
-
-import argparse
-import os
-import numpy as np
-import cv2
-
-import vgg
-import gradcam
-
-# Receive image path from command line
-parser = argparse.ArgumentParser(description='Grad-CAM demo')
-parser.add_argument('img_path', metavar='image_path', type=str, help='path to the image file')
-
-args = parser.parse_args()
-
-# We'll use VGG-16 for visualization
-network = vgg.vgg16(pretrained=True, ctx=mx.cpu())
-# We'll resize images to 224x244 as part of preprocessing
-image_sz = (224, 224)
-
-def preprocess(data):
- """Preprocess the image before running it through the network"""
- data = mx.image.imresize(data, image_sz[0], image_sz[1])
- data = data.astype(np.float32)
- data = data/255
- # These mean values were obtained from
- # https://mxnet.incubator.apache.org/api/python/gluon/model_zoo.html
- data = mx.image.color_normalize(data,
- mean=mx.nd.array([0.485, 0.456, 0.406]),
- std=mx.nd.array([0.229, 0.224, 0.225]))
- data = mx.nd.transpose(data, (2,0,1)) # Channel first
- return data
-
-def read_image_mxnet(path):
- with open(path, 'rb') as fp:
- img_bytes = fp.read()
- return mx.img.imdecode(img_bytes)
-
-def read_image_cv(path):
- return cv2.resize(cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB), image_sz)
-
-# synset.txt contains the names of Imagenet categories
-# Load the file to memory and create a helper method to query category_index -> category name
-synset_url = "http://data.mxnet.io/models/imagenet/synset.txt"
-synset_file_name = "synset.txt"
-mx.test_utils.download(synset_url, fname=synset_file_name)
-
-synset = []
-with open('synset.txt', 'r') as f:
- synset = [l.rstrip().split(' ', 1)[1].split(',')[0] for l in f]
-
-def get_class_name(cls_id):
- return "%s (%d)" % (synset[cls_id], cls_id)
-
-def run_inference(net, data):
- """Run the input image through the network and return the predicted category as integer"""
- out = net(data)
- return out.argmax(axis=1).asnumpy()[0].astype(int)
-
-def visualize(net, img_path, conv_layer_name):
- """Create Grad-CAM visualizations using the network 'net' and the image at 'img_path'
- conv_layer_name is the name of the top most layer of the feature extractor"""
- image = read_image_mxnet(img_path)
- image = preprocess(image)
- image = image.expand_dims(axis=0)
-
- pred_str = get_class_name(run_inference(net, image))
-
- orig_img = read_image_cv(img_path)
- vizs = gradcam.visualize(net, image, orig_img, conv_layer_name)
- return (pred_str, (orig_img, *vizs))
-
-# Create Grad-CAM visualization for the user provided image
-last_conv_layer_name = 'vgg0_conv2d12'
-cat, vizs = visualize(network, args.img_path, last_conv_layer_name)
-
-print("{0:20}: {1:80}".format("Predicted category", cat))
-
-# Write the visualiations into file
-img_name = os.path.split(args.img_path)[1].split('.')[0]
-suffixes = ['orig', 'gradcam', 'guided_gradcam', 'saliency']
-image_desc = ['Original Image', 'Grad-CAM', 'Guided Grad-CAM', 'Saliency Map']
-
-for i, img in enumerate(vizs):
- img = img.astype(np.float32)
- if len(img.shape) == 3:
- img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
- out_file_name = "%s_%s.jpg" % (img_name, suffixes[i])
- cv2.imwrite(out_file_name, img)
- print("{0:20}: {1:80}".format(image_desc[i], out_file_name))
-
diff --git a/example/cnn_visualization/vgg.py b/example/cnn_visualization/vgg.py
deleted file mode 100644
index a8a0ef6..0000000
--- a/example/cnn_visualization/vgg.py
+++ /dev/null
@@ -1,90 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import mxnet as mx
-from mxnet import gluon
-
-import os
-from mxnet.gluon.model_zoo import model_store
-
-from mxnet.initializer import Xavier
-from mxnet.gluon.nn import MaxPool2D, Flatten, Dense, Dropout, BatchNorm
-from gradcam import Activation, Conv2D
-
-class VGG(mx.gluon.HybridBlock):
- def __init__(self, layers, filters, classes=1000, batch_norm=False, **kwargs):
- super(VGG, self).__init__(**kwargs)
- assert len(layers) == len(filters)
- with self.name_scope():
- self.features = self._make_features(layers, filters, batch_norm)
- self.features.add(Dense(4096, activation='relu',
- weight_initializer='normal',
- bias_initializer='zeros'))
- self.features.add(Dropout(rate=0.5))
- self.features.add(Dense(4096, activation='relu',
- weight_initializer='normal',
- bias_initializer='zeros'))
- self.features.add(Dropout(rate=0.5))
- self.output = Dense(classes,
- weight_initializer='normal',
- bias_initializer='zeros')
-
- def _make_features(self, layers, filters, batch_norm):
- featurizer = mx.gluon.nn.HybridSequential(prefix='')
- for i, num in enumerate(layers):
- for _ in range(num):
- featurizer.add(Conv2D(filters[i], kernel_size=3, padding=1,
- weight_initializer=Xavier(rnd_type='gaussian',
- factor_type='out',
- magnitude=2),
- bias_initializer='zeros'))
- if batch_norm:
- featurizer.add(BatchNorm())
- featurizer.add(Activation('relu'))
- featurizer.add(MaxPool2D(strides=2))
- return featurizer
-
- def hybrid_forward(self, F, x):
- x = self.features(x)
- x = self.output(x)
- return x
-
-vgg_spec = {11: ([1, 1, 2, 2, 2], [64, 128, 256, 512, 512]),
- 13: ([2, 2, 2, 2, 2], [64, 128, 256, 512, 512]),
- 16: ([2, 2, 3, 3, 3], [64, 128, 256, 512, 512]),
- 19: ([2, 2, 4, 4, 4], [64, 128, 256, 512, 512])}
-
-def get_vgg(num_layers, pretrained=False, ctx=mx.cpu(),
- root=os.path.join('~', '.mxnet', 'models'), **kwargs):
- layers, filters = vgg_spec[num_layers]
- net = VGG(layers, filters, **kwargs)
- net.initialize(ctx=ctx)
-
- # Get the pretrained model
- vgg = mx.gluon.model_zoo.vision.get_vgg(num_layers, pretrained=True, ctx=ctx)
-
- # Set the parameters in the new network
- params = vgg.collect_params()
- for key in params:
- param = params[key]
- net.collect_params()[net.prefix+key.replace(vgg.prefix, '')].set_data(param.data())
-
- return net
-
-def vgg16(**kwargs):
- return get_vgg(16, **kwargs)
-