You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by zh...@apache.org on 2016/10/05 14:14:23 UTC

[1/2] incubator-singa git commit: SINGA-251 - Implement image loader for pysinga

Repository: incubator-singa
Updated Branches:
  refs/heads/master deb187bb8 -> 17ac16025


SINGA-251 - Implement image loader for pysinga

Add ImageBatchIter in data.py, which prefetches images into mini-batches
and pre-processes images using user defined transform function.
The process is done in another process via multiprocessing

fixed a bug in merge layer which should replicate the gradients in the returned values


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/064f3fb9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/064f3fb9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/064f3fb9

Branch: refs/heads/master
Commit: 064f3fb97f37995c4db9565c6abac6676812c2ad
Parents: deb187b
Author: Wei Wang <wa...@gmail.com>
Authored: Tue Sep 27 12:31:40 2016 +0800
Committer: wang wei <wa...@comp.nus.edu.sg>
Committed: Fri Sep 30 20:55:52 2016 +0800

----------------------------------------------------------------------
 python/singa/data.py       | 133 ++++++++++++++++++++++++++++++++++++++++
 python/singa/image_tool.py |  11 ++--
 python/singa/layer.py      |   2 +-
 3 files changed, 138 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/064f3fb9/python/singa/data.py
----------------------------------------------------------------------
diff --git a/python/singa/data.py b/python/singa/data.py
new file mode 100644
index 0000000..db0776e
--- /dev/null
+++ b/python/singa/data.py
@@ -0,0 +1,133 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+'''Utilities for data loading and preprocessing'''
+
+import os
+import random
+import time
+from multiprocessing import Process, Queue
+import numpy as np
+
+
+class ImageBatchIter:
+    '''Utility for iterating over an image dataset to get mini-batches.
+
+    Args:
+        img_list_file(str): name of the file containing image meta data; each
+                            line consists of image_path_suffix delimeter label
+        batch_size(int): num of samples in one mini-batch
+        image_transform: a function for image augmentation; it accepts the full
+                        image path and outputs a list of augmented images.
+        shuffle(boolean): True for shuffling images in the list
+        delimeter(char): delimeter between image_path_suffix and label, e.g.,
+                         space or comma
+        image_folder(boolean): prefix of the image path
+        capacity(int): the max num of mini-batches in the internal queue.
+    '''
+
+    def __init__(self, img_list_file, batch_size, image_transform,
+                 shuffle=True, delimeter=' ', image_folder=None, capacity=10):
+        self.img_list_file = img_list_file
+        self.queue = Queue(capacity)
+        self.batch_size = batch_size
+        self.image_transform = image_transform
+        self.shuffle = shuffle
+        self.delimeter = delimeter
+        self.image_folder = image_folder
+        self.stop = False
+        self.p = None
+        with open(img_list_file, 'r') as fd:
+            self.num_samples = len(fd.readlines())
+
+    def start(self):
+        self.p = Process(target=self.run)
+        self.p.start()
+        return
+
+    def next(self):
+        assert self.p is not None, 'call start before next'
+        while self.queue.empty():
+            time.sleep(0.1)
+        x, y = self.queue.get()  # dequeue one mini-batch
+        return x, y
+
+    def end(self):
+        if self.p is not None:
+            self.stop = True
+            time.sleep(0.1)
+            self.p.terminate()
+
+    def run(self):
+        img_list = []
+        for line in open(self.img_list_file, 'r'):
+            item = line.split(self.delimeter)
+            img_path = item[0]
+            img_label = int(item[1])
+            img_list.append((img_label, img_path))
+        index = 0  # index for the image
+        while not self.stop:
+            if index == 0 and self.shuffle:
+                random.shuffle(img_list)
+            if not self.queue.full():
+                x = []
+                y = np.empty(self.batch_size, dtype=np.int32)
+                i = 0
+                while i < self.batch_size:
+                    img_label, img_path = img_list[index]
+                    aug_images = self.image_transform(
+                            os.path.join(self.image_folder, img_path))
+                    assert i + len(aug_images) <= self.batch_size, \
+                        'too many images (%d) in a batch (%d)' % \
+                        (i + len(aug_images), self.batch_size)
+                    for img in aug_images:
+                        ary = np.asarray(img.convert('RGB'), dtype=np.float32)
+                        x.append(ary.transpose(2, 0, 1))
+                        y[i] = img_label
+                        i += 1
+                    index += 1
+                    if index == self.num_samples:
+                        index = 0  # reset to the first image
+                # enqueue one mini-batch
+                self.queue.put((np.asarray(x), y))
+            else:
+                time.sleep(0.1)
+        return
+
+
+if __name__ == '__main__':
+    import image_tool
+    from PIL import Image
+    tool = image_tool.ImageTool()
+
+    def image_transform(img_path):
+        global tool
+        return tool.load(img_path).resize_by_range(
+            (112, 128)).random_crop(
+            (96, 96)).flip().get()
+
+    data = ImageBatchIter('train.txt', 3,
+                          image_transform, shuffle=True, delimeter=',',
+                          image_folder='images/',
+                          capacity=10)
+    imgs, labels = data.next()
+    for idx in range(imgs.shape[0]):
+        img = Image.fromarray(imgs[idx].astype(np.uint8).transpose(1, 2, 0),
+                              'RGB')
+        img.save('img%d.png' % idx)
+    data.end()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/064f3fb9/python/singa/image_tool.py
----------------------------------------------------------------------
diff --git a/python/singa/image_tool.py b/python/singa/image_tool.py
index b03174d..9c1741d 100644
--- a/python/singa/image_tool.py
+++ b/python/singa/image_tool.py
@@ -207,6 +207,10 @@ class ImageTool():
     def get(self):
         return self.imgs
 
+    def num_augmentation(self):
+        '''Return the total number of augmentations to each image'''
+        pass
+
     def resize_by_range(self, rng, inplace=True):
         '''
         Args:
@@ -242,13 +246,6 @@ class ImageTool():
         else:
             return new_imgs
 
-    def resize_for_test(self, rng):
-        '''
-        Args:
-            rng: a tuple (begin,end)
-        '''
-        size_list = [rng[0], rng[0]/2+rng[1]/2, rng[1]]
-        return self.resize_by_list(size_list, num_case=3)
 
     def rotate_by_range(self, rng, inplace=True):
         '''

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/064f3fb9/python/singa/layer.py
----------------------------------------------------------------------
diff --git a/python/singa/layer.py b/python/singa/layer.py
index 8caf2bb..6eaf329 100644
--- a/python/singa/layer.py
+++ b/python/singa/layer.py
@@ -659,7 +659,7 @@ class Merge(Layer):
 
     def backward(self, flag, grad):
         assert isinstance(grad, tensor.Tensor), 'The input must be Tensor'
-        return [grad], []  # * self.num_input
+        return [grad] *self.num_input, []  # * self.num_input
 
 
 class Split(Layer):


[2/2] incubator-singa git commit: SINGA-251 - Implement image loader for pysinga

Posted by zh...@apache.org.
SINGA-251 - Implement image loader for pysinga

Add ImageBatchIter in data.py, which prefetches images into mini-batches
and pre-processes images using user defined transform function.
The process is done in another process via multiprocessing

fixed a bug in merge layer which should replicate the gradients in the returned values

To test this feature,
please prepare a train.txt file which consists of multiple lines, where each line is , e.g.

image1.jpg, 0
image2.jpg, 3
image3.jpg, 1
...
The folder should be like

SINGA ROOT/
            train.txt
            images/
                    image1.jpg
                    image2.jpg
                    ...

Then you can run python python/singa/data.py (under the root dir of singa).
After that, you should see a list of images which are transformed from the images in images/ folder.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/17ac1602
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/17ac1602
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/17ac1602

Branch: refs/heads/master
Commit: 17ac160258093973602eda883562b39069504fc8
Parents: 064f3fb
Author: zhaojing <zh...@comp.nus.edu.sg>
Authored: Wed Oct 5 22:10:20 2016 +0800
Committer: zhaojing <zh...@comp.nus.edu.sg>
Committed: Wed Oct 5 22:10:20 2016 +0800

----------------------------------------------------------------------
 python/singa/data.py | 1 +
 1 file changed, 1 insertion(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/17ac1602/python/singa/data.py
----------------------------------------------------------------------
diff --git a/python/singa/data.py b/python/singa/data.py
index db0776e..725b175 100644
--- a/python/singa/data.py
+++ b/python/singa/data.py
@@ -125,6 +125,7 @@ if __name__ == '__main__':
                           image_transform, shuffle=True, delimeter=',',
                           image_folder='images/',
                           capacity=10)
+    data.start()
     imgs, labels = data.next()
     for idx in range(imgs.shape[0]):
         img = Image.fromarray(imgs[idx].astype(np.uint8).transpose(1, 2, 0),