You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by zh...@apache.org on 2016/10/05 14:14:23 UTC
[1/2] incubator-singa git commit: SINGA-251 - Implement image loader
for pysinga
Repository: incubator-singa
Updated Branches:
refs/heads/master deb187bb8 -> 17ac16025
SINGA-251 - Implement image loader for pysinga
Add ImageBatchIter in data.py, which prefetches images into mini-batches
and pre-processes images using user defined transform function.
The process is done in another process via multiprocessing
fixed a bug in merge layer which should replicate the gradients in the returned values
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/064f3fb9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/064f3fb9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/064f3fb9
Branch: refs/heads/master
Commit: 064f3fb97f37995c4db9565c6abac6676812c2ad
Parents: deb187b
Author: Wei Wang <wa...@gmail.com>
Authored: Tue Sep 27 12:31:40 2016 +0800
Committer: wang wei <wa...@comp.nus.edu.sg>
Committed: Fri Sep 30 20:55:52 2016 +0800
----------------------------------------------------------------------
python/singa/data.py | 133 ++++++++++++++++++++++++++++++++++++++++
python/singa/image_tool.py | 11 ++--
python/singa/layer.py | 2 +-
3 files changed, 138 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/064f3fb9/python/singa/data.py
----------------------------------------------------------------------
diff --git a/python/singa/data.py b/python/singa/data.py
new file mode 100644
index 0000000..db0776e
--- /dev/null
+++ b/python/singa/data.py
@@ -0,0 +1,133 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+'''Utilities for data loading and preprocessing'''
+
+import os
+import random
+import time
+from multiprocessing import Process, Queue
+import numpy as np
+
+
+class ImageBatchIter:
+ '''Utility for iterating over an image dataset to get mini-batches.
+
+ Args:
+ img_list_file(str): name of the file containing image meta data; each
+ line consists of image_path_suffix delimeter label
+ batch_size(int): num of samples in one mini-batch
+ image_transform: a function for image augmentation; it accepts the full
+ image path and outputs a list of augmented images.
+ shuffle(boolean): True for shuffling images in the list
+ delimeter(char): delimeter between image_path_suffix and label, e.g.,
+ space or comma
+ image_folder(boolean): prefix of the image path
+ capacity(int): the max num of mini-batches in the internal queue.
+ '''
+
+ def __init__(self, img_list_file, batch_size, image_transform,
+ shuffle=True, delimeter=' ', image_folder=None, capacity=10):
+ self.img_list_file = img_list_file
+ self.queue = Queue(capacity)
+ self.batch_size = batch_size
+ self.image_transform = image_transform
+ self.shuffle = shuffle
+ self.delimeter = delimeter
+ self.image_folder = image_folder
+ self.stop = False
+ self.p = None
+ with open(img_list_file, 'r') as fd:
+ self.num_samples = len(fd.readlines())
+
+ def start(self):
+ self.p = Process(target=self.run)
+ self.p.start()
+ return
+
+ def next(self):
+ assert self.p is not None, 'call start before next'
+ while self.queue.empty():
+ time.sleep(0.1)
+ x, y = self.queue.get() # dequeue one mini-batch
+ return x, y
+
+ def end(self):
+ if self.p is not None:
+ self.stop = True
+ time.sleep(0.1)
+ self.p.terminate()
+
+ def run(self):
+ img_list = []
+ for line in open(self.img_list_file, 'r'):
+ item = line.split(self.delimeter)
+ img_path = item[0]
+ img_label = int(item[1])
+ img_list.append((img_label, img_path))
+ index = 0 # index for the image
+ while not self.stop:
+ if index == 0 and self.shuffle:
+ random.shuffle(img_list)
+ if not self.queue.full():
+ x = []
+ y = np.empty(self.batch_size, dtype=np.int32)
+ i = 0
+ while i < self.batch_size:
+ img_label, img_path = img_list[index]
+ aug_images = self.image_transform(
+ os.path.join(self.image_folder, img_path))
+ assert i + len(aug_images) <= self.batch_size, \
+ 'too many images (%d) in a batch (%d)' % \
+ (i + len(aug_images), self.batch_size)
+ for img in aug_images:
+ ary = np.asarray(img.convert('RGB'), dtype=np.float32)
+ x.append(ary.transpose(2, 0, 1))
+ y[i] = img_label
+ i += 1
+ index += 1
+ if index == self.num_samples:
+ index = 0 # reset to the first image
+ # enqueue one mini-batch
+ self.queue.put((np.asarray(x), y))
+ else:
+ time.sleep(0.1)
+ return
+
+
+if __name__ == '__main__':
+ import image_tool
+ from PIL import Image
+ tool = image_tool.ImageTool()
+
+ def image_transform(img_path):
+ global tool
+ return tool.load(img_path).resize_by_range(
+ (112, 128)).random_crop(
+ (96, 96)).flip().get()
+
+ data = ImageBatchIter('train.txt', 3,
+ image_transform, shuffle=True, delimeter=',',
+ image_folder='images/',
+ capacity=10)
+ imgs, labels = data.next()
+ for idx in range(imgs.shape[0]):
+ img = Image.fromarray(imgs[idx].astype(np.uint8).transpose(1, 2, 0),
+ 'RGB')
+ img.save('img%d.png' % idx)
+ data.end()
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/064f3fb9/python/singa/image_tool.py
----------------------------------------------------------------------
diff --git a/python/singa/image_tool.py b/python/singa/image_tool.py
index b03174d..9c1741d 100644
--- a/python/singa/image_tool.py
+++ b/python/singa/image_tool.py
@@ -207,6 +207,10 @@ class ImageTool():
def get(self):
return self.imgs
+ def num_augmentation(self):
+ '''Return the total number of augmentations to each image'''
+ pass
+
def resize_by_range(self, rng, inplace=True):
'''
Args:
@@ -242,13 +246,6 @@ class ImageTool():
else:
return new_imgs
- def resize_for_test(self, rng):
- '''
- Args:
- rng: a tuple (begin,end)
- '''
- size_list = [rng[0], rng[0]/2+rng[1]/2, rng[1]]
- return self.resize_by_list(size_list, num_case=3)
def rotate_by_range(self, rng, inplace=True):
'''
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/064f3fb9/python/singa/layer.py
----------------------------------------------------------------------
diff --git a/python/singa/layer.py b/python/singa/layer.py
index 8caf2bb..6eaf329 100644
--- a/python/singa/layer.py
+++ b/python/singa/layer.py
@@ -659,7 +659,7 @@ class Merge(Layer):
def backward(self, flag, grad):
assert isinstance(grad, tensor.Tensor), 'The input must be Tensor'
- return [grad], [] # * self.num_input
+ return [grad] *self.num_input, [] # * self.num_input
class Split(Layer):
[2/2] incubator-singa git commit: SINGA-251 - Implement image loader
for pysinga
Posted by zh...@apache.org.
SINGA-251 - Implement image loader for pysinga
Add ImageBatchIter in data.py, which prefetches images into mini-batches
and pre-processes images using user defined transform function.
The process is done in another process via multiprocessing
fixed a bug in merge layer which should replicate the gradients in the returned values
To test this feature,
please prepare a train.txt file which consists of multiple lines, where each line is , e.g.
image1.jpg, 0
image2.jpg, 3
image3.jpg, 1
...
The folder should be like
SINGA ROOT/
train.txt
images/
image1.jpg
image2.jpg
...
Then you can run python python/singa/data.py (under the root dir of singa).
After that, you should see a list of images which are transformed from the images in images/ folder.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/17ac1602
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/17ac1602
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/17ac1602
Branch: refs/heads/master
Commit: 17ac160258093973602eda883562b39069504fc8
Parents: 064f3fb
Author: zhaojing <zh...@comp.nus.edu.sg>
Authored: Wed Oct 5 22:10:20 2016 +0800
Committer: zhaojing <zh...@comp.nus.edu.sg>
Committed: Wed Oct 5 22:10:20 2016 +0800
----------------------------------------------------------------------
python/singa/data.py | 1 +
1 file changed, 1 insertion(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/17ac1602/python/singa/data.py
----------------------------------------------------------------------
diff --git a/python/singa/data.py b/python/singa/data.py
index db0776e..725b175 100644
--- a/python/singa/data.py
+++ b/python/singa/data.py
@@ -125,6 +125,7 @@ if __name__ == '__main__':
image_transform, shuffle=True, delimeter=',',
image_folder='images/',
capacity=10)
+ data.start()
imgs, labels = data.next()
for idx in range(imgs.shape[0]):
img = Image.fromarray(imgs[idx].astype(np.uint8).transpose(1, 2, 0),