You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/03/14 04:19:00 UTC

[GitHub] sxjscience commented on a change in pull request #10096: Fix multi worker

sxjscience commented on a change in pull request #10096: Fix multi worker
URL: https://github.com/apache/incubator-mxnet/pull/10096#discussion_r174349573
 
 

 ##########
 File path: python/mxnet/gluon/data/dataloader.py
 ##########
 @@ -119,6 +119,71 @@ def worker_loop(dataset, key_queue, data_queue, batchify_fn):
         batch = batchify_fn([dataset[i] for i in samples])
         data_queue.put((idx, batch))
 
+class _MultiWorkerIter(object):
+    """Interal multi-worker iterator for DataLoader."""
+    def __init__(self, num_workers, dataset, batchify_fn, batch_sampler):
+        assert num_workers > 0, "_MultiWorkerIter is not for {} workers".format(num_workers)
+        self._num_workers = num_workers
+        self._dataset = dataset
+        self._batchify_fn = batchify_fn
+        self._batch_sampler = batch_sampler
+        self._key_queue = Queue()
+        self._data_queue = Queue(2*self._num_workers)
+        self._data_buffer = {}
+        self._index = 0
+        self._shutdown = False
+
+        workers = []
+        for _ in range(self._num_workers):
+            worker = multiprocessing.Process(
+                target=worker_loop,
+                args=(self._dataset, self._key_queue, self._data_queue, self._batchify_fn))
+            worker.daemon = True
+            worker.start()
+            workers.append(worker)
+
+        for idx, batch in enumerate(self._batch_sampler):
+            self._key_queue.put((idx, batch))
 
 Review comment:
   May still need to revise the logic in later PRs to dynamically push more jobs into the key_queue.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services