You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/08/30 04:11:19 UTC

[GitHub] sandeep-krishnamurthy closed pull request #12291: [MXNET-817] Fixes to speech recognition example

sandeep-krishnamurthy closed pull request #12291: [MXNET-817] Fixes to speech recognition example
URL: https://github.com/apache/incubator-mxnet/pull/12291
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/example/speech_recognition/README.md b/example/speech_recognition/README.md
index 00d16660240..f95fddf2103 100644
--- a/example/speech_recognition/README.md
+++ b/example/speech_recognition/README.md
@@ -19,9 +19,9 @@ With rich functionalities and convenience explained above, you can build your ow
 ## **Environments**
 - MXNet version: 0.9.5+
 - GPU memory size: 2.4GB+
-- Install tensorboard for logging
+- Install mxboard for logging
 <pre>
-<code>pip install tensorboard</code>
+<code>pip install mxboard</code>
 </pre>  
 
 - [SoundFile](https://pypi.python.org/pypi/SoundFile/0.8.1) for audio preprocessing (If encounter errors about libsndfile, follow [this tutorial](http://www.linuxfromscratch.org/blfs/view/svn/multimedia/libsndfile.html).)
diff --git a/example/speech_recognition/deepspeech.cfg b/example/speech_recognition/deepspeech.cfg
index ec3af045958..69894ae7d64 100644
--- a/example/speech_recognition/deepspeech.cfg
+++ b/example/speech_recognition/deepspeech.cfg
@@ -26,14 +26,15 @@ prefix = deep_bucket
 # when mode is load or predict, model will be loaded from the file name with model_file under checkpoints
 model_file = deep_bucketn_epoch0n_batch-0018
 batch_size = 12
-#batch_size=4
+#use batch_size 4 with single GPU
+#batch_size = 4
 # log will be saved by the log_filename
 log_filename = deep_bucket.log
 # checkpoint set n to save checkpoints after n epoch
 save_checkpoint_every_n_epoch = 1
 save_checkpoint_every_n_batch = 3000
 is_bi_graphemes = True
-tensorboard_log_dir = tblog/deep_bucket
+mxboard_log_dir = mxlog/deep_bucket
 # if random_seed is -1 then it gets random seed from timestamp
 mx_random_seed = -1
 random_seed = -1
diff --git a/example/speech_recognition/default.cfg b/example/speech_recognition/default.cfg
index e4beb83d32d..b0869a9dad2 100644
--- a/example/speech_recognition/default.cfg
+++ b/example/speech_recognition/default.cfg
@@ -31,7 +31,7 @@ log_filename = test.log
 save_checkpoint_every_n_epoch = 20
 save_checkpoint_every_n_batch = 1000
 is_bi_graphemes = False
-tensorboard_log_dir = tblog/libri_sample
+mxboard_log_dir = mxlog/libri_sample
 # if random_seed is -1 then it gets random seed from timestamp
 mx_random_seed = 1234
 random_seed = 1234
diff --git a/example/speech_recognition/singleton.py b/example/speech_recognition/singleton.py
index 1d68edfb3ca..01717e4df06 100644
--- a/example/speech_recognition/singleton.py
+++ b/example/speech_recognition/singleton.py
@@ -19,9 +19,9 @@
 import logging as log
 
 class Singleton:
-    def __init__(self, decrated):
-        log.debug("Singleton Init %s" % decrated)
-        self._decorated = decrated
+    def __init__(self, decorated):
+        log.debug("Singleton Init %s" % decorated)
+        self._decorated = decorated
 
     def getInstance(self):
         try:
@@ -30,25 +30,11 @@ def getInstance(self):
             self._instance = self._decorated()
             return self._instance
 
-    def __new__(class_, *args, **kwargs):
+    def __new__(cls, *args, **kwargs):
         print("__new__")
-        class_.instances[class_] = super(Singleton, class_).__new__(class_, *args, **kwargs)
-        return class_.instances[class_]
+        cls._instance = super(Singleton, cls).__new__(cls, *args, **kwargs)
+        return cls._instance
 
     def __call__(self):
         raise TypeError("Singletons must be accessed through 'getInstance()'")
 
-
-class SingletonInstane:
-  __instance = None
-
-  @classmethod
-  def __getInstance(cls):
-    return cls.__instance
-
-  @classmethod
-  def instance(cls, *args, **kargs):
-    cls.__instance = cls(*args, **kargs)
-    cls.instance = cls.__getInstance
-    return cls.__instance
-
diff --git a/example/speech_recognition/stt_metric.py b/example/speech_recognition/stt_metric.py
index fc1916b40c3..ec74fc063dc 100644
--- a/example/speech_recognition/stt_metric.py
+++ b/example/speech_recognition/stt_metric.py
@@ -47,6 +47,7 @@ def __init__(self, batch_size, num_gpu, is_epoch_end=False, is_logging=True):
         self.total_ctc_loss = 0.
         self.batch_loss = 0.
         self.is_logging = is_logging
+
     def update(self, labels, preds):
         check_label_shapes(labels, preds)
         if self.is_logging:
@@ -83,10 +84,15 @@ def update(self, labels, preds):
                     if self.is_logging:
                         log.info("loss: %f " % loss)
         self.total_ctc_loss += self.batch_loss
+
     def get_batch_loss(self):
         return self.batch_loss
+
     def get_name_value(self):
-        total_cer = float(self.total_l_dist) / float(self.total_n_label)
+        try:
+            total_cer = float(self.total_l_dist) / float(self.total_n_label)
+        except ZeroDivisionError:
+            total_cer = float('inf')
 
         return total_cer, self.total_n_label, self.total_l_dist, self.total_ctc_loss
 
@@ -244,4 +250,3 @@ def char_match_2way(label, pred):
     val = val1_max if val1_max > val2_max else val2_max
     val_matched = val1_max_matched if val1_max > val2_max else val2_max_matched
     return val, val_matched, n_whole_label
-
diff --git a/example/speech_recognition/train.py b/example/speech_recognition/train.py
index 0d04e4e47a5..b1ae50b0755 100644
--- a/example/speech_recognition/train.py
+++ b/example/speech_recognition/train.py
@@ -16,15 +16,14 @@
 # under the License.
 
 import sys
-
+import json
 sys.path.insert(0, "../../python")
 import os.path
+#mxboard setting
+from mxboard import SummaryWriter
 import mxnet as mx
 from config_util import get_checkpoint_path, parse_contexts
 from stt_metric import STTMetric
-#tensorboard setting
-from tensorboard import SummaryWriter
-import json
 from stt_bucketing_module import STTBucketingModule
 
 
@@ -65,7 +64,7 @@ def do_training(args, module, data_train, data_val, begin_epoch=0):
     contexts = parse_contexts(args)
     num_gpu = len(contexts)
     eval_metric = STTMetric(batch_size=batch_size, num_gpu=num_gpu, is_logging=enable_logging_validation_metric,is_epoch_end=True)
-    # tensorboard setting
+    # mxboard setting
     loss_metric = STTMetric(batch_size=batch_size, num_gpu=num_gpu, is_logging=enable_logging_train_metric,is_epoch_end=False)
 
     optimizer = args.config.get('optimizer', 'optimizer')
@@ -131,9 +130,9 @@ def reset_optimizer(force_init=False):
         data_train.reset()
         data_train.is_first_epoch = True
 
-    #tensorboard setting
-    tblog_dir = args.config.get('common', 'tensorboard_log_dir')
-    summary_writer = SummaryWriter(tblog_dir)
+    #mxboard setting
+    mxlog_dir = args.config.get('common', 'mxboard_log_dir')
+    summary_writer = SummaryWriter(mxlog_dir)
 
     while True:
 
@@ -144,7 +143,7 @@ def reset_optimizer(force_init=False):
         for nbatch, data_batch in enumerate(data_train):
             module.forward_backward(data_batch)
             module.update()
-            # tensorboard setting
+            # mxboard setting
             if (nbatch + 1) % show_every == 0:
                 module.update_metric(loss_metric, data_batch.label)
             #summary_writer.add_scalar('loss batch', loss_metric.get_batch_loss(), nbatch)
@@ -160,7 +159,7 @@ def reset_optimizer(force_init=False):
             module.forward(data_batch, is_train=True)
             module.update_metric(eval_metric, data_batch.label)
 
-        # tensorboard setting
+        # mxboard setting
         val_cer, val_n_label, val_l_dist, _ = eval_metric.get_name_value()
         log.info("Epoch[%d] val cer=%f (%d / %d)", n_epoch, val_cer, int(val_n_label - val_l_dist), val_n_label)
         curr_acc = val_cer
@@ -170,7 +169,7 @@ def reset_optimizer(force_init=False):
         data_train.reset()
         data_train.is_first_epoch = False
 
-        # tensorboard setting
+        # mxboard setting
         train_cer, train_n_label, train_l_dist, train_ctc_loss = loss_metric.get_name_value()
         summary_writer.add_scalar('loss epoch', train_ctc_loss, n_epoch)
         summary_writer.add_scalar('CER train', train_cer, n_epoch)


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services