You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by sk...@apache.org on 2018/08/30 04:11:39 UTC

[incubator-mxnet] branch master updated: Fix speech recognition example (#12291)

This is an automated email from the ASF dual-hosted git repository.

skm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new e456dc4  Fix speech recognition example (#12291)
e456dc4 is described below

commit e456dc45ce781bfb08a71d0d2e2b87fcb98250c7
Author: Vandana Kannan <va...@users.noreply.github.com>
AuthorDate: Wed Aug 29 21:11:18 2018 -0700

    Fix speech recognition example (#12291)
---
 example/speech_recognition/README.md      |  4 ++--
 example/speech_recognition/deepspeech.cfg |  5 +++--
 example/speech_recognition/default.cfg    |  2 +-
 example/speech_recognition/singleton.py   | 26 ++++++--------------------
 example/speech_recognition/stt_metric.py  |  9 +++++++--
 example/speech_recognition/train.py       | 21 ++++++++++-----------
 6 files changed, 29 insertions(+), 38 deletions(-)

diff --git a/example/speech_recognition/README.md b/example/speech_recognition/README.md
index 00d1666..f95fddf 100644
--- a/example/speech_recognition/README.md
+++ b/example/speech_recognition/README.md
@@ -19,9 +19,9 @@ With rich functionalities and convenience explained above, you can build your ow
 ## **Environments**
 - MXNet version: 0.9.5+
 - GPU memory size: 2.4GB+
-- Install tensorboard for logging
+- Install mxboard for logging
 <pre>
-<code>pip install tensorboard</code>
+<code>pip install mxboard</code>
 </pre>  
 
 - [SoundFile](https://pypi.python.org/pypi/SoundFile/0.8.1) for audio preprocessing (If encounter errors about libsndfile, follow [this tutorial](http://www.linuxfromscratch.org/blfs/view/svn/multimedia/libsndfile.html).)
diff --git a/example/speech_recognition/deepspeech.cfg b/example/speech_recognition/deepspeech.cfg
index ec3af04..69894ae 100644
--- a/example/speech_recognition/deepspeech.cfg
+++ b/example/speech_recognition/deepspeech.cfg
@@ -26,14 +26,15 @@ prefix = deep_bucket
 # when mode is load or predict, model will be loaded from the file name with model_file under checkpoints
 model_file = deep_bucketn_epoch0n_batch-0018
 batch_size = 12
-#batch_size=4
+#use batch_size 4 with single GPU
+#batch_size = 4
 # log will be saved by the log_filename
 log_filename = deep_bucket.log
 # checkpoint set n to save checkpoints after n epoch
 save_checkpoint_every_n_epoch = 1
 save_checkpoint_every_n_batch = 3000
 is_bi_graphemes = True
-tensorboard_log_dir = tblog/deep_bucket
+mxboard_log_dir = mxlog/deep_bucket
 # if random_seed is -1 then it gets random seed from timestamp
 mx_random_seed = -1
 random_seed = -1
diff --git a/example/speech_recognition/default.cfg b/example/speech_recognition/default.cfg
index e4beb83..b0869a9 100644
--- a/example/speech_recognition/default.cfg
+++ b/example/speech_recognition/default.cfg
@@ -31,7 +31,7 @@ log_filename = test.log
 save_checkpoint_every_n_epoch = 20
 save_checkpoint_every_n_batch = 1000
 is_bi_graphemes = False
-tensorboard_log_dir = tblog/libri_sample
+mxboard_log_dir = mxlog/libri_sample
 # if random_seed is -1 then it gets random seed from timestamp
 mx_random_seed = 1234
 random_seed = 1234
diff --git a/example/speech_recognition/singleton.py b/example/speech_recognition/singleton.py
index 1d68edf..01717e4 100644
--- a/example/speech_recognition/singleton.py
+++ b/example/speech_recognition/singleton.py
@@ -19,9 +19,9 @@ from __future__ import print_function
 import logging as log
 
 class Singleton:
-    def __init__(self, decrated):
-        log.debug("Singleton Init %s" % decrated)
-        self._decorated = decrated
+    def __init__(self, decorated):
+        log.debug("Singleton Init %s" % decorated)
+        self._decorated = decorated
 
     def getInstance(self):
         try:
@@ -30,25 +30,11 @@ class Singleton:
             self._instance = self._decorated()
             return self._instance
 
-    def __new__(class_, *args, **kwargs):
+    def __new__(cls, *args, **kwargs):
         print("__new__")
-        class_.instances[class_] = super(Singleton, class_).__new__(class_, *args, **kwargs)
-        return class_.instances[class_]
+        cls._instance = super(Singleton, cls).__new__(cls, *args, **kwargs)
+        return cls._instance
 
     def __call__(self):
         raise TypeError("Singletons must be accessed through 'getInstance()'")
 
-
-class SingletonInstane:
-  __instance = None
-
-  @classmethod
-  def __getInstance(cls):
-    return cls.__instance
-
-  @classmethod
-  def instance(cls, *args, **kargs):
-    cls.__instance = cls(*args, **kargs)
-    cls.instance = cls.__getInstance
-    return cls.__instance
-
diff --git a/example/speech_recognition/stt_metric.py b/example/speech_recognition/stt_metric.py
index fc1916b..ec74fc0 100644
--- a/example/speech_recognition/stt_metric.py
+++ b/example/speech_recognition/stt_metric.py
@@ -47,6 +47,7 @@ class STTMetric(mx.metric.EvalMetric):
         self.total_ctc_loss = 0.
         self.batch_loss = 0.
         self.is_logging = is_logging
+
     def update(self, labels, preds):
         check_label_shapes(labels, preds)
         if self.is_logging:
@@ -83,10 +84,15 @@ class STTMetric(mx.metric.EvalMetric):
                     if self.is_logging:
                         log.info("loss: %f " % loss)
         self.total_ctc_loss += self.batch_loss
+
     def get_batch_loss(self):
         return self.batch_loss
+
     def get_name_value(self):
-        total_cer = float(self.total_l_dist) / float(self.total_n_label)
+        try:
+            total_cer = float(self.total_l_dist) / float(self.total_n_label)
+        except ZeroDivisionError:
+            total_cer = float('inf')
 
         return total_cer, self.total_n_label, self.total_l_dist, self.total_ctc_loss
 
@@ -244,4 +250,3 @@ def char_match_2way(label, pred):
     val = val1_max if val1_max > val2_max else val2_max
     val_matched = val1_max_matched if val1_max > val2_max else val2_max_matched
     return val, val_matched, n_whole_label
-
diff --git a/example/speech_recognition/train.py b/example/speech_recognition/train.py
index 0d04e4e..b1ae50b 100644
--- a/example/speech_recognition/train.py
+++ b/example/speech_recognition/train.py
@@ -16,15 +16,14 @@
 # under the License.
 
 import sys
-
+import json
 sys.path.insert(0, "../../python")
 import os.path
+#mxboard setting
+from mxboard import SummaryWriter
 import mxnet as mx
 from config_util import get_checkpoint_path, parse_contexts
 from stt_metric import STTMetric
-#tensorboard setting
-from tensorboard import SummaryWriter
-import json
 from stt_bucketing_module import STTBucketingModule
 
 
@@ -65,7 +64,7 @@ def do_training(args, module, data_train, data_val, begin_epoch=0):
     contexts = parse_contexts(args)
     num_gpu = len(contexts)
     eval_metric = STTMetric(batch_size=batch_size, num_gpu=num_gpu, is_logging=enable_logging_validation_metric,is_epoch_end=True)
-    # tensorboard setting
+    # mxboard setting
     loss_metric = STTMetric(batch_size=batch_size, num_gpu=num_gpu, is_logging=enable_logging_train_metric,is_epoch_end=False)
 
     optimizer = args.config.get('optimizer', 'optimizer')
@@ -131,9 +130,9 @@ def do_training(args, module, data_train, data_val, begin_epoch=0):
         data_train.reset()
         data_train.is_first_epoch = True
 
-    #tensorboard setting
-    tblog_dir = args.config.get('common', 'tensorboard_log_dir')
-    summary_writer = SummaryWriter(tblog_dir)
+    #mxboard setting
+    mxlog_dir = args.config.get('common', 'mxboard_log_dir')
+    summary_writer = SummaryWriter(mxlog_dir)
 
     while True:
 
@@ -144,7 +143,7 @@ def do_training(args, module, data_train, data_val, begin_epoch=0):
         for nbatch, data_batch in enumerate(data_train):
             module.forward_backward(data_batch)
             module.update()
-            # tensorboard setting
+            # mxboard setting
             if (nbatch + 1) % show_every == 0:
                 module.update_metric(loss_metric, data_batch.label)
             #summary_writer.add_scalar('loss batch', loss_metric.get_batch_loss(), nbatch)
@@ -160,7 +159,7 @@ def do_training(args, module, data_train, data_val, begin_epoch=0):
             module.forward(data_batch, is_train=True)
             module.update_metric(eval_metric, data_batch.label)
 
-        # tensorboard setting
+        # mxboard setting
         val_cer, val_n_label, val_l_dist, _ = eval_metric.get_name_value()
         log.info("Epoch[%d] val cer=%f (%d / %d)", n_epoch, val_cer, int(val_n_label - val_l_dist), val_n_label)
         curr_acc = val_cer
@@ -170,7 +169,7 @@ def do_training(args, module, data_train, data_val, begin_epoch=0):
         data_train.reset()
         data_train.is_first_epoch = False
 
-        # tensorboard setting
+        # mxboard setting
         train_cer, train_n_label, train_l_dist, train_ctc_loss = loss_metric.get_name_value()
         summary_writer.add_scalar('loss epoch', train_ctc_loss, n_epoch)
         summary_writer.add_scalar('CER train', train_cer, n_epoch)