You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2015/09/20 14:36:04 UTC
[4/7] incubator-singa git commit: SINGA-10 Add Support for Recurrent Neural Networks (RNN)

SINGA-10 Add Support for Recurrent Neural Networks (RNN)

* Move the functions of WordLayer into the EmbeddingLayer.
* Make DatLayer a subclass of both RNNLayer and DataLayer.
* create_shard.cc wraps the WordRecord inside singa::Record, and inserts
singa::Record into the DataShard.
* Make the inheritance of base layer classes like InputLayer, NeuronLayer, etc. virutal
from Layer to compilation problems if a future layer is declare to
inherit two base layer.
* Update the documentation on the website for RNNLM example.

Need to optimize the training speed. The PPL is similar to that from
RNNLM Toolkit.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/ba3b1a5c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/ba3b1a5c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/ba3b1a5c

Branch: refs/heads/master
Commit: ba3b1a5c70a813090ede30b0d5c95caa08e94da8
Parents: c1c6a2e
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Fri Sep 18 16:40:32 2015 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Fri Sep 18 16:46:41 2015 +0800

----------------------------------------------------------------------
 examples/rnnlm/create_shard.cc       | 601 ++++++++++++++++--------------
 examples/rnnlm/job.conf              |  97 +++--
 examples/rnnlm/main.cc               |  11 +-
 examples/rnnlm/rnnlm.cc              | 114 +++---
 examples/rnnlm/rnnlm.h               |  43 +--
 examples/rnnlm/rnnlm.proto           |  17 +-
 include/neuralnet/connection_layer.h |   2 +-
 include/neuralnet/input_layer.h      |   2 +-
 include/neuralnet/layer.h            |   8 +-
 src/neuralnet/input_layer.cc         |  12 +-
 src/proto/common.proto               |   2 +
 src/trainer/worker.cc                |   8 +-
 12 files changed, 453 insertions(+), 464 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ba3b1a5c/examples/rnnlm/create_shard.cc
----------------------------------------------------------------------
diff --git a/examples/rnnlm/create_shard.cc b/examples/rnnlm/create_shard.cc
index 4da365b..536ce1f 100644
--- a/examples/rnnlm/create_shard.cc
+++ b/examples/rnnlm/create_shard.cc
@@ -32,6 +32,28 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
+
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
 //
 // This code creates DataShard for RNNLM dataset.
 // The RNNLM dataset could be downloaded at
@@ -63,9 +85,9 @@ using std::min;
 using singa::DataShard;
 
 struct vocab_word {
-    int cn;
-    char word[MAX_STRING];
-    int class_index;
+  int cn;
+  char word[MAX_STRING];
+  int class_index;
 };
 
 struct vocab_word *vocab;
@@ -87,362 +109,363 @@ int valid_mode;
 int test_mode;
 
 unsigned int getWordHash(char *word) {
-    unsigned int hash, a;
+  unsigned int hash, a;
 
-    hash = 0;
-    for (a = 0; a < strlen(word); a++) hash = hash * 237 + word[a];
-    hash = hash % vocab_hash_size;
+  hash = 0;
+  for (a = 0; a < strlen(word); a++) hash = hash * 237 + word[a];
+  hash = hash % vocab_hash_size;
 
-    return hash;
+  return hash;
 }
 
 int searchVocab(char *word) {
-    int a;
-    unsigned int hash;
+  int a;
+  unsigned int hash;
 
-    hash = getWordHash(word);
+  hash = getWordHash(word);
 
-    if (vocab_hash[hash] == -1) return -1;
-    if (!strcmp(word, vocab[vocab_hash[hash]].word)) return vocab_hash[hash];
+  if (vocab_hash[hash] == -1) return -1;
+  if (!strcmp(word, vocab[vocab_hash[hash]].word)) return vocab_hash[hash];
 
-    for (a = 0; a < vocab_size; a++) {   // search in vocabulary
-        if (!strcmp(word, vocab[a].word)) {
-            vocab_hash[hash] = a;
-            return a;
-        }
+  for (a = 0; a < vocab_size; a++) {   // search in vocabulary
+    if (!strcmp(word, vocab[a].word)) {
+      vocab_hash[hash] = a;
+      return a;
     }
+  }
 
-    return -1;   // return OOV if not found
+  return -1;   // return OOV if not found
 }
 
 int addWordToVocab(char *word) {
-    unsigned int hash;
+  unsigned int hash;
 
-    snprintf(vocab[vocab_size].word, strlen(word)+1, "%s", word);
-    vocab[vocab_size].cn = 0;
-    vocab_size++;
+  snprintf(vocab[vocab_size].word, strlen(word)+1, "%s", word);
+  vocab[vocab_size].cn = 0;
+  vocab_size++;
 
-    if (vocab_size + 2 >= vocab_max_size) {   // reallocate memory if needed
-        vocab_max_size += 100;
-        vocab = (struct vocab_word *) realloc(
-                               vocab,
-                               vocab_max_size * sizeof(struct vocab_word));
-    }
+  if (vocab_size + 2 >= vocab_max_size) {   // reallocate memory if needed
+    vocab_max_size += 100;
+    vocab = (struct vocab_word *) realloc(
+        vocab,
+        vocab_max_size * sizeof(struct vocab_word));
+  }
 
-    hash = getWordHash(word);
-    vocab_hash[hash] = vocab_size - 1;
+  hash = getWordHash(word);
+  vocab_hash[hash] = vocab_size - 1;
 
-    return vocab_size - 1;
+  return vocab_size - 1;
 }
 
 void readWord(char *word, FILE *fin) {
-    int a = 0, ch;
-
-    while (!feof(fin)) {
-        ch = fgetc(fin);
-
-        if (ch == 13) continue;
-
-        if ((ch == ' ') || (ch == '\t') || (ch == '\n')) {
-            if (a > 0) {
-                if (ch == '\n') ungetc(ch, fin);
-                break;
-            }
-
-            if (ch == '\n') {
-                snprintf(word, strlen(NL_STRING) + 1,
-                         "%s", const_cast<char *>(NL_STRING));
-                return;
-            } else {
-                continue;
-            }
-        }
-
-        word[a] = static_cast<char>(ch);
-        a++;
-
-        if (a >= MAX_STRING) {
-            // printf("Too long word found!\n");   //truncate too long words
-            a--;
-        }
+  int a = 0, ch;
+
+  while (!feof(fin)) {
+    ch = fgetc(fin);
+
+    if (ch == 13) continue;
+
+    if ((ch == ' ') || (ch == '\t') || (ch == '\n')) {
+      if (a > 0) {
+        if (ch == '\n') ungetc(ch, fin);
+        break;
+      }
+
+      if (ch == '\n') {
+        snprintf(word, strlen(NL_STRING) + 1,
+            "%s", const_cast<char *>(NL_STRING));
+        return;
+      } else {
+        continue;
+      }
     }
-    word[a] = 0;
-}
 
-void sortVocab() {
-    int a, b, max;
-    vocab_word swap;
-
-    for (a = 1; a < vocab_size; a++) {
-        max = a;
-        for (b = a + 1; b < vocab_size; b++)
-            if (vocab[max].cn < vocab[b].cn) max = b;
+    word[a] = static_cast<char>(ch);
+    a++;
 
-        swap = vocab[max];
-        vocab[max] = vocab[a];
-        vocab[a] = swap;
+    if (a >= MAX_STRING) {
+      // printf("Too long word found!\n");   //truncate too long words
+      a--;
     }
+  }
+  word[a] = 0;
+}
+
+void sortVocab() {
+  int a, b, max;
+  vocab_word swap;
+
+  for (a = 1; a < vocab_size; a++) {
+    max = a;
+    for (b = a + 1; b < vocab_size; b++)
+      if (vocab[max].cn < vocab[b].cn) max = b;
+
+    swap = vocab[max];
+    vocab[max] = vocab[a];
+    vocab[a] = swap;
+  }
 }
 
 int learnVocabFromTrainFile() {
-    char word[MAX_STRING];
-    FILE *fin;
-    int a, i, train_wcn;
+  char word[MAX_STRING];
+  FILE *fin;
+  int a, i, train_wcn;
 
-    for (a = 0; a < vocab_hash_size; a++) vocab_hash[a] = -1;
+  for (a = 0; a < vocab_hash_size; a++) vocab_hash[a] = -1;
 
-    fin = fopen(train_file, "rb");
+  fin = fopen(train_file, "rb");
 
-    vocab_size = 0;
+  vocab_size = 0;
 
-    addWordToVocab(const_cast<char *>(NL_STRING));
+  addWordToVocab(const_cast<char *>(NL_STRING));
 
-    train_wcn = 0;
-    while (1) {
-        readWord(word, fin);
-        if (feof(fin)) break;
+  train_wcn = 0;
+  while (1) {
+    readWord(word, fin);
+    if (feof(fin)) break;
 
-        train_wcn++;
+    train_wcn++;
 
-        i = searchVocab(word);
-        if (i == -1) {
-            a = addWordToVocab(word);
-            vocab[a].cn = 1;
-        } else {
-            vocab[i].cn++;
-        }
+    i = searchVocab(word);
+    if (i == -1) {
+      a = addWordToVocab(word);
+      vocab[a].cn = 1;
+    } else {
+      vocab[i].cn++;
     }
+  }
 
-    sortVocab();
+  sortVocab();
 
-    if (debug_mode > 0) {
-        printf("Vocab size: %d\n", vocab_size);
-        printf("Words in train file: %d\n", train_wcn);
-    }
+  if (debug_mode > 0) {
+    printf("Vocab size: %d\n", vocab_size);
+    printf("Words in train file: %d\n", train_wcn);
+  }
 
-    fclose(fin);
-    return 0;
+  fclose(fin);
+  return 0;
 }
 
 int splitClasses() {
-    double df, dd;
-    int i, a, b;
-
-    df = 0;
-    dd = 0;
-    a = 0;
-    b = 0;
-
-    class_start = reinterpret_cast<int *>(calloc(class_size, sizeof(int)));
-    memset(class_start, 0x7f, sizeof(int) * class_size);
-    class_end = reinterpret_cast<int *>(calloc(class_size, sizeof(int)));
-    memset(class_end, 0, sizeof(int) * class_size);
-
-    if (old_classes) {    // old classes
-        for (i = 0; i < vocab_size; i++)
-            b += vocab[i].cn;
-        for (i = 0; i < vocab_size; i++) {
-            df += vocab[i].cn / static_cast<double>(b);
-            if (df > 1) df = 1;
-            if (df > (a + 1) / static_cast<double>(class_size)) {
-                vocab[i].class_index = a;
-                if (a < class_size - 1) a++;
-            } else {
-                vocab[i].class_index = a;
-            }
-        }
-    } else {            // new classes
-        for (i = 0; i < vocab_size; i++)
-            b += vocab[i].cn;
-        for (i = 0; i < vocab_size; i++)
-            dd += sqrt(vocab[i].cn / static_cast<double>(b));
-        for (i = 0; i < vocab_size; i++) {
-            df += sqrt(vocab[i].cn / static_cast<double>(b)) / dd;
-            if (df > 1) df = 1;
-            if (df > (a + 1) / static_cast<double>(class_size)) {
-                vocab[i].class_index = a;
-                if (a < class_size - 1) a++;
-            } else {
-                vocab[i].class_index = a;
-            }
-        }
+  double df, dd;
+  int i, a, b;
+
+  df = 0;
+  dd = 0;
+  a = 0;
+  b = 0;
+
+  class_start = reinterpret_cast<int *>(calloc(class_size, sizeof(int)));
+  memset(class_start, 0x7f, sizeof(int) * class_size);
+  class_end = reinterpret_cast<int *>(calloc(class_size, sizeof(int)));
+  memset(class_end, 0, sizeof(int) * class_size);
+
+  if (old_classes) {    // old classes
+    for (i = 0; i < vocab_size; i++)
+      b += vocab[i].cn;
+    for (i = 0; i < vocab_size; i++) {
+      df += vocab[i].cn / static_cast<double>(b);
+      if (df > 1) df = 1;
+      if (df > (a + 1) / static_cast<double>(class_size)) {
+        vocab[i].class_index = a;
+        if (a < class_size - 1) a++;
+      } else {
+        vocab[i].class_index = a;
+      }
     }
-
-    // after dividing classes, update class start and class end information
-    for (i = 0; i < vocab_size; i++)  {
-        a = vocab[i].class_index;
-        class_start[a] = min(i, class_start[a]);
-        class_end[a] = max(i + 1, class_end[a]);
+  } else {            // new classes
+    for (i = 0; i < vocab_size; i++)
+      b += vocab[i].cn;
+    for (i = 0; i < vocab_size; i++)
+      dd += sqrt(vocab[i].cn / static_cast<double>(b));
+    for (i = 0; i < vocab_size; i++) {
+      df += sqrt(vocab[i].cn / static_cast<double>(b)) / dd;
+      if (df > 1) df = 1;
+      if (df > (a + 1) / static_cast<double>(class_size)) {
+        vocab[i].class_index = a;
+        if (a < class_size - 1) a++;
+      } else {
+        vocab[i].class_index = a;
+      }
     }
-    return 0;
+  }
+
+  // after dividing classes, update class start and class end information
+  for (i = 0; i < vocab_size; i++)  {
+    a = vocab[i].class_index;
+    class_start[a] = min(i, class_start[a]);
+    class_end[a] = max(i + 1, class_end[a]);
+  }
+  return 0;
 }
 
 int init_class() {
-    // debug_mode = 1;
-    debug_mode = 0;
-    vocab_max_size = 100;  // largest length value for each word
-    vocab_size = 0;
-    vocab = (struct vocab_word *) calloc(vocab_max_size,
-                                         sizeof(struct vocab_word));
-    vocab_hash_size = 100000000;
-    vocab_hash = reinterpret_cast<int *>(calloc(vocab_hash_size, sizeof(int)));
-    old_classes = 1;
-
-    // read vocab
-    learnVocabFromTrainFile();
-
-    // split classes
-    splitClasses();
-
-    return 0;
+  // debug_mode = 1;
+  debug_mode = 0;
+  vocab_max_size = 100;  // largest length value for each word
+  vocab_size = 0;
+  vocab = (struct vocab_word *) calloc(vocab_max_size,
+      sizeof(struct vocab_word));
+  vocab_hash_size = 100000000;
+  vocab_hash = reinterpret_cast<int *>(calloc(vocab_hash_size, sizeof(int)));
+  old_classes = 1;
+
+  // read vocab
+  learnVocabFromTrainFile();
+
+  // split classes
+  splitClasses();
+
+  return 0;
 }
 
-int create_shard(const char *input_file, const char *output_file) {
-    DataShard dataShard(output_file, DataShard::kCreate);
-    singa::WordRecord wordRecord;
-
-    FILE *fin;
-    int a, i;
-    fin = fopen(input_file, "rb");
-
-    int wcnt = 0;
-    char str_buffer[BUFFER_LEN];
-    char word[MAX_STRING];
-    while (1) {
-        readWord(word, fin);
-        if (feof(fin)) break;
-        i = searchVocab(word);
-        if (i == -1) {
-            if (debug_mode) printf("unknown word [%s] detected!", word);
-        } else {
-            wordRecord.set_word(string(word));
-            wordRecord.set_word_index(i);
-            int class_idx = vocab[i].class_index;
-            wordRecord.set_class_index(class_idx);
-            wordRecord.set_class_start(class_start[class_idx]);
-            wordRecord.set_class_end(class_end[class_idx]);
-            int length = snprintf(str_buffer, BUFFER_LEN, "%05d", wcnt++);
-            dataShard.Insert(string(str_buffer, length), wordRecord);
-        }
+int create_shard(const char *input_file, const char *output) {
+  DataShard dataShard(output, DataShard::kCreate);
+  singa::Record record;
+  auto* wordRecord = record.MutableExtension(word);
+
+  FILE *fin;
+  int a, i;
+  fin = fopen(input_file, "rb");
+
+  int wcnt = 0;
+  char key[BUFFER_LEN];
+  char wordstr[MAX_STRING];
+  while (1) {
+    readWord(wordstr, fin);
+    if (feof(fin)) break;
+    i = searchVocab(wordstr);
+    if (i == -1) {
+      if (debug_mode) printf("unknown word [%s] detected!", wordstr);
+    } else {
+      wordRecord->set_word(string(wordstr));
+      wordRecord->set_word_index(i);
+      int class_idx = vocab[i].class_index;
+      wordRecord->set_class_index(class_idx);
+      wordRecord->set_class_start(class_start[class_idx]);
+      wordRecord->set_class_end(class_end[class_idx]);
+      int length = snprintf(key, BUFFER_LEN, "%05d", wcnt++);
+      dataShard.Insert(string(key, length), record);
     }
+  }
 
-    dataShard.Flush();
-    fclose(fin);
-    return 0;
+  dataShard.Flush();
+  fclose(fin);
+  return 0;
 }
 
 int argPos(char *str, int argc, char **argv) {
-    int a;
+  int a;
 
-    for (a = 1; a < argc; a++)
-        if (!strcmp(str, argv[a]))
-            return a;
+  for (a = 1; a < argc; a++)
+    if (!strcmp(str, argv[a]))
+      return a;
 
-    return -1;
+  return -1;
 }
 
 int main(int argc, char **argv) {
-    int i;
-    FILE *f;
-
-    // set debug mode
-    i = argPos(const_cast<char *>("-debug"), argc, argv);
-    if (i > 0) {
-        debug_mode = 1;
-        if (debug_mode > 0)
-            printf("debug mode: %d\n", debug_mode);
+  int i;
+  FILE *f;
+
+  // set debug mode
+  i = argPos(const_cast<char *>("-debug"), argc, argv);
+  if (i > 0) {
+    debug_mode = 1;
+    if (debug_mode > 0)
+      printf("debug mode: %d\n", debug_mode);
+  }
+
+  // search for train file
+  i = argPos(const_cast<char *>("-train"), argc, argv);
+  if (i > 0) {
+    if (i + 1 == argc) {
+      printf("ERROR: training data file not specified!\n");
+      return 0;
     }
 
-    // search for train file
-    i = argPos(const_cast<char *>("-train"), argc, argv);
-    if (i > 0) {
-        if (i + 1 == argc) {
-            printf("ERROR: training data file not specified!\n");
-            return 0;
-        }
-
-        snprintf(train_file, strlen(argv[i + 1])+1, "%s", argv[i + 1]);
-
-        if (debug_mode > 0)
-            printf("train file: %s\n", train_file);
-
-        f = fopen(train_file, "rb");
-        if (f == NULL) {
-            printf("ERROR: training data file not found!\n");
-            return 0;
-        }
-        fclose(f);
-    } else {
-        printf("ERROR: training data must be set.\n");
-    }
+    snprintf(train_file, strlen(argv[i + 1])+1, "%s", argv[i + 1]);
 
-    // search for valid file
-    i = argPos(const_cast<char *>("-valid"), argc, argv);
-    if (i > 0) {
-        if (i + 1 == argc) {
-            printf("ERROR: validating data file not specified!\n");
-            return 0;
-        }
-
-        snprintf(valid_file, strlen(argv[i + 1])+1, "%s", argv[i + 1]);
-
-        if (debug_mode > 0)
-            printf("valid file: %s\n", valid_file);
-
-        f = fopen(valid_file, "rb");
-        if (f == NULL) {
-            printf("ERROR: validating data file not found!\n");
-            return 0;
-        }
-        fclose(f);
-        valid_mode = 1;
-    }
+    if (debug_mode > 0)
+      printf("train file: %s\n", train_file);
 
-    // search for test file
-    i = argPos(const_cast<char *>("-test"), argc, argv);
-    if (i > 0) {
-        if (i + 1 == argc) {
-            printf("ERROR: testing data file not specified!\n");
-            return 0;
-        }
-
-        snprintf(test_file, strlen(argv[i + 1])+1, "%s", argv[i + 1]);
-
-        if (debug_mode > 0)
-            printf("test file: %s\n", test_file);
-
-        f = fopen(test_file, "rb");
-        if (f == NULL) {
-            printf("ERROR: testing data file not found!\n");
-            return 0;
-        }
-        fclose(f);
-        test_mode = 1;
+    f = fopen(train_file, "rb");
+    if (f == NULL) {
+      printf("ERROR: training data file not found!\n");
+      return 0;
+    }
+    fclose(f);
+  } else {
+    printf("ERROR: training data must be set.\n");
+  }
+
+  // search for valid file
+  i = argPos(const_cast<char *>("-valid"), argc, argv);
+  if (i > 0) {
+    if (i + 1 == argc) {
+      printf("ERROR: validating data file not specified!\n");
+      return 0;
     }
 
-    // search for class size
-    i = argPos(const_cast<char *>("-class_size"), argc, argv);
-    if (i > 0) {
-        if (i + 1 == argc) {
-            printf("ERROR: class size not specified!\n");
-            return 0;
-        }
+    snprintf(valid_file, strlen(argv[i + 1])+1, "%s", argv[i + 1]);
 
-        class_size = atoi(argv[i + 1]);
+    if (debug_mode > 0)
+      printf("valid file: %s\n", valid_file);
 
-        if (debug_mode > 0)
-            printf("class size: %d\n", class_size);
+    f = fopen(valid_file, "rb");
+    if (f == NULL) {
+      printf("ERROR: validating data file not found!\n");
+      return 0;
     }
-    if (class_size <= 0) {
-        printf("ERROR: no or invalid class size received!\n");
-        return 0;
+    fclose(f);
+    valid_mode = 1;
+  }
+
+  // search for test file
+  i = argPos(const_cast<char *>("-test"), argc, argv);
+  if (i > 0) {
+    if (i + 1 == argc) {
+      printf("ERROR: testing data file not specified!\n");
+      return 0;
     }
 
-    init_class();
+    snprintf(test_file, strlen(argv[i + 1])+1, "%s", argv[i + 1]);
 
-    create_shard(train_file, "train_shard");
-    if (valid_mode) create_shard(valid_file, "valid_shard");
-    if (test_mode) create_shard(test_file, "test_shard");
+    if (debug_mode > 0)
+      printf("test file: %s\n", test_file);
 
+    f = fopen(test_file, "rb");
+    if (f == NULL) {
+      printf("ERROR: testing data file not found!\n");
+      return 0;
+    }
+    fclose(f);
+    test_mode = 1;
+  }
+
+  // search for class size
+  i = argPos(const_cast<char *>("-class_size"), argc, argv);
+  if (i > 0) {
+    if (i + 1 == argc) {
+      printf("ERROR: class size not specified!\n");
+      return 0;
+    }
+
+    class_size = atoi(argv[i + 1]);
+
+    if (debug_mode > 0)
+      printf("class size: %d\n", class_size);
+  }
+  if (class_size <= 0) {
+    printf("ERROR: no or invalid class size received!\n");
     return 0;
+  }
+
+  init_class();
+
+  create_shard(train_file, "train_shard");
+  if (valid_mode) create_shard(valid_file, "valid_shard");
+  if (test_mode) create_shard(test_file, "test_shard");
+
+  return 0;
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ba3b1a5c/examples/rnnlm/job.conf
----------------------------------------------------------------------
diff --git a/examples/rnnlm/job.conf b/examples/rnnlm/job.conf
index 98a4157..5779ff6 100644
--- a/examples/rnnlm/job.conf
+++ b/examples/rnnlm/job.conf
@@ -1,51 +1,38 @@
-cluster {
-  nworker_groups: 1
-  nserver_groups: 1
-  nservers_per_group: 1
-  nworkers_per_group: 1
-  nservers_per_procs: 1
-  nworkers_per_procs: 1
-  workspace: "examples/rnnlm/"
-}
-
-name: "recurrent-neural-network-language-model"
-#To scan the training file (71350) 30 times
-train_steps:214050
-#To scan the validation file (5829) once
-test_steps:583
-test_freq:7135
+name: "rnnlm"
+#To scan the training file (81350) 10 times
+train_steps:81350
+#To scan the validation file (6828) once
+test_steps:683
+test_freq:8135
 #disp_freq is specific to training
-disp_freq:1000
-
+disp_freq:8135
 train_one_batch {
-  alg: kBP
+alg: kBP
 }
-
 updater{
-   type: kSGD
-   #weight_decay:0.0000001
-   learning_rate {
-      type: kFixedStep
-      fixedstep_conf:{
+  type: kSGD
+  learning_rate {
+    type: kFixedStep
+    fixedstep_conf:{
       step:0
-      step:42810
-      step:49945
-      step:57080
-      step:64215
+      step:48810
+      step:56945
+      step:65080
+      step:73215
       step_lr:0.1
       step_lr:0.05
       step_lr:0.025
       step_lr:0.0125
       step_lr:0.00625
-      }
-   }
+    }
+  }
 }
 
 neuralnet {
 layer {
   name: "data"
-  user_type: "kRnnData"
-  [singa.input_conf] {
+  user_type: "kData"
+  [data_conf] {
     path: "examples/rnnlm/train_shard"
     max_window: 10
   }
@@ -54,8 +41,8 @@ layer {
 
 layer {
   name: "data"
-  user_type: "kRnnData"
-  [singa.input_conf] {
+  user_type: "kData"
+  [data_conf] {
     path: "examples/rnnlm/test_shard"
     max_window: 10
   }
@@ -63,25 +50,19 @@ layer {
 }
 
 layer{
-  name:"wordlayer"
-  user_type: "kWord"
+  name:"label"
+  user_type: "kLabel"
   srclayers: "data"
 }
 
 layer{
-  name:"labellayer"
-  user_type: "kRnnLabel"
-  srclayers: "data"
-}
-
-layer{
-  name: "embeddinglayer"
+  name: "embedding"
   user_type: "kEmbedding"
-  [singa.embedding_conf] {
+  srclayers: "data"
+  [embedding_conf] {
     word_dim: 15
     vocab_size: 3720
   }
-  srclayers: "wordlayer"
     param {
     name: "w1"
     init {
@@ -93,9 +74,9 @@ layer{
 }
 
 layer{
-  name: "hiddenlayer"
+  name: "hidden"
   user_type: "kHidden"
-  srclayers:"embeddinglayer"
+  srclayers:"embedding"
   param{
     name: "w2"
     init {
@@ -106,11 +87,11 @@ layer{
   }
 }
 layer{
-  name: "outputlayer"
-  user_type: "kOutput"
-  srclayers:"hiddenlayer"
-  srclayers:"labellayer"
-  [singa.output_conf] {
+  name: "loss"
+  user_type: "kLoss"
+  srclayers:"hidden"
+  srclayers:"label"
+  [loss_conf] {
     nclass:100
     vocab_size: 3720
   }
@@ -133,4 +114,12 @@ layer{
 }
 
 }
-
+cluster {
+  nworker_groups: 1
+  nserver_groups: 1
+  nservers_per_group: 1
+  nworkers_per_group: 1
+  nservers_per_procs: 1
+  nworkers_per_procs: 1
+  workspace: "examples/rnnlm/"
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ba3b1a5c/examples/rnnlm/main.cc
----------------------------------------------------------------------
diff --git a/examples/rnnlm/main.cc b/examples/rnnlm/main.cc
index bf2a67a..87db06a 100644
--- a/examples/rnnlm/main.cc
+++ b/examples/rnnlm/main.cc
@@ -32,12 +32,11 @@ int main(int argc, char **argv) {
   bool resume = (resume_pos != -1);
 
   // register all layers for rnnlm
-  driver.RegisterLayer<singa::EmbeddingLayer, std::string>("kEmbedding");
-  driver.RegisterLayer<singa::HiddenLayer, std::string>("kHidden");
-  driver.RegisterLayer<singa::OutputLayer, std::string>("kOutput");
-  driver.RegisterLayer<singa::RnnDataLayer, std::string>("kRnnData");
-  driver.RegisterLayer<singa::WordLayer, std::string>("kWord");
-  driver.RegisterLayer<singa::RnnLabelLayer, std::string>("kRnnLabel");
+  driver.RegisterLayer<rnnlm::EmbeddingLayer, std::string>("kEmbedding");
+  driver.RegisterLayer<rnnlm::HiddenLayer, std::string>("kHidden");
+  driver.RegisterLayer<rnnlm::LossLayer, std::string>("kLoss");
+  driver.RegisterLayer<rnnlm::DataLayer, std::string>("kData");
+  driver.RegisterLayer<rnnlm::LabelLayer, std::string>("kLabel");
 
   singa::JobProto jobConf = driver.job_conf();
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ba3b1a5c/examples/rnnlm/rnnlm.cc
----------------------------------------------------------------------
diff --git a/examples/rnnlm/rnnlm.cc b/examples/rnnlm/rnnlm.cc
index 9cad077..0ad6dcd 100644
--- a/examples/rnnlm/rnnlm.cc
+++ b/examples/rnnlm/rnnlm.cc
@@ -20,22 +20,22 @@
 *************************************************************/
 #include <string>
 #include <algorithm>
-#include <vector>
 #include "mshadow/tensor.h"
 #include "mshadow/tensor_expr.h"
 #include "mshadow/cxxnet_op.h"
-#include "rnnlm.h"
-#include "rnnlm.pb.h"
+#include "./rnnlm.h"
+#include "./rnnlm.pb.h"
+
+namespace rnnlm {
+using std::vector;
+using std::string;
 
-namespace singa {
 using namespace mshadow;
 using mshadow::cpu;
-
 using mshadow::Shape;
 using mshadow::Shape1;
 using mshadow::Shape2;
 using mshadow::Tensor;
-// using mshadow::TensorContainer;
 
 inline Tensor<cpu, 2> RTensor2(Blob<float>* blob) {
   const vector<int>& shape = blob->shape();
@@ -50,34 +50,34 @@ inline Tensor<cpu, 1> RTensor1(Blob<float>* blob) {
 }
 
 
-/*******InputLayer**************/
-RnnDataLayer::~RnnDataLayer() {
+/*******DataLayer**************/
+DataLayer::~DataLayer() {
   if (shard_ != nullptr)
     delete shard_;
   shard_ = nullptr;
 }
 
-void RnnDataLayer::Setup(const LayerProto& proto, int npartitions) {
-  Layer::Setup(proto, npartitions);
-  shard_ = new DataShard(
-               proto.GetExtension(input_conf).path(),
-               DataShard::kRead);
+void DataLayer::Setup(const LayerProto& proto, int npartitions) {
+  RNNLayer::Setup(proto, npartitions);
+  shard_ = new singa::DataShard(
+               proto.GetExtension(data_conf).path(),
+               singa::DataShard::kRead);
   string key;
-  max_window_ = proto.GetExtension(input_conf).max_window();
+  max_window_ = proto.GetExtension(data_conf).max_window();
   records_.resize(max_window_ + 1);  // resize to # of records in data layer
   window_ = 0;
   shard_->Next(&key, &records_[window_]);
 }
 
-void RnnDataLayer::ComputeFeature(int flag, Metric *perf) {
+void DataLayer::ComputeFeature(int flag, Metric *perf) {
   CHECK(records_.size() <= shard_->Count());
   records_[0] = records_[window_];
   window_ = max_window_;
   for (int i = 1; i <= max_window_; i++) {
     string key;
     if (shard_->Next(&key, &records_[i])) {
-      if (records_[i].word_index() == 0) {
-        window_ = i;  // +1 ??
+      if (records_[i].GetExtension(word).word_index() == 0) {
+        window_ = i;
         break;
       }
     } else {
@@ -87,41 +87,24 @@ void RnnDataLayer::ComputeFeature(int flag, Metric *perf) {
   }
 }
 
-/*******WordLayer**************/
-void WordLayer::Setup(const LayerProto& proto, int npartitions) {
-  Layer::Setup(proto, npartitions);
-  CHECK_EQ(srclayers_.size(), 1);
-  int max_window = static_cast<RnnDataLayer*>(srclayers_[0])->max_window();
-  data_.Reshape(vector<int>{max_window});
-}
-
-void WordLayer::ComputeFeature(int flag, Metric *perf) {
-  auto records = static_cast<RnnDataLayer*>(srclayers_[0])->records();
-  float *word = data_.mutable_cpu_data();
-  window_ = static_cast<RNNLayer*>(srclayers_[0])->window();
-  for (int i = 0; i < window_; i++) {
-    word[i] = records[i].word_index();
-  }
-}
-
-
 /*******LabelLayer**************/
-void RnnLabelLayer::Setup(const LayerProto& proto, int npartitions) {
-  Layer::Setup(proto, npartitions);
+void LabelLayer::Setup(const LayerProto& proto, int npartitions) {
+  RNNLayer::Setup(proto, npartitions);
   CHECK_EQ(srclayers_.size(), 1);
-  int max_window = static_cast<RnnDataLayer*>(srclayers_[0])->max_window();
+  int max_window = dynamic_cast<DataLayer*>(srclayers_[0])->max_window();
   data_.Reshape(vector<int>{max_window, 4});
 }
 
-void RnnLabelLayer::ComputeFeature(int flag, Metric *perf) {
-  auto records = static_cast<RnnDataLayer*>(srclayers_[0])->records();
+void LabelLayer::ComputeFeature(int flag, Metric *perf) {
+  const auto& records = dynamic_cast<DataLayer*>(srclayers_[0])->records();
   float *label = data_.mutable_cpu_data();
-  window_ = static_cast<RNNLayer*>(srclayers_[0])->window();
+  window_ = dynamic_cast<RNNLayer*>(srclayers_[0])->window();
   for (int i = 0; i < window_; i++) {
-    label[4 * i + 0] = records[i + 1].class_start();
-    label[4 * i + 1] = records[i + 1].class_end();
-    label[4 * i + 2] = records[i + 1].word_index();
-    label[4 * i + 3] = records[i + 1].class_index();
+    WordRecord wordrecord = records[i + 1].GetExtension(word);
+    label[4 * i + 0] = wordrecord.class_start();
+    label[4 * i + 1] = wordrecord.class_end();
+    label[4 * i + 2] = wordrecord.word_index();
+    label[4 * i + 3] = wordrecord.class_index();
   }
 }
 
@@ -131,9 +114,9 @@ EmbeddingLayer::~EmbeddingLayer() {
 }
 
 void EmbeddingLayer::Setup(const LayerProto& proto, int npartitions) {
-  Layer::Setup(proto, npartitions);
+  RNNLayer::Setup(proto, npartitions);
   CHECK_EQ(srclayers_.size(), 1);
-  int max_window = srclayers_[0]->data(this).shape()[0];
+  int max_window = dynamic_cast<DataLayer*>(srclayers_[0])->max_window();
   word_dim_ = proto.GetExtension(embedding_conf).word_dim();
   data_.Reshape(vector<int>{max_window, word_dim_});
   grad_.ReshapeLike(data_);
@@ -143,13 +126,14 @@ void EmbeddingLayer::Setup(const LayerProto& proto, int npartitions) {
 }
 
 void EmbeddingLayer::ComputeFeature(int flag, Metric* perf) {
-  window_ = static_cast<RNNLayer*>(srclayers_[0])->window();
+  auto datalayer = dynamic_cast<DataLayer*>(srclayers_[0]);
+  window_ = datalayer->window();
+  auto records = datalayer->records();
   auto words = RTensor2(&data_);
   auto embed = RTensor2(embed_->mutable_data());
-  auto word_idx = RTensor1(srclayers_[0]->mutable_data(this));
 
   for (int t = 0; t < window_; t++) {
-    int idx = static_cast<int>(word_idx[t]);
+    int idx = static_cast<int>(records[t].GetExtension(word).word_index());
     CHECK_GE(idx, 0);
     CHECK_LT(idx, vocab_size_);
     Copy(words[t], embed[idx]);
@@ -159,10 +143,11 @@ void EmbeddingLayer::ComputeFeature(int flag, Metric* perf) {
 void EmbeddingLayer::ComputeGradient(int flag, Metric* perf) {
   auto grad = RTensor2(&grad_);
   auto gembed = RTensor2(embed_->mutable_grad());
-  auto word_idx = RTensor1(srclayers_[0]->mutable_data(this));
+  auto datalayer = dynamic_cast<DataLayer*>(srclayers_[0]);
+  auto records = datalayer->records();
   gembed = 0;
   for (int t = 0; t < window_; t++) {
-    int idx = static_cast<int>(word_idx[t]);
+    int idx = static_cast<int>(records[t].GetExtension(word).word_index());
     Copy(gembed[idx], grad[t]);
   }
 }
@@ -172,7 +157,7 @@ HiddenLayer::~HiddenLayer() {
 }
 
 void HiddenLayer::Setup(const LayerProto& proto, int npartitions) {
-  Layer::Setup(proto, npartitions);
+  RNNLayer::Setup(proto, npartitions);
   CHECK_EQ(srclayers_.size(), 1);
   const auto& innerproductData = srclayers_[0]->data(this);
   data_.ReshapeLike(srclayers_[0]->data(this));
@@ -184,7 +169,7 @@ void HiddenLayer::Setup(const LayerProto& proto, int npartitions) {
 
 // hid[t] = sigmoid(hid[t-1] * W + src[t])
 void HiddenLayer::ComputeFeature(int flag, Metric* perf) {
-  window_ = static_cast<RNNLayer*>(srclayers_[0])->window();
+  window_ = dynamic_cast<RNNLayer*>(srclayers_[0])->window();
   auto data = RTensor2(&data_);
   auto src = RTensor2(srclayers_[0]->mutable_data(this));
   auto weight = RTensor2(weight_->mutable_data());
@@ -219,20 +204,20 @@ void HiddenLayer::ComputeGradient(int flag, Metric* perf) {
   Copy(gsrc, grad);
 }
 
-/*********** 1-Implementation for OutputLayer **********/
-OutputLayer::~OutputLayer() {
+/*********** Implementation for LossLayer **********/
+LossLayer::~LossLayer() {
   delete word_weight_;
   delete class_weight_;
 }
 
-void OutputLayer::Setup(const LayerProto& proto, int npartitions) {
-  Layer::Setup(proto, npartitions);
+void LossLayer::Setup(const LayerProto& proto, int npartitions) {
+  RNNLayer::Setup(proto, npartitions);
   CHECK_EQ(srclayers_.size(), 2);
   const auto& src = srclayers_[0]->data(this);
   int max_window = src.shape()[0];
   int vdim = src.count() / max_window;   // Dimension of input
-  int vocab_size = proto.GetExtension(output_conf).vocab_size();
-  int nclass = proto.GetExtension(output_conf).nclass();
+  int vocab_size = proto.GetExtension(loss_conf).vocab_size();
+  int nclass = proto.GetExtension(loss_conf).nclass();
   word_weight_ = Param::Create(proto.param(0));
   word_weight_->Setup(vector<int>{vocab_size, vdim});
   class_weight_ = Param::Create(proto.param(1));
@@ -242,8 +227,8 @@ void OutputLayer::Setup(const LayerProto& proto, int npartitions) {
   pclass_.Reshape(vector<int>{max_window, nclass});
 }
 
-void OutputLayer::ComputeFeature(int flag, Metric* perf) {
-  window_ = static_cast<RNNLayer*>(srclayers_[0])->window();
+void LossLayer::ComputeFeature(int flag, Metric* perf) {
+  window_ = dynamic_cast<RNNLayer*>(srclayers_[0])->window();
   auto pclass = RTensor2(&pclass_);
   auto src = RTensor2(srclayers_[0]->mutable_data(this));
   auto word_weight = RTensor2(word_weight_->mutable_data());
@@ -274,10 +259,11 @@ void OutputLayer::ComputeFeature(int flag, Metric* perf) {
   }
 
   perf->Add("loss", loss, window_);
+  // users can compute the PPL value by 10^(ppl before exp)
   perf->Add("ppl before exp", ppl, window_);
 }
 
-void OutputLayer::ComputeGradient(int flag, Metric* perf) {
+void LossLayer::ComputeGradient(int flag, Metric* perf) {
   auto pclass = RTensor2(&pclass_);
   auto src = RTensor2(srclayers_[0]->mutable_data(this));
   auto gsrc = RTensor2(srclayers_[0]->mutable_grad(this));
@@ -313,4 +299,4 @@ void OutputLayer::ComputeGradient(int flag, Metric* perf) {
     gsrc[t] += dot(pclass[t], class_weight);
   }
 }
-}   // end of namespace singa
+}   // end of namespace rnnlm

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ba3b1a5c/examples/rnnlm/rnnlm.h
----------------------------------------------------------------------
diff --git a/examples/rnnlm/rnnlm.h b/examples/rnnlm/rnnlm.h
index d9032e6..9fc0bcf 100644
--- a/examples/rnnlm/rnnlm.h
+++ b/examples/rnnlm/rnnlm.h
@@ -23,12 +23,17 @@
 #include <vector>
 #include "./singa.h"
 #include "./rnnlm.pb.h"
-namespace singa {
 
+namespace rnnlm {
+using singa::LayerProto;
+using singa::Layer;
+using singa::Param;
+using singa::Blob;
+using singa::Metric;
 /**
  * Base RNN layer. May make it a base layer of SINGA.
  */
-class RNNLayer : public NeuronLayer {
+class RNNLayer : virtual public Layer {
  public:
   /**
    * The recurrent layers may be unrolled different times for different
@@ -49,44 +54,26 @@ class RNNLayer : public NeuronLayer {
 /**
  * Input layer that get read records from data shard
  */
-class RnnDataLayer : public RNNLayer {
+class DataLayer : public RNNLayer, public singa::DataLayer {
  public:
-  ~RnnDataLayer();
+  ~DataLayer();
   void Setup(const LayerProto& proto, int npartitions) override;
   void ComputeFeature(int flag, Metric *perf) override;
-  void ComputeGradient(int flag, Metric* perf) override {}
-  ConnectionType dst_layer_connection() const override {
-    return kOneToMany;
-  }
   int max_window() const {
     return max_window_;
   }
-  const std::vector<singa::WordRecord>& records() const {
-    return records_;
-  }
 
  private:
   int max_window_;
-  DataShard* shard_;
-  std::vector<singa::WordRecord> records_;
-};
-
-
-/**
- * WordLayer that read records_[0] to records_[window_ - 1] from RnnDataLayer to offer data for computation
- */
-class WordLayer : public RNNLayer {
- public:
-  void Setup(const LayerProto& proto, int npartitions) override;
-  void ComputeFeature(int flag, Metric *perf) override;
-  void ComputeGradient(int flag, Metric* perf) override {}
+  singa::DataShard* shard_;
 };
 
 
 /**
- * LabelLayer that read records_[1] to records_[window_] from RnnDataLayer to offer label information
+ * LabelLayer that read records_[1] to records_[window_] from DataLayer to
+ * offer label information
  */
-class RnnLabelLayer : public RNNLayer {
+class LabelLayer : public RNNLayer {
  public:
   void Setup(const LayerProto& proto, int npartitions) override;
   void ComputeFeature(int flag, Metric *perf) override;
@@ -142,9 +129,9 @@ class HiddenLayer : public RNNLayer {
  * p(w|c) = softmax(src[t]*Ww[Start(c):End(c)])
  * p(word at t+1 is w)=p(word at t+1 is from class c)*p(w|c)
  */
-class OutputLayer : public RNNLayer {
+class LossLayer : public RNNLayer {
  public:
-  ~OutputLayer();
+  ~LossLayer();
   void Setup(const LayerProto& proto, int npartitions) override;
   void ComputeFeature(int flag, Metric *perf) override;
   void ComputeGradient(int flag, Metric* perf) override;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ba3b1a5c/examples/rnnlm/rnnlm.proto
----------------------------------------------------------------------
diff --git a/examples/rnnlm/rnnlm.proto b/examples/rnnlm/rnnlm.proto
index 32d2556..8feb3f9 100644
--- a/examples/rnnlm/rnnlm.proto
+++ b/examples/rnnlm/rnnlm.proto
@@ -19,29 +19,28 @@
 *
 *************************************************************/
 
-package singa;
 import "job.proto";
-
+import "common.proto";
 
 message EmbeddingProto {
   optional int32 word_dim = 1;
   optional int32 vocab_size = 2;
 }
 
-message OutputProto {
+message LossProto {
   optional int32 nclass = 1;
   optional int32 vocab_size = 2;
 }
 
-message InputProto {
+message DataProto {
   required string path = 1;
   optional int32 max_window = 2;
 }
 
-extend LayerProto {
+extend singa.LayerProto {
   optional EmbeddingProto embedding_conf = 101;
-  optional OutputProto output_conf = 102;
-  optional InputProto input_conf = 103;
+  optional LossProto loss_conf = 102;
+  optional DataProto data_conf = 103;
 }
 
 message WordRecord {
@@ -51,3 +50,7 @@ message WordRecord {
   optional int32 class_start = 4;
   optional int32 class_end = 5;
 }
+
+extend singa.Record {
+  optional WordRecord word = 101;
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ba3b1a5c/include/neuralnet/connection_layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/connection_layer.h b/include/neuralnet/connection_layer.h
index 17de22d..75f399c 100644
--- a/include/neuralnet/connection_layer.h
+++ b/include/neuralnet/connection_layer.h
@@ -30,7 +30,7 @@
  * base ConnectionLayer.
  */
 namespace singa {
-class BridgeLayer : public ConnectionLayer {
+class BridgeLayer : virtual public ConnectionLayer {
  public:
   void set_ready(bool a) {
     ready_ = a;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ba3b1a5c/include/neuralnet/input_layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/input_layer.h b/include/neuralnet/input_layer.h
index 42f0468..709912d 100644
--- a/include/neuralnet/input_layer.h
+++ b/include/neuralnet/input_layer.h
@@ -43,7 +43,7 @@ namespace singa {
 /**
  * Base layer for reading records from local Shard, HDFS, lmdb, etc.
  */
-class DataLayer: public InputLayer {
+class DataLayer: virtual public InputLayer {
  public:
   void ComputeGradient(int flag, Metric* perf) override {}
   Blob<float>* mutable_data(const Layer* layer) override {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ba3b1a5c/include/neuralnet/layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/layer.h b/include/neuralnet/layer.h
index 6c99cbd..05377b1 100644
--- a/include/neuralnet/layer.h
+++ b/include/neuralnet/layer.h
@@ -190,7 +190,7 @@ class Layer {
 /**
  * Base layer for connecting layers when neural net is partitioned.
  */
-class ConnectionLayer : public Layer {
+class ConnectionLayer : virtual public Layer {
   // defined as a layer category
 };
 
@@ -198,19 +198,19 @@ class ConnectionLayer : public Layer {
  * Base layer for getting input data. May include layers for loading records,
  * parsing records.
  */
-class InputLayer : public Layer {
+class InputLayer : virtual public Layer {
   // defined as a layer category
 };
 
 
-class NeuronLayer : public Layer {
+class NeuronLayer : virtual public Layer {
   // defined as a layer category
 };
 
 /**
  * Base layer for calculating loss and other metrics, e.g., precison.
  */
-class LossLayer : public Layer {
+class LossLayer : virtual public Layer {
  public:
   Blob<float>* mutable_grad(const Layer* layer) override {
     return nullptr;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ba3b1a5c/src/neuralnet/input_layer.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/input_layer.cc b/src/neuralnet/input_layer.cc
index cf88c15..a608ba4 100644
--- a/src/neuralnet/input_layer.cc
+++ b/src/neuralnet/input_layer.cc
@@ -36,7 +36,7 @@ using std::vector;
 /************* Implementation for ParserLayer ***********/
 void ParserLayer::ComputeFeature(int flag, Metric *perf) {
   CHECK_EQ(srclayers_.size(), 1);
-  auto datalayer = static_cast<DataLayer*>(*srclayers_.begin());
+  auto datalayer = dynamic_cast<DataLayer*>(*srclayers_.begin());
   ParseRecords(flag, datalayer->records(), &data_);
 }
 
@@ -196,7 +196,7 @@ void ShardDataLayer::ComputeFeature(int flag, Metric* perf) {
 void LabelLayer::Setup(const LayerProto& proto, int npartitions) {
   Layer::Setup(proto, npartitions);
   CHECK_EQ(srclayers_.size(), 1);
-  int batchsize = static_cast<DataLayer*>(srclayers_[0])->batchsize();
+  int batchsize = dynamic_cast<DataLayer*>(srclayers_[0])->batchsize();
   data_.Reshape(vector<int>{batchsize});
 }
 
@@ -249,8 +249,8 @@ void MnistLayer::ParseRecords(int flag,
 void MnistLayer::Setup(const LayerProto& proto, int npartitions) {
   Layer::Setup(proto, npartitions);
   CHECK_EQ(srclayers_.size(), 1);
-  int batchsize = static_cast<DataLayer*>(srclayers_[0])->batchsize();
-  Record sample = static_cast<DataLayer*>(srclayers_[0])->sample();
+  int batchsize = dynamic_cast<DataLayer*>(srclayers_[0])->batchsize();
+  Record sample = dynamic_cast<DataLayer*>(srclayers_[0])->sample();
   norm_a_ = proto.mnist_conf().norm_a();
   norm_b_ = proto.mnist_conf().norm_b();
   int ndim = sample.image().shape_size();
@@ -321,8 +321,8 @@ void RGBImageLayer::Setup(const LayerProto& proto, int npartitions) {
   scale_ = proto.rgbimage_conf().scale();
   cropsize_ = proto.rgbimage_conf().cropsize();
   mirror_ = proto.rgbimage_conf().mirror();
-  int batchsize = static_cast<DataLayer*>(srclayers_[0])->batchsize();
-  Record sample = static_cast<DataLayer*>(srclayers_[0])->sample();
+  int batchsize = dynamic_cast<DataLayer*>(srclayers_[0])->batchsize();
+  Record sample = dynamic_cast<DataLayer*>(srclayers_[0])->sample();
   vector<int> shape;
   shape.push_back(batchsize);
   for (int x : sample.image().shape()) {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ba3b1a5c/src/proto/common.proto
----------------------------------------------------------------------
diff --git a/src/proto/common.proto b/src/proto/common.proto
index 05e593f..dd2b04c 100644
--- a/src/proto/common.proto
+++ b/src/proto/common.proto
@@ -95,6 +95,8 @@ message Record {
   optional string user_type =2;
   // configuration for
   optional SingleLabelImageRecord image = 5;
+
+  extensions 101 to 200;
 }
 
 message SingleLabelImageRecord {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ba3b1a5c/src/trainer/worker.cc
----------------------------------------------------------------------
diff --git a/src/trainer/worker.cc b/src/trainer/worker.cc
index 0e284e1..23382e3 100644
--- a/src/trainer/worker.cc
+++ b/src/trainer/worker.cc
@@ -285,7 +285,7 @@ void Worker::ReceiveBlobs(
     auto data = receive_layer->mutable_data(nullptr);
     msg->NextFrame();
     memcpy(data->mutable_cpu_data(), msg->FrameData(), msg->FrameSize());
-    static_cast<BridgeLayer*>(receive_layer)->set_ready(true);
+    dynamic_cast<BridgeLayer*>(receive_layer)->set_ready(true);
     delete msg;
   }
 }
@@ -360,7 +360,7 @@ void BPWorker::Forward(
   for (auto& layer : net->layers()) {
     if (layer->partition_id() == id_) {
       if (typeid(*layer) == typeid(BridgeDstLayer))  // recv data from other workers
-        ReceiveBlobs(true, false, static_cast<BridgeLayer*>(layer), net);
+        ReceiveBlobs(true, false, dynamic_cast<BridgeLayer*>(layer), net);
       if (phase == kTrain) {
         for (Param* p : layer->GetParams()) {  // wait until param is updated
           Collect(p, step);
@@ -368,7 +368,7 @@ void BPWorker::Forward(
       }
       layer->ComputeFeature(phase | kForward, perf);
       if (typeid(*layer) == typeid(BridgeSrcLayer))  // send data to other workers
-        SendBlobs(true, false, static_cast<BridgeLayer*>(layer), net);
+        SendBlobs(true, false, dynamic_cast<BridgeLayer*>(layer), net);
       if (DisplayDebugInfo(step))
         LOG(INFO) << layer->DebugString(step, phase | kForward);
     }
@@ -388,7 +388,7 @@ void BPWorker::Backward(int step, shared_ptr<NeuralNet> net) {
       for (Param* p : layer->GetParams())
         Update(p, step);
       if (typeid(layer) == typeid(BridgeDstLayer))  // recv data from other workers
-        SendBlobs(false, true, static_cast<BridgeDstLayer*>(layer), net);
+        SendBlobs(false, true, dynamic_cast<BridgeDstLayer*>(layer), net);
     }
   }
 }