You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by pr...@apache.org on 2017/02/21 00:23:13 UTC

incubator-systemml git commit: [SYSTEMML-1339] Autoencoder script for acoustic signal modeling

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 230bc63c7 -> 8eed1ec94


[SYSTEMML-1339] Autoencoder script for acoustic signal modeling

Closes #384.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/8eed1ec9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/8eed1ec9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/8eed1ec9

Branch: refs/heads/master
Commit: 8eed1ec94b8070710d532358906a050cd4f727fc
Parents: 230bc63
Author: prithvirajsen <se...@us.ibm.com>
Authored: Mon Feb 20 16:22:24 2017 -0800
Committer: prithvirajsen <se...@us.ibm.com>
Committed: Mon Feb 20 16:22:24 2017 -0800

----------------------------------------------------------------------
 scripts/staging/autoencoder-2layer.dml | 248 ++++++++++++++++++++++++++++
 1 file changed, 248 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/8eed1ec9/scripts/staging/autoencoder-2layer.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/autoencoder-2layer.dml b/scripts/staging/autoencoder-2layer.dml
new file mode 100644
index 0000000..a17d86e
--- /dev/null
+++ b/scripts/staging/autoencoder-2layer.dml
@@ -0,0 +1,248 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Trains a 2-layer autoencoder with minibatch SGD, momentum and step-size decay.
+# If invoked with H1 > H2 then it becomes a 'bowtie' structured autoencoder
+# Weights are initialized using Glorot & Bengio (2010) AISTATS initialization.
+# The script standardizes the input before training (can be turned off).
+# Also, it randomly reshuffles rows before training.
+# Currently, tanh is set to be the activation function. 
+# By re-implementing 'func' DML-bodied function, one can change the activation.
+
+# INPUT PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME   	TYPE     DEFAULT  MEANING
+# ---------------------------------------------------------------------------------------------
+# X         String   ---      Filename where the input is stored
+# H1        Int      ---      Number of neurons in the 1st hidden layer
+# H2        Int      ---      Number of neurons in the 2nd hidden layer
+# EPOCH     Int      ---      Number of epochs to train for
+# fmt       String   'text'   Output format ("text", "csv", "binary" etc.)
+# OBJ       Boolean  FALSE    If TRUE, Computes objective function value (squared-loss)
+#                             at the end of each epoch. Note that, computing the full 
+#                             objective can take a lot of time. 
+# BATCH     Int      256      Mini-batch size (training parameter)
+# STEP      Double   1e-5     Initial step size (training parameter)
+# DECAY     Double   0.95     Decays step size after each epoch (training parameter)
+# MOMENTUM  Double   0.9      Momentum parameter (training parameter)
+# ---------------------------------------------------------------------------------------------
+#
+# OUTPUT PARAMETERS (all filenames):
+# ---------------------------------------------------------------------------------------------
+# NAME     TYPE   DEFAULT  MEANING
+# ---------------------------------------------------------------------------------------------
+# W1_out   String   ---   File to store weights between input layer and 1st hidden layer
+# b1_out   String   ---   File to store bias between input layer and 1st hidden layer
+# W2_out   String   ---   File to store weights between 1st hidden layer and 2nd hidden layer
+# b2_out   String   ---   File to store bias between 1st hidden layer and 2nd hidden layer
+# W3_out   String   ---   File to store weights between 2nd hidden layer and 3rd hidden layer
+# b3_out   String   ---   File to store bias between 2nd hidden layer and 3rd hidden layer
+# W4_out   String   ---   File to store weights between 3rd hidden layer and output layer
+# b4_out   String   ---   File to store bias between 3rd hidden layer and output layer
+# HIDDEN   String   " "   File to store the hidden (2nd) layer representation if needed
+# ---------------------------------------------------------------------------------------------
+#
+# INVOCATION:
+# -f autoencoder_2layer.dml --nvargs X=<input file> H1=500 H2=2 EPOCH=1 fmt="csv" 
+#                                    W1_out=<weights from input to 1st hidden layer> b1_out=<bias from input to 1st hidden layer> 
+#									 W2_out=<weights from 1st hidden layer to 2nd hidden layer> b2_out=<bias from 1st hidden layer to 2nd hidden layer> 
+#									 W3_out=<weights from 2nd hidden layer to 3rd hidden layer> b3_out=<bias from 2nd hidden layer to 3rd hidden layer> 
+#									 W4_out=<weights from 3rd hidden layer to output> b4_out=<bias from 3rd hidden layer to output>
+#
+
+#implements tanh
+#to use another activation fn, implement a DML-bodied function with 
+#function name 'func' and comment out this one
+func = function(Matrix[Double] X) return(Matrix[Double] Y, Matrix[Double] Y_prime){
+     Y = (exp(2*X) - 1)/(exp(2*X) + 1)
+     Y_prime = 1 - Y^2
+}
+
+feedForward = function(Matrix[Double] X, 
+	               Matrix[Double] W1, Matrix[Double] b1, 
+	               Matrix[Double] W2, Matrix[Double] b2, 
+	               Matrix[Double] W3, Matrix[Double] b3, 
+	               Matrix[Double] W4, Matrix[Double] b4, 
+		       Matrix[Double] Y)
+      	      return(Matrix[Double] H1, Matrix[Double] H1_prime,
+	     	     Matrix[Double] H2, Matrix[Double] H2_prime,
+	     	     Matrix[Double] H3, Matrix[Double] H3_prime,
+	     	     Matrix[Double] Yhat, Matrix[Double] Yhat_prime,
+	     	     Matrix[Double] E){
+      H1_in = t(W1 %*% t(X) + b1)
+      [H1, H1_prime] = func(H1_in)
+
+      H2_in = t(W2 %*% t(H1) + b2)
+      [H2, H2_prime] = func(H2_in)
+
+      H3_in = t(W3 %*% t(H2) + b3)
+      [H3, H3_prime] = func(H3_in)
+
+      Yhat_in = t(W4 %*% t(H3) + b4)
+      [Yhat, Yhat_prime] = func(Yhat_in)
+      E = Yhat - Y
+}
+
+grad = function(Matrix[Double] X, 
+       		Matrix[Double] H1, Matrix[Double] H1_prime,
+       		Matrix[Double] H2, Matrix[Double] H2_prime,
+       		Matrix[Double] H3, Matrix[Double] H3_prime,
+		Matrix[Double] Yhat_prime,
+		Matrix[Double] E,
+		Matrix[Double] W1, Matrix[Double] W2, Matrix[Double] W3, Matrix[Double] W4)
+       return(Matrix[Double] W1_grad, Matrix[Double] b1_grad,
+              Matrix[Double] W2_grad, Matrix[Double] b2_grad,
+	      Matrix[Double] W3_grad, Matrix[Double] b3_grad,
+	      Matrix[Double] W4_grad, Matrix[Double] b4_grad){
+      #backprop
+      delta4 = E * Yhat_prime
+      delta3 = H3_prime * (delta4 %*% W4)
+      delta2 = H2_prime * (delta3 %*% W3)
+      delta1 = H1_prime * (delta2 %*% W2)
+
+      #compute gradients
+      b4_grad = t(colSums(delta4))
+      b3_grad = t(colSums(delta3))
+      b2_grad = t(colSums(delta2))
+      b1_grad = t(colSums(delta1))
+
+      W4_grad = t(delta4) %*% H3
+      W3_grad = t(delta3) %*% H2
+      W2_grad = t(delta2) %*% H1
+      W1_grad = t(delta1) %*% X
+}
+
+obj = function(Matrix[Double] E) return(Double val){
+    val = 0.5 * sum(E^2)
+}
+
+batch_size = ifdef($BATCH, 256)
+mu = ifdef($MOMENTUM, 0.9)
+step = ifdef($STEP, 1e-5)
+decay = ifdef($DECAY, 0.95)
+hfile = ifdef($HIDDEN, " ")
+fmt = ifdef($fmt, "text")
+full_obj = ifdef($OBJ, FALSE)
+
+X = read($X)
+num_hidden1 = $H1
+num_hidden2 = $H2
+max_epochs = $EPOCH
+
+n = nrow(X)
+m = ncol(X)
+
+#z-transform, whitening operator is better
+means = colSums(X)/n
+stds = sqrt((colSums(X^2)/n - means*means)*n/(n-1)) + 1e-17
+X = (X - means)/stds
+
+#randomly reordering rows
+permut = table(seq(1,n,1), order(target=Rand(rows=n, cols=1, min=0, max=1, pdf="uniform"), by=1, index.return=TRUE), n, n)
+X = permut %*% X
+
+W1 = sqrt(6)/sqrt(m + num_hidden1) * Rand(rows=num_hidden1, cols=m, min=-1, max=1, pdf="uniform")
+b1 = matrix(0, rows=num_hidden1, cols=1)
+W2 = sqrt(6)/sqrt(num_hidden1 + num_hidden2) * Rand(rows=num_hidden2, cols=num_hidden1, min=-1, max=1, pdf="uniform")
+b2 = matrix(0, rows=num_hidden2, cols=1)
+W3 = sqrt(6)/sqrt(num_hidden2 + num_hidden1) * Rand(rows=num_hidden1, cols=num_hidden2, min=-1, max=1, pdf="uniform")
+b3 = matrix(0, rows=num_hidden1, cols=1)
+W4 = sqrt(6)/sqrt(num_hidden2 + m) * Rand(rows=m, cols=num_hidden1, min=-1, max=1, pdf="uniform")
+b4 = matrix(0, rows=m, cols=1)
+
+upd_W1 = matrix(0, rows=nrow(W1), cols=ncol(W1))
+upd_b1 = matrix(0, rows=nrow(b1), cols=ncol(b1))
+upd_W2 = matrix(0, rows=nrow(W2), cols=ncol(W2))
+upd_b2 = matrix(0, rows=nrow(b2), cols=ncol(b2))
+upd_W3 = matrix(0, rows=nrow(W3), cols=ncol(W3))
+upd_b3 = matrix(0, rows=nrow(b3), cols=ncol(b3))
+upd_W4 = matrix(0, rows=nrow(W4), cols=ncol(W4))
+upd_b4 = matrix(0, rows=nrow(b4), cols=ncol(b4))
+
+if( full_obj ){
+      [full_H1, full_H1_prime, full_H2, full_H2_prime, full_H3, full_H3_prime, full_Yhat, full_Yhat_prime, full_E] = feedForward(X, W1, b1, W2, b2, W3, b3, W4, b4, X)
+      full_o = obj(full_E)
+      print("EPOCHS=" + 0 + " OBJ (FULL DATA): " + full_o)
+}
+
+iter = 0
+num_iters_per_epoch = ceil(n / batch_size)
+max_iterations = max_epochs * num_iters_per_epoch
+#print("num_iters_per_epoch=" + num_iters_per_epoch + " max_iterations=" + max_iterations)
+beg = 1
+while( iter < max_iterations ){
+      end = beg + batch_size - 1
+      if(end > n) end = n
+      X_batch = X[beg:end,]
+
+      [H1, H1_prime, H2, H2_prime, H3, H3_prime, Yhat, Yhat_prime, E] = feedForward(X_batch, W1, b1, W2, b2, W3, b3, W4, b4, X_batch)
+      [W1_grad, b1_grad, W2_grad, b2_grad, W3_grad, b3_grad, W4_grad, b4_grad] = grad(X_batch, H1, H1_prime, H2, H2_prime, H3, H3_prime, Yhat_prime, E, W1, W2, W3, W4)
+
+      o = obj(E)
+      print("epochs=%5.4f BATCH beg=%d end=%d obj=%f", (iter / num_iters_per_epoch), beg, end, o)
+
+      #update
+      local_step = step / nrow(X_batch)
+      upd_W1 = mu * upd_W1 - local_step * W1_grad
+      upd_b1 = mu * upd_b1 - local_step * b1
+      upd_W2 = mu * upd_W2 - local_step * W2_grad
+      upd_b2 = mu * upd_b2 - local_step * b2
+      upd_W3 = mu * upd_W3 - local_step * W3_grad
+      upd_b3 = mu * upd_b3 - local_step * b3
+      upd_W4 = mu * upd_W4 - local_step * W4_grad
+      upd_b4 = mu * upd_b4 - local_step * b4
+      W1 = W1 + upd_W1
+      b1 = b1 + upd_b1
+      W2 = W2 + upd_W2
+      b2 = b2 + upd_b2
+      W3 = W3 + upd_W3
+      b3 = b3 + upd_b3
+      W4 = W4 + upd_W4
+      b4 = b4 + upd_b4
+
+      iter = iter + 1
+      if(end == n) beg = 1
+      else beg = end + 1
+
+      if( iter %% num_iters_per_epoch == 0 ) step = step * decay
+
+      if( full_obj & iter %% num_iters_per_epoch == 0 ){
+      	  [full_H1, full_H1_prime, full_H2, full_H2_prime, full_H3, full_H3_prime, full_Yhat, full_Yhat_prime, full_E] = feedForward(X, W1, b1, W2, b2, W3, b3, W4, b4, X)
+          full_o = obj(full_E)
+      	  epochs = iter %/% num_iters_per_epoch
+          print("EPOCHS=" + epochs + " iter=" + iter + " OBJ (FULL DATA)=" + full_o)
+      }
+}
+
+write(W1, $W1_out, format=fmt)
+write(b1, $b1_out, format=fmt)
+write(W2, $W2_out, format=fmt)
+write(b2, $b2_out, format=fmt)
+write(W3, $W3_out, format=fmt)
+write(b3, $b3_out, format=fmt)
+write(W4, $W4_out, format=fmt)
+write(b4, $b4_out, format=fmt)
+
+if( hfile != " " ){
+      [full_H1, full_H1_prime, full_H2, full_H2_prime, full_H3, full_H3_prime, full_Yhat, full_Yhat_prime, full_E] = feedForward(X, W1, b1, W2, b2, W3, b3, W4, b4, X)
+      reordered_H = t(permut) %*% full_H2
+      write(reordered_H, hfile, format=fmt)
+}