You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ri...@apache.org on 2018/04/04 20:58:25 UTC
madlib git commit: MLP: Simplify initialization of model coefficients

Repository: madlib
Updated Branches:
  refs/heads/master 1670923bf -> e3d2eee9a


MLP: Simplify initialization of model coefficients

Changes:

1. Model initialization now happens in the C++ code instead of being
passed via Python.
2. Warm start coefficients are still passed from Python. These
coefficients are copied into the model. The vectorized form of the copy
raises an Eigen assertion when used with a MappedMatrix, possibly due
to the actual block size not being available via the Map.
3. The distinction between a "TaskState" and an "AlgoState" within a
State type has been removed in the MLPMinibatchState. This demarcation
seemed to be confusing and didn't provide much abstraction.
4. A bool named 'warm_start' was not being used and has been removed
from the main mlp aggregates (igd and minibatch).

Closes #251


Project: http://git-wip-us.apache.org/repos/asf/madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/madlib/commit/e3d2eee9
Tree: http://git-wip-us.apache.org/repos/asf/madlib/tree/e3d2eee9
Diff: http://git-wip-us.apache.org/repos/asf/madlib/diff/e3d2eee9

Branch: refs/heads/master
Commit: e3d2eee9a90fb91eb00d607f4c5cc5ad762b48b5
Parents: 1670923
Author: Rahul Iyer <ri...@apache.org>
Authored: Tue Mar 27 21:41:24 2018 -0700
Committer: Rahul Iyer <ri...@apache.org>
Committed: Wed Apr 4 13:57:51 2018 -0700

----------------------------------------------------------------------
 src/modules/convex/algo/igd.hpp                 |  83 +++----
 src/modules/convex/mlp_igd.cpp                  | 228 +++++++++----------
 src/modules/convex/type/model.hpp               |   2 +-
 src/modules/convex/type/state.hpp               |  56 +++--
 src/ports/postgres/modules/convex/mlp.sql_in    |   4 -
 src/ports/postgres/modules/convex/mlp_igd.py_in |  39 ++--
 6 files changed, 197 insertions(+), 215 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/madlib/blob/e3d2eee9/src/modules/convex/algo/igd.hpp
----------------------------------------------------------------------
diff --git a/src/modules/convex/algo/igd.hpp b/src/modules/convex/algo/igd.hpp
index 1d7fc0f..f5ec471 100644
--- a/src/modules/convex/algo/igd.hpp
+++ b/src/modules/convex/algo/igd.hpp
@@ -34,8 +34,8 @@ public:
     typedef typename Task::model_type model_type;
 
     static void transition(state_type &state, const tuple_type &tuple);
-    static void transitionInMiniBatch(state_type &state, const tuple_type &tuple);
     static void merge(state_type &state, const_state_type &otherState);
+    static void transitionInMiniBatch(state_type &state, const tuple_type &tuple);
     static void mergeInPlace(state_type &state, const_state_type &otherState);
     static void final(state_type &state);
 };
@@ -58,6 +58,35 @@ IGD<State, ConstState, Task>::transition(state_type &state,
             state.task.stepsize * tuple.weight);
 }
 
+template <class State, class ConstState, class Task>
+void
+IGD<State, ConstState, Task>::merge(state_type &state,
+        const_state_type &otherState) {
+    // Having zero checking here to reduce dependency to the caller.
+    // This can be removed if it affects performance in the future,
+    // with the expectation that callers should do the zero checking.
+    if (state.algo.numRows == 0) {
+        state.algo.incrModel = otherState.algo.incrModel;
+        return;
+    } else if (otherState.algo.numRows == 0) {
+        return;
+    }
+
+    // The reason of this weird algorithm instead of an intuitive one
+    // -- (w1 * m1 + w2 * m2) / (w1 + w2): we have only one mutable state,
+    // therefore, (m1 * w1 / w2  + m2)  * w2 / (w1 + w2).
+    // Order:         111111111  22222  3333333333333333
+
+    // model averaging, weighted by rows seen
+    double totalNumRows = static_cast<double>(state.algo.numRows + otherState.algo.numRows);
+    state.algo.incrModel *= static_cast<double>(state.algo.numRows) /
+        static_cast<double>(otherState.algo.numRows);
+    state.algo.incrModel += otherState.algo.incrModel;
+    state.algo.incrModel *= static_cast<double>(otherState.algo.numRows) /
+        static_cast<double>(totalNumRows);
+}
+
+
 /**
   * @brief Update the transition state in mini-batches
   *
@@ -78,8 +107,8 @@ IGD<State, ConstState, Task>::transition(state_type &state,
                   std::runtime_error("Invalid data. Independent and dependent "
                                      "batches don't have same number of rows."));
 
-    int batch_size = state.algo.batchSize;
-    int n_epochs = state.algo.nEpochs;
+    int batch_size = state.batchSize;
+    int n_epochs = state.nEpochs;
 
     // n_rows/n_ind_cols are the rows/cols in a transition tuple.
     int n_rows = tuple.indVar.rows();
@@ -117,12 +146,12 @@ IGD<State, ConstState, Task>::transition(state_type &state,
                 Y_batch = tuple.depVar.block(curr_batch_row_index, 0, batch_size, tuple.depVar.cols());
             }
             loss += Task::getLossAndUpdateModel(
-                state.task.model, X_batch, Y_batch, state.task.stepsize);
+                state.model, X_batch, Y_batch, state.stepsize);
         }
 
         // The first epoch will most likely have the highest loss.
         // Being pessimistic, use the total loss only from the first epoch.
-        if (curr_epoch==0) state.algo.loss += loss;
+        if (curr_epoch==0) state.loss += loss;
     }
     return;
  }
@@ -130,51 +159,23 @@ IGD<State, ConstState, Task>::transition(state_type &state,
 
 template <class State, class ConstState, class Task>
 void
-IGD<State, ConstState, Task>::merge(state_type &state,
-        const_state_type &otherState) {
-    // Having zero checking here to reduce dependency to the caller.
-    // This can be removed if it affects performance in the future,
-    // with the expectation that callers should do the zero checking.
-    if (state.algo.numRows == 0) {
-        state.algo.incrModel = otherState.algo.incrModel;
-        return;
-    } else if (otherState.algo.numRows == 0) {
-        return;
-    }
-
-    // The reason of this weird algorithm instead of an intuitive one
-    // -- (w1 * m1 + w2 * m2) / (w1 + w2): we have only one mutable state,
-    // therefore, (m1 * w1 / w2  + m2)  * w2 / (w1 + w2).
-    // Order:         111111111  22222  3333333333333333
-
-    // model averaging, weighted by rows seen
-    double totalNumRows = static_cast<double>(state.algo.numRows + otherState.algo.numRows);
-    state.algo.incrModel *= static_cast<double>(state.algo.numRows) /
-        static_cast<double>(otherState.algo.numRows);
-    state.algo.incrModel += otherState.algo.incrModel;
-    state.algo.incrModel *= static_cast<double>(otherState.algo.numRows) /
-        static_cast<double>(totalNumRows);
-}
-
-template <class State, class ConstState, class Task>
-void
 IGD<State, ConstState, Task>::mergeInPlace(state_type &state,
         const_state_type &otherState) {
     // avoid division by zero
-    if (state.algo.numRows == 0) {
-        state.task.model = otherState.task.model;
+    if (state.numRows == 0) {
+        state.model = otherState.model;
         return;
-    } else if (otherState.algo.numRows == 0) {
+    } else if (otherState.numRows == 0) {
         return;
     }
 
     // model averaging, weighted by rows seen
-    double leftRows = static_cast<double>(state.algo.numRows + state.algo.numRows);
-    double rightRows = static_cast<double>(otherState.algo.numRows + otherState.algo.numRows);
+    double leftRows = static_cast<double>(state.numRows + state.numRows);
+    double rightRows = static_cast<double>(otherState.numRows + otherState.numRows);
     double totalNumRows = leftRows + rightRows;
-    state.task.model *= leftRows / rightRows;
-    state.task.model += otherState.task.model;
-    state.task.model *= rightRows / totalNumRows;
+    state.model *= leftRows / rightRows;
+    state.model += otherState.model;
+    state.model *= rightRows / totalNumRows;
 }
 
 template <class State, class ConstState, class Task>

http://git-wip-us.apache.org/repos/asf/madlib/blob/e3d2eee9/src/modules/convex/mlp_igd.cpp
----------------------------------------------------------------------
diff --git a/src/modules/convex/mlp_igd.cpp b/src/modules/convex/mlp_igd.cpp
index e914c41..8cd0baa 100644
--- a/src/modules/convex/mlp_igd.cpp
+++ b/src/modules/convex/mlp_igd.cpp
@@ -83,38 +83,37 @@ mlp_igd_transition::run(AnyType &args) {
             ArrayHandle<double> numbersOfUnits = args[4].getAs<ArrayHandle<double> >();
             int numberOfStages = numbersOfUnits.size() - 1;
 
-            double stepsize = args[5].getAs<double>();
             state.allocate(*this, numberOfStages,
                            reinterpret_cast<const double *>(numbersOfUnits.ptr()));
-            state.task.stepsize = stepsize;
-
-            const int activation = args[6].getAs<int>();
-            const int is_classification = args[7].getAs<int>();
+            state.task.stepsize = args[5].getAs<double>();
+            state.task.model.activation = static_cast<double>(args[6].getAs<int>());
+            state.task.model.is_classification = static_cast<double>(args[7].getAs<int>());
             // args[8] is for weighting the input row, which is populated later.
-            const bool warm_start = args[9].getAs<bool>();
-            const double lambda = args[11].getAs<double>();
-            state.task.lambda = lambda;
-            MLPTask::lambda = lambda;
-
-            double is_classification_double = (double) is_classification;
-            double activation_double = (double) activation;
-            MappedColumnVector coeff = args[10].getAs<MappedColumnVector>();
-            state.task.model.rebind(&is_classification_double,&activation_double,
-                                    &coeff.data()[0], numberOfStages,
-                                    &numbersOfUnits[0]);
-
-            // state.task.model.is_classification =
-            //     static_cast<double>(is_classification);
-            // state.task.model.activation = static_cast<double>(activation);
-            // MappedColumnVector initial_coeff = args[10].getAs<MappedColumnVector>();
-            // // copy initial_coeff into the model
-            // Index fan_in, fan_out, layer_start = 0;
-            // for (size_t k = 0; k < numberOfStages; ++k){
-            //     fan_in = numbersOfUnits[k];
-            //     fan_out = numbersOfUnits[k+1];
-            //     state.task.model.u[k] << initial_coeff.segment(layer_start, (fan_in+1)*fan_out);
-            //     layer_start = (fan_in + 1) * fan_out;
-            // }
+            state.task.lambda = args[10].getAs<double>();
+            MLPTask::lambda = state.task.lambda;
+
+            if (!args[9].isNull()){
+                // initial coefficients are provided
+                MappedColumnVector warm_start_coeff = args[9].getAs<MappedColumnVector>();
+
+                // copy warm start into the task model
+                // state.reset() ensures algo.incrModel is copied from task.model
+                Index layer_start = 0;
+                for (size_t k = 0; k < numberOfStages; ++k){
+                    for (size_t j=0; j < state.task.model.u[k].cols(); ++j){
+                        for (size_t i=0; i < state.task.model.u[k].rows(); ++i){
+                            state.task.model.u[k](i, j) = warm_start_coeff(
+                                layer_start + j * state.task.model.u[k].rows() + i);
+                        }
+                    }
+                    layer_start = state.task.model.u[k].rows() * state.task.model.u[k].cols();
+                }
+            } else {
+                // initialize the model with appropriate coefficients
+                state.task.model.initialize(
+                    numberOfStages,
+                    reinterpret_cast<const double *>(numbersOfUnits.ptr()));
+            }
         }
         // resetting in either case
         state.reset();
@@ -144,6 +143,45 @@ mlp_igd_transition::run(AnyType &args) {
 
     return state;
 }
+
+/**
+ * @brief Perform the perliminary aggregation function: Merge transition states
+ */
+AnyType
+mlp_igd_merge::run(AnyType &args) {
+    MLPIGDState<MutableArrayHandle<double> > stateLeft = args[0];
+    MLPIGDState<ArrayHandle<double> > stateRight = args[1];
+
+    if (stateLeft.algo.numRows == 0) { return stateRight; }
+    else if (stateRight.algo.numRows == 0) { return stateLeft; }
+
+    MLPIGDAlgorithm::merge(stateLeft, stateRight);
+    MLPLossAlgorithm::merge(stateLeft, stateRight);
+
+    // The following numRows update cannot be put above, because the model
+    // averaging depends on their original values
+    stateLeft.algo.numRows += stateRight.algo.numRows;
+
+    return stateLeft;
+}
+/**
+ * @brief Perform the multilayer perceptron final step
+ */
+AnyType
+mlp_igd_final::run(AnyType &args) {
+    // We request a mutable object. Depending on the backend, this might perform
+    // a deep copy.
+    MLPIGDState<MutableArrayHandle<double> > state = args[0];
+
+    if (state.algo.numRows == 0) { return Null(); }
+
+    L2<MLPModelType>::lambda = state.task.lambda;
+    state.algo.loss = state.algo.loss/static_cast<double>(state.algo.numRows);
+    state.algo.loss += L2<MLPModelType>::loss(state.task.model);
+    MLPIGDAlgorithm::final(state);
+    return state;
+}
+
 /**
  * @brief Perform the multilayer perceptron minibatch transition step
  *
@@ -157,48 +195,48 @@ mlp_minibatch_transition::run(AnyType &args) {
     MLPMiniBatchState<MutableArrayHandle<double> > state = args[0];
 
     // initilize the state if first tuple
-    if (state.algo.numRows == 0) {
+    if (state.numRows == 0) {
         if (!args[3].isNull()) {
             MLPMiniBatchState<ArrayHandle<double> > previousState = args[3];
-            state.allocate(*this, previousState.task.numberOfStages,
-                           previousState.task.numbersOfUnits);
+            state.allocate(*this, previousState.numberOfStages,
+                           previousState.numbersOfUnits);
             state = previousState;
         } else {
             // configuration parameters
             ArrayHandle<double> numbersOfUnits = args[4].getAs<ArrayHandle<double> >();
             int numberOfStages = numbersOfUnits.size() - 1;
 
-            double stepsize = args[5].getAs<double>();
 
             state.allocate(*this, numberOfStages,
                            reinterpret_cast<const double *>(numbersOfUnits.ptr()));
-            state.task.stepsize = stepsize;
-            const int activation = args[6].getAs<int>();
-            const int is_classification = args[7].getAs<int>();
+            state.stepsize = args[5].getAs<double>();
+            state.model.activation = static_cast<double>(args[6].getAs<int>());
+            state.model.is_classification = static_cast<double>(args[7].getAs<int>());
             // args[8] is for weighting the input row, which is populated later.
-            const bool warm_start = args[9].getAs<bool>();
-            const double lambda = args[11].getAs<double>();
-            state.algo.batchSize = args[12].getAs<int>();
-            state.algo.nEpochs = args[13].getAs<int>();
-            state.task.lambda = lambda;
-            MLPTask::lambda = lambda;
-
-            /* FIXME: The state is set back to zero for second row onwards if
-               initialized as in IGD. The following avoids that, but there is
-               some failure with debug build that must be fixed.
-            */
-            state.task.model.is_classification =
-                static_cast<double>(is_classification);
-            state.task.model.activation = static_cast<double>(activation);
-            MappedColumnVector initial_coeff = args[10].getAs<MappedColumnVector>();
-            // copy initial_coeff into the model
-            Index fan_in, fan_out, layer_start = 0;
-            for (size_t k = 0; k < numberOfStages; ++k){
-                fan_in = numbersOfUnits[k];
-                fan_out = numbersOfUnits[k+1];
-                state.task.model.u[k] << initial_coeff.segment(layer_start, (fan_in+1)*fan_out);
-                layer_start = (fan_in + 1) * fan_out;
+            if (!args[9].isNull()){
+                // initial coefficients are provided copy warm start into the model
+                MappedColumnVector warm_start_coeff = args[9].getAs<MappedColumnVector>();
+                Index layer_start = 0;
+                for (size_t k = 0; k < numberOfStages; ++k){
+                    for (size_t j=0; j < state.model.u[k].cols(); ++j){
+                        for (size_t i=0; i < state.model.u[k].rows(); ++i){
+                            state.model.u[k](i, j) = warm_start_coeff(
+                                layer_start + j * state.model.u[k].rows() + i);
+                        }
+                    }
+                    layer_start = state.model.u[k].rows() * state.model.u[k].cols();
+                }
+            } else {
+                // initialize the model with appropriate coefficients
+                state.model.initialize(
+                    numberOfStages,
+                    reinterpret_cast<const double *>(numbersOfUnits.ptr()));
             }
+
+            state.lambda = args[10].getAs<double>();
+            MLPTask::lambda = state.lambda;
+            state.batchSize = args[11].getAs<int>();
+            state.nEpochs = args[12].getAs<int>();
         }
         // resetting in either case
         state.reset();
@@ -234,30 +272,9 @@ mlp_minibatch_transition::run(AnyType &args) {
         both computed in one function now.
     */
     MLPMiniBatchAlgorithm::transitionInMiniBatch(state, tuple);
-    state.algo.numRows += tuple.indVar.rows();
+    state.numRows += tuple.indVar.rows();
     return state;
 }
-
-/**
- * @brief Perform the perliminary aggregation function: Merge transition states
- */
-AnyType
-mlp_igd_merge::run(AnyType &args) {
-    MLPIGDState<MutableArrayHandle<double> > stateLeft = args[0];
-    MLPIGDState<ArrayHandle<double> > stateRight = args[1];
-
-    if (stateLeft.algo.numRows == 0) { return stateRight; }
-    else if (stateRight.algo.numRows == 0) { return stateLeft; }
-
-    MLPIGDAlgorithm::merge(stateLeft, stateRight);
-    MLPLossAlgorithm::merge(stateLeft, stateRight);
-
-    // The following numRows update cannot be put above, because the model
-    // averaging depends on their original values
-    stateLeft.algo.numRows += stateRight.algo.numRows;
-
-    return stateLeft;
-}
 /**
  * @brief Perform the perliminary aggregation function: Merge transition states
  */
@@ -266,15 +283,15 @@ mlp_minibatch_merge::run(AnyType &args) {
     MLPMiniBatchState<MutableArrayHandle<double> > stateLeft = args[0];
     MLPMiniBatchState<ArrayHandle<double> > stateRight = args[1];
 
-    if (stateLeft.algo.numRows == 0) { return stateRight; }
-    else if (stateRight.algo.numRows == 0) { return stateLeft; }
+    if (stateLeft.numRows == 0) { return stateRight; }
+    else if (stateRight.numRows == 0) { return stateLeft; }
 
     MLPMiniBatchAlgorithm::mergeInPlace(stateLeft, stateRight);
 
     // The following numRows update, cannot be put above, because the model
     // averaging depends on their original values
-    stateLeft.algo.numRows += stateRight.algo.numRows;
-    stateLeft.algo.loss += stateRight.algo.loss;
+    stateLeft.numRows += stateRight.numRows;
+    stateLeft.loss += stateRight.loss;
 
     return stateLeft;
 }
@@ -283,35 +300,16 @@ mlp_minibatch_merge::run(AnyType &args) {
  * @brief Perform the multilayer perceptron final step
  */
 AnyType
-mlp_igd_final::run(AnyType &args) {
-    // We request a mutable object. Depending on the backend, this might perform
-    // a deep copy.
-    MLPIGDState<MutableArrayHandle<double> > state = args[0];
-
-    if (state.algo.numRows == 0) { return Null(); }
-
-    L2<MLPModelType>::lambda = state.task.lambda;
-    state.algo.loss = state.algo.loss/static_cast<double>(state.algo.numRows);
-    state.algo.loss += L2<MLPModelType>::loss(state.task.model);
-    MLPIGDAlgorithm::final(state);
-    return state;
-}
-
-
-/**
- * @brief Perform the multilayer perceptron final step
- */
-AnyType
 mlp_minibatch_final::run(AnyType &args) {
     // We request a mutable object. Depending on the backend, this might perform
     // a deep copy.
     MLPMiniBatchState<MutableArrayHandle<double> > state = args[0];
     // Aggregates that haven't seen any data just return Null.
-    if (state.algo.numRows == 0) { return Null(); }
+    if (state.numRows == 0) { return Null(); }
 
-    L2<MLPModelType>::lambda = state.task.lambda;
-    state.algo.loss = state.algo.loss/static_cast<double>(state.algo.numRows);
-    state.algo.loss += L2<MLPModelType>::loss(state.task.model);
+    L2<MLPModelType>::lambda = state.lambda;
+    state.loss = state.loss/static_cast<double>(state.numRows);
+    state.loss += L2<MLPModelType>::loss(state.model);
     return state;
 }
 
@@ -331,7 +329,7 @@ internal_mlp_minibatch_distance::run(AnyType &args) {
     MLPMiniBatchState<ArrayHandle<double> > stateLeft = args[0];
     MLPMiniBatchState<ArrayHandle<double> > stateRight = args[1];
 
-    return std::abs(stateLeft.algo.loss - stateRight.algo.loss);
+    return std::abs(stateLeft.loss - stateRight.loss);
 }
 
 /**
@@ -345,11 +343,9 @@ internal_mlp_igd_result::run(AnyType &args) {
     flattenU.rebind(&state.task.model.u[0](0, 0),
                     state.task.model.arraySize(state.task.numberOfStages,
                                                state.task.numbersOfUnits));
-    double loss = state.algo.loss;
-
     AnyType tuple;
     tuple << flattenU
-          << loss;
+          << static_cast<double>(state.algo.loss);;
     return tuple;
 }
 
@@ -360,14 +356,12 @@ AnyType
 internal_mlp_minibatch_result::run(AnyType &args) {
     MLPMiniBatchState<ArrayHandle<double> > state = args[0];
     HandleTraits<ArrayHandle<double> >::ColumnVectorTransparentHandleMap flattenU;
-    flattenU.rebind(&state.task.model.u[0](0, 0),
-                    state.task.model.arraySize(state.task.numberOfStages,
-                                               state.task.numbersOfUnits));
-    double loss = state.algo.loss;
-
+    flattenU.rebind(&state.model.u[0](0, 0),
+                    state.model.arraySize(state.numberOfStages,
+                                          state.numbersOfUnits));
     AnyType tuple;
     tuple << flattenU
-          << loss;
+          << static_cast<double>(state.loss);
     return tuple;
 }
 

http://git-wip-us.apache.org/repos/asf/madlib/blob/e3d2eee9/src/modules/convex/type/model.hpp
----------------------------------------------------------------------
diff --git a/src/modules/convex/type/model.hpp b/src/modules/convex/type/model.hpp
index 4f534e4..d77e562 100644
--- a/src/modules/convex/type/model.hpp
+++ b/src/modules/convex/type/model.hpp
@@ -164,7 +164,7 @@ struct MLPModel {
         for (k =0; k < N; ++k){
             // Initalize according to Glorot and Bengio (2010)
             // See design doc for more info
-            span = sqrt(6.0 / (n[k] + n[k+1]));
+            span = 0.5 * sqrt(6.0 / (n[k] + n[k+1]));
             u[k] << span * Matrix::Random(u[k].rows(), u[k].cols());
         }
     }

http://git-wip-us.apache.org/repos/asf/madlib/blob/e3d2eee9/src/modules/convex/type/state.hpp
----------------------------------------------------------------------
diff --git a/src/modules/convex/type/state.hpp b/src/modules/convex/type/state.hpp
index f40d966..aaf0015 100644
--- a/src/modules/convex/type/state.hpp
+++ b/src/modules/convex/type/state.hpp
@@ -732,8 +732,8 @@ public:
      * @brief Reset the intra-iteration fields.
      */
     inline void reset() {
-        algo.numRows = 0;
-        algo.loss = 0.;
+        numRows = 0;
+        loss = 0.;
     }
 
     /**
@@ -760,11 +760,11 @@ public:
         // effect. I can also do something like "mStorage[0] = N",
         // but I am not clear about the type binding/alignment
         rebind();
-        task.numberOfStages = inNumberOfStages;
+        numberOfStages = inNumberOfStages;
         uint16_t N = inNumberOfStages;
         uint16_t k;
         for (k = 0; k <= N; k ++) {
-            task.numbersOfUnits[k] = inNumbersOfUnits[k];
+            numbersOfUnits[k] = inNumbersOfUnits[k];
         }
 
         // This time all the member fields are correctly binded
@@ -823,23 +823,22 @@ private:
      * - N + 9 + sizeOfModel: loss (loss value, the sum of squared errors)
      */
     void rebind() {
+        numberOfStages.rebind(&mStorage[0]);
+        size_t N = numberOfStages;
 
-        task.numberOfStages.rebind(&mStorage[0]);
-        size_t N = task.numberOfStages;
-
-        task.numbersOfUnits =
+        numbersOfUnits =
             reinterpret_cast<dimension_pointer_type>(&mStorage[1]);
-        task.stepsize.rebind(&mStorage[N + 2]);
-        task.lambda.rebind(&mStorage[N + 3]);
-        size_t sizeOfModel = task.model.rebind(&mStorage[N + 4],
+        stepsize.rebind(&mStorage[N + 2]);
+        lambda.rebind(&mStorage[N + 3]);
+        size_t sizeOfModel = model.rebind(&mStorage[N + 4],
                                                &mStorage[N + 5],
                                                &mStorage[N + 6],
-                                               task.numberOfStages,
-                                               task.numbersOfUnits);
-        algo.numRows.rebind(&mStorage[N + 6 + sizeOfModel]);
-        algo.batchSize.rebind(&mStorage[N + 7 + sizeOfModel]);
-        algo.nEpochs.rebind(&mStorage[N + 8 + sizeOfModel]);
-        algo.loss.rebind(&mStorage[N + 9 + sizeOfModel]);
+                                               numberOfStages,
+                                               numbersOfUnits);
+        numRows.rebind(&mStorage[N + 6 + sizeOfModel]);
+        batchSize.rebind(&mStorage[N + 7 + sizeOfModel]);
+        nEpochs.rebind(&mStorage[N + 8 + sizeOfModel]);
+        loss.rebind(&mStorage[N + 9 + sizeOfModel]);
     }
 
 
@@ -849,20 +848,17 @@ private:
     typedef typename HandleTraits<Handle>::ReferenceToDouble numeric_type;
 
 public:
-    struct TaskState {
-        dimension_type numberOfStages;
-        dimension_pointer_type numbersOfUnits;
-        numeric_type stepsize;
-        numeric_type lambda;
-        MLPModel<Handle> model;
-    } task;
+    dimension_type numberOfStages;
+    dimension_pointer_type numbersOfUnits;
+    numeric_type stepsize;
+    numeric_type lambda;
+    MLPModel<Handle> model;
+
+    count_type numRows;
+    dimension_type batchSize;
+    dimension_type nEpochs;
+    numeric_type loss;
 
-    struct AlgoState {
-        count_type numRows;
-        dimension_type batchSize;
-        dimension_type nEpochs;
-        numeric_type loss;
-    } algo;
 };
 
 

http://git-wip-us.apache.org/repos/asf/madlib/blob/e3d2eee9/src/ports/postgres/modules/convex/mlp.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/convex/mlp.sql_in b/src/ports/postgres/modules/convex/mlp.sql_in
index f153722..7ccec2d 100644
--- a/src/ports/postgres/modules/convex/mlp.sql_in
+++ b/src/ports/postgres/modules/convex/mlp.sql_in
@@ -1277,7 +1277,6 @@ CREATE FUNCTION MADLIB_SCHEMA.mlp_igd_transition(
         activation         INTEGER,
         is_classification  INTEGER,
         weight             DOUBLE PRECISION,
-        warm_start         BOOLEAN,
         warm_start_coeff   DOUBLE PRECISION[],
         lambda             DOUBLE PRECISION
     )
@@ -1295,7 +1294,6 @@ CREATE FUNCTION MADLIB_SCHEMA.mlp_minibatch_transition(
         activation         INTEGER,
         is_classification  INTEGER,
         weight             DOUBLE PRECISION,
-        warm_start         BOOLEAN,
         warm_start_coeff   DOUBLE PRECISION[],
         lambda             DOUBLE PRECISION,
         batch_size         INTEGER,
@@ -1344,7 +1342,6 @@ CREATE AGGREGATE MADLIB_SCHEMA.mlp_igd_step(
         /* activation */          INTEGER,
         /* is_classification */   INTEGER,
         /* weight */              DOUBLE PRECISION,
-        /* warm_start */          BOOLEAN,
         /* warm_start_coeff */    DOUBLE PRECISION[],
         /* lambda */              DOUBLE PRECISION
         )(
@@ -1369,7 +1366,6 @@ CREATE AGGREGATE MADLIB_SCHEMA.mlp_minibatch_step(
         /* activation */          INTEGER,
         /* is_classification */   INTEGER,
         /* weight */              DOUBLE PRECISION,
-        /* warm_start */          BOOLEAN,
         /* warm_start_coeff */    DOUBLE PRECISION[],
         /* lambda */              DOUBLE PRECISION,
         /* batch_size */          INTEGER,

http://git-wip-us.apache.org/repos/asf/madlib/blob/e3d2eee9/src/ports/postgres/modules/convex/mlp_igd.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/convex/mlp_igd.py_in b/src/ports/postgres/modules/convex/mlp_igd.py_in
index 687011c..9039ebe 100644
--- a/src/ports/postgres/modules/convex/mlp_igd.py_in
+++ b/src/ports/postgres/modules/convex/mlp_igd.py_in
@@ -81,7 +81,7 @@ def mlp(schema_madlib, source_table, output_table, independent_varname,
     step_size = step_size_init
     n_tries = optimizer_params["n_tries"]
     # lambda is a reserved word in python
-    lmbda = optimizer_params["lambda"]
+    lambda_ = optimizer_params["lambda"]
     batch_size = optimizer_params['batch_size']
     n_epochs = optimizer_params['n_epochs']
 
@@ -212,6 +212,10 @@ def mlp(schema_madlib, source_table, output_table, independent_varname,
                 """.format(**locals())
         else:
             start_coeff = PY2SQL(coeff, array_type="DOUBLE PRECISION")
+    else:
+        # if warm start not enabled then initialization is the responsibility of
+        # the model
+        start_coeff = "NULL::DOUBLE PRECISION[]"
 
     if grouping_col:
         group_by_clause = "GROUP BY {0}, {1}".format(grouping_col, col_grp_key)
@@ -236,12 +240,13 @@ def mlp(schema_madlib, source_table, output_table, independent_varname,
         "warm_start": warm_start,
         "n_iterations": n_iterations,
         "tolerance": tolerance,
-        "lmbda": lmbda,
+        "lambda_": lambda_,
         "grouping_col": grouping_col,
         "grouping_str": grouping_str,
         "x_mean_table": x_mean_table,
         "batch_size": batch_size,
-        "n_epochs": n_epochs
+        "n_epochs": n_epochs,
+        "start_coeff": start_coeff
     }
     # variables to be used by GroupIterationController
     it_args.update({
@@ -269,17 +274,6 @@ def mlp(schema_madlib, source_table, output_table, independent_varname,
 
     for _ in range(n_tries):
         prev_state = None
-        if not warm_start:
-            coeff = []
-            for fan_in, fan_out in zip(layer_sizes, layer_sizes[1:]):
-                # Initalize according to Glorot and Bengio (2010)
-                # See design doc for more info
-                span = math.sqrt(6.0 / (fan_in + fan_out))
-                dim = (fan_in + 1) * fan_out
-                rand = [span * (random() - 0.5) for _ in range(dim)]
-                coeff += rand
-            start_coeff = PY2SQL(coeff, "double precision")
-        it_args['start_coeff'] = start_coeff
         iterationCtrl = GroupIterationController(it_args)
         with iterationCtrl as it:
             it.iteration = 0
@@ -303,9 +297,8 @@ def mlp(schema_madlib, source_table, output_table, independent_varname,
                             {activation},
                             {is_classification},
                             ({weights})::DOUBLE PRECISION,
-                            {warm_start},
                             ({start_coeff})::DOUBLE PRECISION[],
-                            {lmbda},
+                            {lambda_},
                             {batch_size}::integer,
                             {n_epochs}::integer
                         )
@@ -321,9 +314,8 @@ def mlp(schema_madlib, source_table, output_table, independent_varname,
                             {activation},
                             {is_classification},
                             ({weights})::DOUBLE PRECISION,
-                            {warm_start},
                             ({start_coeff})::DOUBLE PRECISION[],
-                            {lmbda}
+                            {lambda_}
                         )
                         """
                 it.update(train_sql)
@@ -539,10 +531,13 @@ def _create_output_table(output_table, temp_output_table,
         plpy.execute("DROP TABLE IF EXISTS {0}".format(output_table))
     build_output_query = """
         CREATE TABLE {output_table} AS
-        SELECT {grouping_col_comma} coeff, loss, num_iterations FROM
-        (SELECT {temp_output_table}.*, row_number()
-        OVER ({partition_by} ORDER BY loss)
-        AS rank FROM {temp_output_table}) x WHERE x.rank=1;
+            SELECT {grouping_col_comma} coeff, loss, num_iterations
+            FROM (
+                SELECT {temp_output_table}.*,
+                        row_number() OVER ({partition_by} ORDER BY loss) AS rank
+                FROM {temp_output_table}
+            ) x
+            WHERE x.rank = 1;
     """.format(**locals())
     plpy.execute(build_output_query)