You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ri...@apache.org on 2018/02/03 00:47:20 UTC
madlib git commit: SVM: Add minibatch as a new solver [Forced Update!]

Repository: madlib
Updated Branches:
  refs/heads/master 657cf4aa4 -> a8bbe082c (forced update)


SVM: Add minibatch as a new solver

This work is based on the original work by
Xiaocheng Tang <xi...@gmail.com> in #75.

This PR adds two main features:

- A Minibatch solver that takes as input a batch of data
- SVM code that takes advantage of the minibatch

Closes #229

Co-authored-by: Nikhil Kak <nk...@pivotal.io>
Co-authored-by: Xiaocheng Tang <xi...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/madlib/commit/a8bbe082
Tree: http://git-wip-us.apache.org/repos/asf/madlib/tree/a8bbe082
Diff: http://git-wip-us.apache.org/repos/asf/madlib/diff/a8bbe082

Branch: refs/heads/master
Commit: a8bbe082ca60f87e006eba164ea69b159e1875fc
Parents: 53db736
Author: Rahul Iyer <ri...@apache.org>
Authored: Fri Feb 2 14:15:01 2018 -0800
Committer: Rahul Iyer <ri...@apache.org>
Committed: Fri Feb 2 16:46:48 2018 -0800

----------------------------------------------------------------------
 .gitignore                                      |   1 +
 src/dbal/EigenIntegration/HandleMap_proto.hpp   |   3 +-
 src/modules/convex/algo/igd.hpp                 |  81 ++++++++-
 src/modules/convex/linear_svm_igd.cpp           | 159 +++++++++++++++++
 src/modules/convex/linear_svm_igd.hpp           |   6 +
 src/modules/convex/task/linear_svm.hpp          |  67 +++++++-
 src/modules/convex/type/model.hpp               |  71 +++++---
 src/modules/convex/type/state.hpp               |  79 +++++++++
 src/modules/convex/type/tuple.hpp               |   3 +
 src/ports/postgres/modules/svm/svm.py_in        | 170 +++++++++++++------
 src/ports/postgres/modules/svm/svm.sql_in       | 117 +++++++++++--
 src/ports/postgres/modules/svm/test/svm.sql_in  | 104 ++++++++++++
 .../modules/utilities/validate_args.py_in       |  19 +--
 13 files changed, 774 insertions(+), 106 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/madlib/blob/a8bbe082/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index 00dc016..a073fbd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,6 +25,7 @@ auto
 *.swp
 *.fdb_latexmk
 *.swo  # vim swap file
+\#*\#  # emacs backup file
 
 # Biblatex temporary files
 *-blx.bib

http://git-wip-us.apache.org/repos/asf/madlib/blob/a8bbe082/src/dbal/EigenIntegration/HandleMap_proto.hpp
----------------------------------------------------------------------
diff --git a/src/dbal/EigenIntegration/HandleMap_proto.hpp b/src/dbal/EigenIntegration/HandleMap_proto.hpp
index 4bfe7c5..c7b6cb2 100644
--- a/src/dbal/EigenIntegration/HandleMap_proto.hpp
+++ b/src/dbal/EigenIntegration/HandleMap_proto.hpp
@@ -19,6 +19,7 @@ namespace eigen_integration {
 template <class EigenType, class Handle, int MapOptions = Eigen::Unaligned>
 class HandleMap : public Eigen::Map<EigenType, MapOptions> {
 public:
+    typedef EigenType PlainEigenType;
     typedef Eigen::Map<EigenType, MapOptions> Base;
     typedef typename Base::Scalar Scalar;
     typedef typename Base::Index Index;
@@ -57,7 +58,7 @@ public:
      *
      * For example, this allows construction of MappedColumnVector from
      * MappedMatrix::col(int) or NativeColumnVector, etc.
-     */ 
+     */
     template <class Derived>
     HandleMap(const Eigen::MapBase<Derived>& inMappedData,
         typename boost::enable_if_c<Derived::IsVectorAtCompileTime>::type* = 0)

http://git-wip-us.apache.org/repos/asf/madlib/blob/a8bbe082/src/modules/convex/algo/igd.hpp
----------------------------------------------------------------------
diff --git a/src/modules/convex/algo/igd.hpp b/src/modules/convex/algo/igd.hpp
index cd17e64..3ae4c13 100644
--- a/src/modules/convex/algo/igd.hpp
+++ b/src/modules/convex/algo/igd.hpp
@@ -34,7 +34,9 @@ public:
     typedef typename Task::model_type model_type;
 
     static void transition(state_type &state, const tuple_type &tuple);
+    static void transitionInMiniBatch(state_type &state, const tuple_type &tuple);
     static void merge(state_type &state, const_state_type &otherState);
+    static void mergeInPlace(state_type &state, const_state_type &otherState);
     static void final(state_type &state);
 };
 
@@ -56,6 +58,62 @@ IGD<State, ConstState, Task>::transition(state_type &state,
             state.task.stepsize * tuple.weight);
 }
 
+/**
+  * @brief Update the transition state in mini-batches
+  *
+  * Note: We assume that
+  *     1. Task defines a model_eigen_type
+  *     2. A batch of tuple.indVar is a Matrix
+  *     3. A batch of tuple.depVar is a ColumnVector
+  *     4. Task defines a getLossAndUpdateModel method
+  *
+ */
+ template <class State, class ConstState, class Task>
+ void
+ IGD<State, ConstState, Task>::transitionInMiniBatch(
+        state_type &state,
+        const tuple_type &tuple) {
+
+    madlib_assert(tuple.indVar.rows() == tuple.depVar.rows(),
+                  std::runtime_error("Invalid data. Independent and dependent "
+                                     "batches don't have same number of rows."));
+
+    int batch_size = state.algo.batchSize;
+    int n_epochs = state.algo.nEpochs;
+
+    // n_rows/n_ind_cols are the rows/cols in a transition tuple.
+    int n_rows = tuple.indVar.rows();
+    int n_ind_cols = tuple.indVar.cols();
+    int n_batches = n_rows < batch_size ? 1 :
+                    n_rows / batch_size +
+                    int(n_rows%batch_size > 0);
+
+    for (int curr_epoch=0; curr_epoch < n_epochs; curr_epoch++) {
+        double loss = 0.0;
+        for (int curr_batch=0, curr_batch_row_index=0; curr_batch < n_batches;
+             curr_batch++, curr_batch_row_index += batch_size) {
+           Matrix X_batch;
+           ColumnVector y_batch;
+           if (curr_batch == n_batches-1) {
+              // last batch
+              X_batch = tuple.indVar.bottomRows(n_rows-curr_batch_row_index);
+              y_batch = tuple.depVar.tail(n_rows-curr_batch_row_index);
+           } else {
+               X_batch = tuple.indVar.block(curr_batch_row_index, 0, batch_size, n_ind_cols);
+               y_batch = tuple.depVar.segment(curr_batch_row_index, batch_size);
+           }
+           loss += Task::getLossAndUpdateModel(
+               state.task.model, X_batch, y_batch, state.task.stepsize);
+        }
+
+        // The first epoch will most likely have the highest loss.
+        // Being pessimistic, use the total loss only from the first epoch.
+        if (curr_epoch==0) state.algo.loss += loss;
+    }
+    return;
+ }
+
+
 template <class State, class ConstState, class Task>
 void
 IGD<State, ConstState, Task>::merge(state_type &state,
@@ -86,11 +144,32 @@ IGD<State, ConstState, Task>::merge(state_type &state,
 
 template <class State, class ConstState, class Task>
 void
+IGD<State, ConstState, Task>::mergeInPlace(state_type &state,
+        const_state_type &otherState) {
+    // avoid division by zero
+    if (state.algo.numRows == 0) {
+        state.task.model = otherState.task.model;
+        return;
+    } else if (otherState.algo.numRows == 0) {
+        return;
+    }
+
+    // model averaging, weighted by rows seen
+    double leftRows = static_cast<double>(state.algo.numRows + state.algo.numRows);
+    double rightRows = static_cast<double>(otherState.algo.numRows + otherState.algo.numRows);
+    double totalNumRows = leftRows + rightRows;
+    state.task.model *= leftRows / rightRows;
+    state.task.model += otherState.task.model;
+    state.task.model *= rightRows / totalNumRows;
+}
+
+template <class State, class ConstState, class Task>
+void
 IGD<State, ConstState, Task>::final(state_type &state) {
     // The reason that we have to keep the task.model untouched in transition
     // funtion: loss computation needs the model from last iteration cleanly
-
     state.task.model = state.algo.incrModel;
+
 }
 
 } // namespace convex

http://git-wip-us.apache.org/repos/asf/madlib/blob/a8bbe082/src/modules/convex/linear_svm_igd.cpp
----------------------------------------------------------------------
diff --git a/src/modules/convex/linear_svm_igd.cpp b/src/modules/convex/linear_svm_igd.cpp
index f396250..90882a3 100644
--- a/src/modules/convex/linear_svm_igd.cpp
+++ b/src/modules/convex/linear_svm_igd.cpp
@@ -32,6 +32,10 @@ typedef IGD<GLMIGDState<MutableArrayHandle<double> >,
         GLMIGDState<ArrayHandle<double> >,
         LinearSVM<GLMModel, GLMTuple > > LinearSVMIGDAlgorithm;
 
+typedef IGD<SVMMinibatchState<MutableArrayHandle<double> >,
+        SVMMinibatchState<ArrayHandle<double> >,
+        LinearSVM<GLMModel, SVMMiniBatchTuple > > LinearSVMIGDAlgoMiniBatch;
+
 typedef Loss<GLMIGDState<MutableArrayHandle<double> >,
         GLMIGDState<ArrayHandle<double> >,
         LinearSVM<GLMModel, GLMTuple > > LinearSVMLossAlgorithm;
@@ -121,6 +125,98 @@ linear_svm_igd_transition::run(AnyType &args) {
 }
 
 /**
+ * @brief Perform the linear support vector machine transition step
+ *
+ * Called for each tuple.
+ */
+AnyType
+linear_svm_igd_minibatch_transition::run(AnyType &args) {
+    // The real state.
+    // For the first tuple: args[0] is nothing more than a marker that
+    // indicates that we should do some initial operations.
+    // For other tuples: args[0] holds the computation state until last tuple
+    SVMMinibatchState<MutableArrayHandle<double> > state = args[0];
+
+    // initialize the state if first tuple
+    if (state.algo.numRows == 0) {
+
+        LinearSVM<GLMModel, GLMTuple >::epsilon = args[9].getAs<double>();;
+        LinearSVM<GLMModel, GLMTuple >::is_svc = args[10].getAs<bool>();;
+        if (!args[3].isNull()) {
+            SVMMinibatchState<ArrayHandle<double> > previousState = args[3];
+            state.allocate(*this, previousState.task.nFeatures);
+            state = previousState;
+        } else {
+            // configuration parameters
+            uint32_t dimension = args[4].getAs<uint32_t>();
+            state.allocate(*this, dimension); // with zeros
+        }
+        // resetting in either case
+        // state.reset();
+        state.task.stepsize = args[5].getAs<double>();
+        const double lambda = args[6].getAs<double>();
+        const bool isL2 = args[7].getAs<bool>();
+        const int nTuples = args[8].getAs<int>();
+
+        // The regularization operations called below (scaling and clipping)
+        // need these class variables to be set.
+        L1<GLMModel>::n_tuples = nTuples;
+        L2<GLMModel>::n_tuples = nTuples;
+        if (isL2)
+            L2<GLMModel>::lambda = lambda;
+        else
+            L1<GLMModel>::lambda = lambda;
+    }
+
+    state.algo.nEpochs = args[12].getAs<int>();
+    state.algo.batchSize = args[13].getAs<int>();
+
+    // Skip the current record if args[1] (features) contains NULL values,
+    // or args[2] is NULL
+    try {
+        args[1].getAs<MappedMatrix>();
+    } catch (const ArrayWithNullException &e) {
+        return args[0];
+    }
+    if (args[2].isNull())
+        return args[0];
+
+    // tuple
+    using madlib::dbal::eigen_integration::MappedColumnVector;
+
+    MappedMatrix x(NULL);
+    MappedColumnVector y(NULL);
+    try {
+        new (&x) MappedMatrix(args[1].getAs<MappedMatrix>());
+        new (&y) MappedColumnVector(args[2].getAs<MappedColumnVector>());
+    } catch (const ArrayWithNullException &e) {
+        return args[0];
+    }
+    SVMMiniBatchTuple tuple;
+    tuple.indVar = trans(x);
+    tuple.depVar = y;
+
+    // each tuple can be weighted - this can be combination of the sample weight
+    // and the class weight. Calling function is responsible for combining the two
+    // into a single tuple weight. The default value for this parameter is 1, set
+    // into the definition of "tuple".
+    // The weight is used to increase the value of a particular tuple for the online
+    // learning. The weight is not used for the loss computation.
+    tuple.weight = args[11].getAs<double>();
+
+
+    // Now do the transition step
+    // apply Minibatching with regularization
+    L2<GLMModel>::scaling(state.task.model, state.task.stepsize);
+    LinearSVMIGDAlgoMiniBatch::transitionInMiniBatch(state, tuple);
+    L1<GLMModel>::clipping(state.task.model, state.task.stepsize);
+
+    state.algo.numRows += x.cols();
+    return state;
+}
+
+
+/**
  * @brief Perform the perliminary aggregation function: Merge transition states
  */
 AnyType
@@ -146,6 +242,30 @@ linear_svm_igd_merge::run(AnyType &args) {
 }
 
 /**
+ * @brief Perform the perliminary aggregation function: Merge transition states
+ */
+AnyType
+linear_svm_igd_minibatch_merge::run(AnyType &args) {
+    SVMMinibatchState<MutableArrayHandle<double> > stateLeft = args[0];
+    SVMMinibatchState<ArrayHandle<double> > stateRight = args[1];
+
+    // We first handle the trivial case where this function is called with one
+    // of the states being the initial state
+    if (stateLeft.algo.numRows == 0) { return stateRight; }
+    else if (stateRight.algo.numRows == 0) { return stateLeft; }
+
+    // Merge states together
+    LinearSVMIGDAlgoMiniBatch::mergeInPlace(stateLeft, stateRight);
+
+    // The following numRows update, cannot be put above, because the model
+    // averaging depends on their original values
+    stateLeft.algo.numRows += stateRight.algo.numRows;
+    stateLeft.algo.loss += stateRight.algo.loss;
+
+    return stateLeft;
+}
+
+/**
  * @brief Perform the linear support vector machine final step
  */
 AnyType
@@ -172,6 +292,29 @@ linear_svm_igd_final::run(AnyType &args) {
 }
 
 /**
+ * @brief Perform the linear support vector machine final step
+ */
+AnyType
+linear_svm_igd_minibatch_final::run(AnyType &args) {
+    // We request a mutable object. Depending on the backend, this might perform
+    // a deep copy.
+    SVMMinibatchState<MutableArrayHandle<double> > state = args[0];
+    // Aggregates that haven't seen any data just return Null.
+    if (state.algo.numRows == 0) { return Null(); }
+    state.algo.loss = state.algo.loss / state.algo.numRows;
+    return state;
+}
+
+AnyType
+internal_linear_svm_igd_minibatch_distance::run(AnyType &args) {
+    SVMMinibatchState<ArrayHandle<double> > stateLeft = args[0];
+    SVMMinibatchState<ArrayHandle<double> > stateRight = args[1];
+
+    return std::abs((stateLeft.algo.loss - stateRight.algo.loss)
+            / stateLeft.algo.loss);
+}
+
+/**
  * @brief Return the difference in RMSE between two states
  */
 AnyType
@@ -199,6 +342,22 @@ internal_linear_svm_igd_result::run(AnyType &args) {
     return tuple;
 }
 
+/**
+ * @brief Return the coefficients and diagnostic statistics of the state
+ */
+AnyType
+internal_linear_svm_igd_minibatch_result::run(AnyType &args) {
+    SVMMinibatchState<ArrayHandle<double> > state = args[0];
+
+    AnyType tuple;
+    tuple << state.task.model
+        << static_cast<double>(state.algo.loss)
+        << 0.
+        << static_cast<int64_t>(state.algo.numRows);
+
+    return tuple;
+}
+
 } // namespace convex
 
 } // namespace modules

http://git-wip-us.apache.org/repos/asf/madlib/blob/a8bbe082/src/modules/convex/linear_svm_igd.hpp
----------------------------------------------------------------------
diff --git a/src/modules/convex/linear_svm_igd.hpp b/src/modules/convex/linear_svm_igd.hpp
index afe169e..0958b0f 100644
--- a/src/modules/convex/linear_svm_igd.hpp
+++ b/src/modules/convex/linear_svm_igd.hpp
@@ -8,26 +8,32 @@
  * @brief Linear support vector machine (incremental gradient): Transition function
  */
 DECLARE_UDF(convex, linear_svm_igd_transition)
+DECLARE_UDF(convex, linear_svm_igd_minibatch_transition)
 
 /**
  * @brief Linear support vector machine (incremental gradient): State merge function
  */
 DECLARE_UDF(convex, linear_svm_igd_merge)
+DECLARE_UDF(convex, linear_svm_igd_minibatch_merge)
 
 /**
  * @brief Linear support vector machine (incremental gradient): Final function
  */
 DECLARE_UDF(convex, linear_svm_igd_final)
+DECLARE_UDF(convex, linear_svm_igd_minibatch_final)
 
 /**
  * @brief Linear support vector machine (incremental gradient): Difference in
  *     log-likelihood between two transition states
  */
 DECLARE_UDF(convex, internal_linear_svm_igd_distance)
+DECLARE_UDF(convex, internal_linear_svm_igd_minibatch_distance)
+
 
 /**
  * @brief Linear support vector machine (incremental gradient): Convert
  *     transition state to result tuple
  */
 DECLARE_UDF(convex, internal_linear_svm_igd_result)
+DECLARE_UDF(convex, internal_linear_svm_igd_minibatch_result)
 

http://git-wip-us.apache.org/repos/asf/madlib/blob/a8bbe082/src/modules/convex/task/linear_svm.hpp
----------------------------------------------------------------------
diff --git a/src/modules/convex/task/linear_svm.hpp b/src/modules/convex/task/linear_svm.hpp
index 136d73b..7146432 100644
--- a/src/modules/convex/task/linear_svm.hpp
+++ b/src/modules/convex/task/linear_svm.hpp
@@ -22,10 +22,16 @@ class LinearSVM {
 public:
     typedef Model model_type;
     typedef Tuple tuple_type;
-    typedef typename Tuple::independent_variables_type
-        independent_variables_type;
+
+    typedef typename Tuple::independent_variables_type independent_variables_type;
     typedef typename Tuple::dependent_variable_type dependent_variable_type;
 
+    // Model is assumed to be base Eigen type or Eigen map and the 'EigenType'
+    // variable infers the actual type from the Model definition.
+    // For eg. SVMModel is defined as a ColumnVectorTransparentHandleMap which
+    // has a ColumnVector as its EigenType.
+    typedef typename model_type::PlainEigenType coefficient_type;
+
     static double epsilon;
     static bool is_svc;
 
@@ -41,6 +47,12 @@ public:
             const dependent_variable_type       &y,
             const double                        &stepsize);
 
+    static double getLossAndUpdateModel(
+            model_type                          &model,
+            const independent_variables_type    &x,
+            const dependent_variable_type       &y,
+            const double                        &stepsize);
+
     static double loss(
             const model_type                    &model,
             const independent_variables_type    &x,
@@ -101,6 +113,57 @@ LinearSVM<Model, Tuple>::gradientInPlace(
     }
 }
 
+/**
+* @brief This function will update the model for a single batch and return the loss
+* @param model Model to update
+* @param x Batch of independent variables
+* @param y Batch of dependent variables
+* @param stepsize Learning rate for model update
+* @return Total loss in the batch
+*/
+template <class Model, class Tuple>
+double
+LinearSVM<Model, Tuple>::getLossAndUpdateModel(
+        model_type                          &model,
+        const independent_variables_type    &x,
+        const dependent_variable_type       &y,
+        const double                        &stepsize){
+
+    // This function is called by the minibatch transition function to update
+    // the model for each batch. x and y in the function signature are defined
+    // as generic variables to ensure a consistent interface across all modules.
+
+    // ASSUMPTION: 'gradient' will always be of the same type as the
+    // coefficients. In SVM, the model is just the coefficients, but can be
+    // more complex with other modules like MLP.
+    coefficient_type gradient = model;
+    gradient.setZero();
+    coefficient_type w_transpose_x = x * model;
+    double loss = 0.0;
+    int batch_size = x.rows();
+    double dist_from_hyperplane = 0.0;
+    double c = 0.0;
+    int n_points_with_positive_dist = 0;
+    for (int i = 0; i < batch_size; i++) {
+        if (is_svc) {
+            c = -y(i);   // minus for "-loglik"
+            dist_from_hyperplane = 1.0 - w_transpose_x(i) * y(i);
+        } else {
+            double wx_y = w_transpose_x(i) - y(i);
+            c = wx_y > 0 ? 1.0 : -1.0;
+            dist_from_hyperplane = c * wx_y - epsilon;
+        }
+        if (dist_from_hyperplane > 0.) {
+            gradient += c * x.row(i);
+            loss += dist_from_hyperplane;
+            n_points_with_positive_dist++;
+        }
+    }
+    gradient.array() /= n_points_with_positive_dist;
+    model -= stepsize * gradient;
+    return loss;
+}
+
 template <class Model, class Tuple>
 double
 LinearSVM<Model, Tuple>::loss(

http://git-wip-us.apache.org/repos/asf/madlib/blob/a8bbe082/src/modules/convex/type/model.hpp
----------------------------------------------------------------------
diff --git a/src/modules/convex/type/model.hpp b/src/modules/convex/type/model.hpp
index 679dab4..4f534e4 100644
--- a/src/modules/convex/type/model.hpp
+++ b/src/modules/convex/type/model.hpp
@@ -93,17 +93,20 @@ struct LMFModel {
     }
 };
 
-// Generalized Linear Models (GLMs): Logistic regression, Linear SVM
 typedef HandleTraits<MutableArrayHandle<double> >::ColumnVectorTransparentHandleMap
     GLMModel;
 
+typedef HandleTraits<MutableArrayHandle<double> >::ColumnVectorTransparentHandleMap
+    SVMModel;
+
 // The necessity of this wrapper is to allow classes in algo/ and task/ to
 // have a type that they can template over
 template <class Handle>
 struct MLPModel {
-    typename HandleTraits<Handle>::ReferenceToUInt16 is_classification;
-    typename HandleTraits<Handle>::ReferenceToUInt16 activation;
-    std::vector<Eigen::Map<Matrix > > u;
+    typename HandleTraits<Handle>::ReferenceToDouble is_classification;
+    typename HandleTraits<Handle>::ReferenceToDouble activation;
+    // std::vector<Eigen::Map<Matrix > > u;
+    std::vector<MutableMappedMatrix> u;
 
     /**
      * @brief Space needed.
@@ -120,8 +123,8 @@ struct MLPModel {
         size_t N = inNumberOfStages;
         const double *n = inNumbersOfUnits;
         size_t k;
-        for (k = 1; k <= N; k ++) {
-            size += (n[k-1] + 1) * (n[k]);
+        for (k = 0; k < N; k ++) {
+            size += (n[k] + 1) * (n[k+1]);
         }
         return size;     // weights (u)
     }
@@ -140,71 +143,87 @@ struct MLPModel {
 
         uint32_t sizeOfU = 0;
         u.clear();
-        for (k = 1; k <= N; k ++) {
-            u.push_back(Eigen::Map<Matrix >(
-                    const_cast<double*>(data + sizeOfU),
-                    n[k-1] + 1, n[k]));
-            sizeOfU += (n[k-1] + 1) * (n[k]);
+        for (k = 0; k < N; k ++) {
+            // u.push_back(Eigen::Map<Matrix >(
+            //     const_cast<double*>(data + sizeOfU),
+            //     n[k] + 1, n[k+1]));
+            u.push_back(MutableMappedMatrix());
+            u[k].rebind(const_cast<double *>(data + sizeOfU), n[k] + 1, n[k+1]);
+            sizeOfU += (n[k] + 1) * (n[k+1]);
         }
 
         return sizeOfU;
     }
 
+    void initialize(const uint16_t &inNumberOfStages,
+                    const double *inNumbersOfUnits){
+        size_t N = inNumberOfStages;
+        const double *n = inNumbersOfUnits;
+        size_t k;
+        double span;
+        for (k =0; k < N; ++k){
+            // Initalize according to Glorot and Bengio (2010)
+            // See design doc for more info
+            span = sqrt(6.0 / (n[k] + n[k+1]));
+            u[k] << span * Matrix::Random(u[k].rows(), u[k].cols());
+        }
+    }
+
     double norm() const {
         double norm = 0.;
         size_t k;
         for (k = 0; k < u.size(); k ++) {
-            norm+=u[k].bottomRows(u[k].rows()-1).squaredNorm();
+            norm += u[k].bottomRows(u[k].rows()-1).squaredNorm();
         }
         return std::sqrt(norm);
     }
 
     void setZero(){
         size_t k;
-        for (k = 1; k <= u.size(); k ++) {
-            u[k-1].setZero();
+        for (k = 0; k < u.size(); k ++) {
+            u[k].setZero();
         }
     }
 
     /*
      *  Some operator wrappers for u.
      */
-    MLPModel &operator*=(const double &c) {
+    MLPModel& operator*=(const double &c) {
         // Note that when scaling the model, you should
         // not update the bias.
         size_t k;
-        for (k = 1; k <= u.size(); k ++) {
-           u[k-1] *= c;
+        for (k = 0; k < u.size(); k ++) {
+           u[k] *= c;
         }
 
         return *this;
     }
 
     template<class OtherHandle>
-    MLPModel &operator-=(const MLPModel<OtherHandle> &inOtherModel) {
+    MLPModel& operator-=(const MLPModel<OtherHandle> &inOtherModel) {
         size_t k;
-        for (k = 1; k <= u.size() && k <= inOtherModel.u.size(); k ++) {
-            u[k-1] -= inOtherModel.u[k-1];
+        for (k = 0; k < u.size() && k < inOtherModel.u.size(); k ++) {
+            u[k] -= inOtherModel.u[k];
         }
 
         return *this;
     }
 
     template<class OtherHandle>
-    MLPModel &operator+=(const MLPModel<OtherHandle> &inOtherModel) {
+    MLPModel& operator+=(const MLPModel<OtherHandle> &inOtherModel) {
         size_t k;
-        for (k = 1; k <= u.size() && k <= inOtherModel.u.size(); k ++) {
-            u[k-1] += inOtherModel.u[k-1];
+        for (k = 0; k < u.size() && k < inOtherModel.u.size(); k ++) {
+            u[k] += inOtherModel.u[k];
         }
 
         return *this;
     }
 
     template<class OtherHandle>
-    MLPModel &operator=(const MLPModel<OtherHandle> &inOtherModel) {
+    MLPModel& operator=(const MLPModel<OtherHandle> &inOtherModel) {
         size_t k;
-        for (k = 1; k <= u.size() && k <= inOtherModel.u.size(); k ++) {
-            u[k-1] = inOtherModel.u[k-1];
+        for (k = 0; k < u.size() && k < inOtherModel.u.size(); k ++) {
+            u[k] = inOtherModel.u[k];
         }
         is_classification = inOtherModel.is_classification;
         activation = inOtherModel.activation;

http://git-wip-us.apache.org/repos/asf/madlib/blob/a8bbe082/src/modules/convex/type/state.hpp
----------------------------------------------------------------------
diff --git a/src/modules/convex/type/state.hpp b/src/modules/convex/type/state.hpp
index 2cb2643..f846e8f 100644
--- a/src/modules/convex/type/state.hpp
+++ b/src/modules/convex/type/state.hpp
@@ -295,6 +295,85 @@ public:
     } algo;
 };
 
+template <class Handle>
+class SVMMinibatchState {
+    template <class OtherHandle>
+    friend class SVMMinibatchState;
+
+public:
+    SVMMinibatchState(const AnyType &inArray) : mStorage(inArray.getAs<Handle>()) {
+        rebind();
+    }
+
+    /**
+     * @brief Convert to backend representation
+     *
+     * We define this function so that we can use State in the
+     * argument list and as a return type.
+     */
+    inline operator AnyType() const {
+        return mStorage;
+    }
+
+    /**
+     * @brief Allocating the state.
+     */
+    inline void allocate(const Allocator &inAllocator, uint32_t nFeatures) {
+        mStorage = inAllocator.allocateArray<double, dbal::AggregateContext,
+                dbal::DoZero, dbal::ThrowBadAlloc>(arraySize(nFeatures));
+
+        rebind();
+        task.nFeatures = nFeatures;
+        rebind();
+    }
+
+    /**
+     * @brief We need to support assigning the previous state
+     */
+    template <class OtherHandle>
+    SVMMinibatchState &operator=(const SVMMinibatchState<OtherHandle> &inOtherState) {
+        for (size_t i = 0; i < mStorage.size(); i++) {
+            mStorage[i] = inOtherState.mStorage[i];
+        }
+
+        return *this;
+    }
+
+    static inline uint32_t arraySize(const uint32_t nFeatures) {
+        return 8 + nFeatures;
+    }
+
+protected:
+    void rebind() {
+        task.nFeatures.rebind(&mStorage[0]);
+        task.stepsize.rebind(&mStorage[1]);
+        algo.numRows.rebind(&mStorage[2]);
+        algo.loss.rebind(&mStorage[3]);
+        task.reg.rebind(&mStorage[4]);
+        algo.batchSize.rebind(&mStorage[5]);
+        algo.nEpochs.rebind(&mStorage[6]);
+        task.model.rebind(&mStorage[8], task.nFeatures);
+    }
+
+    Handle mStorage;
+
+public:
+    struct TaskState {
+        typename HandleTraits<Handle>::ReferenceToUInt32 nFeatures;
+        typename HandleTraits<Handle>::ReferenceToDouble stepsize;
+        typename HandleTraits<Handle>::ReferenceToDouble reg;
+        typename HandleTraits<Handle>::ColumnVectorTransparentHandleMap model;
+    } task;
+
+    struct AlgoState {
+        typename HandleTraits<Handle>::ReferenceToUInt64 numRows;
+        typename HandleTraits<Handle>::ReferenceToDouble loss;
+        typename HandleTraits<Handle>::ReferenceToUInt32 batchSize;
+        typename HandleTraits<Handle>::ReferenceToUInt32 nEpochs;
+    } algo;
+};
+
+
 /**
  * @brief Inter- (Task State) and intra-iteration (Algo State) state of
  *        Conjugate Gradient for generalized linear models

http://git-wip-us.apache.org/repos/asf/madlib/blob/a8bbe082/src/modules/convex/type/tuple.hpp
----------------------------------------------------------------------
diff --git a/src/modules/convex/type/tuple.hpp b/src/modules/convex/type/tuple.hpp
index 824ed90..ac070b6 100644
--- a/src/modules/convex/type/tuple.hpp
+++ b/src/modules/convex/type/tuple.hpp
@@ -61,6 +61,9 @@ using madlib::dbal::eigen_integration::MappedColumnVector;
 // Generalized Linear Models (GLMs): Logistic regression, Linear SVM
 typedef ExampleTuple<MappedColumnVector, double> GLMTuple;
 
+typedef ExampleTuple<MappedColumnVector, double> SVMTuple;
+typedef ExampleTuple<Matrix, ColumnVector> SVMMiniBatchTuple;
+
 // madlib::modules::convex::MatrixIndex
 typedef ExampleTuple<MatrixIndex, double> LMFTuple;
 

http://git-wip-us.apache.org/repos/asf/madlib/blob/a8bbe082/src/ports/postgres/modules/svm/svm.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/svm/svm.py_in b/src/ports/postgres/modules/svm/svm.py_in
index 4760f36..a57a135 100644
--- a/src/ports/postgres/modules/svm/svm.py_in
+++ b/src/ports/postgres/modules/svm/svm.py_in
@@ -36,36 +36,60 @@ def _compute_svm(args):
     """
     init_stepsize = args['init_stepsize']
     args['stepsize'] = init_stepsize
+    batch_size = args['batch_size']
+    args['dist_func'] = ('internal_linear_svm_igd_distance' if batch_size == 1
+                         else 'internal_linear_svm_igd_minibatch_distance')
     iterationCtrl = GroupIterationController(args)
     with iterationCtrl as it:
         it.iteration = 0
         has_converged = False
         while not has_converged:
-            it.update(
-                """
-                {schema_madlib}.linear_svm_igd_step(
-                    ({col_ind_var})::FLOAT8[],
-                    ({col_dep_var_trans})::FLOAT8,
-                    {rel_state}.{col_grp_state},
-                    {n_features}::INT4,
-                    {stepsize}::FLOAT8,
-                    {lambda}::FLOAT8,
-                    {is_l2}::BOOLEAN,
-                    {col_n_tuples},
-                    ({select_epsilon})::FLOAT8,
-                    {is_svc}::BOOLEAN,
-                    {class_weight_sql}::FLOAT8
-                    )
-                """)
+            if batch_size == 1:
+                it.update(
+                    """
+                    {schema_madlib}.linear_svm_igd_step(
+                        ({col_ind_var})::FLOAT8[],
+                        ({col_dep_var_trans})::FLOAT8,
+                        {rel_state}.{col_grp_state},
+                        {n_features}::INT4,
+                        {stepsize}::FLOAT8,
+                        {lambda}::FLOAT8,
+                        {is_l2}::BOOLEAN,
+                        {col_n_tuples},
+                        ({select_epsilon})::FLOAT8,
+                        {is_svc}::BOOLEAN,
+                        {class_weight_sql}::FLOAT8
+                        )
+                    """)
+            else:
+                it.update(
+                    """
+                    {schema_madlib}.linear_svm_igd_minibatch_step(
+                        ({col_ind_var})::FLOAT8[][],
+                        ({col_dep_var_trans})::FLOAT8[],
+                        {rel_state}.{col_grp_state},
+                        {n_features}::INT4,
+                        {stepsize}::FLOAT8,
+                        {lambda}::FLOAT8,
+                        {is_l2}::BOOLEAN,
+                        {col_n_tuples},
+                        ({select_epsilon})::FLOAT8,
+                        {is_svc}::BOOLEAN,
+                        {class_weight_sql}::FLOAT8,
+                        {n_epochs}::INTEGER,
+                        {batch_size}::INTEGER
+                        )
+                    """)
             it.info()
             if it.kwargs['decay_factor'] > 0:
                 it.kwargs['stepsize'] *= it.kwargs['decay_factor']
             else:
                 it.kwargs['stepsize'] = init_stepsize / (it.iteration + 1)
+
             has_converged = it.test(
                 """
                 {iteration} >= {max_iter}
-                OR {schema_madlib}.internal_linear_svm_igd_distance(
+                OR {schema_madlib}.{dist_func}(
                     _state_previous, _state_current) < {tolerance}
                 """)
         it.final()
@@ -89,9 +113,9 @@ def _verify_table(source_table, model_table, dependent_varname,
                 "('{dependent_varname}') for source_table "
                 "({source_table})!".format(dependent_varname=dependent_varname,
                                            source_table=source_table))
-        dep_type = get_expr_type(dependent_varname, source_table)
-        if '[]' in dep_type:
-            plpy.error("SVM error: dependent_varname cannot be of array type!")
+        # dep_type = get_expr_type(dependent_varname, source_table)
+        # if '[]' in dep_type:
+        #     plpy.error("SVM error: dependent_varname cannot be of array type!")
 
     # validate output tables
     output_tbl_valid(model_table, 'SVM')
@@ -157,6 +181,8 @@ def _build_output_tables(n_iters_run, args, **kwargs):
     else:
         groupby_str, grouping_str1, using_str = "", "", "ON TRUE"
     # organizing results
+    result_func = ("internal_linear_svm_igd_result" if args['batch_size'] == 1
+                   else "internal_linear_svm_igd_minibatch_result")
     args.update(locals())
     model_table_query = """
         CREATE TABLE {model_table} AS
@@ -173,7 +199,7 @@ def _build_output_tables(n_iters_run, args, **kwargs):
             FROM
             (
                 SELECT
-                    {schema_madlib}.internal_linear_svm_igd_result(
+                    {schema_madlib}.{result_func}(
                         {col_grp_state}
                     ) AS result,
                     {col_grp_key}
@@ -952,9 +978,9 @@ def svm(schema_madlib, source_table, model_table,
         _verify_table(source_table, model_table,
                       dependent_varname, independent_varname)
         reserved_cols =['coef', 'random_feature_data',
-                     'random_feature_data', 'loss'
-                     'num_rows_processed', 'num_rows_skipped',
-                     'norm_of_gradient', 'num_iterations']
+                        'random_feature_data', 'loss'
+                        'num_rows_processed', 'num_rows_skipped',
+                        'norm_of_gradient', 'num_iterations']
         grouping_str, grouping_col = \
             get_grouping_col_str(schema_madlib, 'SVM', reserved_cols,
                                  source_table, grouping_col)
@@ -1151,7 +1177,15 @@ def _svm_parsed_params(schema_madlib, source_table, model_table,
             datasets.
 
     """
-    n_features = num_features(source_table, independent_varname)
+    # n_features = num_features(source_table, independent_varname)
+
+    upper_dim = 1 if params_dict['batch_size'] == 1 else 2
+    n_features = plpy.execute("SELECT array_upper({0}, {2}) AS dim "
+                              "FROM {1} LIMIT 1".
+                              format(independent_varname,
+                                     source_table,
+                                     upper_dim))[0]['dim']
+
     if update_source_for_one_class:
         # This block is run only when the caller is svm_one_class
 
@@ -1204,7 +1238,9 @@ def _svm_parsed_params(schema_madlib, source_table, model_table,
 
     args.update(_verify_get_params_dict(params_dict))
     args.update(_process_epsilon(is_svc, args))
-    args.update(_svc_or_svr(is_svc, source_table, dependent_varname))
+
+    is_sgd = params_dict['batch_size'] <= 1
+    args.update(_svc_or_svr(is_svc, source_table, dependent_varname, is_sgd))
 
     # place holder for compatibility
     plpy.execute("CREATE TABLE pg_temp.{0} AS SELECT 1".format(args['rel_args']))
@@ -1254,13 +1290,13 @@ def svm_predict(schema_madlib, model_table, new_data_table, id_col_name,
 
         input_tbl_valid(new_data_table, 'SVM')
         reserved_cols =['coef', 'random_feature_data',
-                     'random_feature_data', 'loss'
-                     'num_rows_processed', 'num_rows_skipped',
-                     'norm_of_gradient', 'num_iterations']
+                        'random_feature_data', 'loss'
+                        'num_rows_processed', 'num_rows_skipped',
+                        'norm_of_gradient', 'num_iterations']
         grouping_str, grouping_col = get_grouping_col_str(schema_madlib,
-                                                           'SVM', reserved_cols,
-                                                           new_data_table,
-                                                           grouping_col)
+                                                          'SVM', reserved_cols,
+                                                          new_data_table,
+                                                          grouping_col)
         _assert(is_var_valid(new_data_table, independent_varname),
                 "SVM Error: independent_varname ('" + independent_varname +
                 "') is invalid for new_data_table (" + new_data_table + ")!")
@@ -1341,43 +1377,67 @@ def svm_predict(schema_madlib, model_table, new_data_table, id_col_name,
 # -----------------------------------------------------------------------------
 
 
-def _svc_or_svr(is_svc, source_table, dependent_varname):
+def _svc_or_svr(is_svc, source_table, dependent_varname, is_sgd):
     # transform col_dep_var to binary (1`or -1) if classification
     _args = {'col_dep_var_trans': dependent_varname,
              'mapping': 'NULL',
              'method': 'SVR'}
 
     if is_svc:
+        if is_sgd:
+            src = source_table
+        else:
+            src = "(SELECT unnest({0}) as {0} FROM {1}) q".format(dependent_varname, source_table)
         # dependent variable mapping
         dep_labels = plpy.execute("""
             SELECT {dependent_varname} AS y
-            FROM {source_table}
+            FROM {src}
             WHERE ({dependent_varname}) IS NOT NULL
             GROUP BY ({dependent_varname})
             ORDER BY ({dependent_varname})
-            """.format(source_table=source_table,
+            """.format(src=src,
                        dependent_varname=dependent_varname))
 
-        dep_var_mapping = ["'{0}'".format(d['y'])
-                           if isinstance(d['y'], basestring)
-                           else str(d['y']) for d in dep_labels]
-
-        _assert(1 <= len(dep_var_mapping) <= 2,
+        _assert(1 <= len(dep_labels) <= 2,
                 "SVM Error: Classification currently "
                 "only supports unary or binary output!. Found values {0}".
-                format(dep_var_mapping))
-
-        col_dep_var_trans = ("""
-            CASE WHEN ({col_dep_var}) IS NULL THEN NULL
-                WHEN ({col_dep_var}) = {mapped_value_for_negative} THEN -1.0
-                ELSE 1.0
-            END
-            """.format(col_dep_var=dependent_varname,
-                       mapped_value_for_negative=dep_var_mapping[0]))
+                format(dep_labels))
+
+        dep_labels_str = ["'{0}'".format(d['y'])
+                          if isinstance(d['y'], basestring)
+                          else str(d['y']) for d in dep_labels]
+
+        # map the dependent variable labels to -1 and 1 to represent the two
+        # sides of the hyperplane (only supporting unary/binary for now)
+        if is_sgd:
+            col_dep_var_trans = """
+                CASE WHEN ({dependent_varname}) IS NULL THEN NULL
+                    WHEN ({dependent_varname}) = {mapped_value_for_negative} THEN -1.0
+                    ELSE 1.0
+                END
+                """
+        else:
+            # For minibatch, the dependent_varname is an array. So unnest the
+            # array before mapping to 1/-1.
+            col_dep_var_trans = """
+                ARRAY(SELECT
+                    CASE WHEN ({dependent_varname}) IS NULL THEN NULL
+                         WHEN ({dependent_varname}) = {mapped_value_for_negative} THEN -1.0
+                         ELSE 1.0
+                    END
+                    FROM UNNEST({dependent_varname}) as {dependent_varname}
+                    )
+                """
+
+        # col_dep_var_trans is used by the update query in _compute_svm to
+        # transform dependent variable labels to -1 and 1.
+        col_dep_var_trans = col_dep_var_trans.format(
+            dependent_varname=dependent_varname,
+            mapped_value_for_negative=dep_labels_str[0])
         _args.update({
-            'mapped_value_for_negative': dep_var_mapping[0],
+            'mapped_value_for_negative': dep_labels_str[0],
             'col_dep_var_trans': col_dep_var_trans,
-            'mapping': dep_var_mapping[0] + "," + dep_var_mapping[1],
+            'mapping': dep_labels_str[0] + "," + dep_labels_str[1],
             'method': 'SVC'})
     return _args
 # -----------------------------------------------------------------------------
@@ -1489,7 +1549,9 @@ def _extract_params(schema_madlib, params, module='SVM'):
         'validation_result': '',
         'epsilon': [0.01],
         'eps_table': '',
-        'class_weight': ''}
+        'class_weight': '',
+        'n_epochs': 1,
+        'batch_size': 1}
 
     params_types = {
         'init_stepsize': list,
@@ -1502,7 +1564,9 @@ def _extract_params(schema_madlib, params, module='SVM'):
         'validation_result': str,
         'epsilon': list,
         'eps_table': str,
-        'class_weight': str}
+        'class_weight': str,
+        'n_epochs': int,
+        'batch_size': int}
 
     params_vals = extract_keyvalue_params(params, params_types, params_default)
     if params_vals['n_folds'] < 0:

http://git-wip-us.apache.org/repos/asf/madlib/blob/a8bbe082/src/ports/postgres/modules/svm/svm.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/svm/svm.sql_in b/src/ports/postgres/modules/svm/svm.sql_in
index f3948a8..78d7b76 100644
--- a/src/ports/postgres/modules/svm/svm.sql_in
+++ b/src/ports/postgres/modules/svm/svm.sql_in
@@ -79,7 +79,7 @@ svm_classification(
   <DD>TEXT. Expression list to evaluate for the
     independent variables. An intercept variable should not be included as part
     of this expression. See 'fit_intercept' in the kernel params for info on
-    intercepts.  Please note that expression should be able to be cast 
+    intercepts.  Please note that expression should be able to be cast
     to DOUBLE PRECISION[].
 
   <DT>kernel_func (optional)</DT>
@@ -339,7 +339,7 @@ the parameter is ignored.
 
 Hyperparameter optimization can be carried out using the built-in cross
 validation mechanism, which is activated by assigning a value greater than 1 to
-the parameter \e n_folds in \e params. 
+the parameter \e n_folds in \e params.
 Please note that cross validation is not
 supported if grouping is used.
 
@@ -442,7 +442,7 @@ while the other k - 1 folds form the training set.
 </DD>
 
 <DT>class_weight</dt>
-<DD>Default: 1 for classification, 'balanced' for one-class novelty detection, 
+<DD>Default: 1 for classification, 'balanced' for one-class novelty detection,
 n/a for regression.
 
 Set the weight for the positive and negative classes. If not given, all classes
@@ -495,8 +495,8 @@ table name is already in use, then an error is returned.  Table contains:</DD>
       </tr>
       <tr>
         <th>prediction</th>
-        <td>Provides the prediction for each row in new_data_table. 
-        For regression this would be the same as decision_function. For classification, 
+        <td>Provides the prediction for each row in new_data_table.
+        For regression this would be the same as decision_function. For classification,
         this will be one of the dependent variable values.</td>
       </tr>
       <tr>
@@ -645,9 +645,9 @@ num_rows_skipped   | -1
 dep_var_mapping    | {-1,1}
 </pre>
 -# Now let's look at the prediction functions.  We want to predict if house price
-is less than $100,000.  In the following examples we will 
+is less than $100,000.  In the following examples we will
 use the training data set for prediction as well, which is not usual but serves to
-show the syntax.  The predicted results are in the \e prediction column and the 
+show the syntax.  The predicted results are in the \e prediction column and the
 actual data is in the \e target column.
 For the linear model:
 <pre class="example">
@@ -657,7 +657,7 @@ SELECT *, price < 100000 AS target FROM houses JOIN houses_pred USING (id) ORDER
 </pre>
 Result:
 <pre class="result">
- id | tax  | bedroom | bath | price  | size |  lot  | prediction | decision_function  | target 
+ id | tax  | bedroom | bath | price  | size |  lot  | prediction | decision_function  | target
 ----+------+---------+------+--------+------+-------+------------+--------------------+--------
   1 |  590 |       2 |    1 |  50000 |  770 | 22100 | t          |   104.685894748292 | t
   2 | 1050 |       3 |    2 |  85000 | 1410 | 12000 | t          |   200.592436923938 | t
@@ -683,7 +683,7 @@ SELECT *, price < 100000 AS target FROM houses JOIN houses_pred_gaussian USING (
 </pre>
 This produces a more accurate result than the linear case for this small data set:
 <pre class="result">
- id | tax  | bedroom | bath | price  | size |  lot  | prediction | decision_function | target 
+ id | tax  | bedroom | bath | price  | size |  lot  | prediction | decision_function | target
 ----+------+---------+------+--------+------+-------+------------+-------------------+--------
   1 |  590 |       2 |    1 |  50000 |  770 | 22100 | t          |  1.00338548176312 | t
   2 | 1050 |       3 |    2 |  85000 | 1410 | 12000 | t          |  1.00000000098154 | t
@@ -709,7 +709,7 @@ SELECT * FROM houses JOIN houses_regr USING (id) ORDER BY id;
 </pre>
 Result for the linear regression model:
 <pre class="result">
-  id | tax  | bedroom | bath | price  | size |  lot  |    prediction    | decision_function 
+  id | tax  | bedroom | bath | price  | size |  lot  |    prediction    | decision_function
 ----+------+---------+------+--------+------+-------+------------------+-------------------
   1 |  590 |       2 |    1 |  50000 |  770 | 22100 | 55288.6992755623 |  55288.6992755623
   2 | 1050 |       3 |    2 |  85000 | 1410 | 12000 | 99978.8137019119 |  99978.8137019119
@@ -733,7 +733,7 @@ DROP TABLE IF EXISTS houses_gaussian_regr;
 SELECT madlib.svm_predict('houses_svm_gaussian_regression', 'houses', 'id', 'houses_gaussian_regr');
 SELECT * FROM houses JOIN houses_gaussian_regr USING (id) ORDER BY id;
 </pre>
--#  For the novelty detection using one-class, let's create a test data set using 
+-#  For the novelty detection using one-class, let's create a test data set using
 the last 3 values from the training set plus an outlier at the end (10x price):
 <pre class="example">
 DROP TABLE IF EXISTS houses_one_class_test;
@@ -754,7 +754,7 @@ SELECT * FROM houses_one_class_test JOIN houses_one_class_pred USING (id) ORDER
 </pre>
 Result showing the last row predicted to be novel:
 <pre class="result">
- id | tax  | bedroom | bath | price  | size |  lot  | prediction |  decision_function  
+ id | tax  | bedroom | bath | price  | size |  lot  | prediction |  decision_function
 ----+------+---------+------+--------+------+-------+------------+---------------------
   1 | 3100 |       3 |    2 | 140000 | 1760 | 38000 |          1 |   0.111497008121437
   2 | 2070 |       2 |    3 | 148000 | 1550 | 14000 |          1 |  0.0996021345169148
@@ -938,6 +938,86 @@ CREATE AGGREGATE MADLIB_SCHEMA.linear_svm_igd_step(
     INITCOND='{0,0,0,0,0,0,0}'
 );
 
+--------------------------------------------------------------------------
+-- create SQL functions for IGD optimizer
+--------------------------------------------------------------------------
+-- cannot be labeled as STRICT because we set previous_state NULL initially
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.linear_svm_igd_minibatch_transition(
+        state           double precision[],
+        ind_var         double precision[][],
+        dep_var         double precision[],
+        previous_state  double precision[],
+        dimension       integer,
+        stepsize        double precision,
+        reg             double precision,
+        is_l2           boolean,
+        n_tuples        integer,
+        epsilon         double precision,
+        is_svc          boolean,
+        tuple_weight    double precision,
+        batch_size      integer,
+        n_epochs        integer
+)
+RETURNS double precision[] AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.linear_svm_igd_minibatch_merge(
+        state1 double precision[],
+        state2 double precision[])
+RETURNS double precision[] AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE STRICT
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.linear_svm_igd_minibatch_final(
+        state double precision[])
+RETURNS double precision[] AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE STRICT
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
+
+/**
+ * @internal
+ * @brief Perform one iteration of the incremental gradient
+ *        method for computing linear support vector machine
+ */
+DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.linear_svm_igd_minibatch_step(
+        /*+ ind_var */          double precision[][],
+        /*+ dep_var */          double precision[],
+        /*+ previous_state */   double precision[],
+        /*+ dimension */        integer,
+        /*+ stepsize */         double precision,
+        /*+ reg */              double precision,
+        /*+ is_l2 */            boolean,
+        /*+ n_tuples */         integer,
+        /*+ epsilon */          double precision,
+        /*+ is_svc */           boolean,
+        /*+ tuple_weight */     double precision,
+        /*+ batch_size */       integer,
+        /*+ n_epochs */         integer
+);
+CREATE AGGREGATE MADLIB_SCHEMA.linear_svm_igd_minibatch_step(
+        /*+ ind_var */          double precision[][],
+        /*+ dep_var */          double precision[],
+        /*+ previous_state */   double precision[],
+        /*+ dimension */        integer,
+        /*+ stepsize */         double precision,
+        /*+ reg */              double precision,
+        /*+ is_l2 */            boolean,
+        /*+ n_tuples */         integer,
+        /*+ epsilon */          double precision,
+        /*+ is_svc */           boolean,
+        /*+ tuple_weight */     double precision,
+        /*+ batch_size */       integer,
+        /*+ n_epochs */         integer
+    ) (
+    STYPE=double precision[],
+    SFUNC=MADLIB_SCHEMA.linear_svm_igd_minibatch_transition,
+    m4_ifdef(`__POSTGRESQL__', `', `prefunc=MADLIB_SCHEMA.linear_svm_igd_minibatch_merge,')
+    FINALFUNC=MADLIB_SCHEMA.linear_svm_igd_minibatch_final,
+    INITCOND='{0,0,0,0,0,0,0,0,0}'
+);
+
+
 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.internal_linear_svm_igd_distance(
     /*+ state1 */ double precision[],
     /*+ state2 */ double precision[])
@@ -945,12 +1025,25 @@ RETURNS double precision AS 'MODULE_PATHNAME'
 LANGUAGE c IMMUTABLE STRICT
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
 
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.internal_linear_svm_igd_minibatch_distance(
+    /*+ state1 */ double precision[],
+    /*+ state2 */ double precision[])
+RETURNS double precision AS 'MODULE_PATHNAME'
+LANGUAGE c IMMUTABLE STRICT
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
+
 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.internal_linear_svm_igd_result(
     /*+ state */ double precision[])
 RETURNS MADLIB_SCHEMA.linear_svm_result AS 'MODULE_PATHNAME'
 LANGUAGE c IMMUTABLE STRICT
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
 
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.internal_linear_svm_igd_minibatch_result(
+    /*+ state */ double precision[])
+RETURNS MADLIB_SCHEMA.linear_svm_result AS 'MODULE_PATHNAME'
+LANGUAGE c IMMUTABLE STRICT
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
+
 
 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svm_regression(
     source_table text,

http://git-wip-us.apache.org/repos/asf/madlib/blob/a8bbe082/src/ports/postgres/modules/svm/test/svm.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/svm/test/svm.sql_in b/src/ports/postgres/modules/svm/test/svm.sql_in
index 60d280e..d9e9383 100644
--- a/src/ports/postgres/modules/svm/test/svm.sql_in
+++ b/src/ports/postgres/modules/svm/test/svm.sql_in
@@ -903,3 +903,107 @@ SELECT
         'The dimension of the coefficients must be equal to n_components (3)!')
 FROM m9;
 */
+
+
+-- minibatch -----------------------------------------------------------
+drop table if exists svm_minibatch_train;
+CREATE TABLE svm_minibatch_train (
+    id integer,
+    x double precision[][],
+    rings integer[],
+    sex text[]
+);
+
+COPY svm_minibatch_train (id, x, rings, sex) FROM stdin DELIMITER '|';
+0|{{0.53,0.42,0.17,0.828,0.41,0.208,0.1505},{0.27,0.195,0.06,0.073,0.0285,0.0235,0.03},{0.31,0.23,0.07,0.1245,0.0505,0.0265,0.038},{0.36,0.27,0.085,0.2185,0.1065,0.038,0.062},{0.32,0.24,0.08,0.18,0.08,0.0385,0.055},{0.3,0.22,0.08,0.121,0.0475,0.042,0.035},{0.5,0.39,0.135,0.6595,0.3145,0.1535,0.1565},{0.295,0.215,0.07,0.121,0.047,0.0155,0.0405},{0.375,0.28,0.08,0.226,0.105,0.047,0.065}}|{6,5,6,6,6,5,6,6,6}|{F,M,F,F,F,M,F,F,F}
+1|{{0.415,0.31,0.105,0.3595,0.167,0.083,0.0915},{0.35,0.25,0.07,0.1605,0.0715,0.0335,0.046},{0.415,0.33,0.09,0.3595,0.17,0.081,0.09},{0.66,0.475,0.18,1.3695,0.641,0.294,0.335},{0.415,0.31,0.09,0.2815,0.1245,0.0615,0.085},{0.35,0.265,0.09,0.2265,0.0995,0.0575,0.065},{0.215,0.155,0.06,0.0525,0.021,0.0165,0.015},{0.35,0.27,0.075,0.215,0.1,0.036,0.065},{0.255,0.18,0.065,0.079,0.034,0.014,0.025},{0.28,0.22,0.08,0.1315,0.066,0.024,0.03}}|{6,6,6,6,6,6,5,6,5,5}|{F,F,F,F,F,F,M,F,M,M}
+2|{{0.27,0.19,0.08,0.081,0.0265,0.0195,0.03},{0.375,0.29,0.095,0.2875,0.123,0.0605,0.08},{0.27,0.2,0.08,0.1205,0.0465,0.028,0.04},{0.235,0.175,0.065,0.0615,0.0205,0.02,0.019},{0.24,0.17,0.05,0.0545,0.0205,0.016,0.0155},{0.34,0.255,0.085,0.204,0.097,0.021,0.05},{0.275,0.22,0.08,0.1365,0.0565,0.0285,0.042},{0.385,0.28,0.09,0.228,0.1025,0.042,0.0655},{0.355,0.27,0.075,0.1775,0.079,0.0315,0.054},{0.27,0.205,0.05,0.084,0.03,0.0185,0.029}}|{6,6,6,6,5,6,6,5,6,6}|{F,F,F,F,M,F,F,M,F,F}
+3|{{0.335,0.26,0.085,0.192,0.097,0.03,0.054},{0.26,0.215,0.08,0.099,0.037,0.0255,0.045},{0.315,0.21,0.06,0.125,0.06,0.0375,0.035},{0.585,0.45,0.125,0.874,0.3545,0.2075,0.225},{0.44,0.345,0.13,0.4495,0.209,0.0835,0.134},{0.315,0.23,0.08,0.1375,0.0545,0.031,0.0445},{0.35,0.25,0.07,0.18,0.0655,0.048,0.054},{0.31,0.24,0.105,0.2885,0.118,0.065,0.083},{0.325,0.23,0.09,0.147,0.06,0.034,0.045},{0.28,0.21,0.075,0.1195,0.053,0.0265,0.03}}|{6,5,5,6,6,5,6,6,4,6}|{F,M,M,F,F,M,F,F,F,F}
+4|{{0.41,0.31,0.09,0.3335,0.1635,0.061,0.091},{0.595,0.475,0.16,1.1405,0.547,0.231,0.271},{0.385,0.3,0.09,0.247,0.1225,0.044,0.0675},{0.455,0.335,0.105,0.422,0.229,0.0865,0.1},{0.245,0.18,0.065,0.0635,0.0245,0.0135,0.02},{0.155,0.115,0.025,0.024,0.009,0.005,0.0075},{0.28,0.215,0.08,0.132,0.072,0.022,0.033},{0.335,0.25,0.08,0.1695,0.0695,0.044,0.0495},{0.175,0.125,0.05,0.0235,0.008,0.0035,0.008},{0.275,0.205,0.075,0.1105,0.045,0.0285,0.035}}|{6,6,5,6,4,5,5,6,5,6}|{F,F,M,F,F,M,M,F,M,F}
+5|{{0.41,0.325,0.1,0.394,0.208,0.0655,0.106},{0.4,0.295,0.095,0.252,0.1105,0.0575,0.066},{0.26,0.2,0.07,0.092,0.037,0.02,0.03},{0.445,0.335,0.11,0.4355,0.2025,0.1095,0.1195},{0.255,0.185,0.07,0.075,0.028,0.018,0.025},{0.385,0.3,0.115,0.3435,0.1645,0.085,0.1025},{0.325,0.27,0.1,0.185,0.08,0.0435,0.065},{0.28,0.205,0.1,0.1165,0.0545,0.0285,0.03},{0.275,0.2,0.065,0.092,0.0385,0.0235,0.027},{0.38,0.275,0.095,0.2505,0.0945,0.0655,0.075}}|{6,6,6,6,6,6,6,5,5,6}|{F,F,F,F,F,F,F,M,M,F}
+6|{{0.365,0.255,0.08,0.1985,0.0785,0.0345,0.053},{0.175,0.135,0.04,0.0305,0.011,0.0075,0.01},{0.515,0.375,0.11,0.6065,0.3005,0.131,0.15},{0.23,0.18,0.05,0.064,0.0215,0.0135,0.02},{0.185,0.135,0.04,0.027,0.0105,0.0055,0.009},{0.33,0.24,0.075,0.163,0.0745,0.033,0.048},{0.37,0.265,0.075,0.214,0.09,0.051,0.07},{0.325,0.245,0.07,0.161,0.0755,0.0255,0.045},{0.19,0.13,0.045,0.0265,0.009,0.005,0.009},{0.325,0.245,0.075,0.1495,0.0605,0.033,0.045}}|{5,5,6,5,5,6,6,6,5,5}|{M,M,F,M,M,F,F,F,M,M}
+7|{{0.44,0.34,0.105,0.369,0.164,0.08,0.1015},{0.27,0.195,0.08,0.1,0.0385,0.0195,0.03},{0.32,0.235,0.08,0.1485,0.064,0.031,0.045},{0.53,0.41,0.14,0.681,0.3095,0.1415,0.1835},{0.405,0.285,0.09,0.2645,0.1265,0.0505,0.075},{0.45,0.33,0.11,0.3685,0.16,0.0885,0.102},{0.245,0.175,0.055,0.0785,0.04,0.018,0.02},{0.38,0.275,0.095,0.2425,0.106,0.0485,0.21},{0.47,0.36,0.11,0.4965,0.237,0.127,0.13},{0.37,0.27,0.095,0.2175,0.097,0.046,0.065}}|{5,6,6,6,6,6,5,6,6,6}|{M,F,F,F,F,F,M,F,F,F}
+8|{{0.35,0.265,0.08,0.192,0.081,0.0465,0.053},{0.28,0.2,0.075,0.1225,0.0545,0.0115,0.035},{0.385,0.3,0.1,0.2725,0.1115,0.057,0.08},{0.335,0.245,0.09,0.1665,0.0595,0.04,0.06},{0.285,0.21,0.07,0.109,0.044,0.0265,0.033},{0.415,0.305,0.1,0.325,0.156,0.0505,0.091},{0.43,0.335,0.105,0.378,0.188,0.0785,0.09},{0.175,0.125,0.04,0.024,0.0095,0.006,0.005},{0.4,0.315,0.085,0.2675,0.116,0.0585,0.0765},{0.375,0.285,0.09,0.2545,0.119,0.0595,0.0675}}|{6,5,6,6,5,6,6,4,6,6}|{F,M,F,F,M,F,F,F,F,F}
+9|{{0.365,0.27,0.105,0.2155,0.0915,0.0475,0.063},{0.36,0.27,0.09,0.2075,0.098,0.039,0.062},{0.33,0.23,0.085,0.1695,0.079,0.026,0.0505},{0.34,0.26,0.085,0.1885,0.0815,0.0335,0.06},{0.285,0.215,0.075,0.106,0.0415,0.023,0.035},{0.195,0.145,0.05,0.032,0.01,0.008,0.012},{0.32,0.24,0.07,0.133,0.0585,0.0255,0.041},{0.495,0.4,0.155,0.8085,0.2345,0.1155,0.35},{0.475,0.36,0.12,0.5915,0.3245,0.11,0.127},{0.395,0.27,0.1,0.2985,0.1445,0.061,0.082}}|{6,6,6,6,5,4,6,6,6,5}|{F,F,F,F,M,F,F,F,F,M}
+\.
+
+DROP TABLE IF EXISTS svm_minibatch_test;
+CREATE TABLE svm_minibatch_test AS
+SELECT id,
+       ARRAY[round(length::numeric, 4),
+       round(diameter::numeric, 4),
+       round(height::numeric, 4),
+       round(whole::numeric, 4),
+       round(shucked::numeric, 4),
+       round(viscera::numeric, 4),
+       round(shell::numeric, 4)] as x,
+       rings,
+       CASE WHEN sex = 'I' then 'F' else sex end as sex
+FROM abalone_train_small_tmp;
+
+------ Regression-------
+DROP TABLE IF EXISTS svm_minibatch_reg_out, svm_minibatch_reg_out_summary;
+SELECT svm_regression(
+    'svm_minibatch_train',
+    'svm_minibatch_reg_out',
+    'rings',
+    'x',
+    'linear',
+    NULL,
+    NULL,
+    'max_iter=10, init_stepsize=0.2, batch_size=3, n_epochs=3'
+);
+
+DROP TABLE IF EXISTS svm_predict_reg_minibatch_out;
+SELECT svm_predict('svm_minibatch_reg_out', 'svm_minibatch_test', 'id', 'svm_predict_reg_minibatch_out');
+
+SELECT assert(error < 1, 'Training error ' || error ||  ' with SVM regression minibatch is too high (>1)')
+from (SELECT avg((rings-prediction)^2) as error
+FROM svm_minibatch_test JOIN svm_predict_reg_minibatch_out
+using (id)) q;
+
+
+-- testing for batch_size bigger than datapoints ------------------------------
+-- setting batch_size = 30 with max of 11 rows in svm_minibatch_train
+-- this should not error and just treat the whole matrix as a single batch
+DROP TABLE IF EXISTS svm_minibatch_reg_out, svm_minibatch_reg_out_summary;
+SELECT svm_regression(
+    'svm_minibatch_train',
+    'svm_minibatch_reg_out',
+    'rings',
+    'x',
+    'linear',
+    NULL,
+    NULL,
+    'max_iter=10, init_stepsize=0.2, batch_size=30, n_epochs=1'
+);
+
+------ Classification -------
+DROP TABLE IF EXISTS svm_minibatch_classification_out, svm_minibatch_classification_out_summary;
+\timing on
+SELECT svm_classification(
+    'svm_minibatch_train',
+    'svm_minibatch_classification_out',
+    'sex',
+    'x',
+    'linear',
+    NULL,
+    NULL,
+    'max_iter=10, init_stepsize=0.2, batch_size=3, n_epochs=2'
+);
+\timing off
+
+DROP TABLE IF EXISTS svm_predict_classification_minibatch_out;
+SELECT svm_predict('svm_minibatch_classification_out',
+                   'svm_minibatch_test',
+                   'id',
+                   'svm_predict_classification_minibatch_out');
+
+SELECT assert(accuracy >= 0.70,
+              'Training accuracy '|| accuracy ||
+              ' with SVM classification minibatch is too low (<0.7)')
+FROM (SELECT count(*)/99. as accuracy
+      FROM svm_minibatch_test JOIN svm_predict_classification_minibatch_out
+      USING (id)
+      WHERE sex = prediction
+      ) q;

http://git-wip-us.apache.org/repos/asf/madlib/blob/a8bbe082/src/ports/postgres/modules/utilities/validate_args.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/utilities/validate_args.py_in b/src/ports/postgres/modules/utilities/validate_args.py_in
index 2b9c6d7..a2f43fd 100644
--- a/src/ports/postgres/modules/utilities/validate_args.py_in
+++ b/src/ports/postgres/modules/utilities/validate_args.py_in
@@ -475,9 +475,9 @@ def array_col_dimension(tbl, col):
     if col is None:
         plpy.error('Input error: Column name is invalid')
     dim = plpy.execute("""
-                           SELECT max(array_upper({col}, 1)) AS dim
-                           FROM {tbl}
-                           """.format(col=col, tbl=tbl))[0]["dim"]
+        SELECT max(array_upper({col}, 1)) AS dim
+        FROM {tbl}
+        """.format(col=col, tbl=tbl))[0]["dim"]
     return dim
 # ------------------------------------------------------------------------
 
@@ -491,15 +491,12 @@ def array_col_has_same_dimension(tbl, col):
     if col is None or col.lower() == 'null':
         plpy.error('Input error: Column name is invalid')
 
-    max_dim = plpy.execute("""
-                           SELECT max(array_upper({col}, 1)) AS max_dim
-                           FROM {tbl}
-                           """.format(col=col, tbl=tbl))[0]["max_dim"]
-    min_dim = plpy.execute("""
-                           SELECT min(array_upper({col}, 1)) AS min_dim
+    results = plpy.execute("""
+                           SELECT min(array_upper({col}, 1)) AS min_dim,
+                                  max(array_upper({col}, 1)) AS max_dim
                            FROM {tbl}
-                           """.format(col=col, tbl=tbl))[0]["min_dim"]
-    return max_dim == min_dim
+                           """.format(col=col, tbl=tbl))[0]
+    return results['max_dim'] == results['min_dim']
 # ------------------------------------------------------------------------