You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ra...@apache.org on 2017/02/01 03:23:36 UTC

mahout git commit: MAHOUT-1856 Add Framework for Models, Fitters, and Tests closes apache/mahout#246

Repository: mahout
Updated Branches:
  refs/heads/master f8596b866 -> 9a31923ea


MAHOUT-1856 Add Framework for Models, Fitters, and Tests closes apache/mahout#246


Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/9a31923e
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/9a31923e
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/9a31923e

Branch: refs/heads/master
Commit: 9a31923eae3727d9d91bd2c2ed8df12a616a577e
Parents: f8596b8
Author: rawkintrevo <tr...@gmail.com>
Authored: Tue Jan 31 21:23:10 2017 -0600
Committer: rawkintrevo <tr...@gmail.com>
Committed: Tue Jan 31 21:23:10 2017 -0600

----------------------------------------------------------------------
 .gitignore                                      |   1 +
 .../standard/PreprocessorSuite.scala            |  26 ++++
 .../standard/RegressionSuite.scala              |  27 ++++
 .../standard/RegressionTestsSuite.scala         |  26 ++++
 .../math/algorithms/PreprocessorSuite.scala     |  24 ++++
 .../math/algorithms/RegressionSuite.scala       |  25 ++++
 .../math/algorithms/RegressionTestsSuite.scala  |  24 ++++
 .../apache/mahout/math/algorithms/Fitter.scala  |  27 ++++
 .../apache/mahout/math/algorithms/Model.scala   |  26 ++++
 .../math/algorithms/SupervisedFitter.scala      |  29 +++++
 .../math/algorithms/SupervisedModel.scala       |  26 ++++
 .../math/algorithms/UnsupervisedFitter.scala    |  28 ++++
 .../math/algorithms/UnsupervisedModel.scala     |  24 ++++
 .../algorithms/preprocessing/AsFactor.scala     | 127 +++++++++++++++++++
 .../algorithms/preprocessing/MeanCenter.scala   |  91 +++++++++++++
 .../preprocessing/PreprocessorModel.scala       |  58 +++++++++
 .../preprocessing/StandardScaler.scala          |  97 ++++++++++++++
 .../regression/CochraneOrcuttModel.scala        | 100 +++++++++++++++
 .../regression/LinearRegressorModel.scala       | 124 ++++++++++++++++++
 .../regression/OrdinaryLeastSquaresModel.scala  |  66 ++++++++++
 .../algorithms/regression/RegressorModel.scala  |  58 +++++++++
 .../regression/tests/AutocorrelationTests.scala |  57 +++++++++
 .../regression/tests/FittnessTests.scala        |  56 ++++++++
 .../math/algorithms/PreprocessorSuiteBase.scala |  59 +++++++++
 .../math/algorithms/RegressionSuiteBase.scala   |  81 ++++++++++++
 .../algorithms/RegressionTestsSuiteBase.scala   |  87 +++++++++++++
 .../math/algorithms/PreprocessorSuite.scala     |  24 ++++
 .../math/algorithms/RegressionSuite.scala       |  25 ++++
 .../math/algorithms/RegressionTestsSuite.scala  |  25 ++++
 29 files changed, 1448 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index 63490bf..3aee83a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,3 +20,4 @@ mr/temp
 temp
 foo
 math-tests/
+metastore_db/*
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/flink/src/test/scala/org/apache/mahout/flinkbindings/standard/PreprocessorSuite.scala
----------------------------------------------------------------------
diff --git a/flink/src/test/scala/org/apache/mahout/flinkbindings/standard/PreprocessorSuite.scala b/flink/src/test/scala/org/apache/mahout/flinkbindings/standard/PreprocessorSuite.scala
new file mode 100644
index 0000000..5e2b4ee
--- /dev/null
+++ b/flink/src/test/scala/org/apache/mahout/flinkbindings/standard/PreprocessorSuite.scala
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.flinkbindings.standard
+
+
+import org.apache.mahout.flinkbindings.DistributedFlinkSuite
+import org.apache.mahout.math.algorithms.PreprocessorSuiteBase
+import org.scalatest.FunSuite
+
+class PreprocessorSuite extends FunSuite
+  with DistributedFlinkSuite with PreprocessorSuiteBase

http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/flink/src/test/scala/org/apache/mahout/flinkbindings/standard/RegressionSuite.scala
----------------------------------------------------------------------
diff --git a/flink/src/test/scala/org/apache/mahout/flinkbindings/standard/RegressionSuite.scala b/flink/src/test/scala/org/apache/mahout/flinkbindings/standard/RegressionSuite.scala
new file mode 100644
index 0000000..5cb6183
--- /dev/null
+++ b/flink/src/test/scala/org/apache/mahout/flinkbindings/standard/RegressionSuite.scala
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.flinkbindings.standard
+
+
+import org.apache.mahout.flinkbindings.DistributedFlinkSuite
+import org.apache.mahout.math.algorithms.RegressionSuiteBase
+import org.scalatest.FunSuite
+
+class RegressionSuite extends FunSuite
+  with DistributedFlinkSuite with RegressionSuiteBase
+

http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/flink/src/test/scala/org/apache/mahout/flinkbindings/standard/RegressionTestsSuite.scala
----------------------------------------------------------------------
diff --git a/flink/src/test/scala/org/apache/mahout/flinkbindings/standard/RegressionTestsSuite.scala b/flink/src/test/scala/org/apache/mahout/flinkbindings/standard/RegressionTestsSuite.scala
new file mode 100644
index 0000000..8ddab41
--- /dev/null
+++ b/flink/src/test/scala/org/apache/mahout/flinkbindings/standard/RegressionTestsSuite.scala
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.flinkbindings.standard
+
+import org.apache.mahout.flinkbindings.DistributedFlinkSuite
+import org.apache.mahout.math.algorithms.RegressionTestsSuiteBase
+import org.scalatest.FunSuite
+
+class RegressionTestsSuite extends FunSuite
+  with DistributedFlinkSuite with RegressionTestsSuiteBase
+

http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/h2o/src/test/scala/org/apache/mahout/math/algorithms/PreprocessorSuite.scala
----------------------------------------------------------------------
diff --git a/h2o/src/test/scala/org/apache/mahout/math/algorithms/PreprocessorSuite.scala b/h2o/src/test/scala/org/apache/mahout/math/algorithms/PreprocessorSuite.scala
new file mode 100644
index 0000000..e777f8b
--- /dev/null
+++ b/h2o/src/test/scala/org/apache/mahout/math/algorithms/PreprocessorSuite.scala
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math.algorithms
+
+import org.apache.mahout.h2obindings.test.DistributedH2OSuite
+import org.scalatest.FunSuite
+
+class PreprocessorSuite extends FunSuite
+  with DistributedH2OSuite with PreprocessorSuiteBase
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/h2o/src/test/scala/org/apache/mahout/math/algorithms/RegressionSuite.scala
----------------------------------------------------------------------
diff --git a/h2o/src/test/scala/org/apache/mahout/math/algorithms/RegressionSuite.scala b/h2o/src/test/scala/org/apache/mahout/math/algorithms/RegressionSuite.scala
new file mode 100644
index 0000000..503eb06
--- /dev/null
+++ b/h2o/src/test/scala/org/apache/mahout/math/algorithms/RegressionSuite.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math.algorithms
+
+import org.apache.mahout.h2obindings.test.DistributedH2OSuite
+import org.scalatest.FunSuite
+
+class RegressionSuite extends FunSuite
+  with DistributedH2OSuite with RegressionSuiteBase
+

http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/h2o/src/test/scala/org/apache/mahout/math/algorithms/RegressionTestsSuite.scala
----------------------------------------------------------------------
diff --git a/h2o/src/test/scala/org/apache/mahout/math/algorithms/RegressionTestsSuite.scala b/h2o/src/test/scala/org/apache/mahout/math/algorithms/RegressionTestsSuite.scala
new file mode 100644
index 0000000..864b045
--- /dev/null
+++ b/h2o/src/test/scala/org/apache/mahout/math/algorithms/RegressionTestsSuite.scala
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math.algorithms
+
+import org.apache.mahout.h2obindings.test.DistributedH2OSuite
+import org.scalatest.FunSuite
+
+class RegressionTestsSuite extends FunSuite
+  with DistributedH2OSuite with RegressionTestsSuiteBase

http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/math-scala/src/main/scala/org/apache/mahout/math/algorithms/Fitter.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/Fitter.scala b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/Fitter.scala
new file mode 100644
index 0000000..244cefc
--- /dev/null
+++ b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/Fitter.scala
@@ -0,0 +1,27 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements. See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership. The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License. You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing,
+  * software distributed under the License is distributed on an
+  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  * KIND, either express or implied. See the License for the
+  * specific language governing permissions and limitations
+  * under the License.
+  */
+
+package org.apache.mahout.math.algorithms
+
+trait Fitter {
+
+  // all models must have a fit method... signatures change.
+  // leaving this as place holder incase we decide there are somethings all Models must have in common
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/math-scala/src/main/scala/org/apache/mahout/math/algorithms/Model.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/Model.scala b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/Model.scala
new file mode 100644
index 0000000..0fbe8ac
--- /dev/null
+++ b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/Model.scala
@@ -0,0 +1,26 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements. See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership. The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License. You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing,
+  * software distributed under the License is distributed on an
+  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  * KIND, either express or implied. See the License for the
+  * specific language governing permissions and limitations
+  * under the License.
+  */
+
+package org.apache.mahout.math.algorithms
+
+trait Model extends Serializable {
+
+  var summary: String = ""
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/math-scala/src/main/scala/org/apache/mahout/math/algorithms/SupervisedFitter.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/SupervisedFitter.scala b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/SupervisedFitter.scala
new file mode 100644
index 0000000..bf85dee
--- /dev/null
+++ b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/SupervisedFitter.scala
@@ -0,0 +1,29 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements. See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership. The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License. You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing,
+  * software distributed under the License is distributed on an
+  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  * KIND, either express or implied. See the License for the
+  * specific language governing permissions and limitations
+  * under the License.
+  */
+
+package org.apache.mahout.math.algorithms
+
+import org.apache.mahout.math.drm.DrmLike
+
+trait SupervisedFitter[K, M <: SupervisedModel[K]] extends Fitter {
+
+  def fit(drmX  : DrmLike[K],
+          drmTarget: DrmLike[K],
+          hyperparameters: (Symbol, Any)*): M
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/math-scala/src/main/scala/org/apache/mahout/math/algorithms/SupervisedModel.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/SupervisedModel.scala b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/SupervisedModel.scala
new file mode 100644
index 0000000..57c20e7
--- /dev/null
+++ b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/SupervisedModel.scala
@@ -0,0 +1,26 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements. See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership. The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License. You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing,
+  * software distributed under the License is distributed on an
+  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  * KIND, either express or implied. See the License for the
+  * specific language governing permissions and limitations
+  * under the License.
+  */
+
+package org.apache.mahout.math.algorithms
+
+import scala.collection.mutable
+
+trait SupervisedModel[K] extends Model {
+  var testResults: mutable.Map[Symbol, Any] = mutable.Map[Symbol, Any]()
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/math-scala/src/main/scala/org/apache/mahout/math/algorithms/UnsupervisedFitter.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/UnsupervisedFitter.scala b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/UnsupervisedFitter.scala
new file mode 100644
index 0000000..5c191d1
--- /dev/null
+++ b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/UnsupervisedFitter.scala
@@ -0,0 +1,28 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements. See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership. The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License. You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing,
+  * software distributed under the License is distributed on an
+  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  * KIND, either express or implied. See the License for the
+  * specific language governing permissions and limitations
+  * under the License.
+  */
+
+package org.apache.mahout.math.algorithms
+
+import org.apache.mahout.math.drm.DrmLike
+
+trait UnsupervisedFitter extends Fitter {
+
+  def fit[K](input: DrmLike[K],
+             hyperparameters: (Symbol, Any)*): UnsupervisedModel
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/math-scala/src/main/scala/org/apache/mahout/math/algorithms/UnsupervisedModel.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/UnsupervisedModel.scala b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/UnsupervisedModel.scala
new file mode 100644
index 0000000..f8ff341
--- /dev/null
+++ b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/UnsupervisedModel.scala
@@ -0,0 +1,24 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements. See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership. The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License. You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing,
+  * software distributed under the License is distributed on an
+  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  * KIND, either express or implied. See the License for the
+  * specific language governing permissions and limitations
+  * under the License.
+  */
+
+package org.apache.mahout.math.algorithms
+
+trait UnsupervisedModel extends Model {
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/math-scala/src/main/scala/org/apache/mahout/math/algorithms/preprocessing/AsFactor.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/preprocessing/AsFactor.scala b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/preprocessing/AsFactor.scala
new file mode 100644
index 0000000..9d8e10f
--- /dev/null
+++ b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/preprocessing/AsFactor.scala
@@ -0,0 +1,127 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements. See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership. The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License. You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing,
+  * software distributed under the License is distributed on an
+  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  * KIND, either express or implied. See the License for the
+  * specific language governing permissions and limitations
+  * under the License.
+  */
+
+package org.apache.mahout.math.algorithms.preprocessing
+
+
+
+import collection._
+import JavaConversions._
+import org.apache.mahout.math._
+import org.apache.mahout.math.drm._
+import org.apache.mahout.math.{Vector => MahoutVector}
+import org.apache.mahout.math.drm.RLikeDrmOps._
+import org.apache.mahout.math.scalabindings._
+import org.apache.mahout.math.scalabindings.RLikeOps._
+import MahoutCollections._
+
+class AsFactor extends PreprocessorFitter {
+
+  def fit[K](input: DrmLike[K],
+             hyperparameters: (Symbol, Any)*): AsFactorModel = {
+
+    import org.apache.mahout.math.function.VectorFunction
+    val factorMap = input.allreduceBlock(
+      { case (keys, block: Matrix) =>
+        // someday we'll replace this with block.max: Vector
+        // or better yet- block.distinct
+        dense(block.aggregateColumns( new VectorFunction {
+            def apply(f: Vector): Double = f.max
+        }))
+      })(0, ::)
+    /*
+    val A = drmParallelize(dense(
+      (3, 2, 1),
+      (0, 0, 0),
+      (1, 1, 1))
+      -> (4,2,2),  now 4,3,2
+     */
+    new AsFactorModel(factorMap.sum.toInt,
+      dvec(factorMap.toArray.scanLeft(0.0)((l, r) => l + r ).take(factorMap.length))
+    //  factorMap
+    )
+  }
+
+}
+
+class AsFactorModel(cardinality: Int, factorVec: MahoutVector) extends PreprocessorModel {
+
+  val factorMap: MahoutVector = factorVec
+
+  def transform[K](input: DrmLike[K]): DrmLike[K] ={
+
+    implicit val ctx = input.context
+
+    val bcastK = drmBroadcast(dvec(cardinality))
+    val bcastFactorMap = drmBroadcast(factorMap)
+
+    implicit val ktag =  input.keyClassTag
+
+    val res = input.mapBlock(cardinality) {
+      case (keys, block: Matrix) => {
+        val cardinality: Int = bcastK.value.get(0).toInt
+        val output = new SparseMatrix(block.nrow, cardinality)
+        // This is how we take a vector of mapping to a map
+        val fm = bcastFactorMap.value
+        for (n <- 0 until output.nrow){
+          var m = 0
+          for (e <- block(n, ::).all() ){
+            output(n, fm.get(m).toInt + e.get().toInt ) = 1.0
+            m += 1
+          }
+        }
+        (keys, output)
+      }
+    }
+    res
+  }
+
+  override def invTransform[K](input: DrmLike[K]): DrmLike[K] = {
+    implicit val ctx = input.context
+
+    val bcastK = drmBroadcast(dvec(cardinality))
+    val bcastFactorMap = drmBroadcast(factorMap)
+
+    implicit val ktag =  input.keyClassTag
+
+    val res = input.mapBlock(cardinality) {
+      case (keys, block: Matrix) => {
+        val k: Int = bcastK.value.get(0).toInt
+        val output = new DenseMatrix(block.nrow, bcastK.value.length)
+        // This is how we take a vector of mapping to a map
+        val fm = bcastFactorMap.all.toSeq.map(e => e.get -> e.index).toMap
+
+        import MahoutCollections._
+        val indexArray = Array(1.0) ++ bcastFactorMap.value.toArray.map(i => i.toInt)
+        for (n <- 0 until output.nrow){
+          val v = new DenseVector(bcastFactorMap.value.length)
+          var m = 0
+          for (e <- block(n, ::).asInstanceOf[RandomAccessSparseVector].iterateNonZero() ){
+            v.setQuick(m, e.index - m)
+            m += 1
+          }
+          output(n, ::) = v
+        }
+        (keys, output)
+      }
+    }
+    res
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/math-scala/src/main/scala/org/apache/mahout/math/algorithms/preprocessing/MeanCenter.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/preprocessing/MeanCenter.scala b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/preprocessing/MeanCenter.scala
new file mode 100644
index 0000000..258ad1b
--- /dev/null
+++ b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/preprocessing/MeanCenter.scala
@@ -0,0 +1,91 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements. See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership. The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License. You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing,
+  * software distributed under the License is distributed on an
+  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  * KIND, either express or implied. See the License for the
+  * specific language governing permissions and limitations
+  * under the License.
+  */
+
+package org.apache.mahout.math.algorithms.preprocessing
+
+import collection._
+import JavaConversions._
+import org.apache.mahout.math.drm._
+import org.apache.mahout.math.drm.RLikeDrmOps._
+import org.apache.mahout.math.Matrix
+import org.apache.mahout.math.scalabindings.RLikeOps._
+import org.apache.mahout.math.{Vector => MahoutVector}
+
+
+
+class MeanCenter extends PreprocessorFitter {
+
+  /**
+    * Centers Columns at zero or centers
+    * @param input   A drm which to center on
+    *
+    */
+  def fit[K](input: DrmLike[K],
+             hyperparameters: (Symbol, Any)*): MeanCenterModel = {
+    new MeanCenterModel(input.colMeans()) // could add centers here
+  }
+
+}
+
+/**
+  * A model for mean centering each column of a data set at 0 or some number specified by the setCenters method.
+  * @param means
+  */
+class MeanCenterModel(means: MahoutVector) extends PreprocessorModel {
+
+  var colCentersV: MahoutVector = means
+
+  def setCenters(centers: MahoutVector): Unit = {
+    if (means.length != centers.length){
+      throw new Exception(s"Length of centers vector (${centers.length}) must equal length of means vector ((${means.length}) (e.g. the number of columns in the orignally fit input).")
+    }
+    colCentersV = means + centers
+  }
+  def transform[K](input: DrmLike[K]): DrmLike[K] = {
+
+    implicit val ctx = input.context
+    implicit val ktag =  input.keyClassTag
+
+    val bcastV = drmBroadcast(colCentersV)
+
+    val output = input.mapBlock(input.ncol) {
+      case (keys, block: Matrix) =>
+        val copy: Matrix = block.cloned
+        copy.foreach(row => row -= bcastV.value)
+        (keys, copy)
+    }
+    output
+  }
+
+  def invTransform[K](input: DrmLike[K]): DrmLike[K] = {
+
+    implicit val ctx = input.context
+    implicit val ktag =  input.keyClassTag
+    val bcastV = drmBroadcast(colCentersV)
+
+    val output = input.mapBlock(input.ncol) {
+      case (keys, block: Matrix) =>
+        val copy: Matrix = block.cloned
+        copy.foreach(row => row += bcastV.value)
+        (keys, copy)
+    }
+    output
+  }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/math-scala/src/main/scala/org/apache/mahout/math/algorithms/preprocessing/PreprocessorModel.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/preprocessing/PreprocessorModel.scala b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/preprocessing/PreprocessorModel.scala
new file mode 100644
index 0000000..5adb87d
--- /dev/null
+++ b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/preprocessing/PreprocessorModel.scala
@@ -0,0 +1,58 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements. See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership. The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License. You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing,
+  * software distributed under the License is distributed on an
+  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  * KIND, either express or implied. See the License for the
+  * specific language governing permissions and limitations
+  * under the License.
+  */
+
+package org.apache.mahout.math.algorithms.preprocessing
+
+import org.apache.mahout.math.algorithms.{UnsupervisedFitter, UnsupervisedModel}
+import org.apache.mahout.math.drm.DrmLike
+
+trait PreprocessorModel extends UnsupervisedModel {
+
+  /**
+    * A convenience method for returning transformed data back to original
+    * @param input
+    * @tparam K
+    * @return
+    */
+  def invTransform[K](input: DrmLike[K]): DrmLike[K]
+
+  /**
+    * Transform given Drm given the feature set
+    * @param input
+
+    */
+  def transform[K](input: DrmLike[K]): DrmLike[K]
+
+}
+
+trait PreprocessorFitter extends UnsupervisedFitter {
+
+  def fit[K](input: DrmLike[K],
+             hyperparameters: (Symbol, Any)*): PreprocessorModel
+
+  def fitTransform[K](input: DrmLike[K],
+                      hyperparameters: (Symbol, Any)*): DrmLike[K] = {
+    model = this.fit(input, hyperparameters:_*)
+    model.transform(input)
+
+  }
+
+  // used to store the model if `fitTransform` method called
+  var model: PreprocessorModel = _
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/math-scala/src/main/scala/org/apache/mahout/math/algorithms/preprocessing/StandardScaler.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/preprocessing/StandardScaler.scala b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/preprocessing/StandardScaler.scala
new file mode 100644
index 0000000..98d0be1
--- /dev/null
+++ b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/preprocessing/StandardScaler.scala
@@ -0,0 +1,97 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements. See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership. The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License. You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing,
+  * software distributed under the License is distributed on an
+  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  * KIND, either express or implied. See the License for the
+  * specific language governing permissions and limitations
+  * under the License.
+  */
+
+package org.apache.mahout.math.algorithms.preprocessing
+
+import collection._
+import JavaConversions._
+
+import org.apache.mahout.math.drm._
+import org.apache.mahout.math.drm.RLikeDrmOps._
+import org.apache.mahout.math.scalabindings.RLikeOps._
+import org.apache.mahout.math.{Vector => MahoutVector, Matrix}
+
+/**
+  * Scales columns to mean 0 and unit variance
+  */
+class StandardScaler extends PreprocessorFitter {
+
+  def fit[K](input: DrmLike[K],
+             hyperparameters: (Symbol, Any)*): StandardScalerModel = {
+    val mNv = dcolMeanVars(input)
+    new StandardScalerModel(mNv._1, mNv._2.sqrt)
+  }
+
+}
+
+class StandardScalerModel(meanVec: MahoutVector,
+                          stdev: MahoutVector
+                         ) extends PreprocessorModel {
+
+
+  def transform[K](input: DrmLike[K]): DrmLike[K] = {
+    implicit val ctx = input.context
+
+
+    // Some mapBlock() calls need it
+    // implicit val ktag =  input.keyClassTag
+
+    val bcastMu = drmBroadcast(meanVec)
+    val bcastSigma = drmBroadcast(stdev)
+
+    implicit val ktag =  input.keyClassTag
+
+    val res = input.mapBlock(input.ncol) {
+      case (keys, block: Matrix) => {
+        val copy: Matrix = block.cloned
+        copy.foreach(row => row := (row - bcastMu) / bcastSigma )
+        (keys, copy)
+      }
+    }
+    res
+  }
+
+  /**
+    * Given a an output- trasform it back into the original
+    * e.g. a normalized column, back to original values.
+    *
+    * @param input
+    * @tparam K
+    * @return
+    */
+  def invTransform[K](input: DrmLike[K]): DrmLike[K] = { // [K: ClassTag]
+
+    implicit val ctx = input.context
+
+    // Some mapBlock() calls need it
+    implicit val ktag =  input.keyClassTag
+
+    val bcastMu = drmBroadcast(meanVec)
+    val bcastSigma = drmBroadcast(stdev)
+
+    val res = input.mapBlock(input.ncol) {
+      case (keys, block: Matrix) => {
+        val copy: Matrix = block.cloned
+        copy.foreach(row => row := (row * bcastSigma ) + bcastMu)
+        (keys, copy)
+      }
+    }
+    res
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/math-scala/src/main/scala/org/apache/mahout/math/algorithms/regression/CochraneOrcuttModel.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/regression/CochraneOrcuttModel.scala b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/regression/CochraneOrcuttModel.scala
new file mode 100644
index 0000000..844e72f
--- /dev/null
+++ b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/regression/CochraneOrcuttModel.scala
@@ -0,0 +1,100 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements. See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership. The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License. You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing,
+  * software distributed under the License is distributed on an
+  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  * KIND, either express or implied. See the License for the
+  * specific language governing permissions and limitations
+  * under the License.
+  */
+
+package org.apache.mahout.math.algorithms.regression
+
+import org.apache.mahout.math.{Vector => MahoutVector}
+import org.apache.mahout.math.drm.{CacheHint, DrmLike, safeToNonNegInt}
+import org.apache.mahout.math.drm.RLikeDrmOps._
+import org.apache.mahout.math.scalabindings.RLikeOps._
+
+class CochraneOrcuttModel[K](regressor: LinearRegressorModel[K]) extends LinearRegressorModel[K] {
+  // https://en.wikipedia.org/wiki/Cochrane%E2%80%93Orcutt_estimation
+
+  var betas: Array[MahoutVector] = _
+
+  def predict(drmPredictors: DrmLike[K]): DrmLike[K] = {
+    regressor.predict(drmPredictors)
+  }
+
+}
+
+class CochraneOrcutt[K](hyperparameters: (Symbol, Any)*)  extends LinearRegressorFitter[K] {
+
+  var regressor: LinearRegressorFitter[K] = hyperparameters.asInstanceOf[Map[Symbol,
+    LinearRegressorFitter[K]]].getOrElse('regressor, new OrdinaryLeastSquares[K]())
+  var iterations: Int = hyperparameters.asInstanceOf[Map[Symbol, Int]].getOrElse('iterations, 3)
+  var cacheHint: CacheHint.CacheHint = hyperparameters.asInstanceOf[Map[Symbol, CacheHint.CacheHint]].getOrElse('cacheHint, CacheHint.MEMORY_ONLY)
+  // For larger inputs, CacheHint.MEMORY_AND_DISK2 is reccomended.
+
+  def setHyperparameters(hyperparameters: Map[Symbol, Any] = Map('foo -> None)): Unit = {
+    setStandardHyperparameters(hyperparameters.toMap)
+    regressor = hyperparameters.asInstanceOf[Map[Symbol, LinearRegressorFitter[K]]].getOrElse('regressor, new OrdinaryLeastSquares())
+    regressor.calcStandardErrors = false
+    regressor.calcCommonStatistics = false
+    iterations = hyperparameters.asInstanceOf[Map[Symbol, Int]].getOrElse('iterations, 3)
+    cacheHint = hyperparameters.asInstanceOf[Map[Symbol, CacheHint.CacheHint]].getOrElse('cacheHint, CacheHint.MEMORY_ONLY)
+  }
+
+  setHyperparameters(hyperparameters.toMap)
+
+  def fit(drmFeatures: DrmLike[K], drmTarget: DrmLike[K], hyperparameters: (Symbol, Any)*): CochraneOrcuttModel[K] = {
+
+    var hyperparameters: Option[Map[String,Any]] = None
+    val betas = new Array[MahoutVector](iterations)
+    var regressionModel: LinearRegressorModel[K] = regressor.fit(drmFeatures, drmTarget)
+    betas(0) = regressionModel.beta
+    // todo add dw test option on each iteration
+
+    val drmY = drmTarget
+    val n = safeToNonNegInt(drmTarget.nrow)
+    val Y = drmTarget(1 until n, 0 until 1).checkpoint(cacheHint)
+    val Y_lag = drmTarget(0 until n - 1, 0 until 1).checkpoint(cacheHint)
+    val X = drmFeatures(1 until n, 0 until 1).checkpoint(cacheHint)
+    val X_lag = drmFeatures(0 until n - 1, 0 until 1).checkpoint(cacheHint)
+    for (i <- 1 until iterations){
+      val error = drmTarget - regressionModel.predict(drmFeatures)
+      regressionModel = regressor.fit(drmFeatures, drmTarget)
+      val rho = regressionModel.beta.get(0)
+
+      val drmYprime = Y - Y_lag * rho
+      val drmXprime = X - X_lag * rho
+
+      if (i == iterations - 1 ){
+        // calculate common stats and SE on last iteration only
+        // todo make this optional- but if you don't care then why are you even bothering to do this?
+        regressor.calcStandardErrors = true
+        regressor.calcCommonStatistics = true
+      }
+      regressionModel = regressor.fit(drmFeatures, drmTarget)
+      var betaPrime = regressionModel.beta
+      val b0 = betaPrime(0) / (1 - rho)
+      betaPrime(0) = b0
+      betas(i) = betaPrime
+    }
+
+    val model = new CochraneOrcuttModel[K](regressionModel)
+    model.betas = betas
+    model.summary = (0 until iterations).map(i \u21d2 s"Beta estimates on iteration " + i + ": "
+      + model.betas.toString + "\n").mkString("") + "\n\n" + "Final Model:\n\n" + regressionModel.summary
+
+    model
+  }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/math-scala/src/main/scala/org/apache/mahout/math/algorithms/regression/LinearRegressorModel.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/regression/LinearRegressorModel.scala b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/regression/LinearRegressorModel.scala
new file mode 100644
index 0000000..555ee6c
--- /dev/null
+++ b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/regression/LinearRegressorModel.scala
@@ -0,0 +1,124 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements. See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership. The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License. You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing,
+  * software distributed under the License is distributed on an
+  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  * KIND, either express or implied. See the License for the
+  * specific language governing permissions and limitations
+  * under the License.
+  */
+
+package org.apache.mahout.math.algorithms.regression
+
+import org.apache.mahout.math.algorithms.regression.tests.FittnessTests
+import org.apache.mahout.math.drm._
+import org.apache.mahout.math.drm.DrmLike
+import org.apache.mahout.math.drm.RLikeDrmOps._
+import org.apache.mahout.math.scalabindings.dvec
+import org.apache.mahout.math.{Matrix, Vector => MahoutVector}
+import org.apache.mahout.math.scalabindings.RLikeOps._
+import scala.language.higherKinds
+
+trait LinearRegressorModel[K] extends RegressorModel[K] {
+
+  var beta: MahoutVector = _
+  var se: MahoutVector = _
+  var tScore: MahoutVector = _
+  var pval: MahoutVector = _
+  var degreesFreedom: Int = _
+
+}
+
+trait LinearRegressorFitter[K] extends RegressorFitter[K] {
+
+  var addIntercept: Boolean = _
+  var calcStandardErrors: Boolean = _
+  var calcCommonStatistics: Boolean = _
+
+  def fit(drmX: DrmLike[K],
+          drmTarget: DrmLike[K],
+          hyperparameters: (Symbol, Any)*): LinearRegressorModel[K]
+
+
+  def setStandardHyperparameters(hyperparameters: Map[Symbol, Any] = Map('foo -> None)): Unit = {
+    calcCommonStatistics = hyperparameters.asInstanceOf[Map[Symbol, Boolean]].getOrElse('calcCommonStatistics, true)
+    calcStandardErrors = hyperparameters.asInstanceOf[Map[Symbol, Boolean]].getOrElse('calcStandardErrors, true)
+    addIntercept = hyperparameters.asInstanceOf[Map[Symbol, Boolean]].getOrElse('addIntercept, true)
+  }
+
+  def calculateStandardError[M[K] <: LinearRegressorModel[K]](X: DrmLike[K],
+                             drmTarget: DrmLike[K],
+                             drmXtXinv: Matrix,
+                             model: M[K]): M[K] = {
+    import org.apache.mahout.math.function.Functions.SQRT
+    import org.apache.mahout.math.scalabindings.MahoutCollections._
+    var modelOut = model
+    val yhat = X %*% model.beta
+    val residuals = drmTarget - yhat
+    val ete = (residuals.t %*% residuals).collect // 1x1
+    val n = drmTarget.nrow
+    val k = safeToNonNegInt(X.ncol)
+    val invDegFreedomKindOf = 1.0 / (n - k)
+    val varCovarMatrix = invDegFreedomKindOf * ete(0,0) * drmXtXinv
+    val se = varCovarMatrix.viewDiagonal.assign(SQRT)
+    val tScore = model.beta / se
+    val tDist = new org.apache.commons.math3.distribution.TDistribution(n-k)
+    val pval = dvec(tScore.toArray.map(t => 2 * (1.0 - tDist.cumulativeProbability(t)) ))
+    // ^^ TODO bug in this calculation- fix and add test
+    //degreesFreedom = k
+    modelOut.summary = "Coef.\t\tEstimate\t\tStd. Error\t\tt-score\t\t\tPr(Beta=0)\n" +
+      (0 until k).map(i => s"X${i}\t${model.beta(i)}\t${se(i)}\t${tScore(i)}\t${pval(i)}").mkString("\n")
+
+    modelOut.se = se
+    modelOut.tScore = tScore
+    modelOut.pval = pval
+    modelOut.degreesFreedom = X.ncol
+
+    if (calcCommonStatistics){
+      modelOut = calculateCommonStatistics(modelOut, drmTarget, residuals)
+    }
+    modelOut
+  }
+
+  def calculateCommonStatistics[M[K] <: LinearRegressorModel[K]](model: M[K],
+                                                                 drmTarget: DrmLike[K],
+                                                                 residuals: DrmLike[K]): M[K] ={
+    var modelOut = model
+    modelOut = FittnessTests.CoefficientOfDetermination(model, drmTarget, residuals)
+    modelOut = FittnessTests.MeanSquareError(model, residuals)
+    modelOut
+  }
+
+  def modelPostprocessing[M[K] <: LinearRegressorModel[K]](model: M[K],
+                     X: DrmLike[K],
+                     drmTarget: DrmLike[K],
+                     drmXtXinv: Matrix): M[K] = {
+    var modelOut = model
+    if (calcStandardErrors) {
+      modelOut = calculateStandardError(X, drmTarget, drmXtXinv, model )
+    } else {
+      modelOut.summary = "Coef.\t\tEstimate\n" +
+        (0 until X.ncol).map(i => s"X${i}\t${modelOut.beta(i)}").mkString("\n")
+      if (calcCommonStatistics) { // we do this in calcStandard errors to avoid calculating residuals twice
+        val residuals = drmTarget - (X %*% modelOut.beta)
+        modelOut = calculateCommonStatistics(modelOut, drmTarget, residuals)
+      }
+
+      modelOut
+    }
+
+    if (addIntercept) {
+      model.summary.replace(s"X${X.ncol - 1}", "(Intercept)")
+    }
+    model
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/math-scala/src/main/scala/org/apache/mahout/math/algorithms/regression/OrdinaryLeastSquaresModel.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/regression/OrdinaryLeastSquaresModel.scala b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/regression/OrdinaryLeastSquaresModel.scala
new file mode 100644
index 0000000..d59701a
--- /dev/null
+++ b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/regression/OrdinaryLeastSquaresModel.scala
@@ -0,0 +1,66 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements. See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership. The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License. You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing,
+  * software distributed under the License is distributed on an
+  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  * KIND, either express or implied. See the License for the
+  * specific language governing permissions and limitations
+  * under the License.
+  */
+
+package org.apache.mahout.math.algorithms.regression
+
+import org.apache.mahout.math.drm.RLikeDrmOps._
+import org.apache.mahout.math.drm.DrmLike
+import org.apache.mahout.math.scalabindings._
+import org.apache.mahout.math.scalabindings.RLikeOps._
+
+class OrdinaryLeastSquaresModel[K]
+  extends LinearRegressorModel[K] {
+  // https://en.wikipedia.org/wiki/Ordinary_least_squares
+
+  def predict(drmPredictors: DrmLike[K]): DrmLike[K] = {
+    drmPredictors %*% beta
+  }
+
+}
+
+class OrdinaryLeastSquares[K] extends LinearRegressorFitter[K] {
+
+
+  def fit(drmFeatures: DrmLike[K],
+          drmTarget: DrmLike[K],
+          hyperparameters: (Symbol, Any)*): OrdinaryLeastSquaresModel[K] = {
+
+    var model = new OrdinaryLeastSquaresModel[K]()
+    setStandardHyperparameters(hyperparameters.toMap)
+
+
+    if (drmFeatures.nrow != drmTarget.nrow){
+      throw new Exception(s"${drmFeatures.nrow} observations in features, ${drmTarget.nrow} observations in target, must be equal.")
+    }
+
+    var X = drmFeatures
+    if (addIntercept) {
+      X = X cbind 1
+    }
+
+    val XtX = X.t %*% X
+    XtX.collect
+    val drmXtXinv = solve(X.t %*% X)
+    val drmXty = (X.t %*% drmTarget).collect // this fails when number of columns^2 size matrix won't fit in driver
+    model.beta = (drmXtXinv %*% drmXty)(::, 0)
+
+
+    this.modelPostprocessing(model, X, drmTarget, drmXtXinv)
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/math-scala/src/main/scala/org/apache/mahout/math/algorithms/regression/RegressorModel.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/regression/RegressorModel.scala b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/regression/RegressorModel.scala
new file mode 100644
index 0000000..bdddb29
--- /dev/null
+++ b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/regression/RegressorModel.scala
@@ -0,0 +1,58 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements. See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership. The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License. You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing,
+  * software distributed under the License is distributed on an
+  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  * KIND, either express or implied. See the License for the
+  * specific language governing permissions and limitations
+  * under the License.
+  */
+
+package org.apache.mahout.math.algorithms.regression
+
+import org.apache.mahout.math.algorithms.{SupervisedFitter, SupervisedModel}
+import org.apache.mahout.math.drm.DrmLike
+
+trait RegressorModel[K] extends SupervisedModel[K] {
+
+  def predict(drmPredictors: DrmLike[K]): DrmLike[K]
+
+  // Common Applicable Tests- here only for convenience.
+  var mse: Double = _
+  var r2: Double = _
+
+  /**
+    * Syntatictic sugar for fetching test results.  Will Return test result if it exists, otherwise None
+    * @param testSymbol - symbol of the test result to fetch, e.g. `'mse`
+    * @tparam T - The Type
+    * @return
+    */
+  def getTestResult[T](testSymbol: Symbol): Option[T] = {
+    Some(testResults.get(testSymbol).asInstanceOf[T])
+  }
+}
+
+trait RegressorFitter[K] extends SupervisedFitter[K, RegressorModel[K]] {
+
+
+  def fitPredict(drmX: DrmLike[K],
+                 drmTarget: DrmLike[K],
+                 hyperparameters: (Symbol, Any)* ): DrmLike[K] = {
+
+    model = this.fit(drmX, drmTarget, hyperparameters: _* )
+    model.predict(drmX)
+  }
+
+  // used to store the model if `fitTransform` method called
+  var model: RegressorModel[K] = _
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/math-scala/src/main/scala/org/apache/mahout/math/algorithms/regression/tests/AutocorrelationTests.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/regression/tests/AutocorrelationTests.scala b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/regression/tests/AutocorrelationTests.scala
new file mode 100644
index 0000000..2b16b74
--- /dev/null
+++ b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/regression/tests/AutocorrelationTests.scala
@@ -0,0 +1,57 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements. See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership. The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License. You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing,
+  * software distributed under the License is distributed on an
+  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  * KIND, either express or implied. See the License for the
+  * specific language governing permissions and limitations
+  * under the License.
+  */
+
+package org.apache.mahout.math.algorithms.regression.tests
+
+import org.apache.mahout.math.algorithms.regression.RegressorModel
+import org.apache.mahout.math.drm._
+import org.apache.mahout.math.drm.DrmLike
+import org.apache.mahout.math.drm.RLikeDrmOps._
+import org.apache.mahout.math.function.Functions.SQUARE
+import org.apache.mahout.math.scalabindings.RLikeOps._
+import scala.language.higherKinds
+
+object AutocorrelationTests {
+
+  //https://en.wikipedia.org/wiki/Durbin%E2%80%93Watson_statistic
+  /*
+  To test for positive autocorrelation at significance \u03b1, the test statistic d is compared to lower and upper critical values (dL,\u03b1 and dU,\u03b1):
+      If d < dL,\u03b1, there is statistical evidence that the error terms are positively autocorrelated.
+      If d > dU,\u03b1, there is no statistical evidence that the error terms are positively autocorrelated.
+      If dL,\u03b1 < d < dU,\u03b1, the test is inconclusive.
+
+      Rule of Thumb:
+       d < 2 : positive auto-correlation
+       d = 2 : no auto-correlation
+       d > 2 : negative auto-correlation
+  */
+  def DurbinWatson[R[K] <: RegressorModel[K], K](model: R[K], residuals: DrmLike[K]): R[K] = {
+
+    val n = safeToNonNegInt(residuals.nrow)
+    val e: DrmLike[K] = residuals(1 until n , 0 until 1)
+    val e_t_1: DrmLike[K] = residuals(0 until n - 1, 0 until 1)
+    val numerator = (e - e_t_1).assign(SQUARE).colSums()
+    val denominator = residuals.assign(SQUARE).colSums()
+    val dw = numerator / denominator
+    model.testResults += ('durbinWatsonTestStatistic \u2192 dw.get(0))
+    model.summary += s"\nDurbin Watson Test Statistic: ${dw.toString}"
+    model
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/math-scala/src/main/scala/org/apache/mahout/math/algorithms/regression/tests/FittnessTests.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/regression/tests/FittnessTests.scala b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/regression/tests/FittnessTests.scala
new file mode 100644
index 0000000..d1dd3bb
--- /dev/null
+++ b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/regression/tests/FittnessTests.scala
@@ -0,0 +1,56 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements. See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership. The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License. You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing,
+  * software distributed under the License is distributed on an
+  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  * KIND, either express or implied. See the License for the
+  * specific language governing permissions and limitations
+  * under the License.
+  */
+
+package org.apache.mahout.math.algorithms.regression.tests
+
+import org.apache.mahout.math.algorithms.regression.RegressorModel
+import org.apache.mahout.math.algorithms.preprocessing.MeanCenter
+import org.apache.mahout.math.drm.DrmLike
+import org.apache.mahout.math.function.Functions.SQUARE
+import org.apache.mahout.math.scalabindings.RLikeOps._
+
+import scala.language.higherKinds
+import scala.reflect.ClassTag
+
+object FittnessTests {
+
+  // https://en.wikipedia.org/wiki/Coefficient_of_determination
+  def CoefficientOfDetermination[R[K] <: RegressorModel[K], K](model: R[K],
+                                                               drmTarget: DrmLike[K],
+                                                               residuals: DrmLike[K]): R[K] = {
+    val sumSquareResiduals = residuals.assign(SQUARE).sum
+    val mc = new MeanCenter()
+    val totalResiduals = mc.fitTransform(drmTarget)
+    val sumSquareTotal = totalResiduals.assign(SQUARE).sum
+    val r2 = 1 - (sumSquareResiduals / sumSquareTotal)
+    model.r2 = r2
+    model.testResults += ('r2 -> r2)  // need setResult and setSummary method incase you change in future, also to initialize map if non exists or update value if it does
+    model.summary += s"\nR^2: ${r2}"
+    model
+  }
+
+  // https://en.wikipedia.org/wiki/Mean_squared_error
+  def MeanSquareError[R[K] <: RegressorModel[K], K](model: R[K], residuals: DrmLike[K]): R[K] = {
+    val mse = residuals.assign(SQUARE).sum / residuals.nrow
+    model.mse = mse
+    model.testResults += ('mse -> mse)
+    model.summary += s"\nMean Squared Error: ${mse}"
+    model
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/math-scala/src/test/scala/org/apache/mahout/math/algorithms/PreprocessorSuiteBase.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/test/scala/org/apache/mahout/math/algorithms/PreprocessorSuiteBase.scala b/math-scala/src/test/scala/org/apache/mahout/math/algorithms/PreprocessorSuiteBase.scala
new file mode 100644
index 0000000..9e8f029
--- /dev/null
+++ b/math-scala/src/test/scala/org/apache/mahout/math/algorithms/PreprocessorSuiteBase.scala
@@ -0,0 +1,59 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements. See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership. The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License. You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing,
+  * software distributed under the License is distributed on an
+  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  * KIND, either express or implied. See the License for the
+  * specific language governing permissions and limitations
+  * under the License.
+  */
+
+package org.apache.mahout.math.algorithms
+
+import org.apache.mahout.math.algorithms.preprocessing.{AsFactor, AsFactorModel}
+import org.apache.mahout.math.drm.drmParallelize
+import org.apache.mahout.math.scalabindings.{dense, sparse, svec}
+import org.apache.mahout.math.scalabindings.RLikeOps._
+import org.apache.mahout.test.DistributedMahoutSuite
+import org.scalatest.{FunSuite, Matchers}
+
+trait PreprocessorSuiteBase extends DistributedMahoutSuite with Matchers {
+
+  this: FunSuite =>
+
+  test("asfactor test") {
+    val A = drmParallelize(dense(
+      (3, 2, 1, 2),
+      (0, 0, 0, 0),
+      (1, 1, 1, 1)), numPartitions = 2)
+
+    // 0 -> 2, 3 -> 5, 6 -> 9
+    val factorizer: AsFactorModel = new AsFactor().fit(A)
+
+    val factoredA = factorizer.transform(A)
+
+    println(factoredA)
+    println(factorizer.factorMap)
+    val correctAnswer = sparse(
+      svec((3 \u2192 1.0) :: (6 \u2192 1.0) :: (8 \u2192 1.0) :: (11 \u2192 1.0) :: Nil, cardinality = 12),
+      svec((0 \u2192 1.0) :: (4 \u2192 1.0) :: (7 \u2192 1.0) :: ( 9 \u2192 1.0) :: Nil, cardinality = 12),
+      svec((1 \u2192 1.0) :: (5 \u2192 1.0) :: (8 \u2192 1.0) :: (10 \u2192 1.0) :: Nil, cardinality = 12)
+    )
+
+    val myAnswer = factoredA.collect
+
+    val epsilon = 1E-6
+    (myAnswer.norm - correctAnswer.norm) should be <= epsilon
+    (myAnswer.norm - correctAnswer.norm) should be <= epsilon
+
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/math-scala/src/test/scala/org/apache/mahout/math/algorithms/RegressionSuiteBase.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/test/scala/org/apache/mahout/math/algorithms/RegressionSuiteBase.scala b/math-scala/src/test/scala/org/apache/mahout/math/algorithms/RegressionSuiteBase.scala
new file mode 100644
index 0000000..2bb0343
--- /dev/null
+++ b/math-scala/src/test/scala/org/apache/mahout/math/algorithms/RegressionSuiteBase.scala
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math.algorithms
+
+import org.apache.mahout.math.algorithms.regression.OrdinaryLeastSquares
+import org.apache.mahout.math.drm._
+import org.apache.mahout.math.drm.RLikeDrmOps._
+import org.apache.mahout.math.scalabindings._
+import org.apache.mahout.math.scalabindings.RLikeOps._
+import org.apache.mahout.test.DistributedMahoutSuite
+import org.scalatest.{FunSuite, Matchers}
+
+trait RegressionSuiteBase extends DistributedMahoutSuite with Matchers {
+  this: FunSuite =>
+
+  test("ordinary least squares") {
+    /*
+    R Prototype:
+    dataM <- matrix( c(2, 2, 10.5, 10, 29.509541,
+      1, 2, 12,   12, 18.042851,
+      1, 1, 12,   13, 22.736446,
+      2, 1, 11,   13, 32.207582,
+      1, 2, 12,   11, 21.871292,
+      2, 1, 16,   8,  36.187559,
+      6, 2, 17,   1,  50.764999,
+      3, 2, 13,   7,  40.400208,
+      3, 3, 13,   4,  45.811716), nrow=9, ncol=5, byrow=TRUE)
+
+
+    X = dataM[, c(1,2,3,4)]
+    y = dataM[, c(5)]
+
+    model <- lm(y ~ X )
+    summary(model)
+
+     */
+
+    val drmData = drmParallelize(dense(
+      (2, 2, 10.5, 10, 29.509541),  // Apple Cinnamon Cheerios
+      (1, 2, 12,   12, 18.042851),  // Cap'n'Crunch
+      (1, 1, 12,   13, 22.736446),  // Cocoa Puffs
+      (2, 1, 11,   13, 32.207582),  // Froot Loops
+      (1, 2, 12,   11, 21.871292),  // Honey Graham Ohs
+      (2, 1, 16,   8,  36.187559),  // Wheaties Honey Gold
+      (6, 2, 17,   1,  50.764999),  // Cheerios
+      (3, 2, 13,   7,  40.400208),  // Clusters
+      (3, 3, 13,   4,  45.811716)), numPartitions = 2)
+
+
+    val drmX = drmData(::, 0 until 4)
+    val drmY = drmData(::, 4 until 5)
+
+    val model = new OrdinaryLeastSquares[Int]().fit(drmX, drmY, 'calcCommonStatistics \u2192 false)
+
+    val estimate = model.beta
+    val Ranswers = dvec(-1.336265, -13.157702, -4.152654, -5.679908, 163.179329)
+
+    val epsilon = 1E-6
+    (estimate - Ranswers).sum should be < epsilon
+
+    // TODO add test for S.E / pvalue
+  }
+
+
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/math-scala/src/test/scala/org/apache/mahout/math/algorithms/RegressionTestsSuiteBase.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/test/scala/org/apache/mahout/math/algorithms/RegressionTestsSuiteBase.scala b/math-scala/src/test/scala/org/apache/mahout/math/algorithms/RegressionTestsSuiteBase.scala
new file mode 100644
index 0000000..1178a9b
--- /dev/null
+++ b/math-scala/src/test/scala/org/apache/mahout/math/algorithms/RegressionTestsSuiteBase.scala
@@ -0,0 +1,87 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements. See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership. The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License. You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing,
+  * software distributed under the License is distributed on an
+  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  * KIND, either express or implied. See the License for the
+  * specific language governing permissions and limitations
+  * under the License.
+  */
+
+package org.apache.mahout.math.algorithms
+
+import org.apache.mahout.math.algorithms.regression.OrdinaryLeastSquares
+import org.apache.mahout.math.drm.drmParallelize
+import org.apache.mahout.math.drm.RLikeDrmOps._
+import org.apache.mahout.math.scalabindings.{`::`, dense}
+import org.apache.mahout.test.DistributedMahoutSuite
+import org.scalatest.{FunSuite, Matchers}
+
+
+trait RegressionTestsSuiteBase extends DistributedMahoutSuite with Matchers {
+  this: FunSuite =>
+
+  val epsilon = 1E-4
+
+  test("fittness tests") {
+    /*
+    R Prototype:
+    dataM <- matrix( c(2, 2, 10.5, 10, 29.509541,
+      1, 2, 12,   12, 18.042851,
+      1, 1, 12,   13, 22.736446,
+      2, 1, 11,   13, 32.207582,
+      1, 2, 12,   11, 21.871292,
+      2, 1, 16,   8,  36.187559,
+      6, 2, 17,   1,  50.764999,
+      3, 2, 13,   7,  40.400208,
+      3, 3, 13,   4,  45.811716), nrow=9, ncol=5, byrow=TRUE)
+
+
+    X = dataM[, c(1,2,3,4)]
+    y = dataM[, c(5)]
+
+    model <- lm(y ~ X)
+    summary(model)
+
+     */
+
+    val drmData = drmParallelize(dense(
+      (2, 2, 10.5, 10, 29.509541),  // Apple Cinnamon Cheerios
+      (1, 2, 12,   12, 18.042851),  // Cap'n'Crunch
+      (1, 1, 12,   13, 22.736446),  // Cocoa Puffs
+      (2, 1, 11,   13, 32.207582),  // Froot Loops
+      (1, 2, 12,   11, 21.871292),  // Honey Graham Ohs
+      (2, 1, 16,   8,  36.187559),  // Wheaties Honey Gold
+      (6, 2, 17,   1,  50.764999),  // Cheerios
+      (3, 2, 13,   7,  40.400208),  // Clusters
+      (3, 3, 13,   4,  45.811716)), numPartitions = 2)
+
+    val drmX = drmData(::, 0 until 4)
+    val drmY = drmData(::, 4 until 5)
+
+    val model = new OrdinaryLeastSquares[Int]().fit(drmX, drmY)
+
+    println(model.summary)
+    // Answers from running similar algorithm in R
+    val rR2 = 0.9425
+    val rMSE = 6.457157
+
+    val r2: Double = model.r2
+    val mse: Double = model.mse
+    println("R2: " + r2)
+    println("MSE: " + mse)
+    (rR2 - r2) should be < epsilon
+    (rMSE - mse) should be < epsilon
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/spark/src/test/scala/org/apache/mahout/math/algorithms/PreprocessorSuite.scala
----------------------------------------------------------------------
diff --git a/spark/src/test/scala/org/apache/mahout/math/algorithms/PreprocessorSuite.scala b/spark/src/test/scala/org/apache/mahout/math/algorithms/PreprocessorSuite.scala
new file mode 100644
index 0000000..4a1f074
--- /dev/null
+++ b/spark/src/test/scala/org/apache/mahout/math/algorithms/PreprocessorSuite.scala
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math.algorithms
+
+import org.apache.mahout.sparkbindings.test.DistributedSparkSuite
+import org.scalatest.FunSuite
+
+class PreprocessorSuite extends FunSuite
+  with DistributedSparkSuite with PreprocessorSuiteBase
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/spark/src/test/scala/org/apache/mahout/math/algorithms/RegressionSuite.scala
----------------------------------------------------------------------
diff --git a/spark/src/test/scala/org/apache/mahout/math/algorithms/RegressionSuite.scala b/spark/src/test/scala/org/apache/mahout/math/algorithms/RegressionSuite.scala
new file mode 100644
index 0000000..bb99d61
--- /dev/null
+++ b/spark/src/test/scala/org/apache/mahout/math/algorithms/RegressionSuite.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math.algorithms
+
+import org.apache.mahout.sparkbindings.test.DistributedSparkSuite
+import org.scalatest.FunSuite
+
+class RegressionSuite extends FunSuite
+  with DistributedSparkSuite with RegressionSuiteBase
+

http://git-wip-us.apache.org/repos/asf/mahout/blob/9a31923e/spark/src/test/scala/org/apache/mahout/math/algorithms/RegressionTestsSuite.scala
----------------------------------------------------------------------
diff --git a/spark/src/test/scala/org/apache/mahout/math/algorithms/RegressionTestsSuite.scala b/spark/src/test/scala/org/apache/mahout/math/algorithms/RegressionTestsSuite.scala
new file mode 100644
index 0000000..07864f8
--- /dev/null
+++ b/spark/src/test/scala/org/apache/mahout/math/algorithms/RegressionTestsSuite.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math.algorithms
+
+import org.apache.mahout.sparkbindings.test.DistributedSparkSuite
+import org.scalatest.FunSuite
+
+class RegressionTestsSuite extends FunSuite
+  with DistributedSparkSuite with RegressionTestsSuiteBase
+