You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by du...@apache.org on 2016/09/13 04:16:09 UTC
incubator-systemml git commit: [SYSTEMML-912] Fix Broken Python Tests Due to Index Changes

Repository: incubator-systemml
Updated Branches:
  refs/heads/master fb3ba987e -> 194a80f26


[SYSTEMML-912] Fix Broken Python Tests Due to Index Changes


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/194a80f2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/194a80f2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/194a80f2

Branch: refs/heads/master
Commit: 194a80f26aad728341190fe9d0a005d5ffcfe474
Parents: fb3ba98
Author: Mike Dusenberry <mw...@us.ibm.com>
Authored: Mon Sep 12 21:15:04 2016 -0700
Committer: Mike Dusenberry <mw...@us.ibm.com>
Committed: Mon Sep 12 21:15:04 2016 -0700

----------------------------------------------------------------------
 src/main/python/systemml/mllearn/estimators.py  |  4 +--
 src/main/python/tests/test_mllearn.py           | 34 ++++++++++----------
 .../apache/sysml/api/ml/PredictionUtils.scala   |  5 ++-
 .../sysml/api/ml/ScalaAutomatedTestBase.scala   |  4 +--
 4 files changed, 23 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/194a80f2/src/main/python/systemml/mllearn/estimators.py
----------------------------------------------------------------------
diff --git a/src/main/python/systemml/mllearn/estimators.py b/src/main/python/systemml/mllearn/estimators.py
index 97ab6bb..ceead4d 100644
--- a/src/main/python/systemml/mllearn/estimators.py
+++ b/src/main/python/systemml/mllearn/estimators.py
@@ -125,7 +125,7 @@ class BaseSystemMLEstimator(Estimator):
                 df = assemble(self.sqlCtx, pdfX, pdfX.columns, self.featuresCol).select(self.featuresCol)
                 retjDF = self.model.transform(df._jdf)
                 retDF = DataFrame(retjDF, self.sqlCtx)
-                retPDF = retDF.sort('ID').select('prediction').toPandas()
+                retPDF = retDF.sort('__INDEX').select('prediction').toPandas()
                 if isinstance(X, np.ndarray):
                     return retPDF.as_matrix().flatten()
                 else:
@@ -146,7 +146,7 @@ class BaseSystemMLEstimator(Estimator):
             retjDF = self.model.transform(df._jdf)
             retDF = DataFrame(retjDF, self.sqlCtx)
             # Return DF
-            return retDF.sort('ID')
+            return retDF.sort('__INDEX')
         else:
             raise Exception('Unsupported input type')
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/194a80f2/src/main/python/tests/test_mllearn.py
----------------------------------------------------------------------
diff --git a/src/main/python/tests/test_mllearn.py b/src/main/python/tests/test_mllearn.py
index 1a1a6cb..ae7b936 100644
--- a/src/main/python/tests/test_mllearn.py
+++ b/src/main/python/tests/test_mllearn.py
@@ -67,29 +67,29 @@ class TestMLLearn(unittest.TestCase):
 
     def testLogisticMLPipeline1(self):
         training = sqlCtx.createDataFrame([
-            (0, "a b c d e spark", 1.0),
-            (1, "b d", 2.0),
-            (2, "spark f g h", 1.0),
-            (3, "hadoop mapreduce", 2.0),
-            (4, "b spark who", 1.0),
-            (5, "g d a y", 2.0),
-            (6, "spark fly", 1.0),
-            (7, "was mapreduce", 2.0),
-            (8, "e spark program", 1.0),
-            (9, "a e c l", 2.0),
-            (10, "spark compile", 1.0),
-            (11, "hadoop software", 2.0)
-            ], ["__INDEX", "text", "label"])
+            ("a b c d e spark", 1.0),
+            ("b d", 2.0),
+            ("spark f g h", 1.0),
+            ("hadoop mapreduce", 2.0),
+            ("b spark who", 1.0),
+            ("g d a y", 2.0),
+            ("spark fly", 1.0),
+            ("was mapreduce", 2.0),
+            ("e spark program", 1.0),
+            ("a e c l", 2.0),
+            ("spark compile", 1.0),
+            ("hadoop software", 2.0)
+            ], ["text", "label"])
         tokenizer = Tokenizer(inputCol="text", outputCol="words")
         hashingTF = HashingTF(inputCol="words", outputCol="features", numFeatures=20)
         lr = LogisticRegression(sqlCtx)
         pipeline = Pipeline(stages=[tokenizer, hashingTF, lr])
         model = pipeline.fit(training)
         test = sqlCtx.createDataFrame([
-            (12, "spark i j k", 1.0),
-            (13, "l m n", 2.0),
-            (14, "mapreduce spark", 1.0),
-            (15, "apache hadoop", 2.0)], ["__INDEX", "text", "label"])
+            ("spark i j k", 1.0),
+            ("l m n", 2.0),
+            ("mapreduce spark", 1.0),
+            ("apache hadoop", 2.0)], ["text", "label"])
         result = model.transform(test)
         predictionAndLabels = result.select("prediction", "label")
         evaluator = MulticlassClassificationEvaluator()

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/194a80f2/src/main/scala/org/apache/sysml/api/ml/PredictionUtils.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/ml/PredictionUtils.scala b/src/main/scala/org/apache/sysml/api/ml/PredictionUtils.scala
index 895fcd6..e2a1c0d 100644
--- a/src/main/scala/org/apache/sysml/api/ml/PredictionUtils.scala
+++ b/src/main/scala/org/apache/sysml/api/ml/PredictionUtils.scala
@@ -131,8 +131,7 @@ object PredictionUtils {
   }
   
   def joinUsingID(df1:DataFrame, df2:DataFrame):DataFrame = {
-    val tempDF1 = df1.withColumnRenamed("__INDEX", "ID1")
-    tempDF1.join(df2, tempDF1.col("ID1").equalTo(df2.col("__INDEX"))).drop("ID1")
+    df1.join(df2, "__INDEX")
   }
   
   def computePredictedClassLabelsFromProbability(mlscoreoutput:MLResults, isSingleNode:Boolean, sc:SparkContext, inProbVar:String): MLResults = {
@@ -151,4 +150,4 @@ object PredictionUtils {
       ml.execute(script.in("Prob", probVar))
     }
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/194a80f2/src/test/scala/org/apache/sysml/api/ml/ScalaAutomatedTestBase.scala
----------------------------------------------------------------------
diff --git a/src/test/scala/org/apache/sysml/api/ml/ScalaAutomatedTestBase.scala b/src/test/scala/org/apache/sysml/api/ml/ScalaAutomatedTestBase.scala
index 5e63de0..cb1a223 100644
--- a/src/test/scala/org/apache/sysml/api/ml/ScalaAutomatedTestBase.scala
+++ b/src/test/scala/org/apache/sysml/api/ml/ScalaAutomatedTestBase.scala
@@ -49,6 +49,6 @@ object ScalaAutomatedTestBase {
 	}
 
 	// This ensures that MLPipeline wrappers get appropriate paths to the scripts
-	ScriptsUtils.setSystemmlHome(System.getProperty("user.dir") + File.separator + "scripts")
+	ScriptsUtils.setSystemmlHome(System.getProperty("user.dir"))
 	// *** END HACK ***
-}
\ No newline at end of file
+}