You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by du...@apache.org on 2016/09/13 04:16:09 UTC
incubator-systemml git commit: [SYSTEMML-912] Fix Broken Python Tests
Due to Index Changes
Repository: incubator-systemml
Updated Branches:
refs/heads/master fb3ba987e -> 194a80f26
[SYSTEMML-912] Fix Broken Python Tests Due to Index Changes
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/194a80f2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/194a80f2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/194a80f2
Branch: refs/heads/master
Commit: 194a80f26aad728341190fe9d0a005d5ffcfe474
Parents: fb3ba98
Author: Mike Dusenberry <mw...@us.ibm.com>
Authored: Mon Sep 12 21:15:04 2016 -0700
Committer: Mike Dusenberry <mw...@us.ibm.com>
Committed: Mon Sep 12 21:15:04 2016 -0700
----------------------------------------------------------------------
src/main/python/systemml/mllearn/estimators.py | 4 +--
src/main/python/tests/test_mllearn.py | 34 ++++++++++----------
.../apache/sysml/api/ml/PredictionUtils.scala | 5 ++-
.../sysml/api/ml/ScalaAutomatedTestBase.scala | 4 +--
4 files changed, 23 insertions(+), 24 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/194a80f2/src/main/python/systemml/mllearn/estimators.py
----------------------------------------------------------------------
diff --git a/src/main/python/systemml/mllearn/estimators.py b/src/main/python/systemml/mllearn/estimators.py
index 97ab6bb..ceead4d 100644
--- a/src/main/python/systemml/mllearn/estimators.py
+++ b/src/main/python/systemml/mllearn/estimators.py
@@ -125,7 +125,7 @@ class BaseSystemMLEstimator(Estimator):
df = assemble(self.sqlCtx, pdfX, pdfX.columns, self.featuresCol).select(self.featuresCol)
retjDF = self.model.transform(df._jdf)
retDF = DataFrame(retjDF, self.sqlCtx)
- retPDF = retDF.sort('ID').select('prediction').toPandas()
+ retPDF = retDF.sort('__INDEX').select('prediction').toPandas()
if isinstance(X, np.ndarray):
return retPDF.as_matrix().flatten()
else:
@@ -146,7 +146,7 @@ class BaseSystemMLEstimator(Estimator):
retjDF = self.model.transform(df._jdf)
retDF = DataFrame(retjDF, self.sqlCtx)
# Return DF
- return retDF.sort('ID')
+ return retDF.sort('__INDEX')
else:
raise Exception('Unsupported input type')
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/194a80f2/src/main/python/tests/test_mllearn.py
----------------------------------------------------------------------
diff --git a/src/main/python/tests/test_mllearn.py b/src/main/python/tests/test_mllearn.py
index 1a1a6cb..ae7b936 100644
--- a/src/main/python/tests/test_mllearn.py
+++ b/src/main/python/tests/test_mllearn.py
@@ -67,29 +67,29 @@ class TestMLLearn(unittest.TestCase):
def testLogisticMLPipeline1(self):
training = sqlCtx.createDataFrame([
- (0, "a b c d e spark", 1.0),
- (1, "b d", 2.0),
- (2, "spark f g h", 1.0),
- (3, "hadoop mapreduce", 2.0),
- (4, "b spark who", 1.0),
- (5, "g d a y", 2.0),
- (6, "spark fly", 1.0),
- (7, "was mapreduce", 2.0),
- (8, "e spark program", 1.0),
- (9, "a e c l", 2.0),
- (10, "spark compile", 1.0),
- (11, "hadoop software", 2.0)
- ], ["__INDEX", "text", "label"])
+ ("a b c d e spark", 1.0),
+ ("b d", 2.0),
+ ("spark f g h", 1.0),
+ ("hadoop mapreduce", 2.0),
+ ("b spark who", 1.0),
+ ("g d a y", 2.0),
+ ("spark fly", 1.0),
+ ("was mapreduce", 2.0),
+ ("e spark program", 1.0),
+ ("a e c l", 2.0),
+ ("spark compile", 1.0),
+ ("hadoop software", 2.0)
+ ], ["text", "label"])
tokenizer = Tokenizer(inputCol="text", outputCol="words")
hashingTF = HashingTF(inputCol="words", outputCol="features", numFeatures=20)
lr = LogisticRegression(sqlCtx)
pipeline = Pipeline(stages=[tokenizer, hashingTF, lr])
model = pipeline.fit(training)
test = sqlCtx.createDataFrame([
- (12, "spark i j k", 1.0),
- (13, "l m n", 2.0),
- (14, "mapreduce spark", 1.0),
- (15, "apache hadoop", 2.0)], ["__INDEX", "text", "label"])
+ ("spark i j k", 1.0),
+ ("l m n", 2.0),
+ ("mapreduce spark", 1.0),
+ ("apache hadoop", 2.0)], ["text", "label"])
result = model.transform(test)
predictionAndLabels = result.select("prediction", "label")
evaluator = MulticlassClassificationEvaluator()
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/194a80f2/src/main/scala/org/apache/sysml/api/ml/PredictionUtils.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/ml/PredictionUtils.scala b/src/main/scala/org/apache/sysml/api/ml/PredictionUtils.scala
index 895fcd6..e2a1c0d 100644
--- a/src/main/scala/org/apache/sysml/api/ml/PredictionUtils.scala
+++ b/src/main/scala/org/apache/sysml/api/ml/PredictionUtils.scala
@@ -131,8 +131,7 @@ object PredictionUtils {
}
def joinUsingID(df1:DataFrame, df2:DataFrame):DataFrame = {
- val tempDF1 = df1.withColumnRenamed("__INDEX", "ID1")
- tempDF1.join(df2, tempDF1.col("ID1").equalTo(df2.col("__INDEX"))).drop("ID1")
+ df1.join(df2, "__INDEX")
}
def computePredictedClassLabelsFromProbability(mlscoreoutput:MLResults, isSingleNode:Boolean, sc:SparkContext, inProbVar:String): MLResults = {
@@ -151,4 +150,4 @@ object PredictionUtils {
ml.execute(script.in("Prob", probVar))
}
}
-}
\ No newline at end of file
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/194a80f2/src/test/scala/org/apache/sysml/api/ml/ScalaAutomatedTestBase.scala
----------------------------------------------------------------------
diff --git a/src/test/scala/org/apache/sysml/api/ml/ScalaAutomatedTestBase.scala b/src/test/scala/org/apache/sysml/api/ml/ScalaAutomatedTestBase.scala
index 5e63de0..cb1a223 100644
--- a/src/test/scala/org/apache/sysml/api/ml/ScalaAutomatedTestBase.scala
+++ b/src/test/scala/org/apache/sysml/api/ml/ScalaAutomatedTestBase.scala
@@ -49,6 +49,6 @@ object ScalaAutomatedTestBase {
}
// This ensures that MLPipeline wrappers get appropriate paths to the scripts
- ScriptsUtils.setSystemmlHome(System.getProperty("user.dir") + File.separator + "scripts")
+ ScriptsUtils.setSystemmlHome(System.getProperty("user.dir"))
// *** END HACK ***
-}
\ No newline at end of file
+}