You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2015/06/23 22:12:36 UTC
spark git commit: [SPARK-8525] [MLLIB] fix LabeledPoint parser when
there is a whitespace between label and features vector
Repository: spark
Updated Branches:
refs/heads/master f2fb0285a -> a8031183a
[SPARK-8525] [MLLIB] fix LabeledPoint parser when there is a whitespace between label and features vector
fix LabeledPoint parser when there is a whitespace between label and features vector, e.g.
(y, [x1, x2, x3])
Author: Oleksiy Dyagilev <ol...@epam.com>
Closes #6954 from fe2s/SPARK-8525 and squashes the following commits:
0755b9d [Oleksiy Dyagilev] [SPARK-8525][MLLIB] addressing comment, removing dep on commons-lang
c1abc2b [Oleksiy Dyagilev] [SPARK-8525][MLLIB] fix LabeledPoint parser when there is a whitespace on specific position
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a8031183
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a8031183
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a8031183
Branch: refs/heads/master
Commit: a8031183aff2e23de9204ddfc7e7f5edbf052a7e
Parents: f2fb028
Author: Oleksiy Dyagilev <ol...@epam.com>
Authored: Tue Jun 23 13:12:19 2015 -0700
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Tue Jun 23 13:12:19 2015 -0700
----------------------------------------------------------------------
.../scala/org/apache/spark/mllib/util/NumericParser.scala | 2 ++
.../org/apache/spark/mllib/regression/LabeledPointSuite.scala | 5 +++++
.../org/apache/spark/mllib/util/NumericParserSuite.scala | 7 +++++++
3 files changed, 14 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/a8031183/mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala
index 308f7f3..a841c5c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala
@@ -98,6 +98,8 @@ private[mllib] object NumericParser {
}
} else if (token == ")") {
parsing = false
+ } else if (token.trim.isEmpty){
+ // ignore whitespaces between delim chars, e.g. ", ["
} else {
// expecting a number
items.append(parseDouble(token))
http://git-wip-us.apache.org/repos/asf/spark/blob/a8031183/mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala
index d8364a0..f8d0af8 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala
@@ -31,6 +31,11 @@ class LabeledPointSuite extends SparkFunSuite {
}
}
+ test("parse labeled points with whitespaces") {
+ val point = LabeledPoint.parse("(0.0, [1.0, 2.0])")
+ assert(point === LabeledPoint(0.0, Vectors.dense(1.0, 2.0)))
+ }
+
test("parse labeled points with v0.9 format") {
val point = LabeledPoint.parse("1.0,1.0 0.0 -2.0")
assert(point === LabeledPoint(1.0, Vectors.dense(1.0, 0.0, -2.0)))
http://git-wip-us.apache.org/repos/asf/spark/blob/a8031183/mllib/src/test/scala/org/apache/spark/mllib/util/NumericParserSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/NumericParserSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/NumericParserSuite.scala
index 8dcb9ba..fa4f74d 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/util/NumericParserSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/util/NumericParserSuite.scala
@@ -37,4 +37,11 @@ class NumericParserSuite extends SparkFunSuite {
}
}
}
+
+ test("parser with whitespaces") {
+ val s = "(0.0, [1.0, 2.0])"
+ val parsed = NumericParser.parse(s).asInstanceOf[Seq[_]]
+ assert(parsed(0).asInstanceOf[Double] === 0.0)
+ assert(parsed(1).asInstanceOf[Array[Double]] === Array(1.0, 2.0))
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org