You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sr...@apache.org on 2016/08/10 09:14:46 UTC
spark git commit: [SPARK-16324][SQL] regexp_extract should doc that
it returns empty string when match fails
Repository: spark
Updated Branches:
refs/heads/master eca58755f -> 0578ff968
[SPARK-16324][SQL] regexp_extract should doc that it returns empty string when match fails
## What changes were proposed in this pull request?
Doc that regexp_extract returns empty string when regex or group does not match
## How was this patch tested?
Jenkins test, with a few new test cases
Author: Sean Owen <so...@cloudera.com>
Closes #14525 from srowen/SPARK-16324.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0578ff96
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0578ff96
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0578ff96
Branch: refs/heads/master
Commit: 0578ff9681edbaab4ae68f67272dc3d4d890d53b
Parents: eca5875
Author: Sean Owen <so...@cloudera.com>
Authored: Wed Aug 10 10:14:43 2016 +0100
Committer: Sean Owen <so...@cloudera.com>
Committed: Wed Aug 10 10:14:43 2016 +0100
----------------------------------------------------------------------
python/pyspark/sql/functions.py | 6 +++++-
sql/core/src/main/scala/org/apache/spark/sql/functions.scala | 3 ++-
.../test/scala/org/apache/spark/sql/StringFunctionsSuite.scala | 4 ++++
3 files changed, 11 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/0578ff96/python/pyspark/sql/functions.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 8a01805..4ea83e2 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1440,11 +1440,15 @@ def split(str, pattern):
@ignore_unicode_prefix
@since(1.5)
def regexp_extract(str, pattern, idx):
- """Extract a specific(idx) group identified by a java regex, from the specified string column.
+ """Extract a specific group matched by a Java regex, from the specified string column.
+ If the regex did not match, or the specified group did not match, an empty string is returned.
>>> df = spark.createDataFrame([('100-200',)], ['str'])
>>> df.select(regexp_extract('str', '(\d+)-(\d+)', 1).alias('d')).collect()
[Row(d=u'100')]
+ >>> df = spark.createDataFrame([('foo',)], ['str'])
+ >>> df.select(regexp_extract('str', '(\d+)', 1).alias('d')).collect()
+ [Row(d=u'')]
>>> df = spark.createDataFrame([('aaaac',)], ['str'])
>>> df.select(regexp_extract('str', '(a+)(b)?(c)', 2).alias('d')).collect()
[Row(d=u'')]
http://git-wip-us.apache.org/repos/asf/spark/blob/0578ff96/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 81f6ed7..18e736a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2175,7 +2175,8 @@ object functions {
def ltrim(e: Column): Column = withExpr {StringTrimLeft(e.expr) }
/**
- * Extract a specific(idx) group identified by a java regex, from the specified string column.
+ * Extract a specific group matched by a Java regex, from the specified string column.
+ * If the regex did not match, or the specified group did not match, an empty string is returned.
*
* @group string_funcs
* @since 1.5.0
http://git-wip-us.apache.org/repos/asf/spark/blob/0578ff96/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
index 64b4718..1cc7746 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
@@ -97,6 +97,10 @@ class StringFunctionsSuite extends QueryTest with SharedSQLContext {
test("non-matching optional group") {
val df = Seq(Tuple1("aaaac")).toDF("s")
checkAnswer(
+ df.select(regexp_extract($"s", "(foo)", 1)),
+ Row("")
+ )
+ checkAnswer(
df.select(regexp_extract($"s", "(a+)(b)?(c)", 2)),
Row("")
)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org