You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2018/01/25 22:50:52 UTC
spark git commit: [SPARK-23081][PYTHON] Add colRegex API to PySpark
Repository: spark
Updated Branches:
refs/heads/master 8532e26f3 -> 8480c0c57
[SPARK-23081][PYTHON] Add colRegex API to PySpark
## What changes were proposed in this pull request?
Add colRegex API to PySpark
## How was this patch tested?
add a test in sql/tests.py
Author: Huaxin Gao <hu...@us.ibm.com>
Closes #20390 from huaxingao/spark-23081.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8480c0c5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8480c0c5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8480c0c5
Branch: refs/heads/master
Commit: 8480c0c57698b7dcccec5483d67b17cf2c7527ed
Parents: 8532e26
Author: Huaxin Gao <hu...@us.ibm.com>
Authored: Fri Jan 26 07:50:48 2018 +0900
Committer: hyukjinkwon <gu...@gmail.com>
Committed: Fri Jan 26 07:50:48 2018 +0900
----------------------------------------------------------------------
python/pyspark/sql/dataframe.py | 23 ++++++++++++++++++++
.../scala/org/apache/spark/sql/Dataset.scala | 8 +++----
2 files changed, 27 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/8480c0c5/python/pyspark/sql/dataframe.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 2d5e9b9..ac40308 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -819,6 +819,29 @@ class DataFrame(object):
"""
return [f.name for f in self.schema.fields]
+ @since(2.3)
+ def colRegex(self, colName):
+ """
+ Selects column based on the column name specified as a regex and returns it
+ as :class:`Column`.
+
+ :param colName: string, column name specified as a regex.
+
+ >>> df = spark.createDataFrame([("a", 1), ("b", 2), ("c", 3)], ["Col1", "Col2"])
+ >>> df.select(df.colRegex("`(Col1)?+.+`")).show()
+ +----+
+ |Col2|
+ +----+
+ | 1|
+ | 2|
+ | 3|
+ +----+
+ """
+ if not isinstance(colName, basestring):
+ raise ValueError("colName should be provided as string")
+ jc = self._jdf.colRegex(colName)
+ return Column(jc)
+
@ignore_unicode_prefix
@since(1.3)
def alias(self, alias):
http://git-wip-us.apache.org/repos/asf/spark/blob/8480c0c5/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 912f411..edb6644 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -1194,7 +1194,7 @@ class Dataset[T] private[sql](
def orderBy(sortExprs: Column*): Dataset[T] = sort(sortExprs : _*)
/**
- * Selects column based on the column name and return it as a [[Column]].
+ * Selects column based on the column name and returns it as a [[Column]].
*
* @note The column name can also reference to a nested column like `a.b`.
*
@@ -1220,7 +1220,7 @@ class Dataset[T] private[sql](
}
/**
- * Selects column based on the column name and return it as a [[Column]].
+ * Selects column based on the column name and returns it as a [[Column]].
*
* @note The column name can also reference to a nested column like `a.b`.
*
@@ -1240,7 +1240,7 @@ class Dataset[T] private[sql](
}
/**
- * Selects column based on the column name specified as a regex and return it as [[Column]].
+ * Selects column based on the column name specified as a regex and returns it as [[Column]].
* @group untypedrel
* @since 2.3.0
*/
@@ -2729,7 +2729,7 @@ class Dataset[T] private[sql](
}
/**
- * Return an iterator that contains all rows in this Dataset.
+ * Returns an iterator that contains all rows in this Dataset.
*
* The iterator will consume as much memory as the largest partition in this Dataset.
*
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org