You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2015/06/29 09:22:47 UTC
spark git commit: [SPARK-8698] partitionBy in Python DataFrame
reader/writer interface should not default to empty tuple.
Repository: spark
Updated Branches:
refs/heads/master ac2e17b01 -> 660c6cec7
[SPARK-8698] partitionBy in Python DataFrame reader/writer interface should not default to empty tuple.
Author: Reynold Xin <rx...@databricks.com>
Closes #7079 from rxin/SPARK-8698 and squashes the following commits:
8513e1c [Reynold Xin] [SPARK-8698] partitionBy in Python DataFrame reader/writer interface should not default to empty tuple.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/660c6cec
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/660c6cec
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/660c6cec
Branch: refs/heads/master
Commit: 660c6cec75dc165cf5d62cdc1b0951bdb93df365
Parents: ac2e17b
Author: Reynold Xin <rx...@databricks.com>
Authored: Mon Jun 29 00:22:44 2015 -0700
Committer: Reynold Xin <rx...@databricks.com>
Committed: Mon Jun 29 00:22:44 2015 -0700
----------------------------------------------------------------------
python/pyspark/sql/readwriter.py | 21 +++++++++++++--------
1 file changed, 13 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/660c6cec/python/pyspark/sql/readwriter.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index c4cc62e..882a030 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -270,12 +270,11 @@ class DataFrameWriter(object):
"""
if len(cols) == 1 and isinstance(cols[0], (list, tuple)):
cols = cols[0]
- if len(cols) > 0:
- self._jwrite = self._jwrite.partitionBy(_to_seq(self._sqlContext._sc, cols))
+ self._jwrite = self._jwrite.partitionBy(_to_seq(self._sqlContext._sc, cols))
return self
@since(1.4)
- def save(self, path=None, format=None, mode=None, partitionBy=(), **options):
+ def save(self, path=None, format=None, mode=None, partitionBy=None, **options):
"""Saves the contents of the :class:`DataFrame` to a data source.
The data source is specified by the ``format`` and a set of ``options``.
@@ -295,7 +294,9 @@ class DataFrameWriter(object):
>>> df.write.mode('append').parquet(os.path.join(tempfile.mkdtemp(), 'data'))
"""
- self.partitionBy(partitionBy).mode(mode).options(**options)
+ self.mode(mode).options(**options)
+ if partitionBy is not None:
+ self.partitionBy(partitionBy)
if format is not None:
self.format(format)
if path is None:
@@ -315,7 +316,7 @@ class DataFrameWriter(object):
self._jwrite.mode("overwrite" if overwrite else "append").insertInto(tableName)
@since(1.4)
- def saveAsTable(self, name, format=None, mode=None, partitionBy=(), **options):
+ def saveAsTable(self, name, format=None, mode=None, partitionBy=None, **options):
"""Saves the content of the :class:`DataFrame` as the specified table.
In the case the table already exists, behavior of this function depends on the
@@ -334,7 +335,9 @@ class DataFrameWriter(object):
:param partitionBy: names of partitioning columns
:param options: all other string options
"""
- self.partitionBy(partitionBy).mode(mode).options(**options)
+ self.mode(mode).options(**options)
+ if partitionBy is not None:
+ self.partitionBy(partitionBy)
if format is not None:
self.format(format)
self._jwrite.saveAsTable(name)
@@ -356,7 +359,7 @@ class DataFrameWriter(object):
self.mode(mode)._jwrite.json(path)
@since(1.4)
- def parquet(self, path, mode=None, partitionBy=()):
+ def parquet(self, path, mode=None, partitionBy=None):
"""Saves the content of the :class:`DataFrame` in Parquet format at the specified path.
:param path: the path in any Hadoop supported file system
@@ -370,7 +373,9 @@ class DataFrameWriter(object):
>>> df.write.parquet(os.path.join(tempfile.mkdtemp(), 'data'))
"""
- self.partitionBy(partitionBy).mode(mode)
+ self.mode(mode)
+ if partitionBy is not None:
+ self.partitionBy(partitionBy)
self._jwrite.parquet(path)
@since(1.4)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org