You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2019/07/18 04:38:20 UTC
[spark] branch master updated: [SPARK-28411][PYTHON][SQL]
InsertInto with overwrite is not honored
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 971e832 [SPARK-28411][PYTHON][SQL] InsertInto with overwrite is not honored
971e832 is described below
commit 971e832e0eb720e346e9863ccfd41304d0f8f3dc
Author: Huaxin Gao <hu...@us.ibm.com>
AuthorDate: Thu Jul 18 13:37:59 2019 +0900
[SPARK-28411][PYTHON][SQL] InsertInto with overwrite is not honored
## What changes were proposed in this pull request?
In the following python code
```
df.write.mode("overwrite").insertInto("table")
```
```insertInto``` ignores ```mode("overwrite")``` and appends by default.
## How was this patch tested?
Add Unit test.
Closes #25175 from huaxingao/spark-28411.
Authored-by: Huaxin Gao <hu...@us.ibm.com>
Signed-off-by: HyukjinKwon <gu...@apache.org>
---
python/pyspark/sql/readwriter.py | 6 ++++--
python/pyspark/sql/tests/test_readwriter.py | 21 +++++++++++++++++++++
2 files changed, 25 insertions(+), 2 deletions(-)
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 7596b02..f9bc2ff 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -745,7 +745,7 @@ class DataFrameWriter(OptionUtils):
self._jwrite.save(path)
@since(1.4)
- def insertInto(self, tableName, overwrite=False):
+ def insertInto(self, tableName, overwrite=None):
"""Inserts the content of the :class:`DataFrame` to the specified table.
It requires that the schema of the class:`DataFrame` is the same as the
@@ -753,7 +753,9 @@ class DataFrameWriter(OptionUtils):
Optionally overwriting any existing data.
"""
- self._jwrite.mode("overwrite" if overwrite else "append").insertInto(tableName)
+ if overwrite is not None:
+ self.mode("overwrite" if overwrite else "append")
+ self._jwrite.insertInto(tableName)
@since(1.4)
def saveAsTable(self, name, format=None, mode=None, partitionBy=None, **options):
diff --git a/python/pyspark/sql/tests/test_readwriter.py b/python/pyspark/sql/tests/test_readwriter.py
index a708072..2530cc2 100644
--- a/python/pyspark/sql/tests/test_readwriter.py
+++ b/python/pyspark/sql/tests/test_readwriter.py
@@ -141,6 +141,27 @@ class ReadwriterTests(ReusedSQLTestCase):
.mode("overwrite").saveAsTable("pyspark_bucket"))
self.assertSetEqual(set(data), set(self.spark.table("pyspark_bucket").collect()))
+ def test_insert_into(self):
+ df = self.spark.createDataFrame([("a", 1), ("b", 2)], ["C1", "C2"])
+ with self.table("test_table"):
+ df.write.saveAsTable("test_table")
+ self.assertEqual(2, self.spark.sql("select * from test_table").count())
+
+ df.write.insertInto("test_table")
+ self.assertEqual(4, self.spark.sql("select * from test_table").count())
+
+ df.write.mode("overwrite").insertInto("test_table")
+ self.assertEqual(2, self.spark.sql("select * from test_table").count())
+
+ df.write.insertInto("test_table", True)
+ self.assertEqual(2, self.spark.sql("select * from test_table").count())
+
+ df.write.insertInto("test_table", False)
+ self.assertEqual(4, self.spark.sql("select * from test_table").count())
+
+ df.write.mode("overwrite").insertInto("test_table", False)
+ self.assertEqual(6, self.spark.sql("select * from test_table").count())
+
if __name__ == "__main__":
import unittest
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org