You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2019/07/18 04:38:20 UTC

[spark] branch master updated: [SPARK-28411][PYTHON][SQL] InsertInto with overwrite is not honored

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 971e832  [SPARK-28411][PYTHON][SQL] InsertInto with overwrite is not honored
971e832 is described below

commit 971e832e0eb720e346e9863ccfd41304d0f8f3dc
Author: Huaxin Gao <hu...@us.ibm.com>
AuthorDate: Thu Jul 18 13:37:59 2019 +0900

    [SPARK-28411][PYTHON][SQL] InsertInto with overwrite is not honored
    
    ## What changes were proposed in this pull request?
    In the following python code
    ```
    df.write.mode("overwrite").insertInto("table")
    ```
    ```insertInto``` ignores ```mode("overwrite")```  and appends by default.
    
    ## How was this patch tested?
    
    Add Unit test.
    
    Closes #25175 from huaxingao/spark-28411.
    
    Authored-by: Huaxin Gao <hu...@us.ibm.com>
    Signed-off-by: HyukjinKwon <gu...@apache.org>
---
 python/pyspark/sql/readwriter.py            |  6 ++++--
 python/pyspark/sql/tests/test_readwriter.py | 21 +++++++++++++++++++++
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 7596b02..f9bc2ff 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -745,7 +745,7 @@ class DataFrameWriter(OptionUtils):
             self._jwrite.save(path)
 
     @since(1.4)
-    def insertInto(self, tableName, overwrite=False):
+    def insertInto(self, tableName, overwrite=None):
         """Inserts the content of the :class:`DataFrame` to the specified table.
 
         It requires that the schema of the class:`DataFrame` is the same as the
@@ -753,7 +753,9 @@ class DataFrameWriter(OptionUtils):
 
         Optionally overwriting any existing data.
         """
-        self._jwrite.mode("overwrite" if overwrite else "append").insertInto(tableName)
+        if overwrite is not None:
+            self.mode("overwrite" if overwrite else "append")
+        self._jwrite.insertInto(tableName)
 
     @since(1.4)
     def saveAsTable(self, name, format=None, mode=None, partitionBy=None, **options):
diff --git a/python/pyspark/sql/tests/test_readwriter.py b/python/pyspark/sql/tests/test_readwriter.py
index a708072..2530cc2 100644
--- a/python/pyspark/sql/tests/test_readwriter.py
+++ b/python/pyspark/sql/tests/test_readwriter.py
@@ -141,6 +141,27 @@ class ReadwriterTests(ReusedSQLTestCase):
                 .mode("overwrite").saveAsTable("pyspark_bucket"))
             self.assertSetEqual(set(data), set(self.spark.table("pyspark_bucket").collect()))
 
+    def test_insert_into(self):
+        df = self.spark.createDataFrame([("a", 1), ("b", 2)], ["C1", "C2"])
+        with self.table("test_table"):
+            df.write.saveAsTable("test_table")
+            self.assertEqual(2, self.spark.sql("select * from test_table").count())
+
+            df.write.insertInto("test_table")
+            self.assertEqual(4, self.spark.sql("select * from test_table").count())
+
+            df.write.mode("overwrite").insertInto("test_table")
+            self.assertEqual(2, self.spark.sql("select * from test_table").count())
+
+            df.write.insertInto("test_table", True)
+            self.assertEqual(2, self.spark.sql("select * from test_table").count())
+
+            df.write.insertInto("test_table", False)
+            self.assertEqual(4, self.spark.sql("select * from test_table").count())
+
+            df.write.mode("overwrite").insertInto("test_table", False)
+            self.assertEqual(6, self.spark.sql("select * from test_table").count())
+
 
 if __name__ == "__main__":
     import unittest


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org