You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2018/12/01 02:37:14 UTC
spark git commit: [SPARK-23647][PYTHON][SQL] Adds more types for hint in pyspark

Repository: spark
Updated Branches:
  refs/heads/master 6be272b75 -> 28d337440


[SPARK-23647][PYTHON][SQL] Adds more types for hint in pyspark

Signed-off-by: DylanGuedes <djmgguedesgmail.com>

## What changes were proposed in this pull request?

Addition of float, int and list hints for `pyspark.sql` Hint.

## How was this patch tested?

I did manual tests following the same principles used in the Scala version, and also added unit tests.

Closes #20788 from DylanGuedes/jira-21030.

Authored-by: DylanGuedes <dj...@gmail.com>
Signed-off-by: Hyukjin Kwon <gu...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/28d33744
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/28d33744
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/28d33744

Branch: refs/heads/master
Commit: 28d33744076abd8bf7955eefcbdeef4849a99c40
Parents: 6be272b
Author: DylanGuedes <dj...@gmail.com>
Authored: Sat Dec 1 10:37:03 2018 +0800
Committer: Hyukjin Kwon <gu...@apache.org>
Committed: Sat Dec 1 10:37:03 2018 +0800

----------------------------------------------------------------------
 python/pyspark/sql/dataframe.py            |  6 ++++--
 python/pyspark/sql/tests/test_dataframe.py | 13 +++++++++++++
 2 files changed, 17 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/28d33744/python/pyspark/sql/dataframe.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index b8833a3..1b1092c 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -485,10 +485,12 @@ class DataFrame(object):
         if not isinstance(name, str):
             raise TypeError("name should be provided as str, got {0}".format(type(name)))
 
+        allowed_types = (basestring, list, float, int)
         for p in parameters:
-            if not isinstance(p, str):
+            if not isinstance(p, allowed_types):
                 raise TypeError(
-                    "all parameters should be str, got {0} of type {1}".format(p, type(p)))
+                    "all parameters should be in {0}, got {1} of type {2}".format(
+                        allowed_types, p, type(p)))
 
         jdf = self._jdf.hint(name, self._jseq(parameters))
         return DataFrame(jdf, self.sql_ctx)

http://git-wip-us.apache.org/repos/asf/spark/blob/28d33744/python/pyspark/sql/tests/test_dataframe.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py
index 908d400..65edf59 100644
--- a/python/pyspark/sql/tests/test_dataframe.py
+++ b/python/pyspark/sql/tests/test_dataframe.py
@@ -375,6 +375,19 @@ class DataFrameTests(ReusedSQLTestCase):
         plan = df1.join(df2.hint("broadcast"), "id")._jdf.queryExecution().executedPlan()
         self.assertEqual(1, plan.toString().count("BroadcastHashJoin"))
 
+    # add tests for SPARK-23647 (test more types for hint)
+    def test_extended_hint_types(self):
+        from pyspark.sql import DataFrame
+
+        df = self.spark.range(10e10).toDF("id")
+        such_a_nice_list = ["itworks1", "itworks2", "itworks3"]
+        hinted_df = df.hint("my awesome hint", 1.2345, "what", such_a_nice_list)
+        logical_plan = hinted_df._jdf.queryExecution().logical()
+
+        self.assertEqual(1, logical_plan.toString().count("1.2345"))
+        self.assertEqual(1, logical_plan.toString().count("what"))
+        self.assertEqual(3, logical_plan.toString().count("itworks"))
+
     def test_sample(self):
         self.assertRaisesRegexp(
             TypeError,


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org