You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2018/12/01 02:37:14 UTC
spark git commit: [SPARK-23647][PYTHON][SQL] Adds more types for hint
in pyspark
Repository: spark
Updated Branches:
refs/heads/master 6be272b75 -> 28d337440
[SPARK-23647][PYTHON][SQL] Adds more types for hint in pyspark
Signed-off-by: DylanGuedes <djmgguedesgmail.com>
## What changes were proposed in this pull request?
Addition of float, int and list hints for `pyspark.sql` Hint.
## How was this patch tested?
I did manual tests following the same principles used in the Scala version, and also added unit tests.
Closes #20788 from DylanGuedes/jira-21030.
Authored-by: DylanGuedes <dj...@gmail.com>
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/28d33744
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/28d33744
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/28d33744
Branch: refs/heads/master
Commit: 28d33744076abd8bf7955eefcbdeef4849a99c40
Parents: 6be272b
Author: DylanGuedes <dj...@gmail.com>
Authored: Sat Dec 1 10:37:03 2018 +0800
Committer: Hyukjin Kwon <gu...@apache.org>
Committed: Sat Dec 1 10:37:03 2018 +0800
----------------------------------------------------------------------
python/pyspark/sql/dataframe.py | 6 ++++--
python/pyspark/sql/tests/test_dataframe.py | 13 +++++++++++++
2 files changed, 17 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/28d33744/python/pyspark/sql/dataframe.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index b8833a3..1b1092c 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -485,10 +485,12 @@ class DataFrame(object):
if not isinstance(name, str):
raise TypeError("name should be provided as str, got {0}".format(type(name)))
+ allowed_types = (basestring, list, float, int)
for p in parameters:
- if not isinstance(p, str):
+ if not isinstance(p, allowed_types):
raise TypeError(
- "all parameters should be str, got {0} of type {1}".format(p, type(p)))
+ "all parameters should be in {0}, got {1} of type {2}".format(
+ allowed_types, p, type(p)))
jdf = self._jdf.hint(name, self._jseq(parameters))
return DataFrame(jdf, self.sql_ctx)
http://git-wip-us.apache.org/repos/asf/spark/blob/28d33744/python/pyspark/sql/tests/test_dataframe.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py
index 908d400..65edf59 100644
--- a/python/pyspark/sql/tests/test_dataframe.py
+++ b/python/pyspark/sql/tests/test_dataframe.py
@@ -375,6 +375,19 @@ class DataFrameTests(ReusedSQLTestCase):
plan = df1.join(df2.hint("broadcast"), "id")._jdf.queryExecution().executedPlan()
self.assertEqual(1, plan.toString().count("BroadcastHashJoin"))
+ # add tests for SPARK-23647 (test more types for hint)
+ def test_extended_hint_types(self):
+ from pyspark.sql import DataFrame
+
+ df = self.spark.range(10e10).toDF("id")
+ such_a_nice_list = ["itworks1", "itworks2", "itworks3"]
+ hinted_df = df.hint("my awesome hint", 1.2345, "what", such_a_nice_list)
+ logical_plan = hinted_df._jdf.queryExecution().logical()
+
+ self.assertEqual(1, logical_plan.toString().count("1.2345"))
+ self.assertEqual(1, logical_plan.toString().count("what"))
+ self.assertEqual(3, logical_plan.toString().count("itworks"))
+
def test_sample(self):
self.assertRaisesRegexp(
TypeError,
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org