You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/11/25 23:05:45 UTC
[spark] branch branch-2.4 updated: [SPARK-26645][PYTHON][2.4]
Support decimals with negative scale when parsing datatype
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-2.4 by this push:
new 3eb6e38 [SPARK-26645][PYTHON][2.4] Support decimals with negative scale when parsing datatype
3eb6e38 is described below
commit 3eb6e389afb2435476bedca2e88590aa38421480
Author: Marco Gaido <ma...@gmail.com>
AuthorDate: Wed Nov 25 15:00:39 2020 -0800
[SPARK-26645][PYTHON][2.4] Support decimals with negative scale when parsing datatype
## What changes were proposed in this pull request?
This is a backport of #23575
When parsing datatypes from the json internal representation, PySpark doesn't support decimals with negative scales. Since they are allowed and can actually happen, PySpark should be able to successfully parse them.
## How was this patch tested?
added test
Closes #30503 from dongjoon-hyun/SPARK-26645.
Authored-by: Marco Gaido <ma...@gmail.com>
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
python/pyspark/sql/tests.py | 8 +++++++-
python/pyspark/sql/types.py | 4 +++-
2 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index b995227..70f3882 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -88,7 +88,7 @@ from pyspark.sql.types import _array_signed_int_typecode_ctype_mappings, _array_
from pyspark.sql.types import _array_unsigned_int_typecode_ctype_mappings
from pyspark.sql.types import _merge_type
from pyspark.tests import QuietTest, ReusedPySparkTestCase, PySparkTestCase, SparkSubmitTests
-from pyspark.sql.functions import UserDefinedFunction, sha2, lit, input_file_name, udf
+from pyspark.sql.functions import UserDefinedFunction, sha2, lit, input_file_name, udf, col
from pyspark.sql.window import Window
from pyspark.sql.utils import AnalysisException, ParseException, IllegalArgumentException
@@ -1134,6 +1134,12 @@ class SQLTests(ReusedSQLTestCase):
df = self.spark.createDataFrame([{'a': 1}], ["b"])
self.assertEqual(df.columns, ['b'])
+ def test_negative_decimal(self):
+ df = self.spark.createDataFrame([(1, ), (11, )], ["value"])
+ ret = df.select(col("value").cast(DecimalType(1, -1))).collect()
+ actual = list(map(lambda r: int(r.value), ret))
+ self.assertEqual(actual, [0, 10])
+
def test_create_dataframe_from_objects(self):
data = [MyObject(1, "1"), MyObject(2, "2")]
df = self.spark.createDataFrame(data)
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 0d73963..c10da35 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -752,7 +752,7 @@ _all_complex_types = dict((v.typeName(), v)
for v in [ArrayType, MapType, StructType])
-_FIXED_DECIMAL = re.compile(r"decimal\(\s*(\d+)\s*,\s*(\d+)\s*\)")
+_FIXED_DECIMAL = re.compile(r"decimal\(\s*(\d+)\s*,\s*(-?\d+)\s*\)")
def _parse_datatype_string(s):
@@ -865,6 +865,8 @@ def _parse_datatype_json_string(json_string):
>>> complex_maptype = MapType(complex_structtype,
... complex_arraytype, False)
>>> check_datatype(complex_maptype)
+ >>> # Decimal with negative scale.
+ >>> check_datatype(DecimalType(1,-1))
"""
return _parse_datatype_json_value(json.loads(json_string))
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org