You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/11/25 23:05:45 UTC

[spark] branch branch-2.4 updated: [SPARK-26645][PYTHON][2.4] Support decimals with negative scale when parsing datatype

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
     new 3eb6e38  [SPARK-26645][PYTHON][2.4] Support decimals with negative scale when parsing datatype
3eb6e38 is described below

commit 3eb6e389afb2435476bedca2e88590aa38421480
Author: Marco Gaido <ma...@gmail.com>
AuthorDate: Wed Nov 25 15:00:39 2020 -0800

    [SPARK-26645][PYTHON][2.4] Support decimals with negative scale when parsing datatype
    
    ## What changes were proposed in this pull request?
    
    This is a backport of #23575
    
    When parsing datatypes from the json internal representation, PySpark doesn't support decimals with negative scales. Since they are allowed and can actually happen, PySpark should be able to successfully parse them.
    
    ## How was this patch tested?
    
    added test
    
    Closes #30503 from dongjoon-hyun/SPARK-26645.
    
    Authored-by: Marco Gaido <ma...@gmail.com>
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 python/pyspark/sql/tests.py | 8 +++++++-
 python/pyspark/sql/types.py | 4 +++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index b995227..70f3882 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -88,7 +88,7 @@ from pyspark.sql.types import _array_signed_int_typecode_ctype_mappings, _array_
 from pyspark.sql.types import _array_unsigned_int_typecode_ctype_mappings
 from pyspark.sql.types import _merge_type
 from pyspark.tests import QuietTest, ReusedPySparkTestCase, PySparkTestCase, SparkSubmitTests
-from pyspark.sql.functions import UserDefinedFunction, sha2, lit, input_file_name, udf
+from pyspark.sql.functions import UserDefinedFunction, sha2, lit, input_file_name, udf, col
 from pyspark.sql.window import Window
 from pyspark.sql.utils import AnalysisException, ParseException, IllegalArgumentException
 
@@ -1134,6 +1134,12 @@ class SQLTests(ReusedSQLTestCase):
         df = self.spark.createDataFrame([{'a': 1}], ["b"])
         self.assertEqual(df.columns, ['b'])
 
+    def test_negative_decimal(self):
+        df = self.spark.createDataFrame([(1, ), (11, )], ["value"])
+        ret = df.select(col("value").cast(DecimalType(1, -1))).collect()
+        actual = list(map(lambda r: int(r.value), ret))
+        self.assertEqual(actual, [0, 10])
+
     def test_create_dataframe_from_objects(self):
         data = [MyObject(1, "1"), MyObject(2, "2")]
         df = self.spark.createDataFrame(data)
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 0d73963..c10da35 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -752,7 +752,7 @@ _all_complex_types = dict((v.typeName(), v)
                           for v in [ArrayType, MapType, StructType])
 
 
-_FIXED_DECIMAL = re.compile(r"decimal\(\s*(\d+)\s*,\s*(\d+)\s*\)")
+_FIXED_DECIMAL = re.compile(r"decimal\(\s*(\d+)\s*,\s*(-?\d+)\s*\)")
 
 
 def _parse_datatype_string(s):
@@ -865,6 +865,8 @@ def _parse_datatype_json_string(json_string):
     >>> complex_maptype = MapType(complex_structtype,
     ...                           complex_arraytype, False)
     >>> check_datatype(complex_maptype)
+    >>> # Decimal with negative scale.
+    >>> check_datatype(DecimalType(1,-1))
     """
     return _parse_datatype_json_value(json.loads(json_string))
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org