You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Sandeep Singh (Jira)" <ji...@apache.org> on 2023/01/05 17:48:00 UTC

[jira] [Updated] (SPARK-41902) Fix String representation of maps created by `map_from_arrays`

     [ https://issues.apache.org/jira/browse/SPARK-41902?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Sandeep Singh updated SPARK-41902:
----------------------------------
    Description: 
{code:java}
expected = {"a": 1, "b": 2}
expected2 = {"c": 3, "d": 4}
df = self.spark.createDataFrame(
    [(list(expected.keys()), list(expected.values()))], ["k", "v"]
)
actual = (
    df.select(
        expr("map('c', 3, 'd', 4) as dict2"),
        map_from_arrays(df.k, df.v).alias("dict"),
        "*",
    )
    .select(
        map_contains_key("dict", "a").alias("one"),
        map_contains_key("dict", "d").alias("not_exists"),
        map_keys("dict").alias("keys"),
        map_values("dict").alias("values"),
        map_entries("dict").alias("items"),
        "*",
    )
    .select(
        map_concat("dict", "dict2").alias("merged"),
        map_from_entries(arrays_zip("keys", "values")).alias("from_items"),
        "*",
    )
    .first()
)
self.assertEqual(expected, actual["dict"]){code}
{code:java}
Traceback (most recent call last):
  File "/Users/s.singh/personal/spark-oss/python/pyspark/sql/tests/test_functions.py", line 1142, in test_map_functions
    self.assertEqual(expected, actual["dict"])
AssertionError: {'a': 1, 'b': 2} != [('a', 1), ('b', 2)]{code}

  was:
{code:java}
from pyspark.sql import functions

funs = [
    (functions.acosh, "ACOSH"),
    (functions.asinh, "ASINH"),
    (functions.atanh, "ATANH"),
]

cols = ["a", functions.col("a")]

for f, alias in funs:
    for c in cols:
        self.assertIn(f"{alias}(a)", repr(f(c))){code}
{code:java}
 Traceback (most recent call last):
  File "/Users/s.singh/personal/spark-oss/python/pyspark/sql/tests/test_functions.py", line 271, in test_inverse_trig_functions
    self.assertIn(f"{alias}(a)", repr(f(c)))
AssertionError: 'ACOSH(a)' not found in "Column<'acosh(ColumnReference(a))'>"{code}
 

 
{code:java}
from pyspark.sql.functions import col, lit, overlay
from itertools import chain
import re

actual = list(
    chain.from_iterable(
        [
            re.findall("(overlay\\(.*\\))", str(x))
            for x in [
                overlay(col("foo"), col("bar"), 1),
                overlay("x", "y", 3),
                overlay(col("x"), col("y"), 1, 3),
                overlay("x", "y", 2, 5),
                overlay("x", "y", lit(11)),
                overlay("x", "y", lit(2), lit(5)),
            ]
        ]
    )
)

expected = [
    "overlay(foo, bar, 1, -1)",
    "overlay(x, y, 3, -1)",
    "overlay(x, y, 1, 3)",
    "overlay(x, y, 2, 5)",
    "overlay(x, y, 11, -1)",
    "overlay(x, y, 2, 5)",
]

self.assertListEqual(actual, expected)

df = self.spark.createDataFrame([("SPARK_SQL", "CORE", 7, 0)], ("x", "y", "pos", "len"))

exp = [Row(ol="SPARK_CORESQL")]
self.assertTrue(
    all(
        [
            df.select(overlay(df.x, df.y, 7, 0).alias("ol")).collect() == exp,
            df.select(overlay(df.x, df.y, lit(7), lit(0)).alias("ol")).collect() == exp,
            df.select(overlay("x", "y", "pos", "len").alias("ol")).collect() == exp,
        ]
    )
) {code}
{code:java}
    Traceback (most recent call last):
  File "/Users/s.singh/personal/spark-oss/python/pyspark/sql/tests/test_functions.py", line 675, in test_overlay
    self.assertListEqual(actual, expected)
AssertionError: Lists differ: ['overlay(ColumnReference(foo), ColumnReference(bar[402 chars]5))'] != ['overlay(foo, bar, 1, -1)', 'overlay(x, y, 3, -1)'[90 chars] 5)']

First differing element 0:
'overlay(ColumnReference(foo), ColumnReference(bar), Literal(1), Literal(-1))'
'overlay(foo, bar, 1, -1)'

- ['overlay(ColumnReference(foo), ColumnReference(bar), Literal(1), Literal(-1))',
-  'overlay(ColumnReference(x), ColumnReference(y), Literal(3), Literal(-1))',
-  'overlay(ColumnReference(x), ColumnReference(y), Literal(1), Literal(3))',
-  'overlay(ColumnReference(x), ColumnReference(y), Literal(2), Literal(5))',
-  'overlay(ColumnReference(x), ColumnReference(y), Literal(11), Literal(-1))',
-  'overlay(ColumnReference(x), ColumnReference(y), Literal(2), Literal(5))']
+ ['overlay(foo, bar, 1, -1)',
+  'overlay(x, y, 3, -1)',
+  'overlay(x, y, 1, 3)',
+  'overlay(x, y, 2, 5)',
+  'overlay(x, y, 11, -1)',
+  'overlay(x, y, 2, 5)']
 {code}


> Fix String representation of maps created by `map_from_arrays`
> --------------------------------------------------------------
>
>                 Key: SPARK-41902
>                 URL: https://issues.apache.org/jira/browse/SPARK-41902
>             Project: Spark
>          Issue Type: Sub-task
>          Components: Connect
>    Affects Versions: 3.4.0
>            Reporter: Sandeep Singh
>            Priority: Major
>
> {code:java}
> expected = {"a": 1, "b": 2}
> expected2 = {"c": 3, "d": 4}
> df = self.spark.createDataFrame(
>     [(list(expected.keys()), list(expected.values()))], ["k", "v"]
> )
> actual = (
>     df.select(
>         expr("map('c', 3, 'd', 4) as dict2"),
>         map_from_arrays(df.k, df.v).alias("dict"),
>         "*",
>     )
>     .select(
>         map_contains_key("dict", "a").alias("one"),
>         map_contains_key("dict", "d").alias("not_exists"),
>         map_keys("dict").alias("keys"),
>         map_values("dict").alias("values"),
>         map_entries("dict").alias("items"),
>         "*",
>     )
>     .select(
>         map_concat("dict", "dict2").alias("merged"),
>         map_from_entries(arrays_zip("keys", "values")).alias("from_items"),
>         "*",
>     )
>     .first()
> )
> self.assertEqual(expected, actual["dict"]){code}
> {code:java}
> Traceback (most recent call last):
>   File "/Users/s.singh/personal/spark-oss/python/pyspark/sql/tests/test_functions.py", line 1142, in test_map_functions
>     self.assertEqual(expected, actual["dict"])
> AssertionError: {'a': 1, 'b': 2} != [('a', 1), ('b', 2)]{code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org