You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by mi...@apache.org on 2022/08/30 12:41:41 UTC

[superset] 03/13: fix(csv): Ensure df_to_escaped_csv handles NULL (#20151)

This is an automated email from the ASF dual-hosted git repository.

michaelsmolina pushed a commit to branch 1.5
in repository https://gitbox.apache.org/repos/asf/superset.git

commit d512e89aa9d0f2b5c8eeda49733efc198735e310
Author: John Bodley <45...@users.noreply.github.com>
AuthorDate: Tue May 31 09:56:25 2022 -0700

    fix(csv): Ensure df_to_escaped_csv handles NULL (#20151)
    
    Co-authored-by: John Bodley <jo...@airbnb.com>
    (cherry picked from commit 97ce920d493d126ddcff93b9e46cdde1c5c8bb69)
---
 superset/utils/csv.py                      | 9 +++++++--
 tests/integration_tests/utils/csv_tests.py | 4 ++++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/superset/utils/csv.py b/superset/utils/csv.py
index 42d2c55783..cf73c99dfa 100644
--- a/superset/utils/csv.py
+++ b/superset/utils/csv.py
@@ -19,6 +19,7 @@ import urllib.request
 from typing import Any, Dict, Optional
 from urllib.error import URLError
 
+import numpy as np
 import pandas as pd
 import simplejson
 
@@ -64,8 +65,12 @@ def df_to_escaped_csv(df: pd.DataFrame, **kwargs: Any) -> Any:
     # Escape csv headers
     df = df.rename(columns=escape_values)
 
-    # Escape csv rows
-    df = df.applymap(escape_values)
+    # Escape csv values
+    for name, column in df.items():
+        if column.dtype == np.dtype(object):
+            for idx, value in enumerate(column.values):
+                if isinstance(value, str):
+                    df.at[idx, name] = escape_value(value)
 
     return df.to_csv(**kwargs)
 
diff --git a/tests/integration_tests/utils/csv_tests.py b/tests/integration_tests/utils/csv_tests.py
index bf6110c639..e514efb1d2 100644
--- a/tests/integration_tests/utils/csv_tests.py
+++ b/tests/integration_tests/utils/csv_tests.py
@@ -17,6 +17,7 @@
 import io
 
 import pandas as pd
+import pyarrow as pa
 import pytest
 
 from superset.utils import csv
@@ -77,3 +78,6 @@ def test_df_to_escaped_csv():
         ["a", "'=b"],  # pandas seems to be removing the leading ""
         ["' =a", "b"],
     ]
+
+    df = pa.array([1, None]).to_pandas(integer_object_nulls=True).to_frame()
+    assert csv.df_to_escaped_csv(df, encoding="utf8", index=False) == '0\n1\n""\n'