You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by mi...@apache.org on 2022/08/30 12:41:41 UTC
[superset] 03/13: fix(csv): Ensure df_to_escaped_csv handles NULL (#20151)
This is an automated email from the ASF dual-hosted git repository.
michaelsmolina pushed a commit to branch 1.5
in repository https://gitbox.apache.org/repos/asf/superset.git
commit d512e89aa9d0f2b5c8eeda49733efc198735e310
Author: John Bodley <45...@users.noreply.github.com>
AuthorDate: Tue May 31 09:56:25 2022 -0700
fix(csv): Ensure df_to_escaped_csv handles NULL (#20151)
Co-authored-by: John Bodley <jo...@airbnb.com>
(cherry picked from commit 97ce920d493d126ddcff93b9e46cdde1c5c8bb69)
---
superset/utils/csv.py | 9 +++++++--
tests/integration_tests/utils/csv_tests.py | 4 ++++
2 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/superset/utils/csv.py b/superset/utils/csv.py
index 42d2c55783..cf73c99dfa 100644
--- a/superset/utils/csv.py
+++ b/superset/utils/csv.py
@@ -19,6 +19,7 @@ import urllib.request
from typing import Any, Dict, Optional
from urllib.error import URLError
+import numpy as np
import pandas as pd
import simplejson
@@ -64,8 +65,12 @@ def df_to_escaped_csv(df: pd.DataFrame, **kwargs: Any) -> Any:
# Escape csv headers
df = df.rename(columns=escape_values)
- # Escape csv rows
- df = df.applymap(escape_values)
+ # Escape csv values
+ for name, column in df.items():
+ if column.dtype == np.dtype(object):
+ for idx, value in enumerate(column.values):
+ if isinstance(value, str):
+ df.at[idx, name] = escape_value(value)
return df.to_csv(**kwargs)
diff --git a/tests/integration_tests/utils/csv_tests.py b/tests/integration_tests/utils/csv_tests.py
index bf6110c639..e514efb1d2 100644
--- a/tests/integration_tests/utils/csv_tests.py
+++ b/tests/integration_tests/utils/csv_tests.py
@@ -17,6 +17,7 @@
import io
import pandas as pd
+import pyarrow as pa
import pytest
from superset.utils import csv
@@ -77,3 +78,6 @@ def test_df_to_escaped_csv():
["a", "'=b"], # pandas seems to be removing the leading ""
["' =a", "b"],
]
+
+ df = pa.array([1, None]).to_pandas(integer_object_nulls=True).to_frame()
+ assert csv.df_to_escaped_csv(df, encoding="utf8", index=False) == '0\n1\n""\n'