You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by yj...@apache.org on 2021/02/07 20:02:25 UTC

[superset] branch master updated: refactor: speed up conversion from dataframe to list of records (#12806)

This is an automated email from the ASF dual-hosted git repository.

yjc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git


The following commit(s) were added to refs/heads/master by this push:
     new b56aec7  refactor: speed up conversion from dataframe to list of records (#12806)
b56aec7 is described below

commit b56aec763dbe00f4f706d434ef6420e489e082b7
Author: Tom <38...@users.noreply.github.com>
AuthorDate: Sun Feb 7 20:01:28 2021 +0000

    refactor: speed up conversion from dataframe to list of records (#12806)
---
 superset/dataframe.py | 38 +++++++++++++++++++++++++++++---------
 1 file changed, 29 insertions(+), 9 deletions(-)

diff --git a/superset/dataframe.py b/superset/dataframe.py
index e8cd0d1..b0e7a0e 100644
--- a/superset/dataframe.py
+++ b/superset/dataframe.py
@@ -16,6 +16,7 @@
 # under the License.
 """ Superset utilities for pandas.DataFrame.
 """
+import warnings
 from typing import Any, Dict, List
 
 import pandas as pd
@@ -23,13 +24,32 @@ import pandas as pd
 from superset.utils.core import JS_MAX_INTEGER
 
 
+def _convert_big_integers(val: Any) -> Any:
+    """
+    Cast integers larger than ``JS_MAX_INTEGER`` to strings.
+
+    :param val: the value to process
+    :returns: the same value but recast as a string if it was an integer over
+        ``JS_MAX_INTEGER``
+    """
+    return str(val) if isinstance(val, int) and abs(val) > JS_MAX_INTEGER else val
+
+
 def df_to_records(dframe: pd.DataFrame) -> List[Dict[str, Any]]:
-    data: List[Dict[str, Any]] = dframe.to_dict(orient="records")
-    # TODO: refactor this
-    for row in data:
-        for key, value in list(row.items()):
-            # if an int is too big for JavaScript to handle
-            # convert it to a string
-            if isinstance(value, int) and abs(value) > JS_MAX_INTEGER:
-                row[key] = str(value)
-    return data
+    """
+    Convert a DataFrame to a set of records.
+
+    :param dframe: the DataFrame to convert
+    :returns: a list of dictionaries reflecting each single row of the DataFrame
+    """
+    if not dframe.columns.is_unique:
+        warnings.warn(
+            "DataFrame columns are not unique, some columns will be omitted.",
+            UserWarning,
+            stacklevel=2,
+        )
+    columns = dframe.columns
+    return list(
+        dict(zip(columns, map(_convert_big_integers, row)))
+        for row in zip(*[dframe[col] for col in columns])
+    )