You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@superset.apache.org by GitBox <gi...@apache.org> on 2021/08/17 20:04:02 UTC

[GitHub] [superset] eschutho commented on a change in pull request #16289: fix: improve pivot post-processing

eschutho commented on a change in pull request #16289:
URL: https://github.com/apache/superset/pull/16289#discussion_r690683154



##########
File path: superset/charts/post_processing.py
##########
@@ -27,60 +27,151 @@
 """
 
 from io import StringIO
-from typing import Any, Callable, Dict, Optional, Union
+from typing import Any, Dict, List, Optional, Tuple
 
 import pandas as pd
 
 from superset.utils.core import DTTM_ALIAS, extract_dataframe_dtypes, get_metric_name
 
 
-def sql_like_sum(series: pd.Series) -> pd.Series:
+def get_column_key(label: Tuple[str, ...], metrics: List[str]) -> Tuple[Any, ...]:
     """
-    A SUM aggregation function that mimics the behavior from SQL.
-    """
-    return series.sum(min_count=1)
-
+    Sort columns when combining metrics.
 
-def pivot_table(df: pd.DataFrame, form_data: Dict[str, Any]) -> pd.DataFrame:
+    MultiIndex labels have the metric name as the last element in the
+    tuple. We want to sort these according to the list of passed metrics.
     """
-    Pivot table.
-    """
-    if form_data.get("granularity") == "all" and DTTM_ALIAS in df:
-        del df[DTTM_ALIAS]
-
-    metrics = [get_metric_name(m) for m in form_data["metrics"]]
-    aggfuncs: Dict[str, Union[str, Callable[[Any], Any]]] = {}
-    for metric in metrics:
-        aggfunc = form_data.get("pandas_aggfunc") or "sum"
-        if pd.api.types.is_numeric_dtype(df[metric]):
-            if aggfunc == "sum":
-                aggfunc = sql_like_sum
-        elif aggfunc not in {"min", "max"}:
-            aggfunc = "max"
-        aggfuncs[metric] = aggfunc
-
-    groupby = form_data.get("groupby") or []
-    columns = form_data.get("columns") or []
-    if form_data.get("transpose_pivot"):
-        groupby, columns = columns, groupby
-
-    df = df.pivot_table(
-        index=groupby,
-        columns=columns,
-        values=metrics,
-        aggfunc=aggfuncs,
-        margins=form_data.get("pivot_margins"),
-    )
-
-    # Display metrics side by side with each column
-    if form_data.get("combine_metric"):
-        df = df.stack(0).unstack().reindex(level=-1, columns=metrics)
-
-    # flatten column names
-    df.columns = [
-        " ".join(str(name) for name in column) if isinstance(column, tuple) else column
-        for column in df.columns
-    ]
+    parts: List[Any] = list(label)
+    metric = parts[-1]
+    parts[-1] = metrics.index(metric)

Review comment:
       would it be more or less pythonic to pop and then push instead?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@superset.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscribe@superset.apache.org
For additional commands, e-mail: notifications-help@superset.apache.org