You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by ma...@apache.org on 2024/03/27 00:59:53 UTC

(superset) 01/01: fix: row limits & row count labels are confusing

This is an automated email from the ASF dual-hosted git repository.

maximebeauchemin pushed a commit to branch sql_rowcount
in repository https://gitbox.apache.org/repos/asf/superset.git

commit 6e53fbb07695c75de0e3a085db15521fbd814235
Author: Maxime Beauchemin <ma...@gmail.com>
AuthorDate: Tue Mar 26 14:31:56 2024 -0700

    fix: row limits & row count labels are confusing
    
    Currently, in explore & dashboard, we usually apply a row limit
    on the query we issue against the database.
    
    Following this, some visualization backend code does some
    post-processing on data frames using pandas, typically pivoting
    some things, which affects the result set's row count in
    intricate capacities.
    
    Now from a UX standpoint the user is exposed with:
    - row limits in the control panels
    - row count in various areas of the UI where visualiztion, preview,
      samples and raw results are shown
    
    Currently we show the rowcount that's from row-processing. So maybe
    a hard limit was applied at 1000 rows on the database, but we
    pivot and it goes does to say 532, and we show the user that
    we haven't hit the limit when we actually did.
    
    Also note that the component that shows rowcount is supposed
    to turn red if limit is hit, letting the user know they are looking at
    truncated data.
---
 superset-frontend/src/explore/components/ChartPills.tsx | 2 +-
 superset/common/query_actions.py                        | 2 ++
 superset/common/query_context_processor.py              | 1 +
 superset/common/utils/query_cache_manager.py            | 5 +++++
 superset/models/helpers.py                              | 1 +
 superset/views/core.py                                  | 2 ++
 superset/viz.py                                         | 2 ++
 7 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/superset-frontend/src/explore/components/ChartPills.tsx b/superset-frontend/src/explore/components/ChartPills.tsx
index 99f7c5aad5..489ae146aa 100644
--- a/superset-frontend/src/explore/components/ChartPills.tsx
+++ b/superset-frontend/src/explore/components/ChartPills.tsx
@@ -66,7 +66,7 @@ export const ChartPills = forwardRef(
         >
           {!isLoading && firstQueryResponse && (
             <RowCountLabel
-              rowcount={Number(firstQueryResponse.rowcount) || 0}
+              rowcount={Number(firstQueryResponse.sql_rowcount) || 0}
               limit={Number(rowLimit) || 0}
             />
           )}
diff --git a/superset/common/query_actions.py b/superset/common/query_actions.py
index d73a99d027..bdbccc78db 100644
--- a/superset/common/query_actions.py
+++ b/superset/common/query_actions.py
@@ -135,6 +135,8 @@ def _get_full(
             "data": payload.get("data"),
             "colnames": payload.get("colnames"),
             "coltypes": payload.get("coltypes"),
+            "rowcount": payload.get("rowcount"),
+            "sql_rowcount": payload.get("sql_rowcount"),
         }
     return payload
 
diff --git a/superset/common/query_context_processor.py b/superset/common/query_context_processor.py
index d8b5bea4bb..f003f8ed30 100644
--- a/superset/common/query_context_processor.py
+++ b/superset/common/query_context_processor.py
@@ -194,6 +194,7 @@ class QueryContextProcessor:
             "status": cache.status,
             "stacktrace": cache.stacktrace,
             "rowcount": len(cache.df.index),
+            "sql_rowcount": cache.sql_rowcount,
             "from_dttm": query_obj.from_dttm,
             "to_dttm": query_obj.to_dttm,
             "label_map": label_map,
diff --git a/superset/common/utils/query_cache_manager.py b/superset/common/utils/query_cache_manager.py
index a0fb65b20d..d2e6e07437 100644
--- a/superset/common/utils/query_cache_manager.py
+++ b/superset/common/utils/query_cache_manager.py
@@ -64,6 +64,7 @@ class QueryCacheManager:
         is_cached: bool | None = None,
         cache_dttm: str | None = None,
         cache_value: dict[str, Any] | None = None,
+        sql_rowcount: int | None = None,
     ) -> None:
         self.df = df
         self.query = query
@@ -79,6 +80,7 @@ class QueryCacheManager:
         self.is_cached = is_cached
         self.cache_dttm = cache_dttm
         self.cache_value = cache_value
+        self.sql_rowcount = sql_rowcount
 
     # pylint: disable=too-many-arguments
     def set_query_result(
@@ -102,6 +104,7 @@ class QueryCacheManager:
             self.rejected_filter_columns = query_result.rejected_filter_columns
             self.error_message = query_result.error_message
             self.df = query_result.df
+            self.sql_rowcount = query_result.sql_rowcount
             self.annotation_data = {} if annotation_data is None else annotation_data
 
             if self.status != QueryStatus.FAILED:
@@ -117,6 +120,7 @@ class QueryCacheManager:
                 "applied_filter_columns": self.applied_filter_columns,
                 "rejected_filter_columns": self.rejected_filter_columns,
                 "annotation_data": self.annotation_data,
+                "sql_rowcount": self.sql_rowcount,
             }
             if self.is_loaded and key and self.status != QueryStatus.FAILED:
                 self.set(
@@ -167,6 +171,7 @@ class QueryCacheManager:
                 query_cache.status = QueryStatus.SUCCESS
                 query_cache.is_loaded = True
                 query_cache.is_cached = cache_value is not None
+                query_cache.sql_rowcount = cache_value.get("sql_rowcount", None)
                 query_cache.cache_dttm = (
                     cache_value["dttm"] if cache_value is not None else None
                 )
diff --git a/superset/models/helpers.py b/superset/models/helpers.py
index 684ef51efa..3e34ee054a 100644
--- a/superset/models/helpers.py
+++ b/superset/models/helpers.py
@@ -583,6 +583,7 @@ class QueryResult:  # pylint: disable=too-few-public-methods
         self.errors = errors or []
         self.from_dttm = from_dttm
         self.to_dttm = to_dttm
+        self.sql_rowcount = len(self.df.index) if not self.df.empty else 0
 
 
 class ExtraJSONMixin:
diff --git a/superset/views/core.py b/superset/views/core.py
index 4faede0f34..287f71e548 100755
--- a/superset/views/core.py
+++ b/superset/views/core.py
@@ -167,6 +167,8 @@ class Superset(BaseSupersetView):
                 "data": payload["df"].to_dict("records"),
                 "colnames": payload.get("colnames"),
                 "coltypes": payload.get("coltypes"),
+                "rowcount": payload.get("rowcount"),
+                "sql_rowcount": payload.get("sql_rowcount"),
             },
         )
 
diff --git a/superset/viz.py b/superset/viz.py
index 7c78f84228..447b36d53d 100644
--- a/superset/viz.py
+++ b/superset/viz.py
@@ -262,6 +262,8 @@ class BaseViz:  # pylint: disable=too-many-public-methods
             "data": payload["df"].to_dict(orient="records"),
             "colnames": payload.get("colnames"),
             "coltypes": payload.get("coltypes"),
+            "rowcount": payload.get("rowcount"),
+            "sql_rowcount": payload.get("sql_rowcount"),
         }
 
     @deprecated(deprecated_in="3.0")