You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by mi...@apache.org on 2024/02/15 16:23:04 UTC

(superset) branch master updated: refactor: Updates some database columns to MediumText (#27119)

This is an automated email from the ASF dual-hosted git repository.

michaelsmolina pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git


The following commit(s) were added to refs/heads/master by this push:
     new 3d645fd8e9 refactor: Updates some database columns to MediumText (#27119)
3d645fd8e9 is described below

commit 3d645fd8e9da757cdcf1c04c0fdccf69169b4d21
Author: Michael S. Molina <70...@users.noreply.github.com>
AuthorDate: Thu Feb 15 11:22:57 2024 -0500

    refactor: Updates some database columns to MediumText (#27119)
---
 UPDATING.md                                        |  2 +
 superset/connectors/sqla/models.py                 |  2 +-
 ...14-43_17fcea065655_change_text_to_mediumtext.py | 87 ++++++++++++++++++++++
 superset/models/annotations.py                     |  5 +-
 superset/models/core.py                            |  4 +-
 superset/models/dashboard.py                       |  2 +-
 superset/models/helpers.py                         |  3 +-
 superset/models/slice.py                           |  4 +-
 superset/models/sql_lab.py                         | 12 +--
 superset/reports/models.py                         | 13 ++--
 10 files changed, 111 insertions(+), 23 deletions(-)

diff --git a/UPDATING.md b/UPDATING.md
index fec78be67d..8f5785ac8c 100644
--- a/UPDATING.md
+++ b/UPDATING.md
@@ -24,6 +24,8 @@ assists people when migrating to a new version.
 
 ## Next
 
+- [27119](https://github.com/apache/superset/pull/27119): Updates various database columns to use the `MediumText` type, potentially requiring a table lock on MySQL dbs or taking some time to complete on large deployments.
+
 - [26450](https://github.com/apache/superset/pull/26450): Deprecates the `KV_STORE` feature flag and its related assets such as the API endpoint and `keyvalue` table. The main dependency of this feature is the `SHARE_QUERIES_VIA_KV_STORE` feature flag which allows sharing SQL Lab queries without the necessity of saving the query. Our intention is to use the permalink feature to implement this use case before 5.0 and that's why we are deprecating the feature flag now.
 
 ### Breaking Changes
diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py
index 2552740695..089b9c2f28 100644
--- a/superset/connectors/sqla/models.py
+++ b/superset/connectors/sqla/models.py
@@ -2116,4 +2116,4 @@ class RowLevelSecurityFilter(Model, AuditMixinNullable):
         secondary=RLSFilterTables,
         backref="row_level_security_filters",
     )
-    clause = Column(Text, nullable=False)
+    clause = Column(MediumText(), nullable=False)
diff --git a/superset/migrations/versions/2024-02-14_14-43_17fcea065655_change_text_to_mediumtext.py b/superset/migrations/versions/2024-02-14_14-43_17fcea065655_change_text_to_mediumtext.py
new file mode 100644
index 0000000000..e63ab6ac56
--- /dev/null
+++ b/superset/migrations/versions/2024-02-14_14-43_17fcea065655_change_text_to_mediumtext.py
@@ -0,0 +1,87 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""change_text_to_mediumtext
+
+Revision ID: 17fcea065655
+Revises: 87d38ad83218
+Create Date: 2024-02-14 14:43:39.898093
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = "17fcea065655"
+down_revision = "87d38ad83218"
+
+import sqlalchemy as sa
+from alembic import op
+from sqlalchemy.dialects.mysql.base import MySQLDialect
+
+from superset.utils.core import MediumText
+
+TABLE_COLUMNS = [
+    "annotation.json_metadata",
+    "css_templates.css",
+    "dashboards.css",
+    "keyvalue.value",
+    "query.extra_json",
+    "query.executed_sql",
+    "query.select_sql",
+    "report_execution_log.value_row_json",
+    "report_recipient.recipient_config_json",
+    "report_schedule.sql",
+    "report_schedule.last_value_row_json",
+    "report_schedule.validator_config_json",
+    "report_schedule.extra_json",
+    "row_level_security_filters.clause",
+    "saved_query.sql",
+    "saved_query.extra_json",
+    "sl_columns.extra_json",
+    "sl_datasets.extra_json",
+    "sl_tables.extra_json",
+    "slices.params",
+    "slices.query_context",
+    "ssh_tunnels.extra_json",
+    "tab_state.extra_json",
+    "tab_state.sql",
+    "table_schema.extra_json",
+]
+
+NOT_NULL_COLUMNS = ["keyvalue.value", "row_level_security_filters.clause"]
+
+
+def upgrade():
+    if isinstance(op.get_bind().dialect, MySQLDialect):
+        for column in TABLE_COLUMNS:
+            with op.batch_alter_table(column.split(".")[0]) as batch_op:
+                batch_op.alter_column(
+                    column.split(".")[1],
+                    existing_type=sa.Text(),
+                    type_=MediumText(),
+                    existing_nullable=column not in NOT_NULL_COLUMNS,
+                )
+
+
+def downgrade():
+    if isinstance(op.get_bind().dialect, MySQLDialect):
+        for column in TABLE_COLUMNS:
+            with op.batch_alter_table(column.split(".")[0]) as batch_op:
+                batch_op.alter_column(
+                    column.split(".")[1],
+                    existing_type=MediumText(),
+                    type_=sa.Text(),
+                    existing_nullable=column not in NOT_NULL_COLUMNS,
+                )
diff --git a/superset/models/annotations.py b/superset/models/annotations.py
index 54de94e7f6..d8b6f8b1fa 100644
--- a/superset/models/annotations.py
+++ b/superset/models/annotations.py
@@ -22,10 +22,10 @@ from sqlalchemy import Column, DateTime, ForeignKey, Index, Integer, String, Tex
 from sqlalchemy.orm import relationship
 
 from superset.models.helpers import AuditMixinNullable
+from superset.utils.core import MediumText
 
 
 class AnnotationLayer(Model, AuditMixinNullable):
-
     """A logical namespace for a set of annotations"""
 
     __tablename__ = "annotation_layer"
@@ -38,7 +38,6 @@ class AnnotationLayer(Model, AuditMixinNullable):
 
 
 class Annotation(Model, AuditMixinNullable):
-
     """Time-related annotation"""
 
     __tablename__ = "annotation"
@@ -49,7 +48,7 @@ class Annotation(Model, AuditMixinNullable):
     short_descr = Column(String(500))
     long_descr = Column(Text)
     layer = relationship(AnnotationLayer, backref="annotation")
-    json_metadata = Column(Text)
+    json_metadata = Column(MediumText())
 
     __table_args__ = (Index("ti_dag_state", layer_id, start_dttm, end_dttm),)
 
diff --git a/superset/models/core.py b/superset/models/core.py
index a8d0cdeb51..71a6e9d042 100755
--- a/superset/models/core.py
+++ b/superset/models/core.py
@@ -96,7 +96,7 @@ class KeyValue(Model):  # pylint: disable=too-few-public-methods
 
     __tablename__ = "keyvalue"
     id = Column(Integer, primary_key=True)
-    value = Column(Text, nullable=False)
+    value = Column(utils.MediumText(), nullable=False)
 
 
 class CssTemplate(Model, AuditMixinNullable):
@@ -105,7 +105,7 @@ class CssTemplate(Model, AuditMixinNullable):
     __tablename__ = "css_templates"
     id = Column(Integer, primary_key=True)
     template_name = Column(String(250))
-    css = Column(Text, default="")
+    css = Column(utils.MediumText(), default="")
 
 
 class ConfigurationMethod(StrEnum):
diff --git a/superset/models/dashboard.py b/superset/models/dashboard.py
index 01b1bf9624..5570e892ff 100644
--- a/superset/models/dashboard.py
+++ b/superset/models/dashboard.py
@@ -137,7 +137,7 @@ class Dashboard(AuditMixinNullable, ImportExportMixin, Model):
     dashboard_title = Column(String(500))
     position_json = Column(utils.MediumText())
     description = Column(Text)
-    css = Column(Text)
+    css = Column(utils.MediumText())
     certified_by = Column(Text)
     certification_details = Column(Text)
     json_metadata = Column(utils.MediumText())
diff --git a/superset/models/helpers.py b/superset/models/helpers.py
index fb2f959f31..8d3ed36c46 100644
--- a/superset/models/helpers.py
+++ b/superset/models/helpers.py
@@ -89,6 +89,7 @@ from superset.utils.core import (
     get_column_name,
     get_user_id,
     is_adhoc_column,
+    MediumText,
     remove_duplicates,
 )
 from superset.utils.dates import datetime_to_epoch
@@ -584,7 +585,7 @@ class QueryResult:  # pylint: disable=too-few-public-methods
 class ExtraJSONMixin:
     """Mixin to add an `extra` column (JSON) and utility methods"""
 
-    extra_json = sa.Column(sa.Text, default="{}")
+    extra_json = sa.Column(MediumText(), default="{}")
 
     @property
     def extra(self) -> dict[str, Any]:
diff --git a/superset/models/slice.py b/superset/models/slice.py
index b41bb72a85..eb2b220c8f 100644
--- a/superset/models/slice.py
+++ b/superset/models/slice.py
@@ -78,8 +78,8 @@ class Slice(  # pylint: disable=too-many-public-methods
     datasource_type = Column(String(200))
     datasource_name = Column(String(2000))
     viz_type = Column(String(250))
-    params = Column(Text)
-    query_context = Column(Text)
+    params = Column(utils.MediumText())
+    query_context = Column(utils.MediumText())
     description = Column(Text)
     cache_timeout = Column(Integer)
     perm = Column(String(1000))
diff --git a/superset/models/sql_lab.py b/superset/models/sql_lab.py
index a0e9fa6b6e..f4724d6dab 100644
--- a/superset/models/sql_lab.py
+++ b/superset/models/sql_lab.py
@@ -55,7 +55,7 @@ from superset.models.helpers import (
 )
 from superset.sql_parse import CtasMethod, ParsedQuery, Table
 from superset.sqllab.limiting_factor import LimitingFactor
-from superset.utils.core import get_column_name, QueryStatus, user_label
+from superset.utils.core import get_column_name, MediumText, QueryStatus, user_label
 
 if TYPE_CHECKING:
     from superset.connectors.sqla.models import TableColumn
@@ -88,11 +88,11 @@ class Query(
     tab_name = Column(String(256))
     sql_editor_id = Column(String(256))
     schema = Column(String(256))
-    sql = Column(Text)
+    sql = Column(MediumText())
     # Query to retrieve the results,
     # used only in case of select_as_cta_used is true.
-    select_sql = Column(Text)
-    executed_sql = Column(Text)
+    select_sql = Column(MediumText())
+    executed_sql = Column(MediumText())
     # Could be configured in the superset config.
     limit = Column(Integer)
     limiting_factor = Column(
@@ -365,7 +365,7 @@ class SavedQuery(AuditMixinNullable, ExtraJSONMixin, ImportExportMixin, Model):
     schema = Column(String(128))
     label = Column(String(256))
     description = Column(Text)
-    sql = Column(Text)
+    sql = Column(MediumText())
     template_parameters = Column(Text)
     user = relationship(
         security_manager.user_model,
@@ -467,7 +467,7 @@ class TabState(AuditMixinNullable, ExtraJSONMixin, Model):
     )
 
     # the query in the textarea, and results (if any)
-    sql = Column(Text)
+    sql = Column(MediumText())
     query_limit = Column(Integer)
 
     # latest query that was run
diff --git a/superset/reports/models.py b/superset/reports/models.py
index 59135cda6c..022db5dc6f 100644
--- a/superset/reports/models.py
+++ b/superset/reports/models.py
@@ -41,6 +41,7 @@ from superset.models.helpers import AuditMixinNullable, ExtraJSONMixin
 from superset.models.slice import Slice
 from superset.reports.types import ReportScheduleExtra
 from superset.utils.backports import StrEnum
+from superset.utils.core import MediumText
 
 metadata = Model.metadata  # pylint: disable=no-member
 
@@ -108,7 +109,6 @@ report_schedule_user = Table(
 
 
 class ReportSchedule(AuditMixinNullable, ExtraJSONMixin, Model):
-
     """
     Report Schedules, supports alerts and reports
     """
@@ -128,7 +128,7 @@ class ReportSchedule(AuditMixinNullable, ExtraJSONMixin, Model):
     )
     timezone = Column(String(100), default="UTC", nullable=False)
     report_format = Column(String(50), default=ReportDataFormat.VISUALIZATION)
-    sql = Column(Text())
+    sql = Column(MediumText())
     # (Alerts/Reports) M-O to chart
     chart_id = Column(Integer, ForeignKey("slices.id"), nullable=True)
     chart = relationship(Slice, backref="report_schedules", foreign_keys=[chart_id])
@@ -150,11 +150,11 @@ class ReportSchedule(AuditMixinNullable, ExtraJSONMixin, Model):
     last_eval_dttm = Column(DateTime)
     last_state = Column(String(50), default=ReportState.NOOP)
     last_value = Column(Float)
-    last_value_row_json = Column(Text)
+    last_value_row_json = Column(MediumText())
 
     # (Alerts) Observed value validation related columns
     validator_type = Column(String(100))
-    validator_config_json = Column(Text, default="{}")
+    validator_config_json = Column(MediumText(), default="{}")
 
     # Log retention
     log_retention = Column(Integer, default=90)
@@ -187,7 +187,7 @@ class ReportRecipients(Model, AuditMixinNullable):
     __tablename__ = "report_recipient"
     id = Column(Integer, primary_key=True)
     type = Column(String(50), nullable=False)
-    recipient_config_json = Column(Text, default="{}")
+    recipient_config_json = Column(MediumText(), default="{}")
     report_schedule_id = Column(
         Integer, ForeignKey("report_schedule.id"), nullable=False
     )
@@ -203,7 +203,6 @@ class ReportRecipients(Model, AuditMixinNullable):
 
 
 class ReportExecutionLog(Model):  # pylint: disable=too-few-public-methods
-
     """
     Report Execution Log, hold the result of the report execution with timestamps,
     last observation and possible error messages
@@ -220,7 +219,7 @@ class ReportExecutionLog(Model):  # pylint: disable=too-few-public-methods
 
     # (Alerts) Observed values
     value = Column(Float)
-    value_row_json = Column(Text)
+    value_row_json = Column(MediumText())
 
     state = Column(String(50), nullable=False)
     error_message = Column(Text)