You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@superset.apache.org by GitBox <gi...@apache.org> on 2021/11/10 13:46:15 UTC

[GitHub] [superset] dpgaspar commented on a change in pull request #17337: feat: Adds a key-value store endpoint for Superset

dpgaspar commented on a change in pull request #17337:
URL: https://github.com/apache/superset/pull/17337#discussion_r746576108



##########
File path: superset/key_value/api.py
##########
@@ -0,0 +1,324 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import logging
+
+from flask import g, request, Response
+from flask_appbuilder.api import expose, permission_name, protect, safe
+from flask_appbuilder.models.sqla.interface import SQLAInterface
+
+from superset.constants import MODEL_API_RW_METHOD_PERMISSION_MAP, RouteMethod
+from superset.extensions import event_logger
+from superset.key_value.commands.create import CreateKeyValueCommand
+from superset.key_value.commands.delete import DeleteKeyValueCommand
+from superset.key_value.commands.exceptions import (
+    KeyValueCreateFailedError,
+    KeyValueDeleteFailedError,
+    KeyValueGetFailedError,
+    KeyValueUpdateFailedError,
+)
+from superset.key_value.commands.get import GetKeyValueCommand
+from superset.key_value.commands.update import UpdateKeyValueCommand
+from superset.key_value.dao import KeyValueDAO
+from superset.key_value.schemas import KeyValueSchema
+from superset.models.key_value import KeyValue
+from superset.views.base_api import BaseSupersetModelRestApi, statsd_metrics
+
+logger = logging.getLogger(__name__)
+
+
+class KeyValueRestApi(BaseSupersetModelRestApi):
+    datamodel = SQLAInterface(KeyValue)
+    schema = KeyValueSchema()

Review comment:
       nit: let's use the `add_model_schema` and `edit_model_schema` attrs, they already exist and their goal is exactly this

##########
File path: superset/key_value/api.py
##########
@@ -0,0 +1,324 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import logging
+
+from flask import g, request, Response
+from flask_appbuilder.api import expose, permission_name, protect, safe
+from flask_appbuilder.models.sqla.interface import SQLAInterface
+
+from superset.constants import MODEL_API_RW_METHOD_PERMISSION_MAP, RouteMethod
+from superset.extensions import event_logger
+from superset.key_value.commands.create import CreateKeyValueCommand
+from superset.key_value.commands.delete import DeleteKeyValueCommand
+from superset.key_value.commands.exceptions import (
+    KeyValueCreateFailedError,
+    KeyValueDeleteFailedError,
+    KeyValueGetFailedError,
+    KeyValueUpdateFailedError,
+)
+from superset.key_value.commands.get import GetKeyValueCommand
+from superset.key_value.commands.update import UpdateKeyValueCommand
+from superset.key_value.dao import KeyValueDAO
+from superset.key_value.schemas import KeyValueSchema
+from superset.models.key_value import KeyValue
+from superset.views.base_api import BaseSupersetModelRestApi, statsd_metrics
+
+logger = logging.getLogger(__name__)
+
+
+class KeyValueRestApi(BaseSupersetModelRestApi):
+    datamodel = SQLAInterface(KeyValue)
+    schema = KeyValueSchema()
+    class_permission_name = "KeyValue"
+    resource_name = "key_value_store"
+    method_permission_name = MODEL_API_RW_METHOD_PERMISSION_MAP
+    include_route_methods = {
+        RouteMethod.POST,
+        RouteMethod.PUT,
+        RouteMethod.GET,
+        RouteMethod.DELETE,
+    }
+    allow_browser_login = True
+    openapi_spec_tag = "Key Value Store"
+
+    @expose("/", methods=["POST"])
+    @protect()
+    @safe
+    @statsd_metrics
+    @event_logger.log_this_with_context(
+        action=lambda self, *args, **kwargs: f"{self.__class__.__name__}.post",
+        log_to_statsd=False,
+    )
+    def post(self) -> Response:
+        """Stores a new value.
+        ---
+        post:
+          description: >-
+            Stores a new value.
+          requestBody:
+            description: Key value schema
+            required: true
+            content:
+              application/json:
+                schema:
+                    type: object
+                    properties:
+                      value:
+                        type: string
+                        description: Any type of JSON supported value.
+                        required: true
+                      duration_ms:
+                        type: number
+                        description: The duration of the value on the key store. If no duration is specified the value won't expire.
+                        required: false
+                        default: null
+                      reset_duration_on_retrieval:
+                        type: boolean
+                        description: If the duration should be reset when the value is retrieved. This is useful if you wish to expire unused values but keep the ones that are actively retrieved.
+                        required: false
+                        default: true
+          responses:
+            201:
+              description: The value was stored successfully. It returns the key to retrieve the value.
+              content:
+                application/json:

Review comment:
       Create a marshmallow schema for the response and reference it here, you can register new marshmallow schemas like this: https://github.com/apache/superset/blob/master/superset/dashboards/api.py#L202
   and then reference them like: https://github.com/apache/superset/blob/master/superset/dashboards/api.py#L323

##########
File path: superset/models/key_value.py
##########
@@ -0,0 +1,34 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from datetime import datetime
+
+from flask_appbuilder import Model
+from sqlalchemy import Boolean, Column, DateTime, String, ForeignKey, Integer, Text
+
+
+class KeyValue(Model):
+
+    """Key value store entity"""
+
+    __tablename__ = "key_value"
+    key = Column(String(256), primary_key=True)
+    value = Column(Text, nullable=False)

Review comment:
       if `value` is potentially sensitive, we should encrypt this field

##########
File path: superset/key_value/commands/cleanup.py
##########
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import logging
+from datetime import datetime, timedelta
+
+from superset.commands.base import BaseCommand
+from superset.models.key_value import KeyValue
+from superset.key_value.utils import is_expired
+from superset.utils.celery import session_scope
+
+logger = logging.getLogger(__name__)
+
+
+class CleanupCommand(BaseCommand):
+    """
+    Expiration cleanup command.
+    """
+
+    def __init__(self, worker_context: bool = True):
+        self._worker_context = worker_context
+
+    def run(self) -> None:
+        logger.info("Key value store cleanup starting")
+        with session_scope(nullpool=True) as session:
+           for keyValue in session.query(KeyValue).all():
+                if is_expired(keyValue):

Review comment:
       There must be some way to restrict this full table scan `session.query(KeyValue).all()`. What if, when we create/update a key we timestamp an expiration date time then filter by it?

##########
File path: superset/key_value/api.py
##########
@@ -0,0 +1,324 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import logging
+
+from flask import g, request, Response
+from flask_appbuilder.api import expose, permission_name, protect, safe
+from flask_appbuilder.models.sqla.interface import SQLAInterface
+
+from superset.constants import MODEL_API_RW_METHOD_PERMISSION_MAP, RouteMethod
+from superset.extensions import event_logger
+from superset.key_value.commands.create import CreateKeyValueCommand
+from superset.key_value.commands.delete import DeleteKeyValueCommand
+from superset.key_value.commands.exceptions import (
+    KeyValueCreateFailedError,
+    KeyValueDeleteFailedError,
+    KeyValueGetFailedError,
+    KeyValueUpdateFailedError,
+)
+from superset.key_value.commands.get import GetKeyValueCommand
+from superset.key_value.commands.update import UpdateKeyValueCommand
+from superset.key_value.dao import KeyValueDAO
+from superset.key_value.schemas import KeyValueSchema
+from superset.models.key_value import KeyValue
+from superset.views.base_api import BaseSupersetModelRestApi, statsd_metrics
+
+logger = logging.getLogger(__name__)
+
+
+class KeyValueRestApi(BaseSupersetModelRestApi):
+    datamodel = SQLAInterface(KeyValue)
+    schema = KeyValueSchema()
+    class_permission_name = "KeyValue"
+    resource_name = "key_value_store"
+    method_permission_name = MODEL_API_RW_METHOD_PERMISSION_MAP
+    include_route_methods = {
+        RouteMethod.POST,
+        RouteMethod.PUT,
+        RouteMethod.GET,
+        RouteMethod.DELETE,
+    }
+    allow_browser_login = True
+    openapi_spec_tag = "Key Value Store"
+
+    @expose("/", methods=["POST"])
+    @protect()
+    @safe
+    @statsd_metrics
+    @event_logger.log_this_with_context(
+        action=lambda self, *args, **kwargs: f"{self.__class__.__name__}.post",
+        log_to_statsd=False,
+    )
+    def post(self) -> Response:
+        """Stores a new value.
+        ---
+        post:
+          description: >-
+            Stores a new value.
+          requestBody:
+            description: Key value schema
+            required: true
+            content:
+              application/json:
+                schema:
+                    type: object

Review comment:
       it's shorter to just use a reference to the marshmallow schema like this: https://github.com/apache/superset/blob/master/superset/dashboards/api.py#L427
   

##########
File path: superset/key_value/commands/create.py
##########
@@ -0,0 +1,61 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import logging
+from hashlib import sha3_256
+from typing import Any, Dict, List
+from uuid import uuid4
+
+from flask import current_app
+from flask_appbuilder.models.sqla import Model
+from flask_appbuilder.security.sqla.models import User
+from marshmallow import ValidationError
+
+from superset.commands.base import BaseCommand
+from superset.dao.exceptions import DAOException
+from superset.key_value.commands.exceptions import KeyValueCreateFailedError
+from superset.key_value.dao import KeyValueDAO
+from superset.models.key_value import KeyValue
+
+logger = logging.getLogger(__name__)
+
+
+class CreateKeyValueCommand(BaseCommand):
+    def __init__(self, user: User, data: Dict[str, Any]):
+        self._actor = user
+        self._properties = data.copy()
+
+    def run(self) -> Model:
+        try:
+            secretKey = current_app.config["SECRET_KEY"]
+            key = sha3_256(secretKey.encode() + str(uuid4()).encode()).hexdigest()

Review comment:
       probably does not make any diference to the solution itself, but I would prefer to keep the `SECRET_KEY` out of this, think secret key rotation, something that some organisations are already implementing (no impact to the solution but). I think UUID4 over sha3_256 is good enough, UUID4 possible combinations 2^128. We can increase this by using sha3_512 for example 

##########
File path: superset/key_value/utils.py
##########
@@ -0,0 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from datetime import datetime, timedelta
+from superset.models.key_value import KeyValue
+from sqlalchemy import Boolean
+
+
+def is_expired(keyValue: KeyValue) -> Boolean:

Review comment:
       Highly optional, even debatable, but we could implement this on the model itself

##########
File path: superset/key_value/commands/get.py
##########
@@ -0,0 +1,50 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import logging
+from typing import Any, Dict, List
+from datetime import datetime
+from flask_appbuilder.models.sqla import Model
+from flask_appbuilder.security.sqla.models import User
+from marshmallow import ValidationError
+
+from superset.key_value.commands.exceptions import KeyValueGetFailedError
+from superset.key_value.dao import KeyValueDAO
+from superset.commands.base import BaseCommand
+from superset.dao.exceptions import DAOException
+
+logger = logging.getLogger(__name__)
+
+
+class GetKeyValueCommand(BaseCommand):
+    def __init__(self, user: User, key: str):
+        self._actor = user
+        self._key = key
+
+    def run(self) -> Model:
+        try:
+            model = KeyValueDAO.find_by_id(self._key)
+            if not model:
+                return None
+            setattr(model, 'retrieved_on', datetime.utcnow())

Review comment:
       curious, are you avoiding auditmixin?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@superset.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscribe@superset.apache.org
For additional commands, e-mail: notifications-help@superset.apache.org