You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@superset.apache.org by GitBox <gi...@apache.org> on 2019/11/20 09:36:17 UTC

[GitHub] [incubator-superset] Sascha-Gschwind commented on a change in pull request #8556: [WIP] [SIP-27] Proposal for Improvement on CSV Upload

Sascha-Gschwind commented on a change in pull request #8556: [WIP] [SIP-27] Proposal for Improvement on CSV Upload
URL: https://github.com/apache/incubator-superset/pull/8556#discussion_r348376648
 
 

 ##########
 File path: superset/views/core.py
 ##########
 @@ -3054,6 +3083,259 @@ def schemas_access_for_csv_upload(self):
                 stacktrace=utils.get_stacktrace(),
             )
 
+    @api
+    @has_access_api
+    @expose("/csvtodatabase/add", methods=["POST"])
+    def add(self) -> Response:
+        """ import a csv into a Table
+
+        Handles the logic for importing the csv into a Table in a given or newly created Database
+        returns HTTP Status Codes (200 for ok, 400 for errors)
+        Request body contains multipart/form-data
+        Form content:
+        form -- contains the properties for the table to be created
+        file -- csv file to be imported
+        """
+        form_data = request.form
+        csv_file = request.files["file"]
+        csv_filename = secure_filename(csv_file.filename)
+        if len(csv_filename) == 0:
+            return json_error_response("Filename is not allowed", status=400)
+        database_id = form_data["connectionId"]
+        # check for possible SQL-injection, filter_by does not sanitize the input therefore we have to check
+        # this beforehand
+        try:
+            database_id = int(database_id)
+        except ValueError:
+            message = _(
+                "Possible tampering detected, non-numeral character in database-id"
+            )
+            return json_error_response(message, status=400)
+
+        try:
+            if database_id != -1:
+                schema = None if not form_data["schema"] else form_data["schema"]
+                database = self._get_existing_database(database_id, schema)
+                db_name = database.database_name
+            else:
+                db_name = (
+                    csv_filename[:-4]
+                    if not form_data["databaseName"]
+                    else form_data["databaseName"]
+                )
+                db_name = secure_filename(db_name)
+                if len(db_name) == 0:
+                    return json_error_response(
+                        "Database name is not allowed", status=400
+                    )
+                database = self._create_database(db_name)
+        except Exception as e:
+            return json_error_response(e.args[0], status=400)
+
+        try:
+            path = self._check_and_save_csv(csv_file, csv_filename)
+            self._create_table(form_data, database, csv_filename)
+        except Exception as e:
+            try:
+                os.remove(os.getcwd() + "/" + db_name + ".db")
+            except OSError:
+                pass
+            try:
+                if database_id == -1:
+                    db.session.rollback()
+                    db.session.delete(database)
+                    db.session.commit()
+            except Exception:
+                pass
+            if hasattr(e, "orig"):
+                # pylint: disable=no-member
+                if isinstance(e.orig, IntegrityError):  # type: ignore
+                    message = "Table {0} could not be created".format(
+                        form_data["tableName"]
+                    )
+                # pylint: disable:no-member
+                elif isinstance(e.orig, OperationalError):  # type: ignore
+                    message = "Schema {0} is not allowed in a SQLite database".format(
+                        form_data["schema"]
+                    )
+                else:
+                    message = str(e)
+            return json_error_response(message, status=400)
+        finally:
+            try:
+                os.remove(path)
+            except OSError:
+                pass
+
+        stats_logger.incr("successful_csv_upload")
+        message = "{} imported into database {}".format(form_data["tableName"], db_name)
+        flash(message, "success")
+        return json_success(
+            '"{} imported into database {}"'.format(form_data["tableName"], db_name)
+        )
+
+    def _create_database(self, db_name: str):
+        """ Creates the Database itself as well as the Superset Connection to it
+
+        Keyword arguments:
+        db_name -- the name for the database to be created
+
+        Raises:
+            ValueError: If a file with the database name already exists in the folder
+            Exception: If the Database could not be created
+        """
+        db_path = os.getcwd() + "/" + db_name + ".db"
+        if os.path.isfile(db_path):
+            message = "Database file for {0} already exists, please choose a different name".format(
+                db_name
+            )
+            raise ValueError(message)
+        try:
+            item = SQLAInterface(models.Database).obj()
+            item.database_name = db_name
+            item.sqlalchemy_uri = "sqlite:///" + db_path
+            item.allow_csv_upload = True
+            # TODO check if SQL-injection is possible through add()
+            db.session.add(item)
+            db.session.commit()
+            return item
+        except Exception as e:
+            message = (
+                "Error when trying to create Database"
+                if isinstance(e, IntegrityError)
+                else str(e)
+            )
+            try:
+                if os.path.isfile(db_path):
+                    os.remove(db_path)
+                db.session.delete(item)
+                db.session.commit()
+            except OSError:
+                message = _(
+                    "Error when trying to create Database and Database-File could not be removed. "
+                    "Please contact your administrator to remove it manually"
+                )
+                pass
+            except Exception:
+                pass
+            stats_logger.incr("failed_csv_upload")
+            raise Exception(message)
+
+    def _get_existing_database(self, database_id: int, schema):
+        """Returns the database object for an existing database
+
+        Keyword arguments:
+        database_id -- the ID used to identify the database
+        schema -- the schema to be used
+
+        Raises:
+            ValueError: 1. If the schema is not allowed for csv upload
+                        2. If the database ID is not valid
+                        3. If there was a problem getting the schema
+        """
+        try:
+            database = self._get_database_by_id(database_id)
+            if not self._is_schema_allowed(database, schema):
+                message = _(
+                    "Database {0} Schema {1} is not allowed for csv uploads. "
+                    "Please contact your Superset administrator".format(
+                        database.database_name, schema
+                    )
+                )
+                raise ValueError(message)
+            return database
+        except Exception as e:
+            raise ValueError(e.args[0])
+
+    def _get_database_by_id(self, database_id: int) -> models.Database:
+        """ Returns the Database for the given ID
+
+        Keyword arguments:
+        database_id -- The ID which is used to identify the Database byo
+
+        Raises:
+            ValueError: If the database ID is not valid, i.e. matches no database
+        """
+
+        dbs = db.session.query(models.Database).filter_by(id=database_id).all()
 
 Review comment:
   Should be fixed with [PR 33](https://github.com/Sascha-Gschwind/incubator-superset/pull/33). We changed it accordingly

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscribe@superset.apache.org
For additional commands, e-mail: notifications-help@superset.apache.org