You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@superset.apache.org by GitBox <gi...@apache.org> on 2018/02/28 06:13:08 UTC

[GitHub] graceguo-supercat closed pull request #4488: utf8 fixes to csv -> hive upload

graceguo-supercat closed pull request #4488: utf8 fixes to csv -> hive upload
URL: https://github.com/apache/incubator-superset/pull/4488
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/setup.py b/setup.py
index 2c14b90a45..43744a4e10 100644
--- a/setup.py
+++ b/setup.py
@@ -81,6 +81,7 @@ def get_git_sha():
         'thrift>=0.9.3',
         'thrift-sasl>=0.2.1',
         'unidecode>=0.04.21',
+        'unicodecsv==0.14.1',
         'bleach==2.1.2',
     ],
     extras_require={
diff --git a/superset/db_engine_specs.py b/superset/db_engine_specs.py
index f0e7c67620..c40bbe7f14 100644
--- a/superset/db_engine_specs.py
+++ b/superset/db_engine_specs.py
@@ -17,7 +17,6 @@
 from __future__ import unicode_literals
 
 from collections import defaultdict, namedtuple
-import csv
 import inspect
 import logging
 import os
@@ -34,6 +33,7 @@
 from sqlalchemy.engine.url import make_url
 from sqlalchemy.sql import text
 import sqlparse
+import unicodecsv
 from werkzeug.utils import secure_filename
 
 from superset import app, cache_util, conf, db, utils
@@ -849,7 +849,7 @@ def create_table_from_csv(form, table):
         """Uploads a csv file and creates a superset datasource in Hive."""
         def get_column_names(filepath):
             with open(filepath, 'rb') as f:
-                return csv.reader(f).next()
+                return unicodecsv.reader(f, encoding='utf-8-sig').next()
 
         table_name = form.name.data
         filename = form.csv_file.data.filename
@@ -873,11 +873,12 @@ def get_column_names(filepath):
         s3 = boto3.client('s3')
         location = os.path.join('s3a://', bucket_path, upload_prefix, table_name)
         s3.upload_file(
-            upload_path, 'airbnb-superset',
+            upload_path, bucket_path,
             os.path.join(upload_prefix, table_name, filename))
         sql = """CREATE EXTERNAL TABLE {table_name} ( {schema_definition} )
             ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS
-            TEXTFILE LOCATION '{location}'""".format(**locals())
+            TEXTFILE LOCATION '{location}'
+            tblproperties ('skip.header.line.count'='1')""".format(**locals())
         logging.info(form.con.data)
         engine = create_engine(form.con.data.sqlalchemy_uri)
         engine.execute(sql)


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services