You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by bk...@apache.org on 2020/06/11 00:08:19 UTC
[incubator-superset] branch master updated: [csv upload][hive]
support other delimiters (#9971)
This is an automated email from the ASF dual-hosted git repository.
bkyryliuk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git
The following commit(s) were added to refs/heads/master by this push:
new 8744dad [csv upload][hive] support other delimiters (#9971)
8744dad is described below
commit 8744dadca8f98a84c7cbdbd098e53e435627063d
Author: serenajiang <se...@berkeley.edu>
AuthorDate: Wed Jun 10 17:08:02 2020 -0700
[csv upload][hive] support other delimiters (#9971)
Co-authored-by: serena-jiang <se...@airbnb.com>
---
superset/db_engine_specs/hive.py | 19 ++++++++++++-------
1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/superset/db_engine_specs/hive.py b/superset/db_engine_specs/hive.py
index afa1103..f99b180 100644
--- a/superset/db_engine_specs/hive.py
+++ b/superset/db_engine_specs/hive.py
@@ -24,7 +24,7 @@ from urllib import parse
import pandas as pd
from flask import g
-from sqlalchemy import Column
+from sqlalchemy import Column, text
from sqlalchemy.engine.base import Engine
from sqlalchemy.engine.reflection import Inspector
from sqlalchemy.engine.url import make_url, URL
@@ -182,13 +182,18 @@ class HiveEngineSpec(PrestoEngineSpec):
bucket_path,
os.path.join(upload_prefix, table.table, os.path.basename(filename)),
)
-
- # TODO(bkyryliuk): support other delimiters
- sql = f"""CREATE TABLE {str(table)} ( {schema_definition} )
- ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS
- TEXTFILE LOCATION '{location}'
+ sql = text(
+ f"""CREATE TABLE {str(table)} ( {schema_definition} )
+ ROW FORMAT DELIMITED FIELDS TERMINATED BY :delim
+ STORED AS TEXTFILE LOCATION :location
tblproperties ('skip.header.line.count'='1')"""
- engine.execute(sql)
+ )
+ engine = cls.get_engine(database)
+ engine.execute(
+ sql,
+ delim=csv_to_df_kwargs["sep"].encode().decode("unicode_escape"),
+ location=location,
+ )
@classmethod
def convert_dttm(cls, target_type: str, dttm: datetime) -> Optional[str]: