You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2017/12/09 14:39:30 UTC
[1/2] kylin git commit: KYLIN-3070 Add a config property for flat
table storage format
Repository: kylin
Updated Branches:
refs/heads/master 7e6aa3471 -> fab517b95
KYLIN-3070 Add a config property for flat table storage format
Signed-off-by: shaofengshi <sh...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/9df8572d
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/9df8572d
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/9df8572d
Branch: refs/heads/master
Commit: 9df8572d787e03577c515b5818f5f11814b321f9
Parents: 7e6aa34
Author: Seva Ostapenko <se...@anovadata.com>
Authored: Wed Dec 6 23:57:48 2017 -0500
Committer: shaofengshi <sh...@apache.org>
Committed: Sat Dec 9 21:58:59 2017 +0800
----------------------------------------------------------------------
.../apache/kylin/common/KylinConfigBase.java | 8 +++++++
.../org/apache/kylin/job/JoinedFlatTable.java | 23 ++++++++++----------
.../test_case_data/sandbox/kylin.properties | 7 ++++++
3 files changed, 27 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/9df8572d/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index e1a10a8..d0cc3be 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -663,6 +663,14 @@ abstract public class KylinConfigBase implements Serializable {
return this.getOptional("kylin.source.hive.database-for-flat-table", "default");
}
+ public String getFlatTableStorageFormat() {
+ return this.getOptional("kylin.source.hive.flat-table-storage-format", "SEQUENCEFILE").toUpperCase();
+ }
+
+ public String getFlatTableFieldDelimiter() {
+ return this.getOptional("kylin.source.hive.flat-table-field-delimiter", "\\u001F");
+ }
+
public boolean isHiveRedistributeEnabled() {
return Boolean.parseBoolean(this.getOptional("kylin.source.hive.redistribute-flat-table", "true"));
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/9df8572d/core-job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java
----------------------------------------------------------------------
diff --git a/core-job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java b/core-job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java
index d136ec6..316fc99 100644
--- a/core-job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java
+++ b/core-job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java
@@ -56,11 +56,18 @@ public class JoinedFlatTable {
}
public static String generateCreateTableStatement(IJoinedFlatTableDesc flatDesc, String storageDfsDir) {
- return generateCreateTableStatement(flatDesc, storageDfsDir, "SEQUENCEFILE");
+ String storageFormat = flatDesc.getDataModel().getConfig().getFlatTableStorageFormat();
+ return generateCreateTableStatement(flatDesc, storageDfsDir, storageFormat);
}
public static String generateCreateTableStatement(IJoinedFlatTableDesc flatDesc, String storageDfsDir,
- String format, String filedDelimiter) {
+ String storageFormat) {
+ String fieldDelimiter = flatDesc.getDataModel().getConfig().getFlatTableFieldDelimiter();
+ return generateCreateTableStatement(flatDesc, storageDfsDir, storageFormat, fieldDelimiter);
+ }
+
+ public static String generateCreateTableStatement(IJoinedFlatTableDesc flatDesc, String storageDfsDir,
+ String storageFormat, String filedDelimiter) {
StringBuilder ddl = new StringBuilder();
ddl.append("CREATE EXTERNAL TABLE IF NOT EXISTS " + flatDesc.getTableName() + "\n");
@@ -74,21 +81,15 @@ public class JoinedFlatTable {
ddl.append(colName(col) + " " + getHiveDataType(col.getDatatype()) + "\n");
}
ddl.append(")" + "\n");
- if ("TEXTFILE".equals(format)) {
+ if ("TEXTFILE".equals(storageFormat)) {
ddl.append("ROW FORMAT DELIMITED FIELDS TERMINATED BY '" + filedDelimiter + "'\n");
}
- ddl.append("STORED AS " + format + "\n");
+ ddl.append("STORED AS " + storageFormat + "\n");
ddl.append("LOCATION '" + getTableDir(flatDesc, storageDfsDir) + "';").append("\n");
ddl.append("ALTER TABLE " + flatDesc.getTableName() + " SET TBLPROPERTIES('auto.purge'='true');\n");
return ddl.toString();
}
- public static String generateCreateTableStatement(IJoinedFlatTableDesc flatDesc, String storageDfsDir,
- String format) {
- String fieldDelimiter = flatDesc.getDataModel().getConfig().getSourceFieldDelimiter();
- return generateCreateTableStatement(flatDesc, storageDfsDir, format, fieldDelimiter);
- }
-
public static String generateDropTableStatement(IJoinedFlatTableDesc flatDesc) {
StringBuilder ddl = new StringBuilder();
ddl.append("DROP TABLE IF EXISTS " + flatDesc.getTableName() + ";").append("\n");
@@ -277,4 +278,4 @@ public class JoinedFlatTable {
return sql.toString();
}
-}
\ No newline at end of file
+}
http://git-wip-us.apache.org/repos/asf/kylin/blob/9df8572d/examples/test_case_data/sandbox/kylin.properties
----------------------------------------------------------------------
diff --git a/examples/test_case_data/sandbox/kylin.properties b/examples/test_case_data/sandbox/kylin.properties
index 90c0b95..7271e90 100644
--- a/examples/test_case_data/sandbox/kylin.properties
+++ b/examples/test_case_data/sandbox/kylin.properties
@@ -94,6 +94,13 @@ kylin.engine.mr.yarn-check-interval-seconds=10
# Hive database name for putting the intermediate flat tables
kylin.source.hive.database-for-flat-table=default
+# Hive flat table storage format, defaults to sequencefile
+#kylin.source.hive.flat-table-storage-format=textfile
+
+# Hive flat table field delimiter; used only when kylin.source.hive.flat-table-storage-format is set to textfile; defaults to \u001F (unit separator)
+#kylin.source.hive.flat-table-field-delimiter=\\u001F
+
+
#default compression codec for htable,snappy,lzo,gzip,lz4
kylin.storage.hbase.compression-codec=gzip
[2/2] kylin git commit: KYLIN-3070 give a better method name for jdbc
delimeter
Posted by sh...@apache.org.
KYLIN-3070 give a better method name for jdbc delimeter
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/fab517b9
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/fab517b9
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/fab517b9
Branch: refs/heads/master
Commit: fab517b951fdcc7cd2f74f38f3f2e6991bcfada0
Parents: 9df8572
Author: shaofengshi <sh...@apache.org>
Authored: Sat Dec 9 22:34:57 2017 +0800
Committer: shaofengshi <sh...@apache.org>
Committed: Sat Dec 9 22:34:57 2017 +0800
----------------------------------------------------------------------
.../src/main/java/org/apache/kylin/common/KylinConfigBase.java | 4 ++--
.../main/java/org/apache/kylin/source/jdbc/JdbcHiveMRInput.java | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/fab517b9/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index d0cc3be..66805df 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -664,7 +664,7 @@ abstract public class KylinConfigBase implements Serializable {
}
public String getFlatTableStorageFormat() {
- return this.getOptional("kylin.source.hive.flat-table-storage-format", "SEQUENCEFILE").toUpperCase();
+ return this.getOptional("kylin.source.hive.flat-table-storage-format", "SEQUENCEFILE");
}
public String getFlatTableFieldDelimiter() {
@@ -768,7 +768,7 @@ abstract public class KylinConfigBase implements Serializable {
return Integer.parseInt(getOptional("kylin.source.jdbc.sqoop-mapper-num", "4"));
}
- public String getSourceFieldDelimiter() {
+ public String getJdbcSourceFieldDelimiter() {
return getOptional("kylin.source.jdbc.field-delimiter", "|");
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/fab517b9/source-hive/src/main/java/org/apache/kylin/source/jdbc/JdbcHiveMRInput.java
----------------------------------------------------------------------
diff --git a/source-hive/src/main/java/org/apache/kylin/source/jdbc/JdbcHiveMRInput.java b/source-hive/src/main/java/org/apache/kylin/source/jdbc/JdbcHiveMRInput.java
index 0bfc145..d05f14e 100644
--- a/source-hive/src/main/java/org/apache/kylin/source/jdbc/JdbcHiveMRInput.java
+++ b/source-hive/src/main/java/org/apache/kylin/source/jdbc/JdbcHiveMRInput.java
@@ -67,7 +67,7 @@ public class JdbcHiveMRInput extends HiveMRInput {
private AbstractExecutable createFlatHiveTableFromFiles(String hiveInitStatements, String jobWorkingDir) {
final String dropTableHql = JoinedFlatTable.generateDropTableStatement(flatDesc);
- String filedDelimiter = getConfig().getSourceFieldDelimiter();
+ String filedDelimiter = getConfig().getJdbcSourceFieldDelimiter();
// Sqoop does not support exporting SEQUENSEFILE to Hive now SQOOP-869
final String createTableHql = JoinedFlatTable.generateCreateTableStatement(flatDesc, jobWorkingDir,
"TEXTFILE", filedDelimiter);
@@ -153,7 +153,7 @@ public class JdbcHiveMRInput extends HiveMRInput {
String jdbcUser = config.getJdbcSourceUser();
String jdbcPass = config.getJdbcSourcePass();
String sqoopHome = config.getSqoopHome();
- String filedDelimiter = config.getSourceFieldDelimiter();
+ String filedDelimiter = config.getJdbcSourceFieldDelimiter();
int mapperNum = config.getSqoopMapperNum();
String bquery = String.format("SELECT min(%s), max(%s) FROM %s.%s", splitColumn, splitColumn, splitDatabase,