You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by ya...@apache.org on 2021/12/27 03:30:50 UTC
[kylin] 01/02: configurable hive dict table format
This is an automated email from the ASF dual-hosted git repository.
yaqian pushed a commit to branch kylin3-hadoop3
in repository https://gitbox.apache.org/repos/asf/kylin.git
commit 79cbd08599b655f2cc64e2c8621ffebca0e44baa
Author: fengpod <fe...@gmail.com>
AuthorDate: Wed Dec 8 20:23:14 2021 +0800
configurable hive dict table format
---
.../org/apache/kylin/common/KylinConfigBase.java | 4 ++
.../apache/kylin/source/hive/HiveInputBase.java | 4 +-
.../apache/kylin/source/hive/MRHiveDictUtil.java | 50 ++++++++++++++--------
3 files changed, 38 insertions(+), 20 deletions(-)
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 4d24fd0..707848f 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -708,6 +708,10 @@ public abstract class KylinConfigBase implements Serializable {
return getOptional("kylin.dictionary.mr-hive.intermediate.table.suffix", "_distinct_value");
}
+ public String getMrHiveDictTableFormat() {
+ return getOptional("kylin.dictionary.mr-hive.table.format", "TEXTFILE");
+ }
+
// ============================================================================
// CUBE
// ============================================================================
diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveInputBase.java b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveInputBase.java
index 193990d..90239d9 100644
--- a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveInputBase.java
+++ b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveInputBase.java
@@ -168,11 +168,11 @@ public class HiveInputBase {
final String distinctValueTable = MRHiveDictUtil.distinctValueTable(flatDesc);
final String segmentLevelDictTableName = MRHiveDictUtil.segmentLevelDictTableName(flatDesc);
- final String createGlobalDictTableHql = MRHiveDictUtil.generateDictionaryDdl(globalDictDatabase, globalDictTable);
+ final String createGlobalDictTableHql = MRHiveDictUtil.generateDictionaryDdl(flatDesc, globalDictDatabase, globalDictTable);
final String dropDistinctValueTableHql = MRHiveDictUtil.generateDropTableStatement(distinctValueTable);
final String createDistinctValueTableHql = MRHiveDictUtil.generateDistinctValueTableStatement(flatDesc);
final String dropSegmentLevelDictTableHql = MRHiveDictUtil.generateDropTableStatement(segmentLevelDictTableName);
- final String createSegmentLevelDictTableHql = MRHiveDictUtil.generateDictTableStatement(segmentLevelDictTableName);
+ final String createSegmentLevelDictTableHql = MRHiveDictUtil.generateDictTableStatement(flatDesc, segmentLevelDictTableName);
String maxAndDistinctCountSql = MRHiveDictUtil.generateDictStatisticsSql(distinctValueTable, globalDictTable, globalDictDatabase);
diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/MRHiveDictUtil.java b/source-hive/src/main/java/org/apache/kylin/source/hive/MRHiveDictUtil.java
index 5fe8a97..62bf03d 100644
--- a/source-hive/src/main/java/org/apache/kylin/source/hive/MRHiveDictUtil.java
+++ b/source-hive/src/main/java/org/apache/kylin/source/hive/MRHiveDictUtil.java
@@ -96,15 +96,23 @@ public class MRHiveDictUtil {
return cubeName + flatDesc.getSegment().getConfig().getMrHiveDictTableSuffix();
}
- public static String generateDictionaryDdl(String db, String tbl) {
- return "CREATE TABLE IF NOT EXISTS " + db + "." + tbl + "\n"
- + " ( dict_key STRING COMMENT '', \n"
- + " dict_val INT COMMENT '' \n"
- + ") \n"
- + "COMMENT 'Hive Global Dictionary' \n"
- + "PARTITIONED BY (dict_column string) \n"
- + "ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t' \n"
- + "STORED AS TEXTFILE; \n";
+ public static String generateDictionaryDdl(IJoinedFlatTableDesc flatDesc, String db, String tbl) {
+ KylinConfig config = flatDesc.getSegment().getConfig();
+ String tableFormat = config.getMrHiveDictTableFormat();
+ StringBuilder ddl = new StringBuilder();
+ ddl.append("CREATE TABLE IF NOT EXISTS " + db + "." + tbl + "\n");
+ ddl.append(" ( dict_key STRING COMMENT '', \n");
+ ddl.append(" dict_val INT COMMENT '' \n");
+ ddl.append(") \n");
+ ddl.append("COMMENT 'Hive Global Dictionary' \n");
+ ddl.append("PARTITIONED BY (dict_column string) \n");
+ if ("TEXTFILE".equalsIgnoreCase(tableFormat)) {
+ ddl.append("ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t' \n");
+ ddl.append("STORED AS TEXTFILE; \n");
+ } else {
+ ddl.append("STORED AS " +tableFormat+ "; \n");
+ }
+ return ddl.toString();
}
public static String generateDropTableStatement(String tableName) {
@@ -114,24 +122,26 @@ public class MRHiveDictUtil {
}
public static String generateDistinctValueTableStatement(IJoinedFlatTableDesc flatDesc) {
- StringBuilder ddl = new StringBuilder();
- String table = flatDesc.getTableName()
- + flatDesc.getSegment().getConfig().getMrHiveDistinctValueTableSuffix();
+ KylinConfig config = flatDesc.getSegment().getConfig();
+ String table = config.getMrHiveDistinctValueTableSuffix();
+ String tableFormat = config.getMrHiveDictTableFormat();
+ StringBuilder ddl = new StringBuilder();
ddl.append("CREATE TABLE IF NOT EXISTS " + table + " \n");
ddl.append("( \n ");
ddl.append(" dict_key" + " " + "STRING" + " COMMENT '' \n");
ddl.append(") \n");
ddl.append("COMMENT '' \n");
ddl.append("PARTITIONED BY (dict_column string) \n");
- ddl.append("STORED AS TEXTFILE \n");
- ddl.append(";").append("\n");
+ ddl.append("STORED AS ").append(tableFormat).append(";\n");
return ddl.toString();
}
- public static String generateDictTableStatement(String globalTableName) {
- StringBuilder ddl = new StringBuilder();
+ public static String generateDictTableStatement(IJoinedFlatTableDesc flatDesc, String globalTableName) {
+ KylinConfig config = flatDesc.getSegment().getConfig();
+ String tableFormat = config.getMrHiveDictTableFormat();
+ StringBuilder ddl = new StringBuilder();
ddl.append("CREATE TABLE IF NOT EXISTS " + globalTableName + " \n");
ddl.append("( \n ");
ddl.append(" dict_key" + " " + "STRING" + " COMMENT '' , \n");
@@ -139,8 +149,12 @@ public class MRHiveDictUtil {
ddl.append(") \n");
ddl.append("COMMENT '' \n");
ddl.append("PARTITIONED BY (dict_column string) \n");
- ddl.append("ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t' \n");
- ddl.append("STORED AS TEXTFILE \n");
+ if ("TEXTFILE".equalsIgnoreCase(tableFormat)) {
+ ddl.append("ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t' \n");
+ ddl.append("STORED AS TEXTFILE \n");
+ } else {
+ ddl.append("STORED AS ").append(tableFormat).append("\n");
+ }
ddl.append(";").append("\n");
return ddl.toString();
}