You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by ya...@apache.org on 2021/12/27 03:30:50 UTC

[kylin] 01/02: configurable hive dict table format

This is an automated email from the ASF dual-hosted git repository.

yaqian pushed a commit to branch kylin3-hadoop3
in repository https://gitbox.apache.org/repos/asf/kylin.git

commit 79cbd08599b655f2cc64e2c8621ffebca0e44baa
Author: fengpod <fe...@gmail.com>
AuthorDate: Wed Dec 8 20:23:14 2021 +0800

    configurable hive dict table format
---
 .../org/apache/kylin/common/KylinConfigBase.java   |  4 ++
 .../apache/kylin/source/hive/HiveInputBase.java    |  4 +-
 .../apache/kylin/source/hive/MRHiveDictUtil.java   | 50 ++++++++++++++--------
 3 files changed, 38 insertions(+), 20 deletions(-)

diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 4d24fd0..707848f 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -708,6 +708,10 @@ public abstract class KylinConfigBase implements Serializable {
         return getOptional("kylin.dictionary.mr-hive.intermediate.table.suffix", "_distinct_value");
     }
 
+    public String getMrHiveDictTableFormat() {
+        return getOptional("kylin.dictionary.mr-hive.table.format", "TEXTFILE");
+    }
+
     // ============================================================================
     // CUBE
     // ============================================================================
diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveInputBase.java b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveInputBase.java
index 193990d..90239d9 100644
--- a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveInputBase.java
+++ b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveInputBase.java
@@ -168,11 +168,11 @@ public class HiveInputBase {
             final String distinctValueTable = MRHiveDictUtil.distinctValueTable(flatDesc);
             final String segmentLevelDictTableName = MRHiveDictUtil.segmentLevelDictTableName(flatDesc);
 
-            final String createGlobalDictTableHql = MRHiveDictUtil.generateDictionaryDdl(globalDictDatabase, globalDictTable);
+            final String createGlobalDictTableHql = MRHiveDictUtil.generateDictionaryDdl(flatDesc, globalDictDatabase, globalDictTable);
             final String dropDistinctValueTableHql = MRHiveDictUtil.generateDropTableStatement(distinctValueTable);
             final String createDistinctValueTableHql = MRHiveDictUtil.generateDistinctValueTableStatement(flatDesc);
             final String dropSegmentLevelDictTableHql = MRHiveDictUtil.generateDropTableStatement(segmentLevelDictTableName);
-            final String createSegmentLevelDictTableHql = MRHiveDictUtil.generateDictTableStatement(segmentLevelDictTableName);
+            final String createSegmentLevelDictTableHql = MRHiveDictUtil.generateDictTableStatement(flatDesc, segmentLevelDictTableName);
 
             String maxAndDistinctCountSql = MRHiveDictUtil.generateDictStatisticsSql(distinctValueTable, globalDictTable, globalDictDatabase);
 
diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/MRHiveDictUtil.java b/source-hive/src/main/java/org/apache/kylin/source/hive/MRHiveDictUtil.java
index 5fe8a97..62bf03d 100644
--- a/source-hive/src/main/java/org/apache/kylin/source/hive/MRHiveDictUtil.java
+++ b/source-hive/src/main/java/org/apache/kylin/source/hive/MRHiveDictUtil.java
@@ -96,15 +96,23 @@ public class MRHiveDictUtil {
         return cubeName + flatDesc.getSegment().getConfig().getMrHiveDictTableSuffix();
     }
 
-    public static String generateDictionaryDdl(String db, String tbl) {
-        return "CREATE TABLE IF NOT EXISTS " + db + "." + tbl + "\n"
-                + " ( dict_key STRING COMMENT '', \n"
-                + "   dict_val INT COMMENT '' \n"
-                + ") \n"
-                + "COMMENT 'Hive Global Dictionary' \n"
-                + "PARTITIONED BY (dict_column string) \n"
-                + "ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t' \n"
-                + "STORED AS TEXTFILE; \n";
+    public static String generateDictionaryDdl(IJoinedFlatTableDesc flatDesc, String db, String tbl) {
+        KylinConfig config = flatDesc.getSegment().getConfig();
+        String tableFormat = config.getMrHiveDictTableFormat();
+        StringBuilder ddl = new StringBuilder();
+        ddl.append("CREATE TABLE IF NOT EXISTS " + db + "." + tbl + "\n");
+        ddl.append(" ( dict_key STRING COMMENT '', \n");
+        ddl.append("   dict_val INT COMMENT '' \n");
+        ddl.append(") \n");
+        ddl.append("COMMENT 'Hive Global Dictionary' \n");
+        ddl.append("PARTITIONED BY (dict_column string) \n");
+        if ("TEXTFILE".equalsIgnoreCase(tableFormat)) {
+            ddl.append("ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t' \n");
+            ddl.append("STORED AS TEXTFILE; \n");
+        } else {
+            ddl.append("STORED AS " +tableFormat+ "; \n");
+        }
+        return ddl.toString();
     }
 
     public static String generateDropTableStatement(String tableName) {
@@ -114,24 +122,26 @@ public class MRHiveDictUtil {
     }
 
     public static String generateDistinctValueTableStatement(IJoinedFlatTableDesc flatDesc) {
-        StringBuilder ddl = new StringBuilder();
-        String table = flatDesc.getTableName()
-                + flatDesc.getSegment().getConfig().getMrHiveDistinctValueTableSuffix();
+        KylinConfig config = flatDesc.getSegment().getConfig();
+        String table = config.getMrHiveDistinctValueTableSuffix();
+        String tableFormat = config.getMrHiveDictTableFormat();
 
+        StringBuilder ddl = new StringBuilder();
         ddl.append("CREATE TABLE IF NOT EXISTS " + table + " \n");
         ddl.append("( \n ");
         ddl.append("  dict_key" + " " + "STRING" + " COMMENT '' \n");
         ddl.append(") \n");
         ddl.append("COMMENT '' \n");
         ddl.append("PARTITIONED BY (dict_column string) \n");
-        ddl.append("STORED AS TEXTFILE \n");
-        ddl.append(";").append("\n");
+        ddl.append("STORED AS ").append(tableFormat).append(";\n");
         return ddl.toString();
     }
 
-    public static String generateDictTableStatement(String globalTableName) {
-        StringBuilder ddl = new StringBuilder();
+    public static String generateDictTableStatement(IJoinedFlatTableDesc flatDesc, String globalTableName) {
+        KylinConfig config = flatDesc.getSegment().getConfig();
+        String tableFormat = config.getMrHiveDictTableFormat();
 
+        StringBuilder ddl = new StringBuilder();
         ddl.append("CREATE TABLE IF NOT EXISTS " + globalTableName + " \n");
         ddl.append("( \n ");
         ddl.append("  dict_key" + " " + "STRING" + " COMMENT '' , \n");
@@ -139,8 +149,12 @@ public class MRHiveDictUtil {
         ddl.append(") \n");
         ddl.append("COMMENT '' \n");
         ddl.append("PARTITIONED BY (dict_column string) \n");
-        ddl.append("ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t' \n");
-        ddl.append("STORED AS TEXTFILE \n");
+        if ("TEXTFILE".equalsIgnoreCase(tableFormat)) {
+            ddl.append("ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t' \n");
+            ddl.append("STORED AS TEXTFILE \n");
+        } else {
+            ddl.append("STORED AS ").append(tableFormat).append("\n");
+        }
         ddl.append(";").append("\n");
         return ddl.toString();
     }