You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by xx...@apache.org on 2020/05/30 10:25:48 UTC
[kylin] 01/11: KYLIN-4343 Build Global Dict by MR/Hive,
configuration
This is an automated email from the ASF dual-hosted git repository.
xxyu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kylin.git
commit 4a7c568a6b76edc0f3a7a2858f4eca1a70e234ad
Author: wangxiaojing <wa...@didichuxing.com>
AuthorDate: Wed May 6 14:30:34 2020 +0800
KYLIN-4343 Build Global Dict by MR/Hive, configuration
---
.../org/apache/kylin/common/KylinConfigBase.java | 98 ++++++++++++++++++++--
1 file changed, 92 insertions(+), 6 deletions(-)
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 01fd461..207d8ca 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -37,10 +37,12 @@ import java.io.IOException;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Collection;
+import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
+import java.util.Objects;
import java.util.Properties;
import java.util.SortedSet;
import java.util.TimeZone;
@@ -63,6 +65,7 @@ public abstract class KylinConfigBase implements Serializable {
private static final String KYLIN_STORAGE_HBASE_COPROCESSOR_LOCAL_JAR = "kylin.storage.hbase.coprocessor-local-jar";
private static final String FILE_SCHEME = "file:";
private static final String MAPRFS_SCHEME = "maprfs:";
+ private static final Integer DEFAULT_MR_HIVE_GLOBAL_DICT_REDUCE_NUM_PER_COLUMN = 2;
/*
* DON'T DEFINE CONSTANTS FOR PROPERTY KEYS!
@@ -580,19 +583,94 @@ public abstract class KylinConfigBase implements Serializable {
// ============================================================================
// mr-hive dict
// ============================================================================
-
- /**
- * @return if mr-hive dict not enabled, return empty array;
- * else return array contains "{TABLE_NAME}_{COLUMN_NAME}"
- */
public String[] getMrHiveDictColumns() {
- String columnStr = getOptional("kylin.dictionary.mr-hive.columns", "");
+ String columnStr = getMrHiveDictColumnsStr();
if (!columnStr.equals("")) {
return columnStr.split(",");
}
return new String[0];
}
+ public String[] getMrHiveDictColumnsExcludeRefColumns() {
+ String[] excludeRefCols = null;
+ String[] hiveDictColumns = getMrHiveDictColumns();
+ Map<String, String> refCols = getMrHiveDictRefColumns();
+ if(Objects.nonNull(hiveDictColumns) && hiveDictColumns.length>0) {
+ excludeRefCols = Arrays.stream(hiveDictColumns).filter(x -> !refCols.containsKey(x)).toArray(String[]::new);
+ }
+ return excludeRefCols;
+ }
+
+ /**
+ * set kylin.dictionary.mr-hive.columns in Cube level config , value are the columns which want to use MR/Hive to build global dict ,
+ * Format, tableAliasName_ColumnName, multiple columns separated by commas,eg KYLIN_SALES_BUYER_ID,KYLIN_SALES_SELLER_ID
+ * @return if mr-hive dict not enabled, return "";
+ * else return {TABLE_NAME}_{COLUMN_NAME1},{TABLE_NAME}_{COLUMN_NAME2}"
+ */
+ private String getMrHiveDictColumnsStr() {
+ return getOptional("kylin.dictionary.mr-hive.columns", "");
+ }
+
+ /**
+ * @return The global dic reduce num per column. Default 2 per column.
+ */
+ public Integer[] getMrHiveDictColumnsReduceNumExcludeRefCols() {
+ String[] excludeRefCols = getMrHiveDictColumnsExcludeRefColumns();
+
+ if(Objects.nonNull(excludeRefCols) && excludeRefCols.length>0) {
+ String[] arr = null;
+ Map<String, Integer> colNum = new HashMap<>();
+ Integer[] reduceNumArr = new Integer[excludeRefCols.length];
+ String[] columnReduceNum = getMrHiveDictColumnsReduceNumStr().split(",");
+
+ //change set columnReduceNum to map struct
+ try {
+ for(int i=0;i<columnReduceNum.length;i++){
+ if(!StringUtils.isBlank(columnReduceNum[i])) {
+ arr = columnReduceNum[i].split(":");
+ colNum.put(arr[0], Integer.parseInt(arr[1]));
+ }
+ }
+ }catch (Exception e){
+ logger.error("set kylin.dictionary.mr-hive.columns.reduce.num error {} , the value should like colAilasName:reduceNum,colAilasName:reduceNum", getMrHiveDictColumnsReduceNumStr());
+ }
+
+ for (int i = 0; i < excludeRefCols.length; i++) {
+ reduceNumArr[i] = colNum.containsKey(excludeRefCols[i])?colNum.get(excludeRefCols[i]): DEFAULT_MR_HIVE_GLOBAL_DICT_REDUCE_NUM_PER_COLUMN;
+ }
+
+ Arrays.asList(reduceNumArr).stream().forEach(x -> {
+ if (x < 1) {
+ throw new RuntimeException("kylin.dictionary.mr-hive.columns.reduce.num set error ,every column's reduce num should greater than 0");
+ }
+ });
+
+ return reduceNumArr;
+ }else {
+ return null;
+ }
+ }
+
+ /**
+ * Set kylin.dictionary.mr-hive.columns.reduce.num in Cube level config , value are the reduce number for global dict columns which are set in kylin.dictionary.mr-hive.columns.
+ * Format, tableAliasName_ColumnName:number, multiple columns separated by commas,eg KYLIN_SALES_BUYER_ID:5,KYLIN_SALES_SELLER_ID:3
+ * @return
+ */
+ private String getMrHiveDictColumnsReduceNumStr() {
+ return getOptional("kylin.dictionary.mr-hive.columns.reduce.num", "");
+ }
+
+ /**
+ * MR/Hive global domain dic (reuse dict from other global dic column)
+ * @return
+ */
+ public Map<String, String> getMrHiveDictRefColumns() {
+ Map<String, String> result = new HashMap<>();
+
+ //toDo Implementation of Mr/Hive global domain dict config
+ return result;
+ }
+
public String getMrHiveDictDB() {
return getOptional("kylin.dictionary.mr-hive.database", getHiveDatabaseForIntermediateTable());
}
@@ -601,6 +679,10 @@ public abstract class KylinConfigBase implements Serializable {
return getOptional("kylin.dictionary.mr-hive.table.suffix", "_global_dict");
}
+ public String getMrHiveDictIntermediateTTableSuffix() {
+ return getOptional("kylin.dictionary.mr-hive.intermediate.table.suffix", "__group_by");
+ }
+
// ============================================================================
// CUBE
// ============================================================================
@@ -1012,6 +1094,10 @@ public abstract class KylinConfigBase implements Serializable {
return this.getOptional("kylin.source.hive.database-for-flat-table", DEFAULT);
}
+ public String getHiveDatabaseDir() {
+ return this.getOptional("kylin.source.hive.databasedir", "");
+ }
+
public String getFlatTableStorageFormat() {
return this.getOptional("kylin.source.hive.flat-table-storage-format", "SEQUENCEFILE");
}