You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@kylin.apache.org by 仇同心 <qi...@jd.com> on 2016/11/28 04:52:42 UTC
Cube optimization for help
Hi,all
There is a cube optimization for help.
Cuhe has 15 dimensions, including 14 normal dimensions and 1 derived dimension, and the cardinality of all dimensions is not high;And this cube also has 10 measures, including 1 count expression,2 sum expressions and 7 COUNT_DISTINCT expressions, and the cardinality of all measures’s column is more than one hundred million, the Return Type of 7 COUNT_DISTINCT expressions is hllc(14).
I founded that the step of “Build N - Dimension Cuboid Data” cost most of the time, I hope you can provide me some optimization plan!
The info about the segment: Start Time: 2016-11-10 00:00:00 End Time: 2016-11-24 00:00:00
Source Records: 201,027,127
Cube Size: 1.73 TB
Expansion Rate:14025.91%
The Duration of this job was 1,961.62 mins
The detailed build steps of job :
2016-11-25 11:15:24 GMT+8
#1 Step Name: Create Intermediate Flat Hive Table
Duration: 1.57 mins
2016-11-25 11:16:58 GMT+8
#2 Step Name: Redistribute Flat Hive Table
Duration: 3.32 mins
2016-11-25 11:20:17 GMT+8
#3 Step Name: Extract Fact Table Distinct Columns
Data Size: 8.57 MB
Duration: 19.14 mins
2016-11-25 11:39:26 GMT+8
#4 Step Name: Build Dimension Dictionary
Duration: 0.03 mins
2016-11-25 11:39:27 GMT+8
#5 Step Name: Save Cuboid Statistics
Duration: 0.01 mins
2016-11-25 11:39:28 GMT+8
#6 Step Name: Create HTable
Duration: 0.07 mins
2016-11-25 11:39:32 GMT+8
#7 Step Name: Build Base Cuboid Data
Data Size: 2.53 GB
Duration: 14.59 mins
2016-11-25 11:54:08 GMT+8
#8 Step Name: Build N-Dimension Cuboid Data : 13-Dimension
Data Size: 2.31 GB
Duration: 31.27 mins
2016-11-25 12:25:24 GMT+8
#9 Step Name: Build N-Dimension Cuboid Data : 12-Dimension
Data Size: 17.81 GB
Duration: 430.54 mins
2016-11-25 19:35:56 GMT+8
#10 Step Name: Build N-Dimension Cuboid Data : 11-Dimension
Data Size: 62.58 GB
Duration: 273.30 mins
2016-11-26 00:09:15 GMT+8
#11 Step Name: Build N-Dimension Cuboid Data : 10-Dimension
Data Size: 135.99 GB
Duration: 138.05 mins
2016-11-26 02:27:18 GMT+8
#12 Step Name: Build N-Dimension Cuboid Data : 9-Dimension
Data Size: 211.91 GB
Duration: 313.53 mins
2016-11-26 07:40:49 GMT+8
#13 Step Name: Build N-Dimension Cuboid Data : 8-Dimension
Data Size: 252.22 GB
Duration: 278.18 mins
2016-11-26 12:19:01 GMT+8
#14 Step Name: Build N-Dimension Cuboid Data : 7-Dimension
Data Size: 234.18 GB
Duration: 168.91 mins
2016-11-26 15:07:55 GMT+8
#15 Step Name: Build N-Dimension Cuboid Data : 6-Dimension
Data Size: 169.02 GB
Duration: 96.18 mins
2016-11-26 16:44:06 GMT+8
#16 Step Name: Build N-Dimension Cuboid Data : 5-Dimension
Data Size: 92.81 GB
Duration: 50.24 mins
2016-11-26 17:34:20 GMT+8
#17 Step Name: Build N-Dimension Cuboid Data : 4-Dimension
Data Size: 36.45 GB
Duration: 16.21 mins
2016-11-26 17:50:33 GMT+8
#18 Step Name: Build N-Dimension Cuboid Data : 3-Dimension
Data Size: 8.91 GB
Duration: 7.75 mins
2016-11-26 17:58:18 GMT+8
#19 Step Name: Build N-Dimension Cuboid Data : 2-Dimension
Data Size: 1.00 GB
Duration: 3.08 mins
2016-11-26 18:01:23 GMT+8
#20 Step Name: Build N-Dimension Cuboid Data : 1-Dimension
Data Size: 0.12 KB
Duration: 0.57 mins
2016-11-26 18:01:57 GMT+8
#21 Step Name: Build Cube
Duration: 0.00 mins
2016-11-26 18:01:57 GMT+8
#22 Step Name: Convert Cuboid Data to HFile
Data Size: 1.73 TB
Duration: 114.57 mins
2016-11-26 19:56:31 GMT+8
#23 Step Name: Load HFile to HBase Table
Duration: 0.38 mins
2016-11-26 19:56:54 GMT+8
#24 Step Name: Update Cube Info
Duration: 0.00 mins
2016-11-26 19:56:54 GMT+8
#25 Step Name: Garbage Collection
Duration: 0.12 mins
The following is the json of the cube:
{
"uuid": "ab54fdc8-253d-430e-a193-4c17bbfd0872",
"last_modified": 1480039756561,
"version": "1.6.0",
"name": "cube_pay_syt_order_det_analysis_new",
"model_name": "pay_syt_order_det_analysis",
"description": "",
"null_string": null,
"dimensions": [
{
"name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.ETL_DT",
"table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
"column": "ETL_DT",
"derived": null
},
{
"name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.PWDTYPE",
"table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
"column": "PWDTYPE",
"derived": null
},
{
"name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.REAL_NAME",
"table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
"column": "REAL_NAME",
"derived": null
},
{
"name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.IS_NEW_CARD",
"table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
"column": "IS_NEW_CARD",
"derived": null
},
{
"name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.REQ_ORDER_YEWU_TYPE",
"table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
"column": "REQ_ORDER_YEWU_TYPE",
"derived": null
},
{
"name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.ORDER_YEWU_TYPE",
"table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
"column": "ORDER_YEWU_TYPE",
"derived": null
},
{
"name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.ITEM_FIRST_CATE_NAME",
"table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
"column": "ITEM_FIRST_CATE_NAME",
"derived": null
},
{
"name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.ITEM_SECOND_CATE_NAME",
"table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
"column": "ITEM_SECOND_CATE_NAME",
"derived": null
},
{
"name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.ITEM_THIRD_CATE_NAME",
"table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
"column": "ITEM_THIRD_CATE_NAME",
"derived": null
},
{
"name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.ITEM_BRAND_NAME",
"table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
"column": "ITEM_BRAND_NAME",
"derived": null
},
{
"name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.COUNTY_NAME",
"table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
"column": "COUNTY_NAME",
"derived": null
},
{
"name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.CITY_NAME",
"table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
"column": "CITY_NAME",
"derived": null
},
{
"name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.PROVINCE_NAME",
"table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
"column": "PROVINCE_NAME",
"derived": null
},
{
"name": "DIM.DIM_DAY_DERIVED",
"table": "DIM.DIM_DAY",
"column": null,
"derived": [
"DIM_DAY_NAME",
"DIM_DAY_TXDATE"
]
},
{
"name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.PAY_TYPE",
"table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
"column": "PAY_TYPE",
"derived": null
}
],
"measures": [
{
"name": "_COUNT_",
"function": {
"expression": "COUNT",
"parameter": {
"type": "constant",
"value": "1",
"next_parameter": null
},
"returntype": "bigint"
},
"dependent_measure_ref": null
},
{
"name": "应付金额",
"function": {
"expression": "SUM",
"parameter": {
"type": "column",
"value": "AMOUNT",
"next_parameter": null
},
"returntype": "decimal(25,4)"
},
"dependent_measure_ref": null
},
{
"name": "商品成功支付金额",
"function": {
"expression": "SUM",
"parameter": {
"type": "column",
"value": "USER_ACTUAL_PAY_AMOUNT",
"next_parameter": null
},
"returntype": "decimal(25,4)"
},
"dependent_measure_ref": null
},
{
"name": "商城APP请求单",
"function": {
"expression": "COUNT_DISTINCT",
"parameter": {
"type": "column",
"value": "SERIAL_NUM",
"next_parameter": null
},
"returntype": "hllc(14)"
},
"dependent_measure_ref": null
},
{
"name": "请求订单数",
"function": {
"expression": "COUNT_DISTINCT",
"parameter": {
"type": "column",
"value": "REQ_OUTBIZNO",
"next_parameter": null
},
"returntype": "hllc(14)"
},
"dependent_measure_ref": null
},
{
"name": "支付订单数",
"function": {
"expression": "COUNT_DISTINCT",
"parameter": {
"type": "column",
"value": "OUTBIZNO",
"next_parameter": null
},
"returntype": "hllc(14)"
},
"dependent_measure_ref": null
},
{
"name": "成功支付订单数",
"function": {
"expression": "COUNT_DISTINCT",
"parameter": {
"type": "column",
"value": "SUC_OUTBIZNO",
"next_parameter": null
},
"returntype": "hllc(14)"
},
"dependent_measure_ref": null
},
{
"name": "支付单数",
"function": {
"expression": "COUNT_DISTINCT",
"parameter": {
"type": "column",
"value": "PAYID",
"next_parameter": null
},
"returntype": "hllc(15)"
},
"dependent_measure_ref": null
},
{
"name": "成功支付单数",
"function": {
"expression": "COUNT_DISTINCT",
"parameter": {
"type": "column",
"value": "SUC_PAYID",
"next_parameter": null
},
"returntype": "hllc(14)"
},
"dependent_measure_ref": null
},
{
"name": "用户数",
"function": {
"expression": "COUNT_DISTINCT",
"parameter": {
"type": "column",
"value": "PIN",
"next_parameter": null
},
"returntype": "hllc(14)"
},
"dependent_measure_ref": null
}
],
"dictionaries": [],
"rowkey": {
"rowkey_columns": [
{
"column": "ETL_DT",
"encoding": "dict",
"isShardBy": false
},
{
"column": "COUNTY_NAME",
"encoding": "dict",
"isShardBy": false
},
{
"column": "ITEM_BRAND_NAME",
"encoding": "dict",
"isShardBy": false
},
{
"column": "ITEM_THIRD_CATE_NAME",
"encoding": "dict",
"isShardBy": false
},
{
"column": "ITEM_SECOND_CATE_NAME",
"encoding": "dict",
"isShardBy": false
},
{
"column": "ITEM_FIRST_CATE_NAME",
"encoding": "dict",
"isShardBy": false
},
{
"column": "CITY_NAME",
"encoding": "dict",
"isShardBy": false
},
{
"column": "PROVINCE_NAME",
"encoding": "dict",
"isShardBy": false
},
{
"column": "REQ_ORDER_YEWU_TYPE",
"encoding": "dict",
"isShardBy": false
},
{
"column": "ORDER_YEWU_TYPE",
"encoding": "dict",
"isShardBy": false
},
{
"column": "PWDTYPE",
"encoding": "dict",
"isShardBy": false
},
{
"column": "IS_NEW_CARD",
"encoding": "dict",
"isShardBy": false
},
{
"column": "REAL_NAME",
"encoding": "dict",
"isShardBy": false
},
{
"column": "PAY_TYPE",
"encoding": "dict",
"isShardBy": false
}
]
},
"hbase_mapping": {
"column_family": [
{
"name": "F1",
"columns": [
{
"qualifier": "M",
"measure_refs": [
"_COUNT_",
"应付金额",
"商品成功支付金额"
]
}
]
},
{
"name": "F2",
"columns": [
{
"qualifier": "M",
"measure_refs": [
"商城APP请求单",
"请求订单数",
"支付订单数",
"成功支付订单数",
"支付单数",
"成功支付单数",
"用户数"
]
}
]
}
]
},
"aggregation_groups": [
{
"includes": [
"ETL_DT",
"PWDTYPE",
"REAL_NAME",
"IS_NEW_CARD",
"REQ_ORDER_YEWU_TYPE",
"ORDER_YEWU_TYPE",
"ITEM_FIRST_CATE_NAME",
"ITEM_SECOND_CATE_NAME",
"ITEM_THIRD_CATE_NAME",
"ITEM_BRAND_NAME",
"COUNTY_NAME",
"CITY_NAME",
"PROVINCE_NAME"
],
"select_rule": {
"hierarchy_dims": [
[
"COUNTY_NAME",
"CITY_NAME",
"PROVINCE_NAME"
],
[
"ITEM_FIRST_CATE_NAME",
"ITEM_SECOND_CATE_NAME",
"ITEM_THIRD_CATE_NAME"
]
],
"mandatory_dims": [
"ETL_DT"
],
"joint_dims": []
}
}
],
"signature": "Ukf/u2JnSO44TPQWDJFhyQ==",
"notify_list": [],
"status_need_notify": [
"ERROR",
"DISCARDED",
"SUCCEED"
],
"partition_date_start": 1478736000000,
"partition_date_end": 3153600000000,
"auto_merge_time_ranges": [
604800000,
2419200000
],
"retention_range": 0,
"engine_type": 2,
"storage_type": 2,
"override_kylin_properties": {}
}