You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@kylin.apache.org by 仇同心 <qi...@jd.com> on 2017/02/14 03:05:25 UTC
关于kylin查询问题咨询
大家好:
目前在使用中遇到一个kylin查询慢的场景:
Cube 设计如下:
{
"uuid": "dfb77a08-f51d-4088-b559-1da67c28a068",
"last_modified": 1486997875953,
"version": "1.6.0",
"name": "insu_t",
"model_name": "insu_jdmall_model_test",
"description": "",
"null_string": null,
"dimensions": [
{
"name": "BRAND",
"table": "DMT.DMT_KYLIN_JDMALL_ORDR_DTL_I_D",
"column": "BRAND_CD",
"derived": null
},
{
"name": "DT",
"table": "DMT.DMT_KYLIN_JDMALL_ORDR_DTL_I_D",
"column": "DT",
"derived": null
},
{
"name": "DIM.DIM_DAY_DERIVED",
"table": "DIM.DIM_DAY",
"column": null,
"derived": [
"DIM_DAY_NAME",
"DIM_WEEK_NAME",
"DIM_MONTH_NAME"
]
},
{
"name": "FIRST",
"table": "DIM.DIM_ITEM_GEN_THIRD_CATE_D",
"column": "ITEM_FIRST_CATE_NAME",
"derived": null
},
{
"name": "SECOND",
"table": "DIM.DIM_ITEM_GEN_THIRD_CATE_D",
"column": "ITEM_SECOND_CATE_NAME",
"derived": null
},
{
"name": "THIRD",
"table": "DIM.DIM_ITEM_GEN_THIRD_CATE_D",
"column": "ITEM_THIRD_CATE_NAME",
"derived": null
}
],
"measures": [
{
"name": "_COUNT_",
"function": {
"expression": "COUNT",
"parameter": {
"type": "constant",
"value": "1",
"next_parameter": null
},
"returntype": "bigint"
},
"dependent_measure_ref": null
},
{
"name": "QTTY",
"function": {
"expression": "SUM",
"parameter": {
"type": "column",
"value": "SALE_QTTY",
"next_parameter": null
},
"returntype": "bigint"
},
"dependent_measure_ref": null
},
{
"name": "BEFORE",
"function": {
"expression": "SUM",
"parameter": {
"type": "column",
"value": "BEFORE_PREFR_AMOUNT",
"next_parameter": null
},
"returntype": "decimal(25,4)"
},
"dependent_measure_ref": null
},
{
"name": "USER",
"function": {
"expression": "SUM",
"parameter": {
"type": "column",
"value": "USER_ACTUAL_PAY_AMOUNT",
"next_parameter": null
},
"returntype": "decimal(25,4)"
},
"dependent_measure_ref": null
},
{
"name": "SALE",
"function": {
"expression": "COUNT_DISTINCT",
"parameter": {
"type": "column",
"value": "SALE_ORD_ID",
"next_parameter": null
},
"returntype": "bitmap"
},
"dependent_measure_ref": null
}
],
"dictionaries": [],
"rowkey": {
"rowkey_columns": [
{
"column": "BRAND_CD",
"encoding": "dict",
"isShardBy": false
},
{
"column": "DT",
"encoding": "dict",
"isShardBy": false
},
{
"column": "ITEM_FIRST_CATE_NAME",
"encoding": "dict",
"isShardBy": false
},
{
"column": "ITEM_SECOND_CATE_NAME",
"encoding": "dict",
"isShardBy": false
},
{
"column": "ITEM_THIRD_CATE_NAME",
"encoding": "dict",
"isShardBy": false
}
]
},
"hbase_mapping": {
"column_family": [
{
"name": "F1",
"columns": [
{
"qualifier": "M",
"measure_refs": [
"_COUNT_",
"QTTY",
"BEFORE",
"USER"
]
}
]
},
{
"name": "F2",
"columns": [
{
"qualifier": "M",
"measure_refs": [
"SALE"
]
}
]
}
]
},
"aggregation_groups": [
{
"includes": [
"BRAND_CD",
"DT",
"ITEM_FIRST_CATE_NAME",
"ITEM_SECOND_CATE_NAME",
"ITEM_THIRD_CATE_NAME"
],
"select_rule": {
"hierarchy_dims": [],
"mandatory_dims": [],
"joint_dims": []
}
}
],
"signature": "Kl5sPTVN78bEYTGKoUOsWg==",
"notify_list": [],
"status_need_notify": [
"ERROR",
"DISCARDED",
"SUCCEED"
],
"partition_date_start": 1483747200000,
"partition_date_end": 3153600000000,
"auto_merge_time_ranges": [
604800000,
2419200000
],
"retention_range": 0,
"engine_type": 2,
"storage_type": 2,
"override_kylin_properties": {
"kylin.hbase.region.cut": "1"
}
}
数据量是14天的数据,sale_ord_id的基数是1.5亿
Select dt,item_second_cate_name,count(distinct sale_ord_id),sum(sale_qtty)
from DMT.DMT_KYLIN_JDMALL_ORDR_DTL_I_D a
left join dim.dim_day b on a.dt = b.dim_day_txdate
left join DIM.DIM_ITEM_GEN_THIRD_CATE_D c on a.item_third_cate_cd = c.item_third_cate_id
group by dt,item_second_cate_name;
这条语句执行时间是37秒,去掉count(distinct sale_ord_id)后查询时0.07秒
dt,item_second_cate_name 都是normal维度,从结果看是count_distinct度量导致的慢查询,请问这个有什么优化的建议吗?
谢谢!
Re: 关于kylin查询问题咨询
Posted by ShaoFeng Shi <sh...@apache.org>.
Kaisen made several performance improvement on the bitmap measure; You can
check http://mt.sohu.com/20170113/n478613681.shtml if haven't.
BTW, these improvements will be included in the 2.0 release;
2017-02-14 11:05 GMT+08:00 仇同心 <qi...@jd.com>:
> 大家好:
>
> 目前在使用中遇到一个kylin查询慢的场景:
>
> Cube 设计如下:
>
> {
>
> "uuid": "dfb77a08-f51d-4088-b559-1da67c28a068",
>
> "last_modified": 1486997875953,
>
> "version": "1.6.0",
>
> "name": "insu_t",
>
> "model_name": "insu_jdmall_model_test",
>
> "description": "",
>
> "null_string": null,
>
> "dimensions": [
>
> {
>
> "name": "BRAND",
>
> "table": "DMT.DMT_KYLIN_JDMALL_ORDR_DTL_I_D",
>
> "column": "BRAND_CD",
>
> "derived": null
>
> },
>
> {
>
> "name": "DT",
>
> "table": "DMT.DMT_KYLIN_JDMALL_ORDR_DTL_I_D",
>
> "column": "DT",
>
> "derived": null
>
> },
>
> {
>
> "name": "DIM.DIM_DAY_DERIVED",
>
> "table": "DIM.DIM_DAY",
>
> "column": null,
>
> "derived": [
>
> "DIM_DAY_NAME",
>
> "DIM_WEEK_NAME",
>
> "DIM_MONTH_NAME"
>
> ]
>
> },
>
> {
>
> "name": "FIRST",
>
> "table": "DIM.DIM_ITEM_GEN_THIRD_CATE_D",
>
> "column": "ITEM_FIRST_CATE_NAME",
>
> "derived": null
>
> },
>
> {
>
> "name": "SECOND",
>
> "table": "DIM.DIM_ITEM_GEN_THIRD_CATE_D",
>
> "column": "ITEM_SECOND_CATE_NAME",
>
> "derived": null
>
> },
>
> {
>
> "name": "THIRD",
>
> "table": "DIM.DIM_ITEM_GEN_THIRD_CATE_D",
>
> "column": "ITEM_THIRD_CATE_NAME",
>
> "derived": null
>
> }
>
> ],
>
> "measures": [
>
> {
>
> "name": "_COUNT_",
>
> "function": {
>
> "expression": "COUNT",
>
> "parameter": {
>
> "type": "constant",
>
> "value": "1",
>
> "next_parameter": null
>
> },
>
> "returntype": "bigint"
>
> },
>
> "dependent_measure_ref": null
>
> },
>
> {
>
> "name": "QTTY",
>
> "function": {
>
> "expression": "SUM",
>
> "parameter": {
>
> "type": "column",
>
> "value": "SALE_QTTY",
>
> "next_parameter": null
>
> },
>
> "returntype": "bigint"
>
> },
>
> "dependent_measure_ref": null
>
> },
>
> {
>
> "name": "BEFORE",
>
> "function": {
>
> "expression": "SUM",
>
> "parameter": {
>
> "type": "column",
>
> "value": "BEFORE_PREFR_AMOUNT",
>
> "next_parameter": null
>
> },
>
> "returntype": "decimal(25,4)"
>
> },
>
> "dependent_measure_ref": null
>
> },
>
> {
>
> "name": "USER",
>
> "function": {
>
> "expression": "SUM",
>
> "parameter": {
>
> "type": "column",
>
> "value": "USER_ACTUAL_PAY_AMOUNT",
>
> "next_parameter": null
>
> },
>
> "returntype": "decimal(25,4)"
>
> },
>
> "dependent_measure_ref": null
>
> },
>
> {
>
> "name": "SALE",
>
> "function": {
>
> "expression": "COUNT_DISTINCT",
>
> "parameter": {
>
> "type": "column",
>
> "value": "SALE_ORD_ID",
>
> "next_parameter": null
>
> },
>
> "returntype": "bitmap"
>
> },
>
> "dependent_measure_ref": null
>
> }
>
> ],
>
> "dictionaries": [],
>
> "rowkey": {
>
> "rowkey_columns": [
>
> {
>
> "column": "BRAND_CD",
>
> "encoding": "dict",
>
> "isShardBy": false
>
> },
>
> {
>
> "column": "DT",
>
> "encoding": "dict",
>
> "isShardBy": false
>
> },
>
> {
>
> "column": "ITEM_FIRST_CATE_NAME",
>
> "encoding": "dict",
>
> "isShardBy": false
>
> },
>
> {
>
> "column": "ITEM_SECOND_CATE_NAME",
>
> "encoding": "dict",
>
> "isShardBy": false
>
> },
>
> {
>
> "column": "ITEM_THIRD_CATE_NAME",
>
> "encoding": "dict",
>
> "isShardBy": false
>
> }
>
> ]
>
> },
>
> "hbase_mapping": {
>
> "column_family": [
>
> {
>
> "name": "F1",
>
> "columns": [
>
> {
>
> "qualifier": "M",
>
> "measure_refs": [
>
> "_COUNT_",
>
> "QTTY",
>
> "BEFORE",
>
> "USER"
>
> ]
>
> }
>
> ]
>
> },
>
> {
>
> "name": "F2",
>
> "columns": [
>
> {
>
> "qualifier": "M",
>
> "measure_refs": [
>
> "SALE"
>
> ]
>
> }
>
> ]
>
> }
>
> ]
>
> },
>
> "aggregation_groups": [
>
> {
>
> "includes": [
>
> "BRAND_CD",
>
> "DT",
>
> "ITEM_FIRST_CATE_NAME",
>
> "ITEM_SECOND_CATE_NAME",
>
> "ITEM_THIRD_CATE_NAME"
>
> ],
>
> "select_rule": {
>
> "hierarchy_dims": [],
>
> "mandatory_dims": [],
>
> "joint_dims": []
>
> }
>
> }
>
> ],
>
> "signature": "Kl5sPTVN78bEYTGKoUOsWg==",
>
> "notify_list": [],
>
> "status_need_notify": [
>
> "ERROR",
>
> "DISCARDED",
>
> "SUCCEED"
>
> ],
>
> "partition_date_start": 1483747200000,
>
> "partition_date_end": 3153600000000,
>
> "auto_merge_time_ranges": [
>
> 604800000,
>
> 2419200000
>
> ],
>
> "retention_range": 0,
>
> "engine_type": 2,
>
> "storage_type": 2,
>
> "override_kylin_properties": {
>
> "kylin.hbase.region.cut": "1"
>
> }
>
> }
>
>
>
> 数据量是14天的数据,sale_ord_id的基数是1.5亿
>
> Select dt,item_second_cate_name,count(distinct sale_ord_id),sum(sale_qtty)
>
> from DMT.DMT_KYLIN_JDMALL_ORDR_DTL_I_D a
>
> left join dim.dim_day b on a.dt = b.dim_day_txdate
>
> left join DIM.DIM_ITEM_GEN_THIRD_CATE_D c on a.item_third_cate_cd =
> c.item_third_cate_id
>
> group by dt,item_second_cate_name;
>
>
>
> 这条语句执行时间是37秒,去掉count(distinct sale_ord_id)后查询时0.07秒
>
> dt,item_second_cate_name 都是normal维度,从结果看是count_distinct
> 度量导致的慢查询,请问这个有什么优化的建议吗?
>
>
>
> 谢谢!
>
>
>
>
>
>
>
--
Best regards,
Shaofeng Shi 史少锋