You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@kylin.apache.org by 仇同心 <qi...@jd.com> on 2017/02/14 03:05:25 UTC

关于kylin查询问题咨询

大家好:
目前在使用中遇到一个kylin查询慢的场景:
Cube  设计如下:
{
  "uuid": "dfb77a08-f51d-4088-b559-1da67c28a068",
  "last_modified": 1486997875953,
  "version": "1.6.0",
  "name": "insu_t",
  "model_name": "insu_jdmall_model_test",
  "description": "",
  "null_string": null,
  "dimensions": [
    {
      "name": "BRAND",
      "table": "DMT.DMT_KYLIN_JDMALL_ORDR_DTL_I_D",
      "column": "BRAND_CD",
      "derived": null
    },
    {
      "name": "DT",
      "table": "DMT.DMT_KYLIN_JDMALL_ORDR_DTL_I_D",
      "column": "DT",
      "derived": null
    },
    {
      "name": "DIM.DIM_DAY_DERIVED",
      "table": "DIM.DIM_DAY",
      "column": null,
      "derived": [
        "DIM_DAY_NAME",
        "DIM_WEEK_NAME",
        "DIM_MONTH_NAME"
      ]
    },
    {
      "name": "FIRST",
     "table": "DIM.DIM_ITEM_GEN_THIRD_CATE_D",
      "column": "ITEM_FIRST_CATE_NAME",
      "derived": null
    },
    {
      "name": "SECOND",
      "table": "DIM.DIM_ITEM_GEN_THIRD_CATE_D",
      "column": "ITEM_SECOND_CATE_NAME",
      "derived": null
    },
    {
      "name": "THIRD",
      "table": "DIM.DIM_ITEM_GEN_THIRD_CATE_D",
      "column": "ITEM_THIRD_CATE_NAME",
      "derived": null
    }
  ],
  "measures": [
    {
      "name": "_COUNT_",
      "function": {
        "expression": "COUNT",
        "parameter": {
          "type": "constant",
          "value": "1",
          "next_parameter": null
        },
        "returntype": "bigint"
      },
      "dependent_measure_ref": null
    },
    {
      "name": "QTTY",
      "function": {
        "expression": "SUM",
        "parameter": {
          "type": "column",
          "value": "SALE_QTTY",
          "next_parameter": null
        },
        "returntype": "bigint"
      },
      "dependent_measure_ref": null
    },
    {
      "name": "BEFORE",
      "function": {
        "expression": "SUM",
        "parameter": {
          "type": "column",
          "value": "BEFORE_PREFR_AMOUNT",
          "next_parameter": null
        },
        "returntype": "decimal(25,4)"
      },
      "dependent_measure_ref": null
    },
    {
      "name": "USER",
      "function": {
        "expression": "SUM",
        "parameter": {
          "type": "column",
          "value": "USER_ACTUAL_PAY_AMOUNT",
          "next_parameter": null
        },
       "returntype": "decimal(25,4)"
      },
      "dependent_measure_ref": null
    },
    {
      "name": "SALE",
      "function": {
        "expression": "COUNT_DISTINCT",
        "parameter": {
          "type": "column",
          "value": "SALE_ORD_ID",
          "next_parameter": null
        },
        "returntype": "bitmap"
      },
      "dependent_measure_ref": null
    }
  ],
  "dictionaries": [],
  "rowkey": {
    "rowkey_columns": [
      {
        "column": "BRAND_CD",
        "encoding": "dict",
        "isShardBy": false
      },
      {
        "column": "DT",
        "encoding": "dict",
        "isShardBy": false
      },
      {
        "column": "ITEM_FIRST_CATE_NAME",
        "encoding": "dict",
        "isShardBy": false
      },
      {
        "column": "ITEM_SECOND_CATE_NAME",
        "encoding": "dict",
        "isShardBy": false
      },
      {
        "column": "ITEM_THIRD_CATE_NAME",
        "encoding": "dict",
        "isShardBy": false
      }
    ]
  },
  "hbase_mapping": {
    "column_family": [
      {
        "name": "F1",
        "columns": [
          {
            "qualifier": "M",
            "measure_refs": [
              "_COUNT_",
              "QTTY",
              "BEFORE",
              "USER"
            ]
          }
        ]
      },
      {
        "name": "F2",
        "columns": [
          {
            "qualifier": "M",
            "measure_refs": [
              "SALE"
            ]
          }
        ]
      }
    ]
  },
  "aggregation_groups": [
    {
      "includes": [
        "BRAND_CD",
        "DT",
        "ITEM_FIRST_CATE_NAME",
        "ITEM_SECOND_CATE_NAME",
        "ITEM_THIRD_CATE_NAME"
      ],
      "select_rule": {
        "hierarchy_dims": [],
        "mandatory_dims": [],
        "joint_dims": []
      }
    }
  ],
  "signature": "Kl5sPTVN78bEYTGKoUOsWg==",
  "notify_list": [],
  "status_need_notify": [
    "ERROR",
    "DISCARDED",
    "SUCCEED"
  ],
  "partition_date_start": 1483747200000,
  "partition_date_end": 3153600000000,
  "auto_merge_time_ranges": [
    604800000,
    2419200000
  ],
  "retention_range": 0,
  "engine_type": 2,
  "storage_type": 2,
  "override_kylin_properties": {
    "kylin.hbase.region.cut": "1"
  }
}

数据量是14天的数据,sale_ord_id的基数是1.5亿
Select dt,item_second_cate_name,count(distinct sale_ord_id),sum(sale_qtty)
from DMT.DMT_KYLIN_JDMALL_ORDR_DTL_I_D a
left join dim.dim_day b on a.dt = b.dim_day_txdate
left join DIM.DIM_ITEM_GEN_THIRD_CATE_D c on a.item_third_cate_cd = c.item_third_cate_id
group by dt,item_second_cate_name;

这条语句执行时间是37秒,去掉count(distinct sale_ord_id)后查询时0.07秒
dt,item_second_cate_name 都是normal维度,从结果看是count_distinct度量导致的慢查询,请问这个有什么优化的建议吗?

谢谢!




Re: 关于kylin查询问题咨询

Posted by ShaoFeng Shi <sh...@apache.org>.
Kaisen made several performance improvement on the bitmap measure; You can
check http://mt.sohu.com/20170113/n478613681.shtml if haven't.

BTW, these improvements will be included in the 2.0 release;

2017-02-14 11:05 GMT+08:00 仇同心 <qi...@jd.com>:

> 大家好:
>
> 目前在使用中遇到一个kylin查询慢的场景:
>
> Cube  设计如下:
>
> {
>
>   "uuid": "dfb77a08-f51d-4088-b559-1da67c28a068",
>
>   "last_modified": 1486997875953,
>
>   "version": "1.6.0",
>
>   "name": "insu_t",
>
>   "model_name": "insu_jdmall_model_test",
>
>   "description": "",
>
>   "null_string": null,
>
>   "dimensions": [
>
>     {
>
>       "name": "BRAND",
>
>       "table": "DMT.DMT_KYLIN_JDMALL_ORDR_DTL_I_D",
>
>       "column": "BRAND_CD",
>
>       "derived": null
>
>     },
>
>     {
>
>       "name": "DT",
>
>       "table": "DMT.DMT_KYLIN_JDMALL_ORDR_DTL_I_D",
>
>       "column": "DT",
>
>       "derived": null
>
>     },
>
>     {
>
>       "name": "DIM.DIM_DAY_DERIVED",
>
>       "table": "DIM.DIM_DAY",
>
>       "column": null,
>
>       "derived": [
>
>         "DIM_DAY_NAME",
>
>         "DIM_WEEK_NAME",
>
>         "DIM_MONTH_NAME"
>
>       ]
>
>     },
>
>     {
>
>       "name": "FIRST",
>
>      "table": "DIM.DIM_ITEM_GEN_THIRD_CATE_D",
>
>       "column": "ITEM_FIRST_CATE_NAME",
>
>       "derived": null
>
>     },
>
>     {
>
>       "name": "SECOND",
>
>       "table": "DIM.DIM_ITEM_GEN_THIRD_CATE_D",
>
>       "column": "ITEM_SECOND_CATE_NAME",
>
>       "derived": null
>
>     },
>
>     {
>
>       "name": "THIRD",
>
>       "table": "DIM.DIM_ITEM_GEN_THIRD_CATE_D",
>
>       "column": "ITEM_THIRD_CATE_NAME",
>
>       "derived": null
>
>     }
>
>   ],
>
>   "measures": [
>
>     {
>
>       "name": "_COUNT_",
>
>       "function": {
>
>         "expression": "COUNT",
>
>         "parameter": {
>
>           "type": "constant",
>
>           "value": "1",
>
>           "next_parameter": null
>
>         },
>
>         "returntype": "bigint"
>
>       },
>
>       "dependent_measure_ref": null
>
>     },
>
>     {
>
>       "name": "QTTY",
>
>       "function": {
>
>         "expression": "SUM",
>
>         "parameter": {
>
>           "type": "column",
>
>           "value": "SALE_QTTY",
>
>           "next_parameter": null
>
>         },
>
>         "returntype": "bigint"
>
>       },
>
>       "dependent_measure_ref": null
>
>     },
>
>     {
>
>       "name": "BEFORE",
>
>       "function": {
>
>         "expression": "SUM",
>
>         "parameter": {
>
>           "type": "column",
>
>           "value": "BEFORE_PREFR_AMOUNT",
>
>           "next_parameter": null
>
>         },
>
>         "returntype": "decimal(25,4)"
>
>       },
>
>       "dependent_measure_ref": null
>
>     },
>
>     {
>
>       "name": "USER",
>
>       "function": {
>
>         "expression": "SUM",
>
>         "parameter": {
>
>           "type": "column",
>
>           "value": "USER_ACTUAL_PAY_AMOUNT",
>
>           "next_parameter": null
>
>         },
>
>        "returntype": "decimal(25,4)"
>
>       },
>
>       "dependent_measure_ref": null
>
>     },
>
>     {
>
>       "name": "SALE",
>
>       "function": {
>
>         "expression": "COUNT_DISTINCT",
>
>         "parameter": {
>
>           "type": "column",
>
>           "value": "SALE_ORD_ID",
>
>           "next_parameter": null
>
>         },
>
>         "returntype": "bitmap"
>
>       },
>
>       "dependent_measure_ref": null
>
>     }
>
>   ],
>
>   "dictionaries": [],
>
>   "rowkey": {
>
>     "rowkey_columns": [
>
>       {
>
>         "column": "BRAND_CD",
>
>         "encoding": "dict",
>
>         "isShardBy": false
>
>       },
>
>       {
>
>         "column": "DT",
>
>         "encoding": "dict",
>
>         "isShardBy": false
>
>       },
>
>       {
>
>         "column": "ITEM_FIRST_CATE_NAME",
>
>         "encoding": "dict",
>
>         "isShardBy": false
>
>       },
>
>       {
>
>         "column": "ITEM_SECOND_CATE_NAME",
>
>         "encoding": "dict",
>
>         "isShardBy": false
>
>       },
>
>       {
>
>         "column": "ITEM_THIRD_CATE_NAME",
>
>         "encoding": "dict",
>
>         "isShardBy": false
>
>       }
>
>     ]
>
>   },
>
>   "hbase_mapping": {
>
>     "column_family": [
>
>       {
>
>         "name": "F1",
>
>         "columns": [
>
>           {
>
>             "qualifier": "M",
>
>             "measure_refs": [
>
>               "_COUNT_",
>
>               "QTTY",
>
>               "BEFORE",
>
>               "USER"
>
>             ]
>
>           }
>
>         ]
>
>       },
>
>       {
>
>         "name": "F2",
>
>         "columns": [
>
>           {
>
>             "qualifier": "M",
>
>             "measure_refs": [
>
>               "SALE"
>
>             ]
>
>           }
>
>         ]
>
>       }
>
>     ]
>
>   },
>
>   "aggregation_groups": [
>
>     {
>
>       "includes": [
>
>         "BRAND_CD",
>
>         "DT",
>
>         "ITEM_FIRST_CATE_NAME",
>
>         "ITEM_SECOND_CATE_NAME",
>
>         "ITEM_THIRD_CATE_NAME"
>
>       ],
>
>       "select_rule": {
>
>         "hierarchy_dims": [],
>
>         "mandatory_dims": [],
>
>         "joint_dims": []
>
>       }
>
>     }
>
>   ],
>
>   "signature": "Kl5sPTVN78bEYTGKoUOsWg==",
>
>   "notify_list": [],
>
>   "status_need_notify": [
>
>     "ERROR",
>
>     "DISCARDED",
>
>     "SUCCEED"
>
>   ],
>
>   "partition_date_start": 1483747200000,
>
>   "partition_date_end": 3153600000000,
>
>   "auto_merge_time_ranges": [
>
>     604800000,
>
>     2419200000
>
>   ],
>
>   "retention_range": 0,
>
>   "engine_type": 2,
>
>   "storage_type": 2,
>
>   "override_kylin_properties": {
>
>     "kylin.hbase.region.cut": "1"
>
>   }
>
> }
>
>
>
> 数据量是14天的数据,sale_ord_id的基数是1.5亿
>
> Select dt,item_second_cate_name,count(distinct sale_ord_id),sum(sale_qtty)
>
> from DMT.DMT_KYLIN_JDMALL_ORDR_DTL_I_D a
>
> left join dim.dim_day b on a.dt = b.dim_day_txdate
>
> left join DIM.DIM_ITEM_GEN_THIRD_CATE_D c on a.item_third_cate_cd =
> c.item_third_cate_id
>
> group by dt,item_second_cate_name;
>
>
>
> 这条语句执行时间是37秒,去掉count(distinct sale_ord_id)后查询时0.07秒
>
> dt,item_second_cate_name 都是normal维度,从结果看是count_distinct
> 度量导致的慢查询,请问这个有什么优化的建议吗?
>
>
>
> 谢谢!
>
>
>
>
>
>
>



-- 
Best regards,

Shaofeng Shi 史少锋