You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@kylin.apache.org by 仇同心 <qi...@jd.com> on 2016/11/28 04:52:42 UTC

Cube optimization for help

Hi,all
There is a cube optimization for help.
Cuhe has 15 dimensions, including 14 normal dimensions and 1 derived dimension, and the cardinality of all dimensions is not high;And this cube also has 10 measures, including 1 count expression,2 sum expressions and 7 COUNT_DISTINCT expressions, and the cardinality of all measures’s column is more than one hundred million, the Return Type of 7 COUNT_DISTINCT expressions is hllc(14).

I founded that the step of “Build N - Dimension Cuboid Data” cost most of the time, I hope you can provide me some optimization plan!

The info about the segment: Start Time: 2016-11-10 00:00:00   End Time: 2016-11-24 00:00:00

Source Records: 201,027,127
Cube Size: 1.73 TB
Expansion Rate:14025.91%
The Duration of this job was 1,961.62 mins
The detailed build steps  of job :
      2016-11-25 11:15:24 GMT+8
      #1 Step Name: Create Intermediate Flat Hive Table
      Duration: 1.57 mins

     2016-11-25 11:16:58 GMT+8
      #2 Step Name: Redistribute Flat Hive Table
      Duration: 3.32 mins

     2016-11-25 11:20:17 GMT+8
      #3 Step Name: Extract Fact Table Distinct Columns
      Data Size: 8.57 MB
      Duration: 19.14 mins

      2016-11-25 11:39:26 GMT+8
      #4 Step Name: Build Dimension Dictionary
      Duration: 0.03 mins
     2016-11-25 11:39:27 GMT+8
     #5 Step Name: Save Cuboid Statistics
     Duration: 0.01 mins

   2016-11-25 11:39:28 GMT+8
    #6 Step Name: Create HTable
    Duration: 0.07 mins
    2016-11-25 11:39:32 GMT+8
    #7 Step Name: Build Base Cuboid Data
    Data Size: 2.53 GB
    Duration: 14.59 mins

    2016-11-25 11:54:08 GMT+8
    #8 Step Name: Build N-Dimension Cuboid Data : 13-Dimension
    Data Size: 2.31 GB
    Duration: 31.27 mins

    2016-11-25 12:25:24 GMT+8
    #9 Step Name: Build N-Dimension Cuboid Data : 12-Dimension
    Data Size: 17.81 GB
    Duration: 430.54 mins

   2016-11-25 19:35:56 GMT+8
  #10 Step Name: Build N-Dimension Cuboid Data : 11-Dimension
   Data Size: 62.58 GB
   Duration: 273.30 mins

   2016-11-26 00:09:15 GMT+8
   #11 Step Name: Build N-Dimension Cuboid Data : 10-Dimension
   Data Size: 135.99 GB
   Duration: 138.05 mins

   2016-11-26 02:27:18 GMT+8
   #12 Step Name: Build N-Dimension Cuboid Data : 9-Dimension
   Data Size: 211.91 GB
   Duration: 313.53 mins

  2016-11-26 07:40:49 GMT+8
  #13 Step Name: Build N-Dimension Cuboid Data : 8-Dimension
  Data Size: 252.22 GB
  Duration: 278.18 mins

  2016-11-26 12:19:01 GMT+8
  #14 Step Name: Build N-Dimension Cuboid Data : 7-Dimension
  Data Size: 234.18 GB
  Duration: 168.91 mins

 2016-11-26 15:07:55 GMT+8
#15 Step Name: Build N-Dimension Cuboid Data : 6-Dimension
Data Size: 169.02 GB
 Duration: 96.18 mins

 2016-11-26 16:44:06 GMT+8
#16 Step Name: Build N-Dimension Cuboid Data : 5-Dimension
Data Size: 92.81 GB
 Duration: 50.24 mins

 2016-11-26 17:34:20 GMT+8
#17 Step Name: Build N-Dimension Cuboid Data : 4-Dimension
Data Size: 36.45 GB
Duration: 16.21 mins

 2016-11-26 17:50:33 GMT+8
#18 Step Name: Build N-Dimension Cuboid Data : 3-Dimension
Data Size: 8.91 GB
Duration: 7.75 mins

 2016-11-26 17:58:18 GMT+8
#19 Step Name: Build N-Dimension Cuboid Data : 2-Dimension
Data Size: 1.00 GB
Duration: 3.08 mins

 2016-11-26 18:01:23 GMT+8
#20 Step Name: Build N-Dimension Cuboid Data : 1-Dimension
Data Size: 0.12 KB
Duration: 0.57 mins

 2016-11-26 18:01:57 GMT+8
#21 Step Name: Build Cube
Duration: 0.00 mins
 2016-11-26 18:01:57 GMT+8
#22 Step Name: Convert Cuboid Data to HFile
Data Size: 1.73 TB
Duration: 114.57 mins

 2016-11-26 19:56:31 GMT+8
#23 Step Name: Load HFile to HBase Table
Duration: 0.38 mins
 2016-11-26 19:56:54 GMT+8
#24 Step Name: Update Cube Info
Duration: 0.00 mins

2016-11-26 19:56:54 GMT+8
#25 Step Name: Garbage Collection
Duration: 0.12 mins


The following is the json of the cube:

{
  "uuid": "ab54fdc8-253d-430e-a193-4c17bbfd0872",
  "last_modified": 1480039756561,
  "version": "1.6.0",
  "name": "cube_pay_syt_order_det_analysis_new",
  "model_name": "pay_syt_order_det_analysis",
  "description": "",
  "null_string": null,
  "dimensions": [
    {
      "name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.ETL_DT",
      "table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
      "column": "ETL_DT",
      "derived": null
    },
    {
      "name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.PWDTYPE",
      "table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
      "column": "PWDTYPE",
      "derived": null
    },
    {
      "name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.REAL_NAME",
      "table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
      "column": "REAL_NAME",
      "derived": null
    },
    {
      "name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.IS_NEW_CARD",
      "table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
      "column": "IS_NEW_CARD",
      "derived": null
    },
    {
      "name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.REQ_ORDER_YEWU_TYPE",
      "table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
      "column": "REQ_ORDER_YEWU_TYPE",
      "derived": null
    },
    {
      "name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.ORDER_YEWU_TYPE",
      "table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
      "column": "ORDER_YEWU_TYPE",
      "derived": null
    },
    {
      "name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.ITEM_FIRST_CATE_NAME",
      "table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
      "column": "ITEM_FIRST_CATE_NAME",
      "derived": null
    },
    {
      "name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.ITEM_SECOND_CATE_NAME",
      "table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
      "column": "ITEM_SECOND_CATE_NAME",
      "derived": null
    },
    {
      "name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.ITEM_THIRD_CATE_NAME",
      "table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
      "column": "ITEM_THIRD_CATE_NAME",
      "derived": null
    },
    {
      "name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.ITEM_BRAND_NAME",
      "table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
      "column": "ITEM_BRAND_NAME",
      "derived": null
    },
    {
      "name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.COUNTY_NAME",
      "table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
      "column": "COUNTY_NAME",
      "derived": null
    },
    {
      "name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.CITY_NAME",
      "table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
      "column": "CITY_NAME",
      "derived": null
    },
    {
      "name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.PROVINCE_NAME",
      "table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
      "column": "PROVINCE_NAME",
      "derived": null
    },
    {
      "name": "DIM.DIM_DAY_DERIVED",
      "table": "DIM.DIM_DAY",
      "column": null,
      "derived": [
        "DIM_DAY_NAME",
        "DIM_DAY_TXDATE"
      ]
    },
    {
      "name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.PAY_TYPE",
      "table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
      "column": "PAY_TYPE",
      "derived": null
    }
  ],
  "measures": [
    {
      "name": "_COUNT_",
      "function": {
        "expression": "COUNT",
        "parameter": {
          "type": "constant",
          "value": "1",
          "next_parameter": null
        },
        "returntype": "bigint"
      },
      "dependent_measure_ref": null
    },
    {
      "name": "应付金额",
      "function": {
        "expression": "SUM",
        "parameter": {
          "type": "column",
          "value": "AMOUNT",
          "next_parameter": null
        },
        "returntype": "decimal(25,4)"
      },
      "dependent_measure_ref": null
    },
    {
      "name": "商品成功支付金额",
      "function": {
        "expression": "SUM",
        "parameter": {
          "type": "column",
          "value": "USER_ACTUAL_PAY_AMOUNT",
         "next_parameter": null
        },
        "returntype": "decimal(25,4)"
      },
      "dependent_measure_ref": null
    },
    {
      "name": "商城APP请求单",
      "function": {
        "expression": "COUNT_DISTINCT",
        "parameter": {
          "type": "column",
          "value": "SERIAL_NUM",
          "next_parameter": null
        },
        "returntype": "hllc(14)"
      },
      "dependent_measure_ref": null
    },
    {
      "name": "请求订单数",
      "function": {
        "expression": "COUNT_DISTINCT",
        "parameter": {
          "type": "column",
          "value": "REQ_OUTBIZNO",
          "next_parameter": null
        },
        "returntype": "hllc(14)"
      },
      "dependent_measure_ref": null
    },
    {
      "name": "支付订单数",
      "function": {
        "expression": "COUNT_DISTINCT",
        "parameter": {
          "type": "column",
          "value": "OUTBIZNO",
          "next_parameter": null
        },
        "returntype": "hllc(14)"
      },
      "dependent_measure_ref": null
    },
    {
      "name": "成功支付订单数",
      "function": {
        "expression": "COUNT_DISTINCT",
        "parameter": {
          "type": "column",
          "value": "SUC_OUTBIZNO",
          "next_parameter": null
        },
        "returntype": "hllc(14)"
      },
      "dependent_measure_ref": null
    },
    {
      "name": "支付单数",
      "function": {
        "expression": "COUNT_DISTINCT",
        "parameter": {
          "type": "column",
          "value": "PAYID",
          "next_parameter": null
        },
        "returntype": "hllc(15)"
      },
      "dependent_measure_ref": null
    },
    {
      "name": "成功支付单数",
      "function": {
        "expression": "COUNT_DISTINCT",
        "parameter": {
          "type": "column",
          "value": "SUC_PAYID",
          "next_parameter": null
        },
        "returntype": "hllc(14)"
      },
      "dependent_measure_ref": null
    },
    {
      "name": "用户数",
      "function": {
        "expression": "COUNT_DISTINCT",
        "parameter": {
          "type": "column",
          "value": "PIN",
          "next_parameter": null
        },
        "returntype": "hllc(14)"
      },
      "dependent_measure_ref": null
    }
  ],
  "dictionaries": [],
  "rowkey": {
    "rowkey_columns": [
      {
        "column": "ETL_DT",
        "encoding": "dict",
        "isShardBy": false
      },
      {
        "column": "COUNTY_NAME",
        "encoding": "dict",
        "isShardBy": false
      },
      {
        "column": "ITEM_BRAND_NAME",
        "encoding": "dict",
        "isShardBy": false
      },
      {
        "column": "ITEM_THIRD_CATE_NAME",
        "encoding": "dict",
        "isShardBy": false
      },
      {
        "column": "ITEM_SECOND_CATE_NAME",
        "encoding": "dict",
        "isShardBy": false
      },
      {
        "column": "ITEM_FIRST_CATE_NAME",
        "encoding": "dict",
        "isShardBy": false
      },
      {
        "column": "CITY_NAME",
        "encoding": "dict",
        "isShardBy": false
      },
      {
        "column": "PROVINCE_NAME",
        "encoding": "dict",
        "isShardBy": false
      },
      {
        "column": "REQ_ORDER_YEWU_TYPE",
        "encoding": "dict",
        "isShardBy": false
      },
      {
        "column": "ORDER_YEWU_TYPE",
        "encoding": "dict",
        "isShardBy": false
      },
      {
        "column": "PWDTYPE",
        "encoding": "dict",
        "isShardBy": false
      },
      {
        "column": "IS_NEW_CARD",
        "encoding": "dict",
        "isShardBy": false
      },
      {
        "column": "REAL_NAME",
        "encoding": "dict",
        "isShardBy": false
      },
      {
        "column": "PAY_TYPE",
        "encoding": "dict",
        "isShardBy": false
      }
    ]
  },
  "hbase_mapping": {
    "column_family": [
      {
        "name": "F1",
        "columns": [
          {
            "qualifier": "M",
            "measure_refs": [
              "_COUNT_",
              "应付金额",
              "商品成功支付金额"
            ]
          }
        ]
      },
      {
        "name": "F2",
        "columns": [
          {
            "qualifier": "M",
            "measure_refs": [
              "商城APP请求单",
              "请求订单数",
              "支付订单数",
              "成功支付订单数",
              "支付单数",
              "成功支付单数",
              "用户数"
            ]
          }
        ]
      }
    ]
  },
  "aggregation_groups": [
    {
      "includes": [
        "ETL_DT",
        "PWDTYPE",
        "REAL_NAME",
        "IS_NEW_CARD",
        "REQ_ORDER_YEWU_TYPE",
        "ORDER_YEWU_TYPE",
        "ITEM_FIRST_CATE_NAME",
        "ITEM_SECOND_CATE_NAME",
        "ITEM_THIRD_CATE_NAME",
        "ITEM_BRAND_NAME",
        "COUNTY_NAME",
        "CITY_NAME",
        "PROVINCE_NAME"
      ],
      "select_rule": {
        "hierarchy_dims": [
          [
            "COUNTY_NAME",
            "CITY_NAME",
            "PROVINCE_NAME"
          ],
          [
            "ITEM_FIRST_CATE_NAME",
            "ITEM_SECOND_CATE_NAME",
            "ITEM_THIRD_CATE_NAME"
          ]
        ],
        "mandatory_dims": [
          "ETL_DT"
        ],
        "joint_dims": []
      }
    }
  ],
  "signature": "Ukf/u2JnSO44TPQWDJFhyQ==",
  "notify_list": [],
  "status_need_notify": [
    "ERROR",
    "DISCARDED",
    "SUCCEED"
  ],
  "partition_date_start": 1478736000000,
  "partition_date_end": 3153600000000,
  "auto_merge_time_ranges": [
    604800000,
    2419200000
  ],
  "retention_range": 0,
  "engine_type": 2,
  "storage_type": 2,
  "override_kylin_properties": {}
}