You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2022/04/21 08:37:13 UTC

svn commit: r1900099 [1/4] - in /kylin/site: ./ cn/blog/ cn_blog/2022/04/ cn_blog/2022/04/20/ cn_blog/2022/04/20/kylin4-on-cloud-part1/ cn_blog/2022/04/20/kylin4-on-cloud-part2/ images/blog/kylin4_on_cloud/

Author: lidong
Date: Thu Apr 21 08:37:12 2022
New Revision: 1900099

URL: http://svn.apache.org/viewvc?rev=1900099&view=rev
Log:
Add new blog:Kylin on Cloud

Added:
    kylin/site/cn_blog/2022/04/
    kylin/site/cn_blog/2022/04/20/
    kylin/site/cn_blog/2022/04/20/kylin4-on-cloud-part1/
    kylin/site/cn_blog/2022/04/20/kylin4-on-cloud-part1/index.html
    kylin/site/cn_blog/2022/04/20/kylin4-on-cloud-part2/
    kylin/site/cn_blog/2022/04/20/kylin4-on-cloud-part2/index.html
    kylin/site/images/blog/kylin4_on_cloud/
    kylin/site/images/blog/kylin4_on_cloud/0_deploy_kylin.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/10_full_build_cube.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/11_kylin_job_complete.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/12_destroy_job_cluster.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/13_check_aws_stacks.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/14_kylin_web_ui.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/15_query_in_kylin.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/16_mdx_web_ui.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/17_connect_to_kylin.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/18_exit_management.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/19_kylin_running.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/1_table_ER.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/20_import_dataset.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/21_tableau_connect.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/22_tableau_server.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/23_tableau_dataset.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/24_tableau_covid19_map.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/25_tableau_province.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/26_tableau_us_covid19.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/27_tableau_taxi_1.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/27_tableau_taxi_2.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/28_tableau_taxi_3.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/29_tableau_taxi_4.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/2_step_overview.jpg   (with props)
    kylin/site/images/blog/kylin4_on_cloud/30_excel_connect.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/31_excel_server.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/32_tableau_dataset.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/33_tableau_covid19_1.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/34_excel_covid20_2.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/35_excel_taxi_1.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/36_excel_taxi_2.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/37_jdk_8.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/38_demo_result.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/39_check_s3_demo.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/3_kylin_cluster.jpg   (with props)
    kylin/site/images/blog/kylin4_on_cloud/4_deploy_cluster_successfully.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/5_check_aws_stacks.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/6_list_cluster_node.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/7_query_in_spark_sql.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/8_kylin_web_ui.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/9_reload_kylin_metadata.png   (with props)
    kylin/site/images/blog/kylin4_on_cloud/kylin_on_cloud.png   (with props)
Modified:
    kylin/site/cn/blog/index.html
    kylin/site/feed.xml

Modified: kylin/site/cn/blog/index.html
URL: http://svn.apache.org/viewvc/kylin/site/cn/blog/index.html?rev=1900099&r1=1900098&r2=1900099&view=diff
==============================================================================
--- kylin/site/cn/blog/index.html (original)
+++ kylin/site/cn/blog/index.html Thu Apr 21 08:37:12 2022
@@ -199,6 +199,26 @@ var _hmt = _hmt || [];
             
             
             <div class="col-md-6 col-lg-6 col-xs-12">
+              <a class="blog-card" href="/cn_blog/2022/04/20/kylin4-on-cloud-part2/">
+                <div class="blog-pic">
+                  <img width="20" src="/assets/images/icon_blog_w.png" />
+                </div>
+                <p class="blog-title">Kylin on Cloud —— 两小时快速搭建云上数据分析平台(下)</p>
+                <p align="left" class="post-meta">posted: Apr 20, 2022</p>
+              </a>
+            </div>
+      
+            <div class="col-md-6 col-lg-6 col-xs-12">
+              <a class="blog-card" href="/cn_blog/2022/04/20/kylin4-on-cloud-part1/">
+                <div class="blog-pic">
+                  <img width="20" src="/assets/images/icon_blog_w.png" />
+                </div>
+                <p class="blog-title">Kylin on Cloud —— 两小时快速搭建云上数据分析平台(上)</p>
+                <p align="left" class="post-meta">posted: Apr 20, 2022</p>
+              </a>
+            </div>
+      
+            <div class="col-md-6 col-lg-6 col-xs-12">
               <a class="blog-card" href="/cn_blog/2022/03/31/how-to-use-excel-to-query-kylin/">
                 <div class="blog-pic">
                   <img width="20" src="/assets/images/icon_blog_w.png" />

Added: kylin/site/cn_blog/2022/04/20/kylin4-on-cloud-part1/index.html
URL: http://svn.apache.org/viewvc/kylin/site/cn_blog/2022/04/20/kylin4-on-cloud-part1/index.html?rev=1900099&view=auto
==============================================================================
--- kylin/site/cn_blog/2022/04/20/kylin4-on-cloud-part1/index.html (added)
+++ kylin/site/cn_blog/2022/04/20/kylin4-on-cloud-part1/index.html Thu Apr 21 08:37:12 2022
@@ -0,0 +1,637 @@
+<!--
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+-->
+<!doctype html>
+<html>
+	<!--
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+-->
+
+<head>
+  <meta charset="utf-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+
+  <title>Apache Kylin | Kylin on Cloud —— 两小时快速搭建云上数据分析平台(上)</title>
+  <meta name="description" content="背景">
+  <meta name="author"      content="Apache Kylin">
+  <link rel="shortcut icon" href="fav.png" type="image/png">
+
+
+
+<link rel="stylesheet" href="/assets/css/animate.css">
+<!-- Bootstrap -->
+<link rel="stylesheet" href="/assets/css/bootstrap.min.css">
+
+<!-- Fonts -->
+<!-- <link rel="stylesheet" href="http://fonts.googleapis.com/css?family=Alice|Open+Sans:400,300,700"> -->
+
+<!-- Icons -->
+<link rel="stylesheet" href="/assets/css/font-awesome.min.css">
+
+  <!-- Custom styles -->
+  <link rel="stylesheet" href="/assets/css/styles.css">
+  <link rel="stylesheet" href="/assets/css/docs.css">
+  <link rel="stylesheet" href="/assets/css/pygments.css">
+
+  <link rel="canonical" href="http://kylin.apache.org/cn_blog/2022/04/20/kylin4-on-cloud-part1/">
+  <link rel="alternate" type="application/rss+xml" title="Apache Kylin" href="http://kylin.apache.org/feed.xml" />
+
+<!--[if lt IE 9]> <script src="assets/js/html5shiv.js"></script> <![endif]-->
+<!-- Global site tag (gtag.js) - Google Analytics -->
+<script async src="https://www.googletagmanager.com/gtag/js?id=UA-120788561-1"></script>
+<script>
+  window.dataLayer = window.dataLayer || [];
+  function gtag(){dataLayer.push(arguments);}
+  gtag('js', new Date());
+
+  gtag('config', 'UA-120788561-1');
+</script>
+<script type="text/javascript" src="/assets/js/jquery-1.9.1.min.js"></script>
+<script type="text/javascript" src="/assets/js/nside.js"></script> </script>
+<script type="text/javascript" src="/assets/js/nnav.js"></script> </script>
+<script>
+var _hmt = _hmt || [];
+(function() {
+  var hm = document.createElement("script");
+  hm.src = "https://hm.baidu.com/hm.js?bdc5e03add430c0b72cc0eb91eabfa99";
+  var s = document.getElementsByTagName("script")[0]; 
+  s.parentNode.insertBefore(hm, s);
+})();
+</script>
+
+</head>
+
+	<body>
+		<!--
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+-->
+
+<header id="header" >
+  
+  <!-- Main Menu -->
+  <nav class="navbar navbar-default" role="navigation" id="nav-wrapper">
+    <div class="container-fluid" id="nav">
+      <!--
+      <img class="img-circle" width="40px" height="40px" id="circlelogo" src="/assets/images/kylin_logo.jpg">
+      -->
+      <!-- Brand and toggle get grouped for better mobile display -->
+      <div class="navbar-header">
+        <img class="navbar-logo" width="46" src="/assets/images/kylin_logo.png" ></img>
+        <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1">
+          <span class="sr-only">Toggle navigation</span>
+          <span class="icon-bar"></span>
+          <span class="icon-bar"></span>
+          <span class="icon-bar"></span>
+        </button>
+        <ul class="nav icon-navbar">
+            <li><a href="https://twitter.com/apachekylin" target="_blank" class="fa fa-twitter fa-lg"  title="Twitter: @ApacheKylin" ></a></li>
+            <li><a href="https://github.com/apache/kylin" target="_blank" class="fa fa-github-alt fa-lg" title="Github: apache/kylin" ></a></li>
+            <li><a href="https://www.facebook.com/kylinio" target="_blank" class="fa fa-facebook fa-lg" title="Facebook: kylin.io" ></a></li>
+        </ul>
+      </div>
+
+      <!-- Collect the nav links, forms, and other content for toggling -->
+      <div class="navbar-collapse collapse" id="bs-example-navbar-collapse-1">
+
+        <ul class="nav navbar-nav">
+
+          <li><a href="/">Home</a></li>
+          <li>
+            <a href="/docs" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Docs<span class="caret"></span></a>
+            <ul class="dropdown-menu">
+              <li><a href="/docs/">Latest Release(Kylin 4.0.1)</a></li>
+              <li><a href="/docs31/">Kylin 3.1.3</a></li>
+              <li><a href="/docs24/">Kylin 2.4.0</a></li>
+              <li><a href="/archive/">Archive</a></li>
+            </ul>
+          </li>
+          <li><a href="/download">Download</a></li>
+          <li><a href="/community" >Community</a></li>
+          <li>
+            <a href="/development" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Development<span class="caret"></span></a>
+            <ul class="dropdown-menu">
+              <li><a href="/development40/">Kylin 4.x</a></li>
+              <li><a href="/development/">Kylin 3.x And Older Versions</a></li>
+            </ul>
+          </li>
+          <li><a href="/blog">Blog</a></li>
+          <li><a href="/cn" >中文版</a></li> 
+        </ul>     
+      </div><!-- /.navbar-collapse -->
+    </div><!-- /.container-fluid -->
+  </nav>
+
+  <div id="head" class="parallax normal-header" >
+    <div class="text-center header-apache">
+      <a href="http://apache.org/foundation/contributing.html" title="Support Apache" style="margin-left: 150px;">
+        <div>
+          <img src="https://www.apache.org/images/SupportApache-small.png" >
+        </div>
+      </a>
+    </div>  
+  </div>
+  
+ </header>
+
+		<div class="page-content main">
+			<header style=" padding:2em 0 0 ">
+			<div class="container" >
+			  <div style=" padding:0 4em">
+                <div class="blog-icon">
+                  <img width="30" src="/assets/images/icon_blog_w.png">
+                </div>
+				<h4 class="index-title" style=" float:left;"><span>Apache Kylin™ Technical Blog</span></h4>
+			  </div>
+			</div>
+		</div>
+
+		<div class="container blog">
+			<div>
+				<article class="post-content" >	
+				<!--
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+-->
+
+<div class="post" style=" padding:2em 4em 4em 4em">
+
+  <header class="post-header">
+    <h1 class="post-title">Kylin on Cloud —— 两小时快速搭建云上数据分析平台(上)</h1>
+    <p class="post-meta" >Apr 20, 2022 • Yaqian Zhang</p>
+  </header>
+
+  <article class="post-content" >
+    <h2 id="section">背景</h2>
+
+<p>Apache Kylin 是基于预计算和多维模型的多维数据库,支持 SQL 标准查询接口,在 Kylin 中用户可以通过创建 Model 定义表关系,通过创建 Cube 定义维度和度量,然后构建 Cube 对需要聚合的数据进行预计算,将预计算好的数据保存起来,用户执行查询时便可以直接在经过预计算的数据上进行进一步的聚合或者直接返回查询结果,成倍提升查询效率。</p>
+
+<p>随着 Kylin 4.0 新架构的版本发布与更新,Kylin 具备了在脱离 Hadoop 的云环境下进行集群部署的能力;为了使用户能够轻松地在云上部署 Kylin,Kylin 社区又于近日开发了云上部署工具,用户使用部署工具只需执行一行命令便可以得到一个完备的 kylin 集群,获得高效快速的分析体验;2022 年1月份,Kylin 社区发布了 mdx for kylin 来加强 Kylin 作为多维数据库的业务表达能力,MDX for Kylin 提供了 MDX 的查询接口,mdx for kylin 可以在 
 Kylin 已经定义好的多维模型的基础上更进一步的创建业务指标,将 Kylin 中的数据模型转换为业务友好的语言,赋予数据业务价值,方便对接 Excel、Tableau 等 BI 工具进行多维分析。</p>
+
+<p>基于以上一系列的技术支撑,用户不仅可以方便快捷的在云上部署 Kylin 集群,创建多维模型,体验经过预计算的快速查询响应,还能够结合 MDX for Kylin 对业务指标进行定义和管理,将 DW 技术层提升到业务语义层。</p>
+
+<p>用户可以在 Kylin + MDX for Kylin 之上直接对接 BI 工具进行多维数据分析,也可以以此为底座建设指标平台等复杂应用。相比于直接基于 Spark、Hive 等在运行时进行 Join 和聚合查询的计算引擎之上构建指标平台,利用 Kylin 可以依托于多维模型和预计算技术,以及 mdx for kylin 的语义层能力,满足指标平台所需要的海量数据计算、极速查询响应、统一的多维模型、对接多种 BI、基础的业务指标管理等多种关键�
 �ŠŸèƒ½ã€‚</p>
+
+<p>本文的以下部分将会带领读者,从一个数据工程师的角度,快速体验在云上搭建基于 Kylin 的数据分析平台(Kylin on Cloud),在亿行级数据之上获得高性能低成本的查询体验,并通过 mdx for kylin 管理业务指标,直接对接 BI 工具快速生成报表。</p>
+
+<p>本教程每一个步骤都有详细说明,并附有配图和检查点,帮助新手上路。读者只需要准备一个 AWS 账号,预计这个过程需要大约 2 小时,花费 ¥100 左右。</p>
+
+<p><img src="/images/blog/kylin4_on_cloud/0_deploy_kylin.png" alt="" /></p>
+
+<h2 id="section-1">业务场景</h2>
+
+<p>自 2020 年初以来 COVID-19 在全世界范围内快速传播,对人们的衣食住行尤其是出行习惯造成极大影响。这次数据分析结合 COVID-19 疫情数据和 2018 年以来纽约出租车出行数据,通过分析疫情指标和各种出行指标,比如确诊人数、病死率、出租车订单数、平均出行距离等,来洞察纽约市出租车行业受疫情影响的变化趋势,以支撑决策。</p>
+
+<h3 id="section-2">业务问题</h3>
+
+<ul>
+  <li>多指标联合分析各个国家地区疫情严重程度</li>
+  <li>纽约市各个街区出行指标对比,比如订单数数量、出行里程等</li>
+  <li>疫情对于出租车订单数量有无明显影响</li>
+  <li>疫情之后的出行习惯变化,更偏向远程出行还是近程</li>
+  <li>疫情严重程度与出租车出行次数是否强相关</li>
+</ul>
+
+<h3 id="section-3">数据集</h3>
+
+<h4 id="covid-19-">COVID-19 数据集</h4>
+
+<p>COVID-19 数据集包括一张事实表 <code class="highlighter-rouge">covid_19_activity</code> 和一张维度表 <code class="highlighter-rouge">lookup_calendar</code>。</p>
+
+<p>其中,<code class="highlighter-rouge">covid_19_activity</code> 记录每一天全球范围内不同地区的确诊和死亡数字;<code class="highlighter-rouge">lookup_calendar</code> 为日期维度表,保存了时间的扩展信息,比如每一个日期对应的年始、月始等,<code class="highlighter-rouge">covid_19_activity</code> 和 <code class="highlighter-rouge">lookup_calendar</code> 之间通过日期进行关联。</p>
+
+<p>COVID-19 数据集相关信息如下:</p>
+
+<table>
+  <tbody>
+    <tr>
+      <td>数据大小</td>
+      <td>235 MB</td>
+    </tr>
+    <tr>
+      <td>事实表数据行数</td>
+      <td>2,753,688</td>
+    </tr>
+    <tr>
+      <td>数据日期</td>
+      <td>2020-01-21~2022-03-07</td>
+    </tr>
+    <tr>
+      <td>数据集提供方下载地址</td>
+      <td>https://data.world/covid-19-data-resource-hub/covid-19-case-counts/workspace/file?filename=COVID-19+Activity.csv</td>
+    </tr>
+    <tr>
+      <td>数据集 S3 地址</td>
+      <td>s3://public.kyligence.io/kylin/kylin_demo/data/covid19_data/</td>
+    </tr>
+  </tbody>
+</table>
+
+<h4 id="section-4">纽约市出租车订单数据集</h4>
+
+<p>纽约市出租车订单数据集包括一张事实表 <code class="highlighter-rouge">taxi_trip_records_view</code> 和两张维度表 <code class="highlighter-rouge">newyork_zone</code>、<code class="highlighter-rouge">lookup_calendar</code>。</p>
+
+<p>其中,<code class="highlighter-rouge">taxi_trip_records_view</code> 中的一条记录对一次出租车出行,记录了出发地点 ID、到达地点 ID、出行时长、订单金额、出行距离等;<code class="highlighter-rouge">newyork_zone</code> 记录了地点 ID 所对应的行政区等信息,<code class="highlighter-rouge">taxi_trip_records_view</code> 分别通过 <code class="highlighter-rouge">PULocationID</code> 和 <code class="highlighter-rouge">DOLocationID</code> 两个列与 <code class="highlighter-rouge">newyork_zone</code> 建立关联关系,统计出发街区和到达街区信息;<code class="highlighter-rouge">lookup_calendar</code> 与 <code class="highlighter-roug
 e">COVID-19</code> 数据集中的维度表为同一张表,<code class="highlighter-rouge">taxi_trip_records_view</code> 与 <code class="highlighter-rouge">lookup_calendar</code> 通过日期进行关联。</p>
+
+<p>纽约市出租车订单数据集相关信息如下:</p>
+
+<table>
+  <tbody>
+    <tr>
+      <td>数据大小</td>
+      <td>19 G</td>
+    </tr>
+    <tr>
+      <td>事实表数据行数</td>
+      <td>226,849,274</td>
+    </tr>
+    <tr>
+      <td>数据日期</td>
+      <td>2018-01-01~2021-07-31</td>
+    </tr>
+    <tr>
+      <td>数据集提供方下载地址</td>
+      <td>https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page</td>
+    </tr>
+    <tr>
+      <td>数据集 S3 地址</td>
+      <td>s3://public.kyligence.io/kylin/kylin_demo/data/trip_data_2018-2021/</td>
+    </tr>
+  </tbody>
+</table>
+
+<h4 id="er-">ER 关系图</h4>
+
+<p>新冠疫情数据集和纽约市出租车订单数据集的 ER 关系图如下图所示:</p>
+
+<p><img src="/images/blog/kylin4_on_cloud/1_table_ER.png" alt="" /></p>
+
+<h3 id="section-5">指标设计</h3>
+
+<p>针对需要分析的业务场景和业务问题,我们设计了以下原子指标和业务指标:</p>
+
+<h6 id="section-6">1.原子指标</h6>
+
+<p>原子指标指的是在 Kylin Cube 中创建的各种度量,它们通常是在单一列上面进行聚合计算,相对比较简单。</p>
+
+<ul>
+  <li>Covid19 病例数 sum(covid_19_activity.people_positive_cases_count)</li>
+  <li>Covid19 病死数 sum(covid_19_activity. people_death_count)</li>
+  <li>新增 Covid19 病例数 sum(covid_19_activity. people_positive_new_cases_count)</li>
+  <li>新增 Covid19 病死数 sum(covid_19_activity. people_death_new_count)</li>
+  <li>出租车出行里程 sum(taxi_trip_records_view. trip_distance)</li>
+  <li>出租车订单交易额 sum(taxi_trip_records_view. total_amount)</li>
+  <li>出租车出行数量 count()</li>
+  <li>出租车出行时长 sum(taxi_trip_records_view.trip_time_hour)</li>
+</ul>
+
+<h6 id="section-7">2.业务指标</h6>
+
+<p>业务指标是指基于原子指标定义的各种复合运算,具有具体的业务含义。</p>
+
+<ul>
+  <li>各原子指标的月累计MTD、年累计YTD</li>
+  <li>各原子指标的月增速MOM、年增速YOY</li>
+  <li>Covid19 病死率:死亡人数/确诊人数</li>
+  <li>出租车平均出行速度:出租车出行里程/出租车出行时间</li>
+  <li>出租车出行平均里程:出租车出行里程/出租车出行数量</li>
+</ul>
+
+<h2 id="section-8">操作步骤概览</h2>
+
+<p>搭建基于 Apache Kylin 的云上数据分析平台并进行数据分析的主要操作步骤如下图:</p>
+
+<p><img src="/images/blog/kylin4_on_cloud/2_step_overview.jpg" alt="" /></p>
+
+<h2 id="section-9">集群架构</h2>
+
+<p>使用云上部署工具部署出的 Kylin 集群架构如图所示:</p>
+
+<p><img src="/images/blog/kylin4_on_cloud/3_kylin_cluster.jpg" alt="" /></p>
+
+<h2 id="kylin-on-cloud-">Kylin on Cloud 部署</h2>
+
+<h3 id="section-10">环境要求</h3>
+
+<ul>
+  <li>需要本地机器已安装 git,用于下载部署工具代码;</li>
+  <li>需要本地机器已安装 Python 3.6.6 及以上版本,用于运行部署工具。</li>
+</ul>
+
+<h3 id="aws-">AWS 权限检查与初始化</h3>
+
+<p>登录 AWS 账号,根据 <a href="https://github.com/apache/kylin/blob/kylin4_on_cloud/readme/prerequisites.md">准备文档</a> 来检查用户权限、创建部署工具需要的 Access Key、IAM Role、Key Pair 和 S3 工作目录。后续的 AWS 操作都会以这个帐号的身份执行。</p>
+
+<h3 id="section-11">配置部署工具</h3>
+
+<p>1.执行下面的命令获得 Kylin on AWS 部署工具的代码</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>git clone -b kylin4_on_cloud --single-branch https://github.com/apache/kylin.git <span class="o">&amp;&amp;</span> <span class="nb">cd </span>kylin
+</code></pre>
+</div>
+
+<p>2.在本地机器初始化 python 虚拟环境</p>
+
+<p>检查 python 环境,需要 Python 3.6.6 以上:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>python --version
+</code></pre>
+</div>
+
+<p>初始化 python 虚拟环境,安装依赖:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>bin/init.sh
+<span class="nb">source </span>venv/bin/activate
+</code></pre>
+</div>
+
+<p>3.修改配置文件 <code class="highlighter-rouge">kylin_configs.yaml</code></p>
+
+<p>打开部署工具代码中的 kylin_configs.yaml,将文件中的配置项替换为实际值:</p>
+
+<ul>
+  <li><code class="highlighter-rouge">AWS_REGION</code>: EC2 节点位置 Region,默认为 cn-northwest-1</li>
+  <li><code class="highlighter-rouge">${IAM_ROLE_NAME}</code>: 提前创建的 IAM Role 名称,比如 kylin_deploy_role</li>
+  <li><code class="highlighter-rouge">${S3_URI}</code>: 用于部署 kylin 的 S3 工作目录,比如 s3://kylindemo/kylin_demo_dir/</li>
+  <li><code class="highlighter-rouge">${KEY_PAIR}</code>: 提前创建的 Key pairs 名字,比如 kylin_deploy_key</li>
+  <li><code class="highlighter-rouge">${Cidr Ip}</code>: 允许访问 EC2 实例的 IP 地址范围,比如 10.1.0.0/32,通常设为您的外网 IP 地址,确保创建的 EC2 实例只有您能访问</li>
+</ul>
+
+<p>出于读写分离隔离构建和查询资源的考虑,在以下的步骤中会先启动一个构建集群用于连接 Glue 建表、加载数据源、提交构建任务进行预计算,然后销毁构建集群,保留元数据,启动带有 MDX for Kylin 的查询集群,用于创建业务指标、连接 BI 工具执行查询,进行数据分析。Kylin on AWS 集群使用 RDS 存储元数据,使用 S3 存储构建后的数据,并且支持从 AWS Glue 中加载数据源,除了 EC2 节点之外使用的资源é
 ƒ½æ˜¯æŒä¹…化的,不会随着节点的删除而消失,所以在没有查询或者构建任务时,用户可以随时销毁构建或查询集群,只要保留元数据、S3 工作目录即可。</p>
+
+<h3 id="kylin-">Kylin 构建集群</h3>
+
+<h4 id="kylin--1">启动 Kylin 构建集群</h4>
+
+<p>1.通过如下命令启动构建集群。根据网络情况不同,部署启动可能需要 15-30 分钟。</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>python deploy.py --type deploy --mode job
+</code></pre>
+</div>
+
+<p>2.构建集群部署成功后,命令窗口可以看到如下输出:</p>
+
+<p><img src="/images/blog/kylin4_on_cloud/4_deploy_cluster_successfully.png" alt="" /></p>
+
+<h4 id="aws--1">检查 AWS 服务</h4>
+
+<p>1.进入 AWS 控制台的  CloudFormation 界面,可以看到 Kylin 部署工具一共起了 7 个 stack:</p>
+
+<p><img src="/images/blog/kylin4_on_cloud/5_check_aws_stacks.png" alt="" /></p>
+
+<p>2.用户可以通过 AWS 控制台查看 EC2 节点的详细信息,也可以在命令行界面使用如下命令列出所有 EC2 节点的名字、私有 IP 和公有 IP:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>python deploy.py --type list
+</code></pre>
+</div>
+
+<p><img src="/images/blog/kylin4_on_cloud/6_list_cluster_node.png" alt="" /></p>
+
+<h4 id="spark-sql-">体验 spark-sql 原生查询速度</h4>
+
+<p>为了直观的感受到预计算给查询性能带来的提升,在构建 cube 之前,我们先在 spark-sql 中体验原生的查询速度:</p>
+
+<p>1.首先,我们通过 kylin 节点的公有 IP 登录到该 kylin 所在的 EC2 机器,并切换到 root 用户,执行 ~/.bash_profile 使提前设置的环境变量生效:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>ssh -i <span class="s2">"</span><span class="k">${</span><span class="nv">KEY_PAIR</span><span class="k">}</span><span class="s2">"</span> ec2-user@<span class="k">${</span><span class="nv">kylin_node_public_ip</span><span class="k">}</span>
+sudo su
+<span class="nb">source</span> ~/.bash_profile
+</code></pre>
+</div>
+
+<p>2.然后进入 <code class="highlighter-rouge">$SPARK_HOME</code> 并修改配置文件 <code class="highlighter-rouge">conf/spark-defaults.conf</code>,将 <code class="highlighter-rouge">spark_master_node_private_ip</code> 修改为 spark master 节点的私有 IP:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code><span class="nb">cd</span> <span class="nv">$SPARK_HOME</span>
+vim conf/spark-defaults.conf
+
+<span class="c"># 将 spark_master_node_private_ip 替换为真实 spark master 节点的私有ip</span>
+spark.master spark://spark_master_node_private_ip:7077
+</code></pre>
+</div>
+
+<p><code class="highlighter-rouge">spark-defaults.conf</code> 中关于 driver 和 executor 的资源配置与 kylin 查询集群的资源配置是一致的。</p>
+
+<p>3.在 spark-sql 中建表</p>
+
+<p>测试所用数据集的所有数据存放在位于 <code class="highlighter-rouge">cn-north-1</code> 和 <code class="highlighter-rouge">us-east-1</code> 地区的 S3 bucket 中,如果你的 S3 bucket 位于 <code class="highlighter-rouge">cn-north-1</code> 或者 <code class="highlighter-rouge">us-east-1</code>,那么你可以直接执行建表 sql;否则需要执行以下脚本复制数据到 <code class="highlighter-rouge">kylin_configs.yaml</code> 中设置的 S3 工作目录下,并修改建表 sql:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code><span class="c">## AWS CN 用户</span>
+aws s3 sync s3://public.kyligence.io/kylin/kylin_demo/data/ <span class="k">${</span><span class="nv">S3_DATA_DIR</span><span class="k">}</span> --region cn-north-1
+
+<span class="c">## AWS Global 用户</span>
+aws s3 sync s3://public.kyligence.io/kylin/kylin_demo/data/ <span class="k">${</span><span class="nv">S3_DATA_DIR</span><span class="k">}</span> --region us-east-1
+
+<span class="c"># 修改建表 sql</span>
+sed -i <span class="s2">"s#s3://public.kyligence.io/kylin/kylin_demo/data/#</span><span class="k">${</span><span class="nv">S3_DATA_DIR</span><span class="k">}</span><span class="s2">#g"</span> /home/ec2-user/kylin_demo/create_kylin_demo_table.sql
+</code></pre>
+</div>
+
+<p>执行建表 sql:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>bin/spark-sql -f /home/ec2-user/kylin_demo/create_kylin_demo_table.sql
+</code></pre>
+</div>
+
+<p>4.在 spark-sql 中执行查询</p>
+
+<p>进入 spark-sql:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>bin/spark-sql
+</code></pre>
+</div>
+
+<p>在 spark-sql 中执行查询:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code><span class="n">use</span> <span class="n">kylin_demo</span><span class="p">;</span>
+<span class="k">select</span> <span class="n">TAXI_TRIP_RECORDS_VIEW</span><span class="p">.</span><span class="n">PICKUP_DATE</span><span class="p">,</span> <span class="n">NEWYORK_ZONE</span><span class="p">.</span><span class="n">BOROUGH</span><span class="p">,</span> <span class="k">count</span><span class="p">(</span><span class="o">*</span><span class="p">),</span> <span class="k">sum</span><span class="p">(</span><span class="n">TAXI_TRIP_RECORDS_VIEW</span><span class="p">.</span><span class="n">TRIP_TIME_HOUR</span><span class="p">),</span> <span class="k">sum</span><span class="p">(</span><span class="n">TAXI_TRIP_RECORDS_VIEW</span><span class="p">.</span><span class="n">TOTAL_AMOUNT</span><span class="p">)</span>
+<span class="k">from</span> <span class="n">TAXI_TRIP_RECORDS_VIEW</span>
+<span class="k">left</span> <span class="k">join</span> <span class="n">NEWYORK_ZONE</span>
+<span class="k">on</span> <span class="n">TAXI_TRIP_RECORDS_VIEW</span><span class="p">.</span><span class="n">PULOCATIONID</span> <span class="o">=</span> <span class="n">NEWYORK_ZONE</span><span class="p">.</span><span class="n">LOCATIONID</span>
+<span class="k">group</span> <span class="k">by</span> <span class="n">TAXI_TRIP_RECORDS_VIEW</span><span class="p">.</span><span class="n">PICKUP_DATE</span><span class="p">,</span> <span class="n">NEWYORK_ZONE</span><span class="p">.</span><span class="n">BOROUGH</span><span class="p">;</span>
+</code></pre>
+</div>
+
+<p>然后可以看到,在资源与 kylin 查询集群配置相同的情况下,使用 spark-sql 直接查询耗时超过100s:</p>
+
+<p><img src="/images/blog/kylin4_on_cloud/7_query_in_spark_sql.png" alt="" /></p>
+
+<p>5.查询执行成功后必须退出 spark-sql 再进行下面的步骤,防止占用资源。</p>
+
+<h4 id="kylin--2">导入 Kylin 元数据</h4>
+
+<p>1.进入 <code class="highlighter-rouge">$KYLIN_HOME</code></p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code><span class="nb">cd</span> <span class="nv">$KYLIN_HOME</span>
+</code></pre>
+</div>
+
+<p>2.导入元数据</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>bin/metastore.sh restore /home/ec2-user/meta_backups/
+</code></pre>
+</div>
+
+<p>3.重载元数据</p>
+
+<p>根据 EC2 节点的公有 IP,在浏览器输入 <code class="highlighter-rouge">http://${kylin_node_public_ip}:7070/kylin</code> 进入 kylin web 页面,并使用 ADMIN/KYLIN 的默认用户名密码登录:</p>
+
+<p><img src="/images/blog/kylin4_on_cloud/8_kylin_web_ui.png" alt="" /></p>
+
+<p>通过 System -&gt; Configuration -&gt; Reload Metadata 重载 Kylin 元数据:</p>
+
+<p><img src="/images/blog/kylin4_on_cloud/9_reload_kylin_metadata.png" alt="" /></p>
+
+<p>如果用户想要了解如何手动创建 Kylin 元数据中所包含的 Model 和 Cube,可以参考:(Create model and cube in kylin)[https://cwiki.apache.org/confluence/display/KYLIN/Create+Model+and+Cube+in+Kylin]。</p>
+
+<h4 id="section-12">执行构建</h4>
+
+<p>提交 cube 构建任务,由于在 model 中未设置分区列,所以这里直接对两个 cube 进行全量构建:</p>
+
+<p><img src="/images/blog/kylin4_on_cloud/10_full_build_cube.png.png" alt="" /></p>
+
+<p><img src="/images/blog/kylin4_on_cloud/11_kylin_job_complete.png" alt="" /></p>
+
+<h4 id="section-13">销毁构建集群</h4>
+
+<p>构建完成之后,执行集群销毁命令销毁构建集群,默认情况下会保留 RDS stack、monitor stack 和 vpc stack:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>python deploy.py --type destroy
+</code></pre>
+</div>
+
+<p>集群销毁成功:</p>
+
+<p><img src="/images/blog/kylin4_on_cloud/12_destroy_job_cluster.png" alt="" /></p>
+
+<h4 id="aws--2">检查 AWS 资源</h4>
+
+<p>集群销毁成功后,可以到 AWS 控制台的 <code class="highlighter-rouge">CloudFormation</code> 服务确认是否存在资源残留,由于默认会保留元数据 RDS、监控节点和 VPC 节点,所以集群销毁后 CloudFormation 页面还会存在以下三个 Stack:</p>
+
+<p><img src="/images/blog/kylin4_on_cloud/13_check_aws_stacks.png" alt="" /></p>
+
+<p>下面启动查询集群时仍然会使用这三个 Stack 中的资源,这样我们可以保证查询集群和构建集群使用同一套元数据。</p>
+
+<p>以上部分为 <code class="highlighter-rouge">Kylin on Cloud —— 两小时快速搭建云上数据分析平台</code> 的上篇,下篇请查看:<a href="../kylin4-on-cloud-part2/">Kylin on Cloud —— 两小时快速搭建云上数据分析平台(下)</a></p>
+
+
+  </article>
+
+</div>
+
+
+
+
+
+				</article>
+			</div>
+		</div>		
+		<!--
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+-->
+
+<footer id="underfooter">
+    <div>
+        <div class="row">
+            <div class="col-md-12 widget">
+                <div class="widget-body">
+                    <div class="footer-img">
+                        <a href="http://www.apache.org">
+                            <img id="asf-logo" height="78px" alt="Apache Software Foundation" src="/assets/images/apache_footer.png">
+                        </a>
+                    </div>
+                    <p style="padding-top: 11px;">
+                        The contents of this website are © 2015 Apache Software Foundation under the terms of the 
+                        <a href="http://www.apache.org/licenses/LICENSE-2.0"> Apache License v2 </a>. 
+                    </p>
+                    <p style="margin-bottom: 11px;">    
+                        Apache Kylin and its logo are trademarks of the Apache Software Foundation.
+                    </div>
+
+                </div>
+            </div>
+        </div>
+        <!-- /row of widgets -->
+
+    </div>
+    <div></div>
+
+</footer>
+
+	<script src="/assets/js/jquery-1.9.1.min.js"></script> 
+	<script src="/assets/js/bootstrap.min.js"></script> 
+	<script src="/assets/js/main.js"></script>
+	</body>
+</html>
+
+
+
+