You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@griffin.apache.org by gu...@apache.org on 2018/09/13 14:35:30 UTC
incubator-griffin-site git commit: Updated asf-site site from master
(4d98ade756427a2df8b9b1695c9bae31c0974780)
Repository: incubator-griffin-site
Updated Branches:
refs/heads/asf-site 7af1690fb -> 47c77c9b6
Updated asf-site site from master (4d98ade756427a2df8b9b1695c9bae31c0974780)
Project: http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/commit/47c77c9b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/tree/47c77c9b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/diff/47c77c9b
Branch: refs/heads/asf-site
Commit: 47c77c9b676490dbb0345b5f9f9e644e016ca3f9
Parents: 7af1690
Author: William Guo <gu...@apache.org>
Authored: Thu Sep 13 22:35:25 2018 +0800
Committer: William Guo <gu...@apache.org>
Committed: Thu Sep 13 22:35:25 2018 +0800
----------------------------------------------------------------------
data/create-table.hql | 27 -------------------
data/gen_delta_src.sh | 12 ---------
data/gen_demo_data.sh | 14 ----------
data/gen_hive_data.sh | 54 --------------------------------------
data/insert-data.hql.template | 2 --
docs/quickstart.html | 10 ++++---
6 files changed, 7 insertions(+), 112 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/blob/47c77c9b/data/create-table.hql
----------------------------------------------------------------------
diff --git a/data/create-table.hql b/data/create-table.hql
deleted file mode 100644
index e117cd6..0000000
--- a/data/create-table.hql
+++ /dev/null
@@ -1,27 +0,0 @@
---replace data location with your own path
-
-CREATE EXTERNAL TABLE `demo_src`(
- `id` bigint,
- `age` int,
- `desc` string)
-PARTITIONED BY (
- `dt` string,
- `hour` string)
-ROW FORMAT DELIMITED
- FIELDS TERMINATED BY '|'
-LOCATION
- 'hdfs:///griffin/data/batch/demo_src';
-
---replace data location with your own path
-
-CREATE EXTERNAL TABLE `demo_tgt`(
- `id` bigint,
- `age` int,
- `desc` string)
-PARTITIONED BY (
- `dt` string,
- `hour` string)
-ROW FORMAT DELIMITED
- FIELDS TERMINATED BY '|'
-LOCATION
- 'hdfs:///griffin/data/batch/demo_tgt';
http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/blob/47c77c9b/data/gen_delta_src.sh
----------------------------------------------------------------------
diff --git a/data/gen_delta_src.sh b/data/gen_delta_src.sh
deleted file mode 100644
index 29fc96b..0000000
--- a/data/gen_delta_src.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env bash
-
-file=delta_src
-id=124
-
-rm ${file}
-
-for i in {1..1000}
-do
- idx=`shuf -i1-2000 -n1`
- echo "${id}|${idx}|${idx}" >> ${file}
-done
http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/blob/47c77c9b/data/gen_demo_data.sh
----------------------------------------------------------------------
diff --git a/data/gen_demo_data.sh b/data/gen_demo_data.sh
deleted file mode 100644
index 55a975c..0000000
--- a/data/gen_demo_data.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/usr/bin/env bash
-
-./gen_delta_src.sh
-
-src=demo_src
-tgt=demo_tgt
-
-rm ${src}
-cat demo_basic >> ${src}
-cat delta_src >> ${src}
-
-rm ${tgt}
-cat demo_basic >> ${tgt}
-cat delta_tgt >> ${tgt}
http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/blob/47c77c9b/data/gen_hive_data.sh
----------------------------------------------------------------------
diff --git a/data/gen_hive_data.sh b/data/gen_hive_data.sh
deleted file mode 100644
index 5d7816d..0000000
--- a/data/gen_hive_data.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/usr/bin/env bash
-
-#create table
-hive -f create-table.hql
-echo "create table done"
-
-#current hour
-./gen_demo_data.sh
-cur_date=`date +%Y%m%d%H`
-dt=${cur_date:0:8}
-hour=${cur_date:8:2}
-partition_date="dt='$dt',hour='$hour'"
-sed s/PARTITION_DATE/$partition_date/ ./insert-data.hql.template > insert-data.hql
-hive -f insert-data.hql
-src_done_path=/griffin/data/batch/demo_src/dt=${dt}/hour=${hour}/_DONE
-tgt_done_path=/griffin/data/batch/demo_tgt/dt=${dt}/hour=${hour}/_DONE
-hadoop fs -touchz ${src_done_path}
-hadoop fs -touchz ${tgt_done_path}
-echo "insert data [$partition_date] done"
-
-#last hour
-./gen_demo_data.sh
-cur_date=`date -d '1 hour ago' +%Y%m%d%H`
-dt=${cur_date:0:8}
-hour=${cur_date:8:2}
-partition_date="dt='$dt',hour='$hour'"
-sed s/PARTITION_DATE/$partition_date/ ./insert-data.hql.template > insert-data.hql
-hive -f insert-data.hql
-src_done_path=/griffin/data/batch/demo_src/dt=${dt}/hour=${hour}/_DONE
-tgt_done_path=/griffin/data/batch/demo_tgt/dt=${dt}/hour=${hour}/_DONE
-hadoop fs -touchz ${src_done_path}
-hadoop fs -touchz ${tgt_done_path}
-echo "insert data [$partition_date] done"
-
-#next hours
-set +e
-while true
-do
- ./gen_demo_data.sh
- cur_date=`date +%Y%m%d%H`
- next_date=`date -d "+1hour" '+%Y%m%d%H'`
- dt=${next_date:0:8}
- hour=${next_date:8:2}
- partition_date="dt='$dt',hour='$hour'"
- sed s/PARTITION_DATE/$partition_date/ ./insert-data.hql.template > insert-data.hql
- hive -f insert-data.hql
- src_done_path=/griffin/data/batch/demo_src/dt=${dt}/hour=${hour}/_DONE
- tgt_done_path=/griffin/data/batch/demo_tgt/dt=${dt}/hour=${hour}/_DONE
- hadoop fs -touchz ${src_done_path}
- hadoop fs -touchz ${tgt_done_path}
- echo "insert data [$partition_date] done"
- sleep 3600
-done
-set -e
http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/blob/47c77c9b/data/insert-data.hql.template
----------------------------------------------------------------------
diff --git a/data/insert-data.hql.template b/data/insert-data.hql.template
deleted file mode 100644
index 4e4039a..0000000
--- a/data/insert-data.hql.template
+++ /dev/null
@@ -1,2 +0,0 @@
-LOAD DATA LOCAL INPATH 'demo_src' INTO TABLE demo_src PARTITION (PARTITION_DATE);
-LOAD DATA LOCAL INPATH 'demo_tgt' INTO TABLE demo_tgt PARTITION (PARTITION_DATE);
http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/blob/47c77c9b/docs/quickstart.html
----------------------------------------------------------------------
diff --git a/docs/quickstart.html b/docs/quickstart.html
index 1fdc75b..9bfd59f 100644
--- a/docs/quickstart.html
+++ b/docs/quickstart.html
@@ -129,14 +129,18 @@ under the License.
<h2 id="user-story">User Story</h2>
<p>Say we have two hive tables(demo_src, demo_tgt), we need to know what is the data quality for target table, based on source table.</p>
-<p>For simplicity, suppose both two table have the same schema as this:</p>
+<p>For simplicity, suppose both two tables have the same schema as this:</p>
<div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>id bigint
age int
desc string
dt string
hour string
</code></pre></div></div>
-<p>dt and hour are partitions, as every date we have one big partition dt(like 20180912), for every date we have 24 hour partitions(like 01,02, …).</p>
+<p>dt and hour are partitions,</p>
+
+<p>as every date we have one partition dt(like 20180912),</p>
+
+<p>for every date we have 24 hour partitions(like 01,02, …).</p>
<h2 id="environment-preparation">Environment Preparation</h2>
<p>You need to prepare the environment for Apache Griffin measure module, including the following software:</p>
@@ -168,7 +172,7 @@ cd griffin-0.3.0-incubating-source-release
<h2 id="data-preparation">Data Preparation</h2>
-<p>For our quick start, We will generate two Hive tables demo_src and demo_tgt.</p>
+<p>For our quick start, We will generate two hive tables demo_src and demo_tgt.</p>
<div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>--create hive tables here. hql script
--Note: replace hdfs location with your own path
CREATE EXTERNAL TABLE `demo_src`(