You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by za...@apache.org on 2023/05/15 09:30:26 UTC

[hive-site] branch main updated: HIVE-27339: Add links and instructions for running Hive from Docker Hub (Simhadri Govindappa reviewed by Zhihua Deng, Stamatis Zampetakis)

This is an automated email from the ASF dual-hosted git repository.

zabetak pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hive-site.git


The following commit(s) were added to refs/heads/main by this push:
     new d1e69b8  HIVE-27339: Add links and instructions for running Hive from Docker Hub (Simhadri Govindappa reviewed by Zhihua Deng, Stamatis Zampetakis)
d1e69b8 is described below

commit d1e69b822ebeac8fb063ad9bd1311395888d8e85
Author: SimhadriG <si...@cloudera.com>
AuthorDate: Fri May 12 17:58:46 2023 +0530

    HIVE-27339: Add links and instructions for running Hive from Docker Hub (Simhadri Govindappa reviewed by Zhihua Deng, Stamatis Zampetakis)
    
    Closes #5
---
 config.toml                              |   6 ++
 content/Developement/gettingStarted.md   |   4 +
 content/Developement/quickStart.md       | 178 +++++++++++++++++++++++++++++++
 themes/hive/layouts/partials/banner.html |  13 ++-
 themes/hive/layouts/partials/head.html   |   2 +-
 themes/hive/layouts/partials/menu.html   |   1 +
 themes/hive/static/css/hive-theme.css    |   5 +-
 7 files changed, 201 insertions(+), 8 deletions(-)

diff --git a/config.toml b/config.toml
index d705501..02b8c52 100644
--- a/config.toml
+++ b/config.toml
@@ -8,6 +8,11 @@ theme = 'hive'
   apacheURL = 'https://www.apache.org'
   javaDocUrl = 'https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs'
 
+[params.banner]
+    github = "https://github.com/apache/hive"
+    docker = "https://hub.docker.com/r/apache/hive"
+    jira = "https://issues.apache.org/jira/projects/HIVE/issues"
+
 [params.apache]
     apacheUrl = "https://www.apache.org/"
     license = "https://www.apache.org/licenses/"
@@ -42,6 +47,7 @@ theme = 'hive'
     bylaws = "https://cwiki.apache.org/confluence/display/Hive/Bylaws"
     howToRelease = "https://cwiki.apache.org/confluence/display/Hive/HowToRelease"
     gettingStarted = "/developement/gettingstarted/"
+    docker = "/developement/quickstart/"
 
 [params.features]
     acidTxn = "https://cwiki.apache.org/confluence/display/hive/hive+transactions"
diff --git a/content/Developement/gettingStarted.md b/content/Developement/gettingStarted.md
index ba59a6d..e2b3fd8 100644
--- a/content/Developement/gettingStarted.md
+++ b/content/Developement/gettingStarted.md
@@ -37,6 +37,9 @@ Hive.
 * Read the [Getting Started Guide][GETTING_STARTED] to learn how to install Hive
 * The [User and Hive SQL documentation][HIVE_QL] shows how to program Hive
 
+## Quick start with Docker
+---
+Checkout the quickstart with Docker here: [DOCKER_QUICKSTART]
 
 # Getting Involved With The Apache Hive Community
 ---
@@ -62,5 +65,6 @@ project and contribute your expertise.
 [COMMUNITY]: /community/people/
 [CONTRIBUTOR]: https://cwiki.apache.org/confluence/display/Hive/Home#Home-ResourcesforContributors
 [HIVE_TWITTER]: https://twitter.com/apachehive
+[DOCKER_QUICKSTART]: /developement/quickstart/
 
 
diff --git a/content/Developement/quickStart.md b/content/Developement/quickStart.md
new file mode 100644
index 0000000..6a21e9a
--- /dev/null
+++ b/content/Developement/quickStart.md
@@ -0,0 +1,178 @@
+---
+title: "QuickStarted"
+date: 2023-05-12T17:51:06+05:30
+draft: false
+---
+
+### Introduction
+
+---
+Run Apache Hive inside docker container in pseudo-distributed mode, inorder to provide the following Quick-start/Debugging/Prepare a test env for Hive
+
+
+### Quickstart
+
+---
+
+##### **STEP 1: Pull the image** 
+
+- Pull the image from DockerHub: https://hub.docker.com/r/apache/hive/tags. Currently, there are 3 images released:  
+  - 4.0.0-alpha-2
+  - 4.0.0-alpha-1
+  - 3.1.3
+```shell
+docker pull apache/hive:4.0.0-alpha-2
+```
+` `
+##### **STEP 2: Export the Hive version**
+```shell
+export HIVE_VERSION=4.0.0-alpha-2
+```
+` `
+##### **STEP 3:  Launch the HiveServer2 with an embedded Metastore.**
+This is lightweight and for a quick setup, it uses Derby as metastore db.
+```shell
+docker run -d -p 10000:10000 -p 10002:10002 --env SERVICE_NAME=hiveserver2 --name hive4 apache/hive:${HIVE_VERSION}
+```
+` `
+##### **STEP 4: Connect to beeline**
+
+```shell
+docker exec -it hiveserver2 beeline -u 'jdbc:hive2://hiveserver2:10000/'
+```
+` `
+##### Note: Launch Standalone Metastore To use standalone Metastore with Derby,
+
+```shell
+docker run -d -p 9083:9083 --env SERVICE_NAME=metastore --name metastore-standalone apache/hive:${HIVE_VERSION}
+```
+` `
+## Detailed Setup
+
+---
+##### - Build image
+
+Apache Hive relies on Hadoop, Tez and some others to facilitate reading, writing, and managing large datasets.
+The [/packaging/src/docker/build.sh]  provides ways to build the image against specified version of the dependent, as well as build from source.
+
+##### - Build from source
+```shell
+mvn clean package -pl packaging -DskipTests -Pdocker
+```
+` `
+##### - Build with specified version
+
+There are some arguments to specify the component version:
+```shell
+-hadoop <hadoop version>
+-tez <tez version>
+-hive <hive version> 
+```
+If the version is not provided, it will read the version from current `pom.xml`:
+`project.version`, `hadoop.version` and `tez.version` for Hive, Hadoop and Tez respectively.
+For example, the following command uses Hive 4.0.0-alpha-2, Hadoop `hadoop.version` and Tez `tez.version` to build the image,
+```shell
+./build.sh -hive 4.0.0-alpha-2
+```
+If the command does not specify the Hive version, it will use the local `apache-hive-${project.version}-bin.tar.gz`(will trigger a build if it doesn't exist),
+together with Hadoop 3.1.0 and Tez 0.10.1 to build the image,
+```shell
+./build.sh -hadoop 3.1.0 -tez 0.10.1
+```
+After building successfully,  we can get a Docker image named `apache/hive` by default, the image is tagged by the provided Hive version.
+
+### Run services
+---
+Before going further, we should define the environment variable `HIVE_VERSION` first.
+For example, if `-hive 4.0.0-alpha-2` is specified to build the image,
+```shell
+export HIVE_VERSION=4.0.0-alpha-2
+```
+or assuming that you're relying on current `project.version` from pom.xml,
+```shell
+export HIVE_VERSION=$(mvn -f pom.xml -q help:evaluate -Dexpression=project.version -DforceStdout)
+```
+` `
+##### **- Metastore**
+
+For a quick start, launch the Metastore with Derby,
+  ```shell
+  docker run -d -p 9083:9083 --env SERVICE_NAME=metastore --name metastore-standalone apache/hive:${HIVE_VERSION}
+  ```
+Everything would be lost when the service is down. In order to save the Hive table's schema and data, start the container with an external Postgres and Volume to keep them,
+
+  ```shell
+  docker run -d -p 9083:9083 --env SERVICE_NAME=metastore \
+       --env DB_DRIVER=postgres \
+       --env SERVICE_OPTS="-Djavax.jdo.option.ConnectionDriverName=org.postgresql.Driver -Djavax.jdo.option.ConnectionURL=jdbc:postgresql://postgres:5432/metastore_db -Djavax.jdo.option.ConnectionUserName=hive -Djavax.jdo.option.ConnectionPassword=password" \
+       --mount source=warehouse,target=/opt/hive/data/warehouse \
+       --name metastore-standalone apache/hive:${HIVE_VERSION}
+  ```
+
+If you want to use your own `hdfs-site.xml` or `yarn-site.xml` for the service, you can provide the environment variable `HIVE_CUSTOM_CONF_DIR` for the command. For instance, put the custom configuration file under the directory `/opt/hive/conf`, then run,
+
+  ```shell
+   docker run -d -p 9083:9083 --env SERVICE_NAME=metastore \
+        --env DB_DRIVER=postgres -v /opt/hive/conf:/hive_custom_conf --env HIVE_CUSTOM_CONF_DIR=/hive_custom_conf \
+        --name metastore apache/hive:${HIVE_VERSION}
+  ```
+` `
+#####  **- HiveServer2**
+
+Launch the HiveServer2 with an embedded Metastore,
+   ```shell
+    docker run -d -p 10000:10000 -p 10002:10002 --env SERVICE_NAME=hiveserver2 --name hiveserver2-standalone apache/hive:${HIVE_VERSION}
+   ```
+or specify a remote Metastore if it's available,
+   ```shell
+    docker run -d -p 10000:10000 -p 10002:10002 --env SERVICE_NAME=hiveserver2 \
+         --env SERVICE_OPTS="-Dhive.metastore.uris=thrift://metastore:9083" \
+         --env IS_RESUME="true" \
+         --name hiveserver2-standalone apache/hive:${HIVE_VERSION}
+   ```
+To save the data between container restarts, you can start the HiveServer2 with a Volume,
+   ```shell
+   docker run -d -p 10000:10000 -p 10002:10002 --env SERVICE_NAME=hiveserver2 \
+      --env SERVICE_OPTS="-Dhive.metastore.uris=thrift://metastore:9083" \
+      --mount source=warehouse,target=/opt/hive/data/warehouse \
+      --env IS_RESUME="true" \
+      --name hiveserver2 apache/hive:${HIVE_VERSION}
+   ```
+` `
+##### **- HiveServer2, Metastore**
+
+To get a quick overview of both HiveServer2 and Metastore, you can run:
+
+```shell
+    cd packaging/src/docker
+    docker compose up -d
+  ```
+Volumes are used to persist data generated by Hive inside Postgres and HiveServer2 containers:
+- hive_db 
+  - The volume persists the metadata of Hive tables inside Postgres container.
+- warehouse 
+  - The volume stores tables' files inside HiveServer2 container.
+
+### Usage
+
+---
+
+- HiveServer2 web
+    - Accessed on browser at http://localhost:10002/
+- Beeline:
+  ```shell
+   docker exec -it hiveserver2 beeline -u 'jdbc:hive2://hiveserver2:10000/'
+   # If beeline is installed on host machine, HiveServer2 can be simply reached via:
+   beeline -u 'jdbc:hive2://localhost:10000/'
+  ```
+- Run some queries
+  ```sql
+    show tables;
+    create table hive_example(a string, b int) partitioned by(c int);
+    alter table hive_example add partition(c=1);
+    insert into hive_example partition(c=1) values('a', 1), ('a', 2),('b',3);
+    select count(distinct a) from hive_example;
+    select sum(b) from hive_example;
+  ```
+  
+[/packaging/src/docker/build.sh]: https://github.com/apache/hive/blob/master/packaging/src/docker/build.sh
\ No newline at end of file
diff --git a/themes/hive/layouts/partials/banner.html b/themes/hive/layouts/partials/banner.html
index b93b71c..7f9b7c7 100644
--- a/themes/hive/layouts/partials/banner.html
+++ b/themes/hive/layouts/partials/banner.html
@@ -24,14 +24,17 @@
             <h1 class="text-outline black-text">Apache  Hive </h1>
             <h4 class="thin"> The Apache Hive ™ is a distributed, fault-tolerant data warehouse system that enables analytics at a massive scale and
                 facilitates reading, writing, and managing petabytes of data residing in distributed storage using SQL.</h4>
-            <a href="https://github.com/apache/hive" class="icon-block">
-                <button class="custom-button banner-button-style" role="button">Github <i class="fa fa-github" style="font-size:35px"></i></button>
+            <a href="{{ .Site.Params.banner.github }}" class="icon-block">
+                <button class="custom-button banner-button-style" role="button">Github <i class="fab fa-github" style="font-size:35px"></i></button>
             </a>
             <a href="{{.Site.BaseURL}}{{ .Site.Params.navbar.mailinglists }}" class="icon-block">
-                <button class="custom-button banner-button-style" role="button">Mail <i class="fa fa-envelope" style="font-size:35px"></i></button>
+                <button class="custom-button banner-button-style" role="button">Mail <i class="fa-solid fa-envelope" style="font-size:35px"></i></button>
             </a>
-            <a href="https://lists.apache.org/list.html?dev@hive.apache.org" class="icon-block">
-                <button class="custom-button banner-button-style" role="button">Community <i class='fa fa-comments' style='font-size:35px'></i></button>
+            <a href="{{ .Site.Params.banner.docker }}" class="icon-block">
+                <button class="custom-button banner-button-style" role="button">Docker <i class="fab fa-docker" style='font-size:35px'></i></button>
+            </a>
+            <a href="{{ .Site.Params.banner.jira }}" class="icon-block">
+                <button class="custom-button banner-button-style" role="button">Community <i class='fa-solid fa-comments' style='font-size:35px'></i></button>
             </a>
         </div>
     </div>
diff --git a/themes/hive/layouts/partials/head.html b/themes/hive/layouts/partials/head.html
index ce14d54..de9a48e 100644
--- a/themes/hive/layouts/partials/head.html
+++ b/themes/hive/layouts/partials/head.html
@@ -27,7 +27,7 @@
         <title>{{ .Title }}</title>
         <link rel="icon" href="/images/hive.svg" sizes="any" type="image/svg+xml">
         <link rel="stylesheet" href="{{ .Site.BaseURL }}/css/hive-theme.css" />
-        <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/font-awesome/4.4.0/css/font-awesome.min.css">
+        <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
         <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC" crossorigin="anonymous">
         <link rel="stylesheet" href="{{ .Site.BaseURL }}/css/termynal.css" />
         <link rel="apple-touch-icon" sizes="180x180" href="{{ .Site.BaseURL }}/images/apple-touch-icon.png">
diff --git a/themes/hive/layouts/partials/menu.html b/themes/hive/layouts/partials/menu.html
index 5040168..b33f113 100644
--- a/themes/hive/layouts/partials/menu.html
+++ b/themes/hive/layouts/partials/menu.html
@@ -67,6 +67,7 @@
                         </a>
                         <ul class="dropdown-menu" aria-labelledby="navbarDropdown">
                             <li><a class="dropdown-item" href="{{.Site.BaseURL}}{{ .Site.Params.navbar.gettingStarted }}">Getting Started</a></li>
+                            <li><a class="dropdown-item" href="{{.Site.BaseURL}}{{ .Site.Params.navbar.docker }}">Quickstart with Docker</a></li>
                             <li><a class="dropdown-item" href="{{ .Site.Params.navbar.designDocs }}">Design Docs</a></li>
                             <li><a class="dropdown-item" href="{{ .Site.Params.navbar.hiveJira }}">Hive JIRA</a></li>
                             <li><a class="dropdown-item" href="{{ .Site.Params.navbar.faq }}">Hive Developer FAQ</a></li>
diff --git a/themes/hive/static/css/hive-theme.css b/themes/hive/static/css/hive-theme.css
index 88b2d93..32fdd54 100644
--- a/themes/hive/static/css/hive-theme.css
+++ b/themes/hive/static/css/hive-theme.css
@@ -262,9 +262,9 @@
 p,
 .splendor-p {
   font-size: 1.2rem;
-  margin-bottom: 1.3rem;
+  margin-bottom: 1.0rem;
   padding-top: 1rem;
-  padding-bottom:1rem;
+  padding-bottom:0rem;
 }
 
 /* https://github.com/mrmrs/fluidity */
@@ -303,6 +303,7 @@ pre {
   font-size: 1rem;
   overflow-x: scroll;
   padding: 1.125em;
+  display: inline-flex;
 }
 
 h1{