You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jb...@apache.org on 2017/04/12 18:25:45 UTC

[41/51] [partial] incubator-impala git commit: IMPALA-4181 [DOCS] Publish rendered Impala documentation to ASF site

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/75c46918/docs/build/html/topics/impala_config_options.html
----------------------------------------------------------------------
diff --git a/docs/build/html/topics/impala_config_options.html b/docs/build/html/topics/impala_config_options.html
new file mode 100644
index 0000000..5bf3ff2
--- /dev/null
+++ b/docs/build/html/topics/impala_config_options.html
@@ -0,0 +1,361 @@
+<!DOCTYPE html
+  SYSTEM "about:legacy-compat">
+<html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="UTF-8"><meta name="copyright" content="(C) Copyright 2017"><meta name="DC.rights.owner" content="(C) Copyright 2017"><meta name="DC.Type" content="concept"><meta name="DC.Relation" scheme="URI" content="../topics/impala_processes.html"><meta name="prodname" content="Impala"><meta name="prodname" content="Impala"><meta name="version" content="Impala 2.8.x"><meta name="version" content="Impala 2.8.x"><meta name="DC.Format" content="XHTML"><meta name="DC.Identifier" content="config_options"><link rel="stylesheet" type="text/css" href="../commonltr.css"><title>Modifying Impala Startup Options</title></head><body id="config_options"><main role="main"><article role="article" aria-labelledby="ariaid-title1">
+
+  <h1 class="title topictitle1" id="ariaid-title1">Modifying Impala Startup Options</h1>
+
+  
+
+  <div class="body conbody">
+
+    <p class="p">
+      
+
+      
+
+      
+
+      
+
+      
+
+      
+
+      
+
+      
+
+      
+
+      
+
+      
+
+      
+      The configuration options for the Impala-related daemons let you choose which hosts and
+      ports to use for the services that run on a single host, specify directories for logging,
+      control resource usage and security, and specify other aspects of the Impala software.
+    </p>
+
+    <p class="p toc inpage"></p>
+
+  </div>
+
+  <nav role="navigation" class="related-links"><div class="familylinks"><div class="parentlink"><strong>Parent topic:</strong> <a class="link" href="../topics/impala_processes.html">Starting Impala</a></div></div></nav><article class="topic concept nested1" aria-labelledby="ariaid-title2" id="config_options__config_options_noncm">
+
+    <h2 class="title topictitle2" id="ariaid-title2">Configuring Impala Startup Options through the Command Line</h2>
+
+    <div class="body conbody">
+
+      <p class="p"> The Impala server, statestore, and catalog services start up using values provided in a
+        defaults file, <span class="ph filepath">/etc/default/impala</span>. </p>
+
+      <p class="p">
+        This file includes information about many resources used by Impala. Most of the defaults
+        included in this file should be effective in most cases. For example, typically you
+        would not change the definition of the <code class="ph codeph">CLASSPATH</code> variable, but you
+        would always set the address used by the statestore server. Some of the content you
+        might modify includes:
+      </p>
+
+
+
+<pre class="pre codeblock"><code>IMPALA_STATE_STORE_HOST=127.0.0.1
+IMPALA_STATE_STORE_PORT=24000
+IMPALA_BACKEND_PORT=22000
+IMPALA_LOG_DIR=/var/log/impala
+IMPALA_CATALOG_SERVICE_HOST=...
+IMPALA_STATE_STORE_HOST=...
+
+export IMPALA_STATE_STORE_ARGS=${IMPALA_STATE_STORE_ARGS:- \
+    -log_dir=${IMPALA_LOG_DIR} -state_store_port=${IMPALA_STATE_STORE_PORT}}
+IMPALA_SERVER_ARGS=" \
+-log_dir=${IMPALA_LOG_DIR} \
+-catalog_service_host=${IMPALA_CATALOG_SERVICE_HOST} \
+-state_store_port=${IMPALA_STATE_STORE_PORT} \
+-use_statestore \
+-state_store_host=${IMPALA_STATE_STORE_HOST} \
+-be_port=${IMPALA_BACKEND_PORT}"
+export ENABLE_CORE_DUMPS=${ENABLE_COREDUMPS:-false}</code></pre>
+
+      <p class="p">
+        To use alternate values, edit the defaults file, then restart all the Impala-related
+        services so that the changes take effect. Restart the Impala server using the following
+        commands:
+      </p>
+
+<pre class="pre codeblock"><code>$ sudo service impala-server restart
+Stopping Impala Server:                                    [  OK  ]
+Starting Impala Server:                                    [  OK  ]</code></pre>
+
+      <p class="p">
+        Restart the Impala statestore using the following commands:
+      </p>
+
+<pre class="pre codeblock"><code>$ sudo service impala-state-store restart
+Stopping Impala State Store Server:                        [  OK  ]
+Starting Impala State Store Server:                        [  OK  ]</code></pre>
+
+      <p class="p">
+        Restart the Impala catalog service using the following commands:
+      </p>
+
+<pre class="pre codeblock"><code>$ sudo service impala-catalog restart
+Stopping Impala Catalog Server:                            [  OK  ]
+Starting Impala Catalog Server:                            [  OK  ]</code></pre>
+
+      <p class="p">
+        Some common settings to change include:
+      </p>
+
+      <ul class="ul">
+        <li class="li">
+          <p class="p">
+            Statestore address. Where practical, put the statestore on a separate host not
+            running the <span class="keyword cmdname">impalad</span> daemon. In that recommended configuration,
+            the <span class="keyword cmdname">impalad</span> daemon cannot refer to the statestore server using
+            the loopback address. If the statestore is hosted on a machine with an IP address of
+            192.168.0.27, change:
+          </p>
+<pre class="pre codeblock"><code>IMPALA_STATE_STORE_HOST=127.0.0.1</code></pre>
+          <p class="p">
+            to:
+          </p>
+<pre class="pre codeblock"><code>IMPALA_STATE_STORE_HOST=192.168.0.27</code></pre>
+        </li>
+
+        <li class="li">
+          <p class="p">
+            Catalog server address (including both the hostname and the port number). Update the
+            value of the <code class="ph codeph">IMPALA_CATALOG_SERVICE_HOST</code> variable. Where
+            practical, run the catalog server on the same host as the statestore. In that
+            recommended configuration, the <span class="keyword cmdname">impalad</span> daemon cannot refer to the
+            catalog server using the loopback address. If the catalog service is hosted on a
+            machine with an IP address of 192.168.0.27, add the following line:
+          </p>
+<pre class="pre codeblock"><code>IMPALA_CATALOG_SERVICE_HOST=192.168.0.27:26000</code></pre>
+          <p class="p">
+            The <span class="ph filepath">/etc/default/impala</span> defaults file currently does not define
+            an <code class="ph codeph">IMPALA_CATALOG_ARGS</code> environment variable, but if you add one it
+            will be recognized by the service startup/shutdown script. Add a definition for this
+            variable to <span class="ph filepath">/etc/default/impala</span> and add the option
+            <code class="ph codeph">-catalog_service_host=<var class="keyword varname">hostname</var></code>. If the port is
+            different than the default 26000, also add the option
+            <code class="ph codeph">-catalog_service_port=<var class="keyword varname">port</var></code>.
+          </p>
+        </li>
+
+        <li class="li" id="config_options_noncm__mem_limit">
+          <p class="p">
+            Memory limits. You can limit the amount of memory available to Impala. For example,
+            to allow Impala to use no more than 70% of system memory, change:
+          </p>
+
+<pre class="pre codeblock"><code>export IMPALA_SERVER_ARGS=${IMPALA_SERVER_ARGS:- \
+    -log_dir=${IMPALA_LOG_DIR} \
+    -state_store_port=${IMPALA_STATE_STORE_PORT} \
+    -use_statestore -state_store_host=${IMPALA_STATE_STORE_HOST} \
+    -be_port=${IMPALA_BACKEND_PORT}}</code></pre>
+          <p class="p">
+            to:
+          </p>
+<pre class="pre codeblock"><code>export IMPALA_SERVER_ARGS=${IMPALA_SERVER_ARGS:- \
+    -log_dir=${IMPALA_LOG_DIR} -state_store_port=${IMPALA_STATE_STORE_PORT} \
+    -use_statestore -state_store_host=${IMPALA_STATE_STORE_HOST} \
+    -be_port=${IMPALA_BACKEND_PORT} -mem_limit=70%}</code></pre>
+          <p class="p">
+            You can specify the memory limit using absolute notation such as
+            <code class="ph codeph">500m</code> or <code class="ph codeph">2G</code>, or as a percentage of physical memory
+            such as <code class="ph codeph">60%</code>.
+          </p>
+
+          <div class="note note note_note"><span class="note__title notetitle">Note:</span> 
+            Queries that exceed the specified memory limit are aborted. Percentage limits are
+            based on the physical memory of the machine and do not consider cgroups.
+          </div>
+        </li>
+
+        <li class="li">
+          <p class="p"> Core dump enablement. To enable core dumps, change: </p>
+<pre class="pre codeblock"><code>export ENABLE_CORE_DUMPS=${ENABLE_COREDUMPS:-false}</code></pre>
+          <p class="p">
+            to:
+          </p>
+<pre class="pre codeblock"><code>export ENABLE_CORE_DUMPS=${ENABLE_COREDUMPS:-true}</code></pre>
+
+          <div class="note note note_note"><span class="note__title notetitle">Note:</span> 
+      <ul class="ul">
+        <li class="li">
+          <p class="p">
+            The location of core dump files may vary according to your operating system configuration.
+          </p>
+        </li>
+        <li class="li">
+          <p class="p">
+            Other security settings may prevent Impala from writing core dumps even when this option is enabled.
+          </p>
+        </li>
+      </ul>
+      </div>
+        </li>
+
+        <li class="li">
+          <p class="p">
+            Authorization using the open source Sentry plugin. Specify the
+            <code class="ph codeph">-server_name</code> and <code class="ph codeph">-authorization_policy_file</code>
+            options as part of the <code class="ph codeph">IMPALA_SERVER_ARGS</code> and
+            <code class="ph codeph">IMPALA_STATE_STORE_ARGS</code> settings to enable the core Impala support
+            for authentication. See <a class="xref" href="impala_authorization.html#secure_startup">Starting the impalad Daemon with Sentry Authorization Enabled</a> for
+            details.
+          </p>
+        </li>
+
+        <li class="li">
+          <p class="p">
+            Auditing for successful or blocked Impala queries, another aspect of security.
+            Specify the <code class="ph codeph">-audit_event_log_dir=<var class="keyword varname">directory_path</var></code>
+            option and optionally the
+            <code class="ph codeph">-max_audit_event_log_file_size=<var class="keyword varname">number_of_queries</var></code>
+            and <code class="ph codeph">-abort_on_failed_audit_event</code> options as part of the
+            <code class="ph codeph">IMPALA_SERVER_ARGS</code> settings, for each Impala node, to enable and
+            customize auditing. See <a class="xref" href="impala_auditing.html#auditing">Auditing Impala Operations</a> for details.
+          </p>
+        </li>
+
+        <li class="li">
+          <p class="p">
+            Password protection for the Impala web UI, which listens on port 25000 by default.
+            This feature involves adding some or all of the
+            <code class="ph codeph">--webserver_password_file</code>,
+            <code class="ph codeph">--webserver_authentication_domain</code>, and
+            <code class="ph codeph">--webserver_certificate_file</code> options to the
+            <code class="ph codeph">IMPALA_SERVER_ARGS</code> and <code class="ph codeph">IMPALA_STATE_STORE_ARGS</code>
+            settings. See <a class="xref" href="impala_security_guidelines.html#security_guidelines">Security Guidelines for Impala</a> for
+            details.
+          </p>
+        </li>
+
+        <li class="li" id="config_options_noncm__default_query_options">
+          <div class="p">
+            Another setting you might add to <code class="ph codeph">IMPALA_SERVER_ARGS</code> is a
+            comma-separated list of query options and values:
+<pre class="pre codeblock"><code>-default_query_options='<var class="keyword varname">option</var>=<var class="keyword varname">value</var>,<var class="keyword varname">option</var>=<var class="keyword varname">value</var>,...'
+</code></pre>
+            These options control the behavior of queries performed by this
+            <span class="keyword cmdname">impalad</span> instance. The option values you specify here override the
+            default values for <a class="xref" href="impala_query_options.html#query_options">Impala query
+            options</a>, as shown by the <code class="ph codeph">SET</code> statement in
+            <span class="keyword cmdname">impala-shell</span>.
+          </div>
+        </li>
+
+
+
+        <li class="li">
+          <p class="p">
+            During troubleshooting, <span class="keyword">the appropriate support channel</span> might direct you to change other values,
+            particularly for <code class="ph codeph">IMPALA_SERVER_ARGS</code>, to work around issues or
+            gather debugging information.
+          </p>
+        </li>
+      </ul>
+
+
+
+      <div class="note note note_note"><span class="note__title notetitle">Note:</span> 
+        <p class="p">
+          These startup options for the <span class="keyword cmdname">impalad</span> daemon are different from the
+          command-line options for the <span class="keyword cmdname">impala-shell</span> command. For the
+          <span class="keyword cmdname">impala-shell</span> options, see
+          <a class="xref" href="impala_shell_options.html#shell_options">impala-shell Configuration Options</a>.
+        </p>
+      </div>
+
+      
+
+    </div>
+
+    
+
+    
+
+    
+
+  </article>
+
+  <article class="topic concept nested1" aria-labelledby="ariaid-title3" id="config_options__config_options_checking">
+
+    <h2 class="title topictitle2" id="ariaid-title3">Checking the Values of Impala Configuration Options</h2>
+
+    <div class="body conbody">
+
+      <p class="p">
+        You can check the current runtime value of all these settings through the Impala web
+        interface, available by default at
+        <code class="ph codeph">http://<var class="keyword varname">impala_hostname</var>:25000/varz</code> for the
+        <span class="keyword cmdname">impalad</span> daemon,
+        <code class="ph codeph">http://<var class="keyword varname">impala_hostname</var>:25010/varz</code> for the
+        <span class="keyword cmdname">statestored</span> daemon, or
+        <code class="ph codeph">http://<var class="keyword varname">impala_hostname</var>:25020/varz</code> for the
+        <span class="keyword cmdname">catalogd</span> daemon.
+      </p>
+
+    </div>
+
+  </article>
+
+  <article class="topic concept nested1" aria-labelledby="ariaid-title4" id="config_options__config_options_impalad">
+
+    <h2 class="title topictitle2" id="ariaid-title4">Startup Options for impalad Daemon</h2>
+
+    <div class="body conbody">
+
+      <p class="p">
+        The <code class="ph codeph">impalad</code> daemon implements the main Impala service, which performs
+        query processing and reads and writes the data files.
+      </p>
+
+    </div>
+
+  </article>
+
+  <article class="topic concept nested1" aria-labelledby="ariaid-title5" id="config_options__config_options_statestored">
+
+    <h2 class="title topictitle2" id="ariaid-title5">Startup Options for statestored Daemon</h2>
+
+    <div class="body conbody">
+
+      <p class="p">
+        The <span class="keyword cmdname">statestored</span> daemon implements the Impala statestore service,
+        which monitors the availability of Impala services across the cluster, and handles
+        situations such as nodes becoming unavailable or becoming available again.
+      </p>
+
+    </div>
+
+  </article>
+
+  <article class="topic concept nested1" aria-labelledby="ariaid-title6" id="config_options__config_options_catalogd">
+
+    <h2 class="title topictitle2" id="ariaid-title6">Startup Options for catalogd Daemon</h2>
+
+    <div class="body conbody">
+
+      <p class="p">
+        The <span class="keyword cmdname">catalogd</span> daemon implements the Impala catalog service, which
+        broadcasts metadata changes to all the Impala nodes when Impala creates a table, inserts
+        data, or performs other kinds of DDL and DML operations.
+      </p>
+
+      <p class="p">
+        By default, the metadata loading and caching on startup happens asynchronously, so Impala can begin
+        accepting requests promptly. To enable the original behavior, where Impala waited until all metadata was
+        loaded before accepting any requests, set the <span class="keyword cmdname">catalogd</span> configuration option
+        <code class="ph codeph">--load_catalog_in_background=false</code>.
+      </p>
+
+    </div>
+
+  </article>
+
+</article></main></body></html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/75c46918/docs/build/html/topics/impala_config_performance.html
----------------------------------------------------------------------
diff --git a/docs/build/html/topics/impala_config_performance.html b/docs/build/html/topics/impala_config_performance.html
new file mode 100644
index 0000000..61de174
--- /dev/null
+++ b/docs/build/html/topics/impala_config_performance.html
@@ -0,0 +1,149 @@
+<!DOCTYPE html
+  SYSTEM "about:legacy-compat">
+<html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="UTF-8"><meta name="copyright" content="(C) Copyright 2017"><meta name="DC.rights.owner" content="(C) Copyright 2017"><meta name="DC.Type" content="concept"><meta name="DC.Relation" scheme="URI" content="../topics/impala_config.html"><meta name="prodname" content="Impala"><meta name="prodname" content="Impala"><meta name="version" content="Impala 2.8.x"><meta name="version" content="Impala 2.8.x"><meta name="DC.Format" content="XHTML"><meta name="DC.Identifier" content="config_performance"><link rel="stylesheet" type="text/css" href="../commonltr.css"><title>Post-Installation Configuration for Impala</title></head><body id="config_performance"><main role="main"><article role="article" aria-labelledby="ariaid-title1">
+
+  <h1 class="title topictitle1" id="ariaid-title1">Post-Installation Configuration for Impala</h1>
+  
+
+  <div class="body conbody">
+
+    <p class="p" id="config_performance__p_24">
+      This section describes the mandatory and recommended configuration settings for Impala. If Impala is
+      installed using cluster management software, some of these configurations might be completed automatically; you must still
+      configure short-circuit reads manually. If you want to customize your environment, consider making the changes described in this topic.
+    </p>
+
+    <ul class="ul">
+      <li class="li">
+        You must enable short-circuit reads, whether or not Impala was installed with cluster
+        management software. This setting goes in the Impala configuration settings, not the Hadoop-wide settings.
+      </li>
+
+      <li class="li">
+        You must enable block location tracking, and you can optionally enable native checksumming for optimal performance.
+      </li>
+    </ul>
+
+    <section class="section" id="config_performance__section_fhq_wyv_ls"><h2 class="title sectiontitle">Mandatory: Short-Circuit Reads</h2>
+      
+      <p class="p"> Enabling short-circuit reads allows Impala to read local data directly
+        from the file system. This removes the need to communicate through the
+        DataNodes, improving performance. This setting also minimizes the number
+        of additional copies of data. Short-circuit reads requires
+          <code class="ph codeph">libhadoop.so</code>
+        (the Hadoop Native Library) to be accessible to both the server and the
+        client. <code class="ph codeph">libhadoop.so</code> is not available if you have
+        installed from a tarball. You must install from an
+        <code class="ph codeph">.rpm</code>, <code class="ph codeph">.deb</code>, or parcel to use
+        short-circuit local reads.
+      </p>
+      <p class="p">
+        <strong class="ph b">To configure DataNodes for short-circuit reads:</strong>
+      </p>
+      <ol class="ol" id="config_performance__ol_qlq_wyv_ls">
+        <li class="li" id="config_performance__copy_config_files"> Copy the client
+            <code class="ph codeph">core-site.xml</code> and <code class="ph codeph">hdfs-site.xml</code>
+          configuration files from the Hadoop configuration directory to the
+          Impala configuration directory. The default Impala configuration
+          location is <code class="ph codeph">/etc/impala/conf</code>. </li>
+        <li class="li">
+          
+          
+          
+          On all Impala nodes, configure the following properties in 
+          
+          Impala's copy of <code class="ph codeph">hdfs-site.xml</code> as shown: <pre class="pre codeblock"><code>&lt;property&gt;
+    &lt;name&gt;dfs.client.read.shortcircuit&lt;/name&gt;
+    &lt;value&gt;true&lt;/value&gt;
+&lt;/property&gt;
+
+&lt;property&gt;
+    &lt;name&gt;dfs.domain.socket.path&lt;/name&gt;
+    &lt;value&gt;/var/run/hdfs-sockets/dn&lt;/value&gt;
+&lt;/property&gt;
+
+&lt;property&gt;
+    &lt;name&gt;dfs.client.file-block-storage-locations.timeout.millis&lt;/name&gt;
+    &lt;value&gt;10000&lt;/value&gt;
+&lt;/property&gt;</code></pre>
+          
+          
+        </li>
+        <li class="li">
+          <p class="p"> If <code class="ph codeph">/var/run/hadoop-hdfs/</code> is group-writable, make
+            sure its group is <code class="ph codeph">root</code>. </p>
+          <div class="note note note_note"><span class="note__title notetitle">Note:</span>  If you are also going to enable block location tracking, you
+            can skip copying configuration files and restarting DataNodes and go
+            straight to <a class="xref" href="#config_performance__block_location_tracking">Optional: Block Location Tracking</a>.
+            Configuring short-circuit reads and block location tracking require
+            the same process of copying files and restarting services, so you
+            can complete that process once when you have completed all
+            configuration changes. Whether you copy files and restart services
+            now or during configuring block location tracking, short-circuit
+            reads are not enabled until you complete those final steps. </div>
+        </li>
+        <li class="li" id="config_performance__restart_all_datanodes"> After applying these changes, restart
+          all DataNodes. </li>
+      </ol>
+    </section>
+
+    <section class="section" id="config_performance__block_location_tracking"><h2 class="title sectiontitle">Mandatory: Block Location Tracking</h2>
+
+      
+
+      <p class="p">
+        Enabling block location metadata allows Impala to know which disk data blocks are located on, allowing
+        better utilization of the underlying disks. Impala will not start unless this setting is enabled.
+      </p>
+
+      <p class="p">
+        <strong class="ph b">To enable block location tracking:</strong>
+      </p>
+
+      <ol class="ol">
+        <li class="li">
+          For each DataNode, adding the following to the&nbsp;<code class="ph codeph">hdfs-site.xml</code> file:
+<pre class="pre codeblock"><code>&lt;property&gt;
+  &lt;name&gt;dfs.datanode.hdfs-blocks-metadata.enabled&lt;/name&gt;
+  &lt;value&gt;true&lt;/value&gt;
+&lt;/property&gt; </code></pre>
+        </li>
+
+        <li class="li"> Copy the client
+            <code class="ph codeph">core-site.xml</code> and <code class="ph codeph">hdfs-site.xml</code>
+          configuration files from the Hadoop configuration directory to the
+          Impala configuration directory. The default Impala configuration
+          location is <code class="ph codeph">/etc/impala/conf</code>. </li>
+
+        <li class="li"> After applying these changes, restart
+          all DataNodes. </li>
+      </ol>
+    </section>
+
+    <section class="section" id="config_performance__native_checksumming"><h2 class="title sectiontitle">Optional: Native Checksumming</h2>
+
+      
+
+      <p class="p">
+        Enabling native checksumming causes Impala to use an optimized native library for computing checksums, if
+        that library is available.
+      </p>
+
+      <p class="p" id="config_performance__p_29">
+        <strong class="ph b">To enable native checksumming:</strong>
+      </p>
+
+      <p class="p">
+        If you installed <span class="keyword"></span> from packages, the native checksumming library is installed and setup correctly. In
+        such a case, no additional steps are required. Conversely, if you installed by other means, such as with
+        tarballs, native checksumming may not be available due to missing shared objects. Finding the message
+        "<code class="ph codeph">Unable to load native-hadoop library for your platform... using builtin-java classes where
+        applicable</code>" in the Impala logs indicates native checksumming may be unavailable. To enable native
+        checksumming, you must build and install <code class="ph codeph">libhadoop.so</code> (the
+        
+        
+        Hadoop Native Library).
+      </p>
+    </section>
+  </div>
+<nav role="navigation" class="related-links"><div class="familylinks"><div class="parentlink"><strong>Parent topic:</strong> <a class="link" href="../topics/impala_config.html">Managing Impala</a></div></div></nav></article></main></body></html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/75c46918/docs/build/html/topics/impala_connecting.html
----------------------------------------------------------------------
diff --git a/docs/build/html/topics/impala_connecting.html b/docs/build/html/topics/impala_connecting.html
new file mode 100644
index 0000000..e48d850
--- /dev/null
+++ b/docs/build/html/topics/impala_connecting.html
@@ -0,0 +1,190 @@
+<!DOCTYPE html
+  SYSTEM "about:legacy-compat">
+<html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="UTF-8"><meta name="copyright" content="(C) Copyright 2017"><meta name="DC.rights.owner" content="(C) Copyright 2017"><meta name="DC.Type" content="concept"><meta name="DC.Relation" scheme="URI" content="../topics/impala_impala_shell.html"><meta name="prodname" content="Impala"><meta name="prodname" content="Impala"><meta name="version" content="Impala 2.8.x"><meta name="version" content="Impala 2.8.x"><meta name="DC.Format" content="XHTML"><meta name="DC.Identifier" content="connecting"><link rel="stylesheet" type="text/css" href="../commonltr.css"><title>Connecting to impalad through impala-shell</title></head><body id="connecting"><main role="main"><article role="article" aria-labelledby="ariaid-title1">
+
+  <h1 class="title topictitle1" id="ariaid-title1">Connecting to impalad through impala-shell</h1>
+  
+  
+
+  <div class="body conbody">
+
+
+
+    <div class="p">
+      Within an <span class="keyword cmdname">impala-shell</span> session, you can only issue queries while connected to an instance
+      of the <span class="keyword cmdname">impalad</span> daemon. You can specify the connection information:
+      <ul class="ul">
+        <li class="li">
+          Through command-line options when you run the <span class="keyword cmdname">impala-shell</span> command.
+        </li>
+        <li class="li">
+          Through a configuration file that is read when you run the <span class="keyword cmdname">impala-shell</span> command.
+        </li>
+        <li class="li">
+          During an <span class="keyword cmdname">impala-shell</span> session, by issuing a <code class="ph codeph">CONNECT</code> command.
+        </li>
+      </ul>
+      See <a class="xref" href="impala_shell_options.html">impala-shell Configuration Options</a> for the command-line and configuration file options you can use.
+    </div>
+
+    <p class="p">
+      You can connect to any DataNode where an instance of <span class="keyword cmdname">impalad</span> is running,
+      and that host coordinates the execution of all queries sent to it.
+    </p>
+
+    <p class="p">
+      For simplicity during development, you might always connect to the same host, perhaps running <span class="keyword cmdname">impala-shell</span> on
+      the same host as <span class="keyword cmdname">impalad</span> and specifying the hostname as <code class="ph codeph">localhost</code>.
+    </p>
+
+    <p class="p">
+      In a production environment, you might enable load balancing, in which you connect to specific host/port combination
+      but queries are forwarded to arbitrary hosts. This technique spreads the overhead of acting as the coordinator
+      node among all the DataNodes in the cluster. See <a class="xref" href="impala_proxy.html">Using Impala through a Proxy for High Availability</a> for details.
+    </p>
+
+    <p class="p">
+      <strong class="ph b">To connect the Impala shell during shell startup:</strong>
+    </p>
+
+    <ol class="ol">
+      <li class="li">
+        Locate the hostname of a DataNode within the cluster that is running an instance of the
+        <span class="keyword cmdname">impalad</span> daemon. If that DataNode uses a non-default port (something
+        other than port 21000) for <span class="keyword cmdname">impala-shell</span> connections, find out the
+        port number also.
+      </li>
+
+      <li class="li">
+        Use the <code class="ph codeph">-i</code> option to the
+        <span class="keyword cmdname">impala-shell</span> interpreter to specify the connection information for
+        that instance of <span class="keyword cmdname">impalad</span>:
+<pre class="pre codeblock"><code>
+# When you are logged into the same machine running impalad.
+# The prompt will reflect the current hostname.
+$ impala-shell
+
+# When you are logged into the same machine running impalad.
+# The host will reflect the hostname 'localhost'.
+$ impala-shell -i localhost
+
+# When you are logged onto a different host, perhaps a client machine
+# outside the Hadoop cluster.
+$ impala-shell -i <var class="keyword varname">some.other.hostname</var>
+
+# When you are logged onto a different host, and impalad is listening
+# on a non-default port. Perhaps a load balancer is forwarding requests
+# to a different host/port combination behind the scenes.
+$ impala-shell -i <var class="keyword varname">some.other.hostname</var>:<var class="keyword varname">port_number</var>
+</code></pre>
+      </li>
+    </ol>
+
+    <p class="p">
+      <strong class="ph b">To connect the Impala shell after shell startup:</strong>
+    </p>
+
+    <ol class="ol">
+      <li class="li">
+        Start the Impala shell with no connection:
+<pre class="pre codeblock"><code>$ impala-shell</code></pre>
+        <p class="p">
+          You should see a prompt like the following:
+        </p>
+<pre class="pre codeblock"><code>Welcome to the Impala shell. Press TAB twice to see a list of available commands.
+...
+<span class="ph">(Shell
+      build version: Impala Shell v2.8.x (<var class="keyword varname">hash</var>) built on
+      <var class="keyword varname">date</var>)</span>
+[Not connected] &gt; </code></pre>
+      </li>
+
+      <li class="li">
+        Locate the hostname of a DataNode within the cluster that is running an instance of the
+        <span class="keyword cmdname">impalad</span> daemon. If that DataNode uses a non-default port (something
+        other than port 21000) for <span class="keyword cmdname">impala-shell</span> connections, find out the
+        port number also.
+      </li>
+
+      <li class="li">
+        Use the <code class="ph codeph">connect</code> command to connect to an Impala instance. Enter a command of the form:
+<pre class="pre codeblock"><code>[Not connected] &gt; connect <var class="keyword varname">impalad-host</var>
+[<var class="keyword varname">impalad-host</var>:21000] &gt;</code></pre>
+        <div class="note note note_note"><span class="note__title notetitle">Note:</span> 
+          Replace <var class="keyword varname">impalad-host</var> with the hostname you have configured for any DataNode running
+          Impala in your environment. The changed prompt indicates a successful connection.
+        </div>
+      </li>
+    </ol>
+
+    <p class="p">
+      <strong class="ph b">To start <span class="keyword cmdname">impala-shell</span> in a specific database:</strong>
+    </p>
+
+    <p class="p">
+      You can use all the same connection options as in previous examples.
+      For simplicity, these examples assume that you are logged into one of
+      the DataNodes that is running the <span class="keyword cmdname">impalad</span> daemon.
+    </p>
+
+    <ol class="ol">
+      <li class="li">
+        Find the name of the database containing the relevant tables, views, and so
+        on that you want to operate on.
+      </li>
+
+      <li class="li">
+        Use the <code class="ph codeph">-d</code> option to the
+        <span class="keyword cmdname">impala-shell</span> interpreter to connect and immediately
+        switch to the specified database, without the need for a <code class="ph codeph">USE</code>
+        statement or fully qualified names:
+<pre class="pre codeblock"><code>
+# Subsequent queries with unqualified names operate on
+# tables, views, and so on inside the database named 'staging'.
+$ impala-shell -i localhost -d staging
+
+# It is common during development, ETL, benchmarking, and so on
+# to have different databases containing the same table names
+# but with different contents or layouts.
+$ impala-shell -i localhost -d parquet_snappy_compression
+$ impala-shell -i localhost -d parquet_gzip_compression
+</code></pre>
+      </li>
+    </ol>
+
+    <p class="p">
+      <strong class="ph b">To run one or several statements in non-interactive mode:</strong>
+    </p>
+
+    <p class="p">
+      You can use all the same connection options as in previous examples.
+      For simplicity, these examples assume that you are logged into one of
+      the DataNodes that is running the <span class="keyword cmdname">impalad</span> daemon.
+    </p>
+
+    <ol class="ol">
+      <li class="li">
+        Construct a statement, or a file containing a sequence of statements,
+        that you want to run in an automated way, without typing or copying
+        and pasting each time.
+      </li>
+
+      <li class="li">
+        Invoke <span class="keyword cmdname">impala-shell</span> with the <code class="ph codeph">-q</code> option to run a single statement, or
+        the <code class="ph codeph">-f</code> option to run a sequence of statements from a file.
+        The <span class="keyword cmdname">impala-shell</span> command returns immediately, without going into
+        the interactive interpreter.
+<pre class="pre codeblock"><code>
+# A utility command that you might run while developing shell scripts
+# to manipulate HDFS files.
+$ impala-shell -i localhost -d database_of_interest -q 'show tables'
+
+# A sequence of CREATE TABLE, CREATE VIEW, and similar DDL statements
+# can go into a file to make the setup process repeatable.
+$ impala-shell -i localhost -d database_of_interest -f recreate_tables.sql
+</code></pre>
+      </li>
+    </ol>
+
+  </div>
+<nav role="navigation" class="related-links"><div class="familylinks"><div class="parentlink"><strong>Parent topic:</strong> <a class="link" href="../topics/impala_impala_shell.html">Using the Impala Shell (impala-shell Command)</a></div></div></nav></article></main></body></html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/75c46918/docs/build/html/topics/impala_conversion_functions.html
----------------------------------------------------------------------
diff --git a/docs/build/html/topics/impala_conversion_functions.html b/docs/build/html/topics/impala_conversion_functions.html
new file mode 100644
index 0000000..d49cef8
--- /dev/null
+++ b/docs/build/html/topics/impala_conversion_functions.html
@@ -0,0 +1,288 @@
+<!DOCTYPE html
+  SYSTEM "about:legacy-compat">
+<html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="UTF-8"><meta name="copyright" content="(C) Copyright 2017"><meta name="DC.rights.owner" content="(C) Copyright 2017"><meta name="DC.Type" content="concept"><meta name="DC.Relation" scheme="URI" content="../topics/impala_functions.html"><meta name="prodname" content="Impala"><meta name="prodname" content="Impala"><meta name="version" content="Impala 2.8.x"><meta name="version" content="Impala 2.8.x"><meta name="DC.Format" content="XHTML"><meta name="DC.Identifier" content="conversion_functions"><link rel="stylesheet" type="text/css" href="../commonltr.css"><title>Impala Type Conversion Functions</title></head><body id="conversion_functions"><main role="main"><article role="article" aria-labelledby="ariaid-title1">
+
+  <h1 class="title topictitle1" id="ariaid-title1">Impala Type Conversion Functions</h1>
+  
+  
+
+  <div class="body conbody">
+
+    <p class="p">
+      Conversion functions are usually used in combination with other functions, to explicitly pass the expected
+      data types. Impala has strict rules regarding data types for function parameters. For example, Impala does
+      not automatically convert a <code class="ph codeph">DOUBLE</code> value to <code class="ph codeph">FLOAT</code>, a
+      <code class="ph codeph">BIGINT</code> value to <code class="ph codeph">INT</code>, or other conversion where precision could be lost or
+      overflow could occur. Also, for reporting or dealing with loosely defined schemas in big data contexts,
+      you might frequently need to convert values to or from the <code class="ph codeph">STRING</code> type.
+    </p>
+
+    <div class="note note note_note"><span class="note__title notetitle">Note:</span> 
+      Although in <span class="keyword">Impala 2.3</span>, the <code class="ph codeph">SHOW FUNCTIONS</code> output for
+      database <code class="ph codeph">_IMPALA_BUILTINS</code> contains some function signatures
+      matching the pattern <code class="ph codeph">castto*</code>, these functions are not intended
+      for public use and are expected to be hidden in future.
+    </div>
+
+    <p class="p">
+      <strong class="ph b">Function reference:</strong>
+    </p>
+
+    <p class="p">
+      Impala supports the following type conversion functions:
+    </p>
+
+<dl class="dl">
+
+
+<dt class="dt dlterm" id="conversion_functions__cast">
+<code class="ph codeph">cast(<var class="keyword varname">expr</var> AS <var class="keyword varname">type</var>)</code>
+</dt>
+
+<dd class="dd">
+
+<strong class="ph b">Purpose:</strong> Converts the value of an expression to any other type.
+If the expression value is of a type that cannot be converted to the target type, the result is <code class="ph codeph">NULL</code>.
+<p class="p"><strong class="ph b">Usage notes:</strong>
+Use <code class="ph codeph">CAST</code> when passing a column value or literal to a function that
+expects a parameter with a different type.
+Frequently used in SQL operations such as <code class="ph codeph">CREATE TABLE AS SELECT</code>
+and <code class="ph codeph">INSERT ... VALUES</code> to ensure that values from various sources
+are of the appropriate type for the destination columns.
+Where practical, do a one-time <code class="ph codeph">CAST()</code> operation during the ingestion process
+to make each column into the appropriate type, rather than using many <code class="ph codeph">CAST()</code>
+operations in each query; doing type conversions for each row during each query can be expensive
+for tables with millions or billions of rows.
+</p>
+    <p class="p">
+        The way this function deals with time zones when converting to or from <code class="ph codeph">TIMESTAMP</code>
+        values is affected by the <code class="ph codeph">-use_local_tz_for_unix_timestamp_conversions</code> startup flag for the
+        <span class="keyword cmdname">impalad</span> daemon. See <a class="xref" href="../shared/../topics/impala_timestamp.html#timestamp">TIMESTAMP Data Type</a> for details about
+        how Impala handles time zone considerations for the <code class="ph codeph">TIMESTAMP</code> data type.
+      </p>
+
+<p class="p">
+        <strong class="ph b">Examples:</strong>
+      </p>
+<pre class="pre codeblock"><code>select concat('Here are the first ',10,' results.'); -- Fails
+select concat('Here are the first ',cast(10 as string),' results.'); -- Succeeds
+</code></pre>
+<p class="p">
+The following example starts with a text table where every column has a type of <code class="ph codeph">STRING</code>,
+which might be how you ingest data of unknown schema until you can verify the cleanliness of the underly values.
+Then it uses <code class="ph codeph">CAST()</code> to create a new Parquet table with the same data, but using specific
+numeric data types for the columns with numeric data. Using numeric types of appropriate sizes can result in
+substantial space savings on disk and in memory, and performance improvements in queries,
+over using strings or larger-than-necessary numeric types.
+</p>
+<pre class="pre codeblock"><code>create table t1 (name string, x string, y string, z string);
+
+create table t2 stored as parquet
+as select
+  name,
+  cast(x as bigint) x,
+  cast(y as timestamp) y,
+  cast(z as smallint) z
+from t1;
+
+describe t2;
++------+----------+---------+
+| name | type     | comment |
++------+----------+---------+
+| name | string   |         |
+| x    | bigint   |         |
+| y    | smallint |         |
+| z    | tinyint  |         |
++------+----------+---------+
+</code></pre>
+<p class="p">
+        <strong class="ph b">Related information:</strong>
+      </p>
+<p class="p">
+
+  For details of casts from each kind of data type, see the description of
+  the appropriate type:
+  <a class="xref" href="impala_tinyint.html#tinyint">TINYINT Data Type</a>,
+  <a class="xref" href="impala_smallint.html#smallint">SMALLINT Data Type</a>,
+  <a class="xref" href="impala_int.html#int">INT Data Type</a>,
+  <a class="xref" href="impala_bigint.html#bigint">BIGINT Data Type</a>,
+  <a class="xref" href="impala_float.html#float">FLOAT Data Type</a>,
+  <a class="xref" href="impala_double.html#double">DOUBLE Data Type</a>,
+  <a class="xref" href="impala_decimal.html#decimal">DECIMAL Data Type (Impala 1.4 or higher only)</a>,
+  <a class="xref" href="impala_string.html#string">STRING Data Type</a>,
+  <a class="xref" href="impala_char.html#char">CHAR Data Type (Impala 2.0 or higher only)</a>,
+  <a class="xref" href="impala_varchar.html#varchar">VARCHAR Data Type (Impala 2.0 or higher only)</a>,
+  <a class="xref" href="impala_timestamp.html#timestamp">TIMESTAMP Data Type</a>,
+  <a class="xref" href="impala_boolean.html#boolean">BOOLEAN Data Type</a>
+</p>
+</dd>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<dt class="dt dlterm" id="conversion_functions__typeof">
+<code class="ph codeph">typeof(type value)</code>
+</dt>
+<dd class="dd">
+
+<strong class="ph b">Purpose:</strong> Returns the name of the data type corresponding to an expression. For types with
+extra attributes, such as length for <code class="ph codeph">CHAR</code> and <code class="ph codeph">VARCHAR</code>,
+or precision and scale for <code class="ph codeph">DECIMAL</code>, includes the full specification of the type.
+
+<p class="p"><strong class="ph b">Return type:</strong> <code class="ph codeph">string</code></p>
+<p class="p"><strong class="ph b">Usage notes:</strong> Typically used in interactive exploration of a schema, or in application code that programmatically generates schema definitions such as <code class="ph codeph">CREATE TABLE</code> statements.
+For example, previously, to understand the type of an expression such as
+<code class="ph codeph">col1 / col2</code> or <code class="ph codeph">concat(col1, col2, col3)</code>,
+you might have created a dummy table with a single row, using syntax such as <code class="ph codeph">CREATE TABLE foo AS SELECT 5 / 3.0</code>,
+and then doing a <code class="ph codeph">DESCRIBE</code> to see the type of the row.
+Or you might have done a <code class="ph codeph">CREATE TABLE AS SELECT</code> operation to create a table and
+copy data into it, only learning the types of the columns by doing a <code class="ph codeph">DESCRIBE</code> afterward.
+This technique is especially useful for arithmetic expressions involving <code class="ph codeph">DECIMAL</code> types,
+because the precision and scale of the result is typically different than that of the operands.
+</p>
+<p class="p">
+        <strong class="ph b">Added in:</strong> <span class="keyword">Impala 2.3.0</span>
+      </p>
+<p class="p">
+        <strong class="ph b">Examples:</strong>
+      </p>
+<p class="p">
+These examples show how to check the type of a simple literal or function value.
+Notice how adding even tiny integers together changes the data type of the result to
+avoid overflow, and how the results of arithmetic operations on <code class="ph codeph">DECIMAL</code> values
+have specific precision and scale attributes.
+</p>
+<pre class="pre codeblock"><code>select typeof(2)
++-----------+
+| typeof(2) |
++-----------+
+| TINYINT   |
++-----------+
+
+select typeof(2+2)
++---------------+
+| typeof(2 + 2) |
++---------------+
+| SMALLINT      |
++---------------+
+
+select typeof('xyz')
++---------------+
+| typeof('xyz') |
++---------------+
+| STRING        |
++---------------+
+
+select typeof(now())
++---------------+
+| typeof(now()) |
++---------------+
+| TIMESTAMP     |
++---------------+
+
+select typeof(5.3 / 2.1)
++-------------------+
+| typeof(5.3 / 2.1) |
++-------------------+
+| DECIMAL(6,4)      |
++-------------------+
+
+select typeof(5.30001 / 2342.1);
++--------------------------+
+| typeof(5.30001 / 2342.1) |
++--------------------------+
+| DECIMAL(13,11)           |
++--------------------------+
+
+select typeof(typeof(2+2))
++-----------------------+
+| typeof(typeof(2 + 2)) |
++-----------------------+
+| STRING                |
++-----------------------+
+</code></pre>
+
+<p class="p">
+This example shows how even if you do not have a record of the type of a column,
+for example because the type was changed by <code class="ph codeph">ALTER TABLE</code> after the
+original <code class="ph codeph">CREATE TABLE</code>, you can still find out the type in a
+more compact form than examining the full <code class="ph codeph">DESCRIBE</code> output.
+Remember to use <code class="ph codeph">LIMIT 1</code> in such cases, to avoid an identical
+result value for every row in the table.
+</p>
+<pre class="pre codeblock"><code>create table typeof_example (a int, b tinyint, c smallint, d bigint);
+
+/* Empty result set if there is no data in the table. */
+select typeof(a) from typeof_example;
+
+/* OK, now we have some data but the type of column A is being changed. */
+insert into typeof_example values (1, 2, 3, 4);
+alter table typeof_example change a a bigint;
+
+/* We can always find out the current type of that column without doing a full DESCRIBE. */
+select typeof(a) from typeof_example limit 1;
++-----------+
+| typeof(a) |
++-----------+
+| BIGINT    |
++-----------+
+</code></pre>
+<p class="p">
+This example shows how you might programmatically generate a <code class="ph codeph">CREATE TABLE</code> statement
+with the appropriate column definitions to hold the result values of arbitrary expressions.
+The <code class="ph codeph">typeof()</code> function lets you construct a detailed <code class="ph codeph">CREATE TABLE</code> statement
+without actually creating the table, as opposed to <code class="ph codeph">CREATE TABLE AS SELECT</code> operations
+where you create the destination table but only learn the column data types afterward through <code class="ph codeph">DESCRIBE</code>.
+</p>
+<pre class="pre codeblock"><code>describe typeof_example;
++------+----------+---------+
+| name | type     | comment |
++------+----------+---------+
+| a    | bigint   |         |
+| b    | tinyint  |         |
+| c    | smallint |         |
+| d    | bigint   |         |
++------+----------+---------+
+
+/* An ETL or business intelligence tool might create variations on a table with different file formats,
+   different sets of columns, and so on. TYPEOF() lets an application introspect the types of the original columns. */
+select concat('create table derived_table (a ', typeof(a), ', b ', typeof(b), ', c ',
+    typeof(c), ', d ', typeof(d), ') stored as parquet;')
+  as 'create table statement'
+from typeof_example limit 1;
++-------------------------------------------------------------------------------------------+
+| create table statement                                                                    |
++-------------------------------------------------------------------------------------------+
+| create table derived_table (a BIGINT, b TINYINT, c SMALLINT, d BIGINT) stored as parquet; |
++-------------------------------------------------------------------------------------------+
+</code></pre>
+</dd>
+
+
+</dl>
+
+  </div>
+<nav role="navigation" class="related-links"><div class="familylinks"><div class="parentlink"><strong>Parent topic:</strong> <a class="link" href="../topics/impala_functions.html">Impala Built-In Functions</a></div></div></nav></article></main></body></html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/75c46918/docs/build/html/topics/impala_count.html
----------------------------------------------------------------------
diff --git a/docs/build/html/topics/impala_count.html b/docs/build/html/topics/impala_count.html
new file mode 100644
index 0000000..c1b961a
--- /dev/null
+++ b/docs/build/html/topics/impala_count.html
@@ -0,0 +1,353 @@
+<!DOCTYPE html
+  SYSTEM "about:legacy-compat">
+<html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="UTF-8"><meta name="copyright" content="(C) Copyright 2017"><meta name="DC.rights.owner" content="(C) Copyright 2017"><meta name="DC.Type" content="concept"><meta name="DC.Relation" scheme="URI" content="../topics/impala_aggregate_functions.html"><meta name="prodname" content="Impala"><meta name="prodname" content="Impala"><meta name="version" content="Impala 2.8.x"><meta name="version" content="Impala 2.8.x"><meta name="DC.Format" content="XHTML"><meta name="DC.Identifier" content="count"><link rel="stylesheet" type="text/css" href="../commonltr.css"><title>COUNT Function</title></head><body id="count"><main role="main"><article role="article" aria-labelledby="ariaid-title1">
+
+  <h1 class="title topictitle1" id="ariaid-title1">COUNT Function</h1>
+  
+  
+
+  <div class="body conbody">
+
+    <p class="p">
+      
+      An aggregate function that returns the number of rows, or the number of non-<code class="ph codeph">NULL</code> rows.
+    </p>
+
+    <p class="p">
+        <strong class="ph b">Syntax:</strong>
+      </p>
+
+<pre class="pre codeblock"><code>COUNT([DISTINCT | ALL] <var class="keyword varname">expression</var>) [OVER (<var class="keyword varname">analytic_clause</var>)]</code></pre>
+
+    <p class="p">
+      Depending on the argument, <code class="ph codeph">COUNT()</code> considers rows that meet certain conditions:
+    </p>
+
+    <ul class="ul">
+      <li class="li">
+        The notation <code class="ph codeph">COUNT(*)</code> includes <code class="ph codeph">NULL</code> values in the total.
+      </li>
+
+      <li class="li">
+        The notation <code class="ph codeph">COUNT(<var class="keyword varname">column_name</var>)</code> only considers rows where the column
+        contains a non-<code class="ph codeph">NULL</code> value.
+      </li>
+
+      <li class="li">
+        You can also combine <code class="ph codeph">COUNT</code> with the <code class="ph codeph">DISTINCT</code> operator to eliminate
+        duplicates before counting, and to count the combinations of values across multiple columns.
+      </li>
+    </ul>
+
+    <p class="p">
+      When the query contains a <code class="ph codeph">GROUP BY</code> clause, returns one value for each combination of
+      grouping values.
+    </p>
+
+    <p class="p">
+      <strong class="ph b">Return type:</strong> <code class="ph codeph">BIGINT</code>
+    </p>
+
+    <p class="p">
+        <strong class="ph b">Usage notes:</strong>
+      </p>
+
+    <p class="p">
+        If you frequently run aggregate functions such as <code class="ph codeph">MIN()</code>, <code class="ph codeph">MAX()</code>, and
+        <code class="ph codeph">COUNT(DISTINCT)</code> on partition key columns, consider enabling the <code class="ph codeph">OPTIMIZE_PARTITION_KEY_SCANS</code>
+        query option, which optimizes such queries. This feature is available in <span class="keyword">Impala 2.5</span> and higher.
+        See <a class="xref" href="../shared/../topics/impala_optimize_partition_key_scans.html">OPTIMIZE_PARTITION_KEY_SCANS Query Option (Impala 2.5 or higher only)</a>
+        for the kinds of queries that this option applies to, and slight differences in how partitions are
+        evaluated when this query option is enabled.
+      </p>
+
+    <p class="p">
+        <strong class="ph b">Complex type considerations:</strong>
+      </p>
+
+    <p class="p">
+        To access a column with a complex type (<code class="ph codeph">ARRAY</code>, <code class="ph codeph">STRUCT</code>, or <code class="ph codeph">MAP</code>)
+        in an aggregation function, you unpack the individual elements using join notation in the query,
+        and then apply the function to the final scalar item, field, key, or value at the bottom of any nested type hierarchy in the column.
+        See <a class="xref" href="../shared/../topics/impala_complex_types.html#complex_types">Complex Types (Impala 2.3 or higher only)</a> for details about using complex types in Impala.
+      </p>
+
+    <div class="p">
+The following example demonstrates calls to several aggregation functions
+using values from a column containing nested complex types
+(an <code class="ph codeph">ARRAY</code> of <code class="ph codeph">STRUCT</code> items).
+The array is unpacked inside the query using join notation.
+The array elements are referenced using the <code class="ph codeph">ITEM</code>
+pseudocolumn, and the structure fields inside the array elements
+are referenced using dot notation.
+Numeric values such as <code class="ph codeph">SUM()</code> and <code class="ph codeph">AVG()</code>
+are computed using the numeric <code class="ph codeph">R_NATIONKEY</code> field, and
+the general-purpose <code class="ph codeph">MAX()</code> and <code class="ph codeph">MIN()</code>
+values are computed from the string <code class="ph codeph">N_NAME</code> field.
+<pre class="pre codeblock"><code>describe region;
++-------------+-------------------------+---------+
+| name        | type                    | comment |
++-------------+-------------------------+---------+
+| r_regionkey | smallint                |         |
+| r_name      | string                  |         |
+| r_comment   | string                  |         |
+| r_nations   | array&lt;struct&lt;           |         |
+|             |   n_nationkey:smallint, |         |
+|             |   n_name:string,        |         |
+|             |   n_comment:string      |         |
+|             | &gt;&gt;                      |         |
++-------------+-------------------------+---------+
+
+select r_name, r_nations.item.n_nationkey
+  from region, region.r_nations as r_nations
+order by r_name, r_nations.item.n_nationkey;
++-------------+------------------+
+| r_name      | item.n_nationkey |
++-------------+------------------+
+| AFRICA      | 0                |
+| AFRICA      | 5                |
+| AFRICA      | 14               |
+| AFRICA      | 15               |
+| AFRICA      | 16               |
+| AMERICA     | 1                |
+| AMERICA     | 2                |
+| AMERICA     | 3                |
+| AMERICA     | 17               |
+| AMERICA     | 24               |
+| ASIA        | 8                |
+| ASIA        | 9                |
+| ASIA        | 12               |
+| ASIA        | 18               |
+| ASIA        | 21               |
+| EUROPE      | 6                |
+| EUROPE      | 7                |
+| EUROPE      | 19               |
+| EUROPE      | 22               |
+| EUROPE      | 23               |
+| MIDDLE EAST | 4                |
+| MIDDLE EAST | 10               |
+| MIDDLE EAST | 11               |
+| MIDDLE EAST | 13               |
+| MIDDLE EAST | 20               |
++-------------+------------------+
+
+select
+  r_name,
+  count(r_nations.item.n_nationkey) as count,
+  sum(r_nations.item.n_nationkey) as sum,
+  avg(r_nations.item.n_nationkey) as avg,
+  min(r_nations.item.n_name) as minimum,
+  max(r_nations.item.n_name) as maximum,
+  ndv(r_nations.item.n_nationkey) as distinct_vals
+from
+  region, region.r_nations as r_nations
+group by r_name
+order by r_name;
++-------------+-------+-----+------+-----------+----------------+---------------+
+| r_name      | count | sum | avg  | minimum   | maximum        | distinct_vals |
++-------------+-------+-----+------+-----------+----------------+---------------+
+| AFRICA      | 5     | 50  | 10   | ALGERIA   | MOZAMBIQUE     | 5             |
+| AMERICA     | 5     | 47  | 9.4  | ARGENTINA | UNITED STATES  | 5             |
+| ASIA        | 5     | 68  | 13.6 | CHINA     | VIETNAM        | 5             |
+| EUROPE      | 5     | 77  | 15.4 | FRANCE    | UNITED KINGDOM | 5             |
+| MIDDLE EAST | 5     | 58  | 11.6 | EGYPT     | SAUDI ARABIA   | 5             |
++-------------+-------+-----+------+-----------+----------------+---------------+
+</code></pre>
+</div>
+
+    <p class="p">
+        <strong class="ph b">Examples:</strong>
+      </p>
+
+<pre class="pre codeblock"><code>-- How many rows total are in the table, regardless of NULL values?
+select count(*) from t1;
+-- How many rows are in the table with non-NULL values for a column?
+select count(c1) from t1;
+-- Count the rows that meet certain conditions.
+-- Again, * includes NULLs, so COUNT(*) might be greater than COUNT(col).
+select count(*) from t1 where x &gt; 10;
+select count(c1) from t1 where x &gt; 10;
+-- Can also be used in combination with DISTINCT and/or GROUP BY.
+-- Combine COUNT and DISTINCT to find the number of unique values.
+-- Must use column names rather than * with COUNT(DISTINCT ...) syntax.
+-- Rows with NULL values are not counted.
+select count(distinct c1) from t1;
+-- Rows with a NULL value in _either_ column are not counted.
+select count(distinct c1, c2) from t1;
+-- Return more than one result.
+select month, year, count(distinct visitor_id) from web_stats group by month, year;
+</code></pre>
+
+    <div class="p">
+      The following examples show how to use <code class="ph codeph">COUNT()</code> in an analytic context. They use a table
+      containing integers from 1 to 10. Notice how the <code class="ph codeph">COUNT()</code> is reported for each input value, as
+      opposed to the <code class="ph codeph">GROUP BY</code> clause which condenses the result set.
+<pre class="pre codeblock"><code>select x, property, count(x) over (partition by property) as count from int_t where property in ('odd','even');
++----+----------+-------+
+| x  | property | count |
++----+----------+-------+
+| 2  | even     | 5     |
+| 4  | even     | 5     |
+| 6  | even     | 5     |
+| 8  | even     | 5     |
+| 10 | even     | 5     |
+| 1  | odd      | 5     |
+| 3  | odd      | 5     |
+| 5  | odd      | 5     |
+| 7  | odd      | 5     |
+| 9  | odd      | 5     |
++----+----------+-------+
+</code></pre>
+
+Adding an <code class="ph codeph">ORDER BY</code> clause lets you experiment with results that are cumulative or apply to a moving
+set of rows (the <span class="q">"window"</span>). The following examples use <code class="ph codeph">COUNT()</code> in an analytic context
+(that is, with an <code class="ph codeph">OVER()</code> clause) to produce a running count of all the even values,
+then a running count of all the odd values. The basic <code class="ph codeph">ORDER BY x</code> clause implicitly
+activates a window clause of <code class="ph codeph">RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW</code>,
+which is effectively the same as <code class="ph codeph">ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW</code>,
+therefore all of these examples produce the same results:
+<pre class="pre codeblock"><code>select x, property,
+  count(x) over (partition by property <strong class="ph b">order by x</strong>) as 'cumulative count'
+  from int_t where property in ('odd','even');
++----+----------+------------------+
+| x  | property | cumulative count |
++----+----------+------------------+
+| 2  | even     | 1                |
+| 4  | even     | 2                |
+| 6  | even     | 3                |
+| 8  | even     | 4                |
+| 10 | even     | 5                |
+| 1  | odd      | 1                |
+| 3  | odd      | 2                |
+| 5  | odd      | 3                |
+| 7  | odd      | 4                |
+| 9  | odd      | 5                |
++----+----------+------------------+
+
+select x, property,
+  count(x) over
+  (
+    partition by property
+    <strong class="ph b">order by x</strong>
+    <strong class="ph b">range between unbounded preceding and current row</strong>
+  ) as 'cumulative total'
+from int_t where property in ('odd','even');
++----+----------+------------------+
+| x  | property | cumulative count |
++----+----------+------------------+
+| 2  | even     | 1                |
+| 4  | even     | 2                |
+| 6  | even     | 3                |
+| 8  | even     | 4                |
+| 10 | even     | 5                |
+| 1  | odd      | 1                |
+| 3  | odd      | 2                |
+| 5  | odd      | 3                |
+| 7  | odd      | 4                |
+| 9  | odd      | 5                |
++----+----------+------------------+
+
+select x, property,
+  count(x) over
+  (
+    partition by property
+    <strong class="ph b">order by x</strong>
+    <strong class="ph b">rows between unbounded preceding and current row</strong>
+  ) as 'cumulative total'
+  from int_t where property in ('odd','even');
++----+----------+------------------+
+| x  | property | cumulative count |
++----+----------+------------------+
+| 2  | even     | 1                |
+| 4  | even     | 2                |
+| 6  | even     | 3                |
+| 8  | even     | 4                |
+| 10 | even     | 5                |
+| 1  | odd      | 1                |
+| 3  | odd      | 2                |
+| 5  | odd      | 3                |
+| 7  | odd      | 4                |
+| 9  | odd      | 5                |
++----+----------+------------------+
+</code></pre>
+
+The following examples show how to construct a moving window, with a running count taking into account 1 row before
+and 1 row after the current row, within the same partition (all the even values or all the odd values).
+Therefore, the count is consistently 3 for rows in the middle of the window, and 2 for
+rows near the ends of the window, where there is no preceding or no following row in the partition.
+Because of a restriction in the Impala <code class="ph codeph">RANGE</code> syntax, this type of
+moving window is possible with the <code class="ph codeph">ROWS BETWEEN</code> clause but not the <code class="ph codeph">RANGE BETWEEN</code>
+clause:
+<pre class="pre codeblock"><code>select x, property,
+  count(x) over
+  (
+    partition by property
+    <strong class="ph b">order by x</strong>
+    <strong class="ph b">rows between 1 preceding and 1 following</strong>
+  ) as 'moving total'
+  from int_t where property in ('odd','even');
++----+----------+--------------+
+| x  | property | moving total |
++----+----------+--------------+
+| 2  | even     | 2            |
+| 4  | even     | 3            |
+| 6  | even     | 3            |
+| 8  | even     | 3            |
+| 10 | even     | 2            |
+| 1  | odd      | 2            |
+| 3  | odd      | 3            |
+| 5  | odd      | 3            |
+| 7  | odd      | 3            |
+| 9  | odd      | 2            |
++----+----------+--------------+
+
+-- Doesn't work because of syntax restriction on RANGE clause.
+select x, property,
+  count(x) over
+  (
+    partition by property
+    <strong class="ph b">order by x</strong>
+    <strong class="ph b">range between 1 preceding and 1 following</strong>
+  ) as 'moving total'
+from int_t where property in ('odd','even');
+ERROR: AnalysisException: RANGE is only supported with both the lower and upper bounds UNBOUNDED or one UNBOUNDED and the other CURRENT ROW.
+</code></pre>
+    </div>
+
+    <div class="note note note_note"><span class="note__title notetitle">Note:</span> 
+        <p class="p">
+          By default, Impala only allows a single <code class="ph codeph">COUNT(DISTINCT <var class="keyword varname">columns</var>)</code>
+          expression in each query.
+        </p>
+        <p class="p">
+          If you do not need precise accuracy, you can produce an estimate of the distinct values for a column by
+          specifying <code class="ph codeph">NDV(<var class="keyword varname">column</var>)</code>; a query can contain multiple instances of
+          <code class="ph codeph">NDV(<var class="keyword varname">column</var>)</code>. To make Impala automatically rewrite
+          <code class="ph codeph">COUNT(DISTINCT)</code> expressions to <code class="ph codeph">NDV()</code>, enable the
+          <code class="ph codeph">APPX_COUNT_DISTINCT</code> query option.
+        </p>
+        <p class="p">
+          To produce the same result as multiple <code class="ph codeph">COUNT(DISTINCT)</code> expressions, you can use the
+          following technique for queries involving a single table:
+        </p>
+<pre class="pre codeblock"><code>select v1.c1 result1, v2.c1 result2 from
+  (select count(distinct col1) as c1 from t1) v1
+    cross join
+  (select count(distinct col2) as c1 from t1) v2;
+</code></pre>
+        <p class="p">
+          Because <code class="ph codeph">CROSS JOIN</code> is an expensive operation, prefer to use the <code class="ph codeph">NDV()</code>
+          technique wherever practical.
+        </p>
+      </div>
+
+    <p class="p">
+        <strong class="ph b">Related information:</strong>
+      </p>
+
+    <p class="p">
+      <a class="xref" href="impala_analytic_functions.html#analytic_functions">Impala Analytic Functions</a>
+    </p>
+
+  </div>
+<nav role="navigation" class="related-links"><div class="familylinks"><div class="parentlink"><strong>Parent topic:</strong> <a class="link" href="../topics/impala_aggregate_functions.html">Impala Aggregate Functions</a></div></div></nav></article></main></body></html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/75c46918/docs/build/html/topics/impala_create_database.html
----------------------------------------------------------------------
diff --git a/docs/build/html/topics/impala_create_database.html b/docs/build/html/topics/impala_create_database.html
new file mode 100644
index 0000000..bfee172
--- /dev/null
+++ b/docs/build/html/topics/impala_create_database.html
@@ -0,0 +1,209 @@
+<!DOCTYPE html
+  SYSTEM "about:legacy-compat">
+<html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="UTF-8"><meta name="copyright" content="(C) Copyright 2017"><meta name="DC.rights.owner" content="(C) Copyright 2017"><meta name="DC.Type" content="concept"><meta name="DC.Relation" scheme="URI" content="../topics/impala_langref_sql.html"><meta name="prodname" content="Impala"><meta name="prodname" content="Impala"><meta name="version" content="Impala 2.8.x"><meta name="version" content="Impala 2.8.x"><meta name="DC.Format" content="XHTML"><meta name="DC.Identifier" content="create_database"><link rel="stylesheet" type="text/css" href="../commonltr.css"><title>CREATE DATABASE Statement</title></head><body id="create_database"><main role="main"><article role="article" aria-labelledby="ariaid-title1">
+
+  <h1 class="title topictitle1" id="ariaid-title1">CREATE DATABASE Statement</h1>
+  
+  
+
+  <div class="body conbody">
+
+    <p class="p">
+      
+      Creates a new database.
+    </p>
+
+    <p class="p">
+      In Impala, a database is both:
+    </p>
+
+    <ul class="ul">
+      <li class="li">
+        A logical construct for grouping together related tables, views, and functions within their own namespace.
+        You might use a separate database for each application, set of related tables, or round of experimentation.
+      </li>
+
+      <li class="li">
+        A physical construct represented by a directory tree in HDFS. Tables (internal tables), partitions, and
+        data files are all located under this directory. You can perform HDFS-level operations such as backing it up and measuring space usage,
+        or remove it with a <code class="ph codeph">DROP DATABASE</code> statement.
+      </li>
+    </ul>
+
+    <p class="p">
+        <strong class="ph b">Syntax:</strong>
+      </p>
+
+<pre class="pre codeblock"><code>CREATE (DATABASE|SCHEMA) [IF NOT EXISTS] <var class="keyword varname">database_name</var>[COMMENT '<var class="keyword varname">database_comment</var>']
+  [LOCATION <var class="keyword varname">hdfs_path</var>];</code></pre>
+
+    <p class="p">
+        <strong class="ph b">Statement type:</strong> DDL
+      </p>
+
+    <p class="p">
+        <strong class="ph b">Usage notes:</strong>
+      </p>
+
+    <p class="p">
+      A database is physically represented as a directory in HDFS, with a filename extension <code class="ph codeph">.db</code>,
+      under the main Impala data directory. If the associated HDFS directory does not exist, it is created for you.
+      All databases and their associated directories are top-level objects, with no physical or logical nesting.
+    </p>
+
+    <p class="p">
+      After creating a database, to make it the current database within an <span class="keyword cmdname">impala-shell</span> session,
+      use the <code class="ph codeph">USE</code> statement. You can refer to tables in the current database without prepending
+      any qualifier to their names.
+    </p>
+
+    <p class="p">
+      When you first connect to Impala through <span class="keyword cmdname">impala-shell</span>, the database you start in (before
+      issuing any <code class="ph codeph">CREATE DATABASE</code> or <code class="ph codeph">USE</code> statements) is named
+      <code class="ph codeph">default</code>.
+    </p>
+
+    <div class="p">
+        Impala includes another predefined database, <code class="ph codeph">_impala_builtins</code>, that serves as the location
+        for the <a class="xref" href="../shared/../topics/impala_functions.html#builtins">built-in functions</a>. To see the built-in
+        functions, use a statement like the following:
+<pre class="pre codeblock"><code>show functions in _impala_builtins;
+show functions in _impala_builtins like '*<var class="keyword varname">substring</var>*';
+</code></pre>
+      </div>
+
+    <p class="p">
+      After creating a database, your <span class="keyword cmdname">impala-shell</span> session or another
+      <span class="keyword cmdname">impala-shell</span> connected to the same node can immediately access that database. To access
+      the database through the Impala daemon on a different node, issue the <code class="ph codeph">INVALIDATE METADATA</code>
+      statement first while connected to that other node.
+    </p>
+
+    <p class="p">
+      Setting the <code class="ph codeph">LOCATION</code> attribute for a new database is a way to work with sets of files in an
+      HDFS directory structure outside the default Impala data directory, as opposed to setting the
+      <code class="ph codeph">LOCATION</code> attribute for each individual table.
+    </p>
+
+    <p class="p">
+        If you connect to different Impala nodes within an <span class="keyword cmdname">impala-shell</span> session for
+        load-balancing purposes, you can enable the <code class="ph codeph">SYNC_DDL</code> query option to make each DDL
+        statement wait before returning, until the new or changed metadata has been received by all the Impala
+        nodes. See <a class="xref" href="../shared/../topics/impala_sync_ddl.html#sync_ddl">SYNC_DDL Query Option</a> for details.
+      </p>
+
+    <p class="p">
+        <strong class="ph b">Hive considerations:</strong>
+      </p>
+
+    <p class="p">
+      When you create a database in Impala, the database can also be used by Hive.
+      When you create a database in Hive, issue an <code class="ph codeph">INVALIDATE METADATA</code>
+      statement in Impala to make Impala permanently aware of the new database.
+    </p>
+
+    <p class="p">
+      The <code class="ph codeph">SHOW DATABASES</code> statement lists all databases, or the databases whose name
+      matches a wildcard pattern. <span class="ph">In <span class="keyword">Impala 2.5</span> and higher, the
+      <code class="ph codeph">SHOW DATABASES</code> output includes a second column that displays the associated
+      comment, if any, for each database.</span>
+    </p>
+
+    <p class="p">
+        <strong class="ph b">Amazon S3 considerations:</strong>
+      </p>
+
+    <p class="p">
+      To specify that any tables created within a database reside on the Amazon S3 system,
+      you can include an <code class="ph codeph">s3a://</code> prefix on the <code class="ph codeph">LOCATION</code>
+      attribute. In <span class="keyword">Impala 2.6</span> and higher, Impala automatically creates any
+      required folders as the databases, tables, and partitions are created, and removes
+      them when they are dropped.
+    </p>
+
+    <p class="p">
+        In <span class="keyword">Impala 2.6</span> and higher, Impala DDL statements such as
+        <code class="ph codeph">CREATE DATABASE</code>, <code class="ph codeph">CREATE TABLE</code>, <code class="ph codeph">DROP DATABASE CASCADE</code>,
+        <code class="ph codeph">DROP TABLE</code>, and <code class="ph codeph">ALTER TABLE [ADD|DROP] PARTITION</code> can create or remove folders
+        as needed in the Amazon S3 system. Prior to <span class="keyword">Impala 2.6</span>, you had to create folders yourself and point
+        Impala database, tables, or partitions at them, and manually remove folders when no longer needed.
+        See <a class="xref" href="../shared/../topics/impala_s3.html#s3">Using Impala with the Amazon S3 Filesystem</a> for details about reading and writing S3 data with Impala.
+      </p>
+
+    <p class="p">
+        <strong class="ph b">Cancellation:</strong> Cannot be cancelled.
+      </p>
+
+    <p class="p">
+        <strong class="ph b">HDFS permissions:</strong>
+      </p>
+    <p class="p">
+      The user ID that the <span class="keyword cmdname">impalad</span> daemon runs under,
+      typically the <code class="ph codeph">impala</code> user, must have write
+      permission for the parent HDFS directory under which the database
+      is located.
+    </p>
+
+    <p class="p">
+        <strong class="ph b">Examples:</strong>
+      </p>
+
+    <pre class="pre codeblock"><code>create database first_db;
+use first_db;
+create table t1 (x int);
+
+create database second_db;
+use second_db;
+-- Each database has its own namespace for tables.
+-- You can reuse the same table names in each database.
+create table t1 (s string);
+
+create database temp;
+
+-- You can either USE a database after creating it,
+-- or qualify all references to the table name with the name of the database.
+-- Here, tables T2 and T3 are both created in the TEMP database.
+
+create table temp.t2 (x int, y int);
+use database temp;
+create table t3 (s string);
+
+-- You cannot drop a database while it is selected by the USE statement.
+drop database temp;
+<em class="ph i">ERROR: AnalysisException: Cannot drop current default database: temp</em>
+
+-- The always-available database 'default' is a convenient one to USE
+-- before dropping a database you created.
+use default;
+
+-- Before dropping a database, first drop all the tables inside it,
+<span class="ph">-- or in <span class="keyword">Impala 2.3</span> and higher use the CASCADE clause.</span>
+drop database temp;
+ERROR: ImpalaRuntimeException: Error making 'dropDatabase' RPC to Hive Metastore:
+CAUSED BY: InvalidOperationException: Database temp is not empty
+show tables in temp;
++------+
+| name |
++------+
+| t3   |
++------+
+
+<span class="ph">-- <span class="keyword">Impala 2.3</span> and higher:</span>
+<span class="ph">drop database temp cascade;</span>
+
+-- Earlier releases:
+drop table temp.t3;
+drop database temp;
+</code></pre>
+
+    <p class="p">
+        <strong class="ph b">Related information:</strong>
+      </p>
+
+    <p class="p">
+      <a class="xref" href="impala_databases.html#databases">Overview of Impala Databases</a>, <a class="xref" href="impala_drop_database.html#drop_database">DROP DATABASE Statement</a>,
+      <a class="xref" href="impala_use.html#use">USE Statement</a>, <a class="xref" href="impala_show.html#show_databases">SHOW DATABASES</a>,
+      <a class="xref" href="impala_tables.html#tables">Overview of Impala Tables</a>
+    </p>
+  </div>
+<nav role="navigation" class="related-links"><div class="familylinks"><div class="parentlink"><strong>Parent topic:</strong> <a class="link" href="../topics/impala_langref_sql.html">Impala SQL Statements</a></div></div></nav></article></main></body></html>
\ No newline at end of file