You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@impala.apache.org by jr...@apache.org on 2016/10/29 00:33:49 UTC

[1/7] incubator-impala git commit: New files needed to make PDF build happy.

Repository: incubator-impala
Updated Branches:
  refs/heads/doc_prototype 8039fbb3b -> 1fcc8ceec


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/rg_impala_vd.xml
----------------------------------------------------------------------
diff --git a/docs/topics/rg_impala_vd.xml b/docs/topics/rg_impala_vd.xml
new file mode 100644
index 0000000..820db3c
--- /dev/null
+++ b/docs/topics/rg_impala_vd.xml
@@ -0,0 +1,1165 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept audience="PDF" id="impala_versions">
+  <title>Impala Version and Download Information</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Installing"/>
+      <data name="Category" value="Upgrading"/>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Packages"/>
+      <data name="Category" value="Downloading"/>
+      <data name="Category" value="Versions"/>
+    </metadata>
+  </prolog>
+  <conbody>
+
+    <p>
+      The information in this section applies to CDH 4 clusters, where Impala is
+      downloaded and installed separately from CDH itself. You can disregard this
+      section if you are running a CDH 5 cluster.
+      This information is included with the CDH 5 documentation for users who
+      manage CDH 4 clusters through Cloudera Manager 5.
+    </p>
+
+    <p>You can download the following releases of the Impala product:</p>
+    <ul>
+      <li><xref href="#impala_downloads_270" format="dita"/> (latest 2.7.x, for CDH 5 only)</li>
+      <li><xref href="#impala_downloads_260" format="dita"/> (latest 2.6.x, for CDH 5 only)</li>
+      <li><xref href="#impala_downloads_250" format="dita"/> (latest 2.5.x, for CDH 5 only)</li>
+      <li><xref href="#impala_downloads_240" format="dita"/> (latest 2.4.x, for CDH 5 only)</li>
+      <li><xref href="#impala_downloads_230" format="dita"/> (latest 2.3.x, for CDH 5 only)</li>
+      <li><xref href="#impala_downloads_220" format="dita"/> (latest 2.2.x, for CDH 5 only)</li>
+      <li><xref href="#impala_downloads_210" format="dita"/> (latest 2.1.x, for CDH 4 or
+        CDH 5)</li>
+      <li><xref href="#impala_downloads_201" format="dita"/> (latest 2.0.x, for CDH 4 or
+        CDH 5)</li>
+      <li><xref href="#impala_downloads_200" format="dita"/></li>
+      <li><xref href="#impala_downloads_143" format="dita"/> (latest 1.4.x, for CDH 4 or
+        CDH 5)</li>
+      <li><xref href="#impala_downloads_140" format="dita"/></li>
+      <li><xref href="#impala_downloads_131" format="dita"/> (latest 1.3.x, for CDH 4 or
+        CDH 5)</li>
+      <li><xref href="#impala_downloads_124" format="dita"/> (latest 1.2.x, for CDH 4)</li>
+      <li><xref href="#impala_downloads_123" format="dita"/> (for CDH 4; also bundled with
+        CDH 5 beta 2)</li>
+      <li><xref href="#impala_downloads_122" format="dita"/></li>
+      <li><xref href="#impala_downloads_121" format="dita"/></li>
+      <li><xref href="#impala_downloads_120" format="dita"/> (for use with CDH 5 beta 1)</li>
+      <li><xref href="#impala_downloads_111" format="dita"/> (latest 1.1.x)</li>
+      <li audience="Cloudera"><xref href="#impala_downloads_110" format="dita"/></li>
+      <li><xref href="#impala_downloads_101" format="dita"/> (latest 1.0.x)</li>
+      <li audience="Cloudera"><xref href="#impala_downloads_100" format="dita"/></li>
+    </ul>
+    <p>For installation instructions, see <xref href="impala_install.xml#install"/>.</p>
+<!-- JDR: hoisted this link out of the first subtopic, conditionalized for CDH 4 / CDH 5,
+         and commented out later occurrences of the same CDH 4-based link. -->
+    <p audience="integrated">For information about new features in the latest release, and issues that are
+      fixed or still outstanding, see the <xref href="releases.xml"/>.</p>
+    <p audience="standalone">For information about new features in the latest release, and issues that are
+      fixed or still outstanding, see the
+      <xref href="impala_relnotes.xml#relnotes"/>.</p>
+  </conbody>
+  <concept id="impala_downloads">
+    <title>Impala Download Information</title>
+    <conbody>
+      <note>All packages are 64-bit.</note>
+    </conbody>
+    <concept rev="2.7.0" id="impala_downloads_270">
+      <title>Impala 2.7.0</title>
+      <conbody>
+        <p><b>Release Date:</b> October 2016 <b>Status:</b> Production</p>
+        <p>Impala 2.7.0 and higher are only available for CDH 5, not for CDH 4. For the Impala bundled with CDH 5, see
+          <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/rg_cdh_vd.html" scope="external" format="html">CDH
+          Version and Packaging Information</xref>.</p>
+      </conbody>
+    </concept>
+    <concept rev="2.6.0" id="impala_downloads_260">
+      <title>Impala 2.6.0</title>
+      <conbody>
+        <p><b>Release Date:</b> July 2016 <b>Status:</b> Production</p>
+        <p>Impala 2.6.0 and higher are only available for CDH 5, not for CDH 4. For the Impala bundled with CDH 5, see
+          <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/rg_cdh_vd.html" scope="external" format="html">CDH
+          Version and Packaging Information</xref>.</p>
+      </conbody>
+    </concept>
+    <concept rev="2.5.0" id="impala_downloads_250">
+      <title>Impala 2.5.0</title>
+      <conbody>
+        <p><b>Release Date:</b> April 2016 <b>Status:</b> Production</p>
+        <p>Impala 2.5.0 and higher are only available for CDH 5, not for CDH 4. For the Impala bundled with CDH 5, see
+          <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/rg_cdh_vd.html" scope="external" format="html">CDH
+          Version and Packaging Information</xref>.</p>
+      </conbody>
+    </concept>
+    <concept rev="2.4.0" id="impala_downloads_240">
+      <title>Impala 2.4.0</title>
+      <conbody>
+        <p><b>Release Date:</b> March 2016 <b>Status:</b> Production</p>
+        <p>Impala 2.4.0 and higher are only available for CDH 5, not for CDH 4. For the Impala bundled with CDH 5, see
+          <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/rg_cdh_vd.html" scope="external" format="html">CDH
+          Version and Packaging Information</xref>.</p>
+      </conbody>
+    </concept>
+    <concept rev="2.3.0" id="impala_downloads_230">
+      <title>Impala 2.3.0</title>
+      <conbody>
+        <p><b>Release Date:</b> November 2015 <b>Status:</b> Production</p>
+        <p>Impala 2.3.0 and higher are only available for CDH 5, not for CDH 4. For the Impala bundled with CDH 5, see
+          <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/rg_cdh_vd.html" scope="external" format="html">CDH
+          Version and Packaging Information</xref>.</p>
+      </conbody>
+    </concept>
+    <concept rev="2.2.0" id="impala_downloads_220">
+      <title>Impala 2.2.0</title>
+      <conbody>
+        <p><b>Release Date:</b> April 2015 <b>Status:</b> Production</p>
+        <p>Impala 2.2.0 and higher are only available for CDH 5, not for CDH 4. For the Impala bundled with CDH 5, see
+          <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/rg_cdh_vd.html" scope="external" format="html">CDH
+          Version and Packaging Information</xref>.</p>
+      </conbody>
+    </concept>
+    <concept rev="2.1.0" id="impala_downloads_210">
+      <title>Impala 2.1.0</title>
+      <conbody>
+        <p><b>Release Date:</b> December 2014 <b>Status:</b> Production</p>
+        <p>These Impala packages work with CDH 4. For the Impala bundled with CDH 5, see
+          <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/rg_cdh_vd.html" scope="external" format="html">CDH
+          Version and Packaging Information</xref>.</p>
+        <table>
+          <tgroup cols="3">
+            <colspec colname="1" colwidth="3120*"/>
+            <colspec colname="2" colwidth="3120*"/>
+            <colspec colname="3" colwidth="3120*"/>
+            <thead>
+              <row>
+                <entry><p><b>Repository Type</b></p></entry>
+                <entry><p><b>Location</b></p></entry>
+                <entry><p><b>Repo or List file</b></p></entry>
+              </row>
+            </thead>
+            <tbody>
+              <row>
+                <entry><p>Yum RHEL 6/CentOS 6 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/2/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Yum RHEL 5/CentOS 5 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/2/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Zypper SLES 11 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/2/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 10.04 (Lucid) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 12.04 ( Precise) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Debian (Squeeze) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+            </tbody>
+          </tgroup>
+        </table>
+      </conbody>
+    </concept>
+    <concept rev="2.0.1" id="impala_downloads_201">
+      <title>Impala 2.0.1</title>
+      <conbody>
+        <p><b>Release Date:</b> December 2014 <b>Status:</b> Production</p>
+        <p>These Impala packages work with CDH 4. For the Impala bundled with CDH 5, see
+        <!-- Original URL: http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH5/latest/CDH-Version-and-Packaging-Information/CDH-Version-and-Packaging-Information.html -->
+        <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/rg_cdh_vd.html" scope="external" format="html">CDH
+          Version and Packaging Information</xref>.</p>
+<!--
+        <p>
+          For information about new features in the latest release, and issues
+          that are fixed or still outstanding, see the
+          <xref href="/Content/releases_xi41194.xml"/>.
+        </p>
+-->
+        <table>
+          <tgroup cols="3">
+            <colspec colname="1" colwidth="3120*"/>
+            <colspec colname="2" colwidth="3120*"/>
+            <colspec colname="3" colwidth="3120*"/>
+            <thead>
+              <row>
+                <entry><p><b>Repository Type</b></p></entry>
+                <entry><p><b>Location</b></p></entry>
+                <entry><p><b>Repo or List file</b></p></entry>
+              </row>
+            </thead>
+            <tbody>
+              <row>
+                <entry><p>Yum RHEL 6/CentOS 6 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/2.0.1/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Yum RHEL 5/CentOS 5 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/2.0.1/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Zypper SLES 11 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/2.0.1/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 10.04 (Lucid) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 12.04 ( Precise) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Debian (Squeeze) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+            </tbody>
+          </tgroup>
+        </table>
+      </conbody>
+    </concept>
+    <concept rev="2.0.0" id="impala_downloads_200">
+      <title>Impala 2.0.0</title>
+      <conbody>
+        <p><b>Release Date:</b> October 2014 <b>Status:</b> Production</p>
+        <p>These Impala packages work with CDH 4. For the Impala bundled with CDH 5, see
+        <!-- Original URL: http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH5/latest/CDH-Version-and-Packaging-Information/CDH-Version-and-Packaging-Information.html -->
+        <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/rg_cdh_vd.html" scope="external" format="html">CDH
+          Version and Packaging Information</xref>.</p>
+<!--
+        <p>
+          For information about new features in the latest release, and issues
+          that are fixed or still outstanding, see the
+          <xref href="/Content/releases_xi41194.xml"/>.
+        </p>
+-->
+        <table>
+          <tgroup cols="3">
+            <colspec colname="1" colwidth="3120*"/>
+            <colspec colname="2" colwidth="3120*"/>
+            <colspec colname="3" colwidth="3120*"/>
+            <thead>
+              <row>
+                <entry><p><b>Repository Type</b></p></entry>
+                <entry><p><b>Location</b></p></entry>
+                <entry><p><b>Repo or List file</b></p></entry>
+              </row>
+            </thead>
+            <tbody>
+              <row>
+                <entry><p>Yum RHEL 6/CentOS 6 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/2/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Yum RHEL 5/CentOS 5 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/2/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Zypper SLES 11 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/2/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 10.04 (Lucid) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 12.04 ( Precise) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Debian (Squeeze) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+            </tbody>
+          </tgroup>
+        </table>
+      </conbody>
+    </concept>
+    <concept rev="1.4.3" id="impala_downloads_143">
+      <title>Impala 1.4.3</title>
+      <conbody>
+        <p><b>Release Date:</b> December 2014 <b>Status:</b> Production</p>
+        <p>These Impala packages work with CDH 4. For the Impala bundled with CDH 5, see
+        <!-- Original URL: http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH5/latest/CDH-Version-and-Packaging-Information/CDH-Version-and-Packaging-Information.html -->
+        <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/rg_cdh_vd.html" scope="external" format="html">CDH
+          Version and Packaging Information</xref>.</p>
+<!--
+        <p>
+          For information about new features in the latest release, and issues
+          that are fixed or still outstanding, see the
+          <xref href="/Content/impala_relnotes_xi44305.xml#relnotes"/>.
+        </p>
+-->
+        <table>
+          <tgroup cols="3">
+            <colspec colname="1" colwidth="3120*"/>
+            <colspec colname="2" colwidth="3120*"/>
+            <colspec colname="3" colwidth="3120*"/>
+            <thead>
+              <row>
+                <entry><p><b>Repository Type</b></p></entry>
+                <entry><p><b>Location</b></p></entry>
+                <entry><p><b>Repo or List file</b></p></entry>
+              </row>
+            </thead>
+            <tbody>
+              <row>
+                <entry><p>Yum RHEL 6/CentOS 6 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/1.4.3/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Yum RHEL 5/CentOS 5 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/1.4.3/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Zypper SLES 11 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/1.4.3/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 10.04 (Lucid) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 12.04 ( Precise) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Debian (Squeeze) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+            </tbody>
+          </tgroup>
+        </table>
+      </conbody>
+    </concept>
+    <concept rev="1.4.0" id="impala_downloads_140">
+      <title>Impala 1.4.0</title>
+      <conbody>
+        <p><b>Release Date:</b> July 2014 <b>Status:</b> Production</p>
+        <p>These Impala packages work with CDH 4. For the Impala bundled with CDH 5, see
+        <!-- Original URL: http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH5/latest/CDH-Version-and-Packaging-Information/CDH-Version-and-Packaging-Information.html -->
+          <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/rg_cdh_vd.html" scope="external" format="html">CDH
+          Version and Packaging Information</xref>.</p>
+<!--
+        <p>
+          For information about new features in the latest release, and issues
+          that are fixed or still outstanding, see the
+          <xref href="/Content/impala_relnotes_xi44305.xml#relnotes"/>.
+        </p>
+-->
+        <table>
+          <tgroup cols="3">
+            <colspec colname="1" colwidth="3120*"/>
+            <colspec colname="2" colwidth="3120*"/>
+            <colspec colname="3" colwidth="3120*"/>
+            <thead>
+              <row>
+                <entry><p><b>Repository Type</b></p></entry>
+                <entry><p><b>Location</b></p></entry>
+                <entry><p><b>Repo or List file</b></p></entry>
+              </row>
+            </thead>
+            <tbody>
+              <row>
+                <entry><p>Yum RHEL 6/CentOS 6 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/1.4.0/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Yum RHEL 5/CentOS 5 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/1.4.0/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Zypper SLES 11 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/1.4.0/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 10.04 (Lucid) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 12.04 ( Precise) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Debian (Squeeze) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+            </tbody>
+          </tgroup>
+        </table>
+      </conbody>
+    </concept>
+<!-- It was decided not to have a CDH 4 release for Impala 1.3.3. Therefore hiding the following topic. -->
+    <concept audience="Cloudera" id="impala_downloads_133">
+      <title>Impala 1.3.3</title>
+      <conbody>
+        <p><b>Release Date:</b> December 2014 <b>Status:</b> Production</p>
+        <p>These Impala packages work with CDH 4. For the Impala bundled with CDH 5, see
+        <!-- Original URL: http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH5/latest/CDH-Version-and-Packaging-Information/CDH-Version-and-Packaging-Information.html -->
+        <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/rg_cdh_vd.html" scope="external" format="html">CDH
+          Version and Packaging Information</xref>.</p>
+<!--
+        <p>
+          For information about new features in the latest release, and issues
+          that are fixed or still outstanding, see the
+          <xref href="/Content/impala_relnotes_xi44305.xml#relnotes"/>.
+        </p>
+-->
+        <table>
+          <tgroup cols="3">
+            <colspec colname="1" colwidth="3120*"/>
+            <colspec colname="2" colwidth="3120*"/>
+            <colspec colname="3" colwidth="3120*"/>
+            <thead>
+              <row>
+                <entry><p><b>Repository Type</b></p></entry>
+                <entry><p><b>Location</b></p></entry>
+                <entry><p><b>Repo or List file</b></p></entry>
+              </row>
+            </thead>
+            <tbody>
+              <row>
+                <entry><p>Yum RHEL 6/CentOS 6 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/1.3.3/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Yum RHEL 5/CentOS 5 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/1.3.3/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Zypper SLES 11 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/1.3.3/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 10.04 (Lucid) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 12.04 ( Precise) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Debian (Squeeze) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+            </tbody>
+          </tgroup>
+        </table>
+      </conbody>
+    </concept>
+    <concept id="impala_downloads_131">
+      <title>Impala 1.3.1</title>
+      <conbody>
+        <p><b>Release Date:</b> May 2014 <b>Status:</b> Production</p>
+        <p>These Impala packages work with CDH 4. For the Impala bundled with CDH 5, see
+        <!-- Original URL: http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH5/latest/CDH-Version-and-Packaging-Information/CDH-Version-and-Packaging-Information.html -->
+        <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/rg_cdh_vd.html" scope="external" format="html">CDH
+          Version and Packaging Information</xref>.</p>
+<!--
+        <p>
+          For information about new features in the latest release, and issues
+          that are fixed or still outstanding, see the
+          <xref href="/Content/impala_relnotes_xi44305.xml#relnotes"/>.
+        </p>
+-->
+        <table>
+          <tgroup cols="3">
+            <colspec colname="1" colwidth="3120*"/>
+            <colspec colname="2" colwidth="3120*"/>
+            <colspec colname="3" colwidth="3120*"/>
+            <thead>
+              <row>
+                <entry><p><b>Repository Type</b></p></entry>
+                <entry><p><b>Location</b></p></entry>
+                <entry><p><b>Repo or List file</b></p></entry>
+              </row>
+            </thead>
+            <tbody>
+              <row>
+                <entry><p>Yum RHEL 6/CentOS 6 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/1.3.1/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Yum RHEL 5/CentOS 5 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/1.3.1/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Zypper SLES 11 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/1.3.1/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 10.04 (Lucid) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 12.04 ( Precise) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Debian (Squeeze) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+            </tbody>
+          </tgroup>
+        </table>
+      </conbody>
+    </concept>
+    <concept id="impala_downloads_124">
+      <title>Impala 1.2.4</title>
+      <conbody>
+        <p><b>Release Date:</b> February 2014 <b>Status:</b> Production</p>
+        <p>This Impala version works with CDH 4.</p>
+<!--
+        <p> For information about new features in the latest release, and issues
+          that are fixed or still outstanding, see the <xref href="/Content/impala_relnotes_xi44305.xml#relnotes"/>.</p>
+-->
+        <table>
+          <tgroup cols="3">
+            <colspec colname="1" colwidth="3120*"/>
+            <colspec colname="2" colwidth="3120*"/>
+            <colspec colname="3" colwidth="3120*"/>
+            <thead>
+              <row>
+                <entry><p><b>Repository Type</b></p></entry>
+                <entry><p><b>Location</b></p></entry>
+                <entry><p><b>Repo or List file</b></p></entry>
+              </row>
+            </thead>
+            <tbody>
+              <row>
+                <entry><p>Yum RHEL 6/CentOS 6 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/1.2.4/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Yum RHEL 5/CentOS 5 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/1.2.4/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Zypper SLES 11 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/1.2.4/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 10.04 (Lucid) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 12.04 ( Precise) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Debian (Squeeze) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+            </tbody>
+          </tgroup>
+        </table>
+      </conbody>
+    </concept>
+    <concept id="impala_downloads_123">
+      <title>Impala 1.2.3</title>
+      <conbody>
+        <p><b>Release Date:</b> December 2013 (for CDH 4), February 2013 (for CDH 5 beta 2)
+          <b>Status:</b> Production</p>
+        <p>This Impala version works with CDH 4, and also comes bundled with CDH 5 beta 2.</p>
+<!--
+        <p> For information about new features in the latest release, and issues
+          that are fixed or still outstanding, see the <xref href="/Content/impala_relnotes_xi44305.xml#relnotes"/>.</p>
+-->
+        <table>
+          <tgroup cols="3">
+            <colspec colname="1" colwidth="3120*"/>
+            <colspec colname="2" colwidth="3120*"/>
+            <colspec colname="3" colwidth="3120*"/>
+            <thead>
+              <row>
+                <entry><p><b>Repository Type</b></p></entry>
+                <entry><p><b>Location</b></p></entry>
+                <entry><p><b>Repo or List file</b></p></entry>
+              </row>
+            </thead>
+            <tbody>
+              <row>
+                <entry><p>Yum RHEL 6/CentOS 6 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/1.2.3/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Yum RHEL 5/CentOS 5 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/1.2.3/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Zypper SLES 11 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/1.2.3/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 10.04 (Lucid) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 12.04 ( Precise) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Debian (Squeeze) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+            </tbody>
+          </tgroup>
+        </table>
+      </conbody>
+    </concept>
+    <concept id="impala_downloads_122">
+      <title>Impala 1.2.2</title>
+      <conbody>
+        <p><b>Release Date:</b> December 2013 <b>Status:</b> Superceded by Impala 1.2.3 due
+          to important bug fix</p>
+<!--
+        <p> For information about new features in the latest release, and issues
+          that are fixed or still outstanding, see the <xref href="/Content/impala_relnotes_xi44305.xml#relnotes"/>.</p>
+-->
+        <table>
+          <tgroup cols="3">
+            <colspec colname="1" colwidth="3120*"/>
+            <colspec colname="2" colwidth="3120*"/>
+            <colspec colname="3" colwidth="3120*"/>
+            <thead>
+              <row>
+                <entry><p><b>Repository Type</b></p></entry>
+                <entry><p><b>Location</b></p></entry>
+                <entry><p><b>Repo or List file</b></p></entry>
+              </row>
+            </thead>
+            <tbody>
+              <row>
+                <entry><p>Yum RHEL 6/CentOS 6 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/1.2.2/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Yum RHEL 5/CentOS 5 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/1.2.2/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Zypper SLES 11 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/1.2.2/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 10.04 (Lucid) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 12.04 ( Precise) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Debian (Squeeze) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+            </tbody>
+          </tgroup>
+        </table>
+      </conbody>
+    </concept>
+    <concept id="impala_downloads_121">
+      <title>Impala 1.2.1</title>
+      <conbody>
+        <p><b>Release Date:</b> November 2013 <b>Status:</b> Production</p>
+<!--
+        <p> For information about new features in the latest release, and issues
+          that are fixed or still outstanding, see the <xref href="/Content/impala_relnotes_xi44305.xml#relnotes"/>.</p>
+-->
+        <table>
+          <tgroup cols="3">
+            <colspec colname="1" colwidth="3120*"/>
+            <colspec colname="2" colwidth="3120*"/>
+            <colspec colname="3" colwidth="3120*"/>
+            <thead>
+              <row>
+                <entry><p><b>Repository Type</b></p></entry>
+                <entry><p><b>Location</b></p></entry>
+                <entry><p><b>Repo or List file</b></p></entry>
+              </row>
+            </thead>
+            <tbody>
+              <row>
+                <entry><p>Yum RHEL 6/CentOS 6 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/1.2.1/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Yum RHEL 5/CentOS 5 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/1.2.1/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Zypper SLES 11 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/1.2.1/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 10.04 (Lucid) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 12.04 ( Precise) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Debian (Squeeze) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+            </tbody>
+          </tgroup>
+        </table>
+      </conbody>
+    </concept>
+    <concept id="impala_downloads_120">
+      <title>Impala 1.2.0 (Beta)</title>
+      <conbody>
+        <p><b>Release Date:</b> October 2013 <b>Status:</b> Beta, for use with CDH 5 Beta 1</p>
+<!--
+        <p> For information about new features in the latest release, and issues
+          that are fixed or still outstanding, see the <xref href="/Content/impala_relnotes_xi44305.xml#relnotes"/>.</p>
+-->
+        <table>
+          <tgroup cols="5">
+<!--
+            <colspec colname="1" colwidth="3120*"/>
+            <colspec colname="2" colwidth="3120*"/>
+            <colspec colname="3" colwidth="3120*"/>
+          -->
+<!-- Adapted from cdhvd_topic_3.xml. Probably best to conref in the end. -->
+            <colspec colname="1" colwidth="1*"/>
+            <colspec colname="2" colwidth="1.3*"/>
+            <colspec colname="3" colwidth="2.1*"/>
+            <colspec colname="4" colwidth="1.1*"/>
+            <colspec colname="5" colwidth="1.07*"/>
+            <thead>
+              <row>
+                <entry><p><b>CDH 5 Project</b></p></entry>
+                <entry><p><b>Package Version</b></p></entry>
+                <entry><p><b>Tarball Version</b></p></entry>
+                <entry><p><b>Release Notes</b></p></entry>
+                <entry><p><b>Changes File</b></p></entry>
+              </row>
+            </thead>
+<!--
+            <thead>
+              <row>
+                <entry>
+                  <p>
+                    <b>Repository Type</b>
+                  </p>
+                </entry>
+                <entry>
+                  <p>
+                    <b>Location</b>
+                  </p>
+                </entry>
+                <entry>
+                  <p>
+                    <b>Repo or List file</b>
+                  </p>
+                </entry>
+              </row>
+            </thead>
+            -->
+            <tbody>
+              <row>
+                <entry><p>Impala</p></entry>
+                <entry><p>impala-1.2.0+0</p></entry>
+                <entry><p>(none)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/cdh5/cdh/5/impala-1.2.0-cdh5.0.0-beta-1.releasenotes.html" scope="external" format="html">here</xref></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/cdh5/cdh/5/impala-1.2.0-cdh5.0.0-beta-1.CHANGES.txt" scope="external" format="txt">here</xref></p></entry>
+              </row>
+<!--
+              <row>
+                <entry>
+                  <p> Yum RHEL 6/CentOS 6 (64-bit) </p>
+                </entry>
+                <entry>
+                  <p>
+                    <xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/1.2.0/" scope="external" format="html"/>
+                  </p>
+                </entry>
+                <entry>
+                  <p>
+                    <xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/>
+                  </p>
+                </entry>
+              </row>
+              <row>
+                <entry>
+                  <p> Yum RHEL 5/CentOS 5 (64-bit) </p>
+                </entry>
+                <entry>
+                  <p>
+                    <xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/1.2.0/" scope="external" format="html"/>
+                  </p>
+                </entry>
+                <entry>
+                  <p>
+                    <xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/>
+                  </p>
+                </entry>
+              </row>
+              <row>
+                <entry>
+                  <p> Zypper SLES 11 (64-bit) </p>
+                </entry>
+                <entry>
+                  <p>
+                    <xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/1.2.0/" scope="external" format="html"/>
+                  </p>
+                </entry>
+                <entry>
+                  <p>
+                    <xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/>
+                  </p>
+                </entry>
+              </row>
+              <row>
+                <entry>
+                  <p> Apt-Get Ubuntu 10.04 (Lucid) (64-bit) </p>
+                </entry>
+                <entry>
+                  <p>
+                    <xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/" scope="external" format="html"/>
+                  </p>
+                </entry>
+                <entry>
+                  <p>
+                    <xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/cloudera.list" scope="external" format="list"/>
+                  </p>
+                </entry>
+              </row>
+              <row>
+                <entry>
+                  <p> Apt-Get Ubuntu 12.04 ( Precise) (64-bit) </p>
+                </entry>
+                <entry>
+                  <p>
+                    <xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/" scope="external" format="html"/>
+                  </p>
+                </entry>
+                <entry>
+                  <p>
+                    <xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/cloudera.list" scope="external" format="list"/>
+                  </p>
+                </entry>
+              </row>
+              <row>
+                <entry>
+                  <p> Apt-Get Debian (Squeeze) (64-bit) </p>
+                </entry>
+                <entry>
+                  <p>
+                    <xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/" scope="external" format="html"/>
+                  </p>
+                </entry>
+                <entry>
+                  <p>
+                    <xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/cloudera.list" scope="external" format="list"/>
+                  </p>
+                </entry>
+              </row>
+              -->
+            </tbody>
+          </tgroup>
+        </table>
+      </conbody>
+    </concept>
+    <concept id="impala_downloads_111">
+      <title>Impala 1.1.1</title>
+      <conbody>
+        <p><b>Release Date:</b> July 2013 <b>Status:</b> Production</p>
+<!--
+        <p> For information about new features in the latest release, and issues
+          that are fixed or still outstanding, see the <xref href="/Content/impala_relnotes_xi44305.xml#relnotes"/>.</p>
+-->
+        <table>
+          <tgroup cols="3">
+            <colspec colname="1" colwidth="3120*"/>
+            <colspec colname="2" colwidth="3120*"/>
+            <colspec colname="3" colwidth="3120*"/>
+            <thead>
+              <row>
+                <entry><p><b>Repository Type</b></p></entry>
+                <entry><p><b>Location</b></p></entry>
+                <entry><p><b>Repo or List file</b></p></entry>
+              </row>
+            </thead>
+            <tbody>
+              <row>
+                <entry><p>Yum RHEL 6/CentOS 6 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/1.1.1/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Yum RHEL 5/CentOS 5 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/1.1.1/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Zypper SLES 11 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/1.1.1/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 10.04 (Lucid) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 12.04 ( Precise) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Debian (Squeeze) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+            </tbody>
+          </tgroup>
+        </table>
+      </conbody>
+    </concept>
+    <concept audience="Cloudera" id="impala_downloads_110">
+      <title>Impala 1.1.1</title>
+      <conbody>
+        <p><b>Release Date:</b> July 2013 <b>Status:</b> Production</p>
+<!--
+        <p> For information about new features in the latest release, and issues
+          that are fixed or still outstanding, see the <xref href="/Content/impala_relnotes_xi44305.xml#relnotes"/>.</p>
+-->
+        <table>
+          <tgroup cols="3">
+            <colspec colname="1" colwidth="3120*"/>
+            <colspec colname="2" colwidth="3120*"/>
+            <colspec colname="3" colwidth="3120*"/>
+            <thead>
+              <row>
+                <entry><p><b>Repository Type</b></p></entry>
+                <entry><p><b>Location</b></p></entry>
+                <entry><p><b>Repo or List file</b></p></entry>
+              </row>
+            </thead>
+            <tbody>
+              <row>
+                <entry><p>Yum RHEL 6/CentOS 6 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/1.1/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Yum RHEL 5/CentOS 5 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/1.1/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Zypper SLES 11 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/1.1/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 10.04 (Lucid) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 12.04 ( Precise) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Debian (Squeeze) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+            </tbody>
+          </tgroup>
+        </table>
+      </conbody>
+    </concept>
+    <concept id="impala_downloads_101">
+      <title>Impala 1.0.1</title>
+      <conbody>
+        <p><b>Release Date:</b> June 2013 <b>Status:</b> Production</p>
+<!--
+        <p> For information about new features in the latest release, and issues
+          that are fixed or still outstanding, see the <xref href="/Content/impala_relnotes_xi44305.xml#relnotes"/>.</p>
+-->
+        <table>
+          <tgroup cols="3">
+            <colspec colname="1" colwidth="3120*"/>
+            <colspec colname="2" colwidth="3120*"/>
+            <colspec colname="3" colwidth="3120*"/>
+            <thead>
+              <row>
+                <entry><p><b>Repository Type</b></p></entry>
+                <entry><p><b>Location</b></p></entry>
+                <entry><p><b>Repo or List file</b></p></entry>
+              </row>
+            </thead>
+            <tbody>
+              <row>
+                <entry><p>Yum RHEL 6/CentOS 6 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/1.0.1/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Yum RHEL 5/CentOS 5 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/1.0.1/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Zypper SLES 11 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/1.0.1/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 10.04 (Lucid) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 12.04 ( Precise) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Debian (Squeeze) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+            </tbody>
+          </tgroup>
+        </table>
+      </conbody>
+    </concept>
+    <concept audience="Cloudera" id="impala_downloads_100">
+      <title>Impala 1.0.1</title>
+      <conbody>
+        <p><b>Release Date:</b> June 2013 <b>Status:</b> Production</p>
+<!--
+        <p> For information about new features in the latest release, and issues
+          that are fixed or still outstanding, see the <xref href="/Content/impala_relnotes_xi44305.xml#relnotes"/>.</p>
+-->
+        <table>
+          <tgroup cols="3">
+            <colspec colname="1" colwidth="3120*"/>
+            <colspec colname="2" colwidth="3120*"/>
+            <colspec colname="3" colwidth="3120*"/>
+            <thead>
+              <row>
+                <entry><p><b>Repository Type</b></p></entry>
+                <entry><p><b>Location</b></p></entry>
+                <entry><p><b>Repo or List file</b></p></entry>
+              </row>
+            </thead>
+            <tbody>
+              <row>
+                <entry><p>Yum RHEL 6/CentOS 6 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/1.0.1/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Yum RHEL 5/CentOS 5 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/1.0.1/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/redhat/5/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Zypper SLES 11 (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/1.0.1/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/sles/11/x86_64/impala/cloudera-impala.repo" scope="external" format="repo"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 10.04 (Lucid) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/lucid/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Ubuntu 12.04 ( Precise) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/ubuntu/precise/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+              <row>
+                <entry><p>Apt-Get Debian (Squeeze) (64-bit)</p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/" scope="external" format="html"/></p></entry>
+                <entry><p><xref href="https://archive.cloudera.com/impala/debian/squeeze/amd64/impala/cloudera.list" scope="external" format="list"/></p></entry>
+              </row>
+            </tbody>
+          </tgroup>
+        </table>
+      </conbody>
+    </concept>
+  </concept>
+</concept>

[6/7] incubator-impala git commit: New files needed to make PDF build happy.

Posted by jr...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_config_performance.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_config_performance.xml b/docs/topics/impala_config_performance.xml
new file mode 100644
index 0000000..837e63c
--- /dev/null
+++ b/docs/topics/impala_config_performance.xml
@@ -0,0 +1,291 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="config_performance">
+
+  <title>Post-Installation Configuration for Impala</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Performance"/>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Configuring"/>
+      <data name="Category" value="Administrators"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p id="p_24">
+      This section describes the mandatory and recommended configuration settings for Impala. If Impala is
+      installed using Cloudera Manager, some of these configurations are completed automatically; you must still
+      configure short-circuit reads manually. If you installed Impala without Cloudera Manager, or if you want to
+      customize your environment, consider making the changes described in this topic.
+    </p>
+
+    <p>
+<!-- Could conref this paragraph from ciiu_install.xml. -->
+      In some cases, depending on the level of Impala, CDH, and Cloudera Manager, you might need to add particular
+      component configuration details in one of the free-form fields on the Impala configuration pages within
+      Cloudera Manager. <ph conref="../shared/impala_common.xml#common/safety_valve"/>
+    </p>
+
+    <ul>
+      <li>
+        You must enable short-circuit reads, whether or not Impala was installed through Cloudera Manager. This
+        setting goes in the Impala configuration settings, not the Hadoop-wide settings.
+      </li>
+
+      <li>
+        If you installed Impala in an environment that is not managed by Cloudera Manager, you must enable block
+        location tracking, and you can optionally enable native checksumming for optimal performance.
+      </li>
+
+      <li>
+        If you deployed Impala using Cloudera Manager see
+        <xref href="impala_perf_testing.xml#performance_testing"/> to confirm proper configuration.
+      </li>
+    </ul>
+
+    <section id="section_fhq_wyv_ls">
+      <title>Mandatory: Short-Circuit Reads</title>
+      <p> Enabling short-circuit reads allows Impala to read local data directly
+        from the file system. This removes the need to communicate through the
+        DataNodes, improving performance. This setting also minimizes the number
+        of additional copies of data. Short-circuit reads requires
+          <codeph>libhadoop.so</codeph>
+        <!-- This link went stale. Not obvious how to keep it in sync with whatever Hadoop CDH is using behind the scenes. So hide the link for now. -->
+        <!--        (the <xref href="http://hadoop.apache.org/docs/r0.19.1/native_libraries.html" scope="external" format="html">Hadoop Native Library</xref>) -->
+        (the Hadoop Native Library) to be accessible to both the server and the
+        client. <codeph>libhadoop.so</codeph> is not available if you have
+        installed from a tarball. You must install from an
+        <codeph>.rpm</codeph>, <codeph>.deb</codeph>, or parcel to use
+        short-circuit local reads. <note> If you use Cloudera Manager, you can
+          enable short-circuit reads through a checkbox in the user interface
+          and that setting takes effect for Impala as well. </note>
+      </p>
+      <p> Cloudera strongly recommends using Impala with CDH 4.2 or higher,
+        ideally the latest 4.x release. Impala does support short-circuit reads
+        with CDH 4.1, but for best performance, upgrade to CDH 4.3 or higher.
+        The process of configuring short-circuit reads varies according to which
+        version of CDH you are using. Choose the procedure that is appropriate
+        for your environment. </p>
+      <p>
+        <b>To configure DataNodes for short-circuit reads with CDH 4.2 or
+          higher:</b>
+      </p>
+      <ol id="ol_qlq_wyv_ls">
+        <li id="copy_config_files"> Copy the client
+            <codeph>core-site.xml</codeph> and <codeph>hdfs-site.xml</codeph>
+          configuration files from the Hadoop configuration directory to the
+          Impala configuration directory. The default Impala configuration
+          location is <codeph>/etc/impala/conf</codeph>. </li>
+        <li>
+          <indexterm audience="Cloudera"
+            >dfs.client.read.shortcircuit</indexterm>
+          <indexterm audience="Cloudera">dfs.domain.socket.path</indexterm>
+          <indexterm audience="Cloudera"
+            >dfs.client.file-block-storage-locations.timeout.millis</indexterm>
+          On all Impala nodes, configure the following properties in <!-- Exact timing is unclear, since we say farther down to copy /etc/hadoop/conf/hdfs-site.xml to /etc/impala/conf.
+     Which wouldn't work if we already modified the Impala version of the file here. Not to mention that this
+     doesn't take the CM interface into account, where these /etc files might not exist in those locations. -->
+          <!--          <codeph>/etc/impala/conf/hdfs-site.xml</codeph> as shown: -->
+          Impala's copy of <codeph>hdfs-site.xml</codeph> as shown: <codeblock>&lt;property&gt;
+    &lt;name&gt;dfs.client.read.shortcircuit&lt;/name&gt;
+    &lt;value&gt;true&lt;/value&gt;
+&lt;/property&gt;
+
+&lt;property&gt;
+    &lt;name&gt;dfs.domain.socket.path&lt;/name&gt;
+    &lt;value&gt;/var/run/hdfs-sockets/dn&lt;/value&gt;
+&lt;/property&gt;
+
+&lt;property&gt;
+    &lt;name&gt;dfs.client.file-block-storage-locations.timeout.millis&lt;/name&gt;
+    &lt;value&gt;10000&lt;/value&gt;
+&lt;/property&gt;</codeblock>
+          <!-- Former socket.path value:    &lt;value&gt;/var/run/hadoop-hdfs/dn._PORT&lt;/value&gt; -->
+          <!--
+          <note>
+            The text <codeph>_PORT</codeph> appears just as shown; you do not need to
+            substitute a number.
+          </note>
+-->
+        </li>
+        <li>
+          <p> If <codeph>/var/run/hadoop-hdfs/</codeph> is group-writable, make
+            sure its group is <codeph>root</codeph>. </p>
+          <note> If you are also going to enable block location tracking, you
+            can skip copying configuration files and restarting DataNodes and go
+            straight to <xref href="#config_performance/block_location_tracking"
+             >Optional: Block Location Tracking</xref>.
+            Configuring short-circuit reads and block location tracking require
+            the same process of copying files and restarting services, so you
+            can complete that process once when you have completed all
+            configuration changes. Whether you copy files and restart services
+            now or during configuring block location tracking, short-circuit
+            reads are not enabled until you complete those final steps. </note>
+        </li>
+        <li id="restart_all_datanodes"> After applying these changes, restart
+          all DataNodes. </li>
+      </ol>
+      <p>
+        <b>To configure DataNodes for short-circuit reads with CDH 4.1:</b>
+      </p>
+      <!-- Repeated twice, turn into a conref. -->
+      <note> Cloudera strongly recommends using Impala with CDH 4.2 or higher,
+        ideally the latest 4.x release. Impala does support short-circuit reads
+        with CDH 4.1, but for best performance, upgrade to CDH 4.3 or higher.
+        The process of configuring short-circuit reads varies according to which
+        version of CDH you are using. Choose the procedure that is appropriate
+        for your environment. </note>
+      <ol id="ol_cqq_wyv_ls">
+        <li> Enable short-circuit reads by adding settings to the Impala
+            <codeph>core-site.xml</codeph> file. <ul id="ul_a5q_wyv_ls">
+            <li> If you installed Impala using Cloudera Manager, short-circuit
+              reads should be properly configured, but you can review the
+              configuration by checking the contents of
+                the�<codeph>core-site.xml</codeph>�file, which is installed at
+                <codeph>/etc/impala/conf</codeph>�by default. </li>
+            <li> If you installed using packages, instead of using Cloudera
+              Manager, create the <codeph>core-site.xml</codeph>�file. This can
+              be easily done by copying
+              the�<codeph>core-site.xml</codeph>�client configuration file from
+              another machine that is running Hadoop services. This file must be
+              copied to the Impala configuration directory. The Impala
+              configuration directory is set by
+                the�<codeph>IMPALA_CONF_DIR</codeph> environment variable and is
+              by default <codeph>/etc/impala/conf</codeph>. To confirm the
+              Impala configuration directory, check
+                the�<codeph>IMPALA_CONF_DIR</codeph> environment variable value.
+                <note> If the Impala configuration directory does not exist,
+                create it and then add the�<codeph>core-site.xml</codeph> file.
+              </note>
+            </li>
+          </ul> Add the following to the <codeph>core-site.xml</codeph> file: <codeblock>&lt;property&gt;
+   &lt;name&gt;dfs.client.read.shortcircuit&lt;/name&gt;
+ ��&lt;value&gt;true&lt;/value&gt;
+&lt;/property&gt;</codeblock>
+          <note> For an installation managed by Cloudera Manager, specify these
+            settings in the Impala dialogs, in the options field for HDFS. <ph
+              conref="../shared/impala_common.xml#common/safety_valve" />
+          </note>
+        </li>
+        <li> For each DataNode, enable access by adding the following to
+            the�<codeph>hdfs-site.xml</codeph> file: <codeblock rev="1.3.0">&lt;property&gt;
+   &lt;name&gt;dfs.client.use.legacy.blockreader.local&lt;/name&gt;
+   &lt;value&gt;true&lt;/value&gt;
+&lt;/property&gt;
+
+&lt;property&gt;
+   &lt;name&gt;dfs.datanode.data.dir.perm&lt;/name&gt;
+   &lt;value&gt;750&lt;/value&gt;
+&lt;/property&gt;
+
+&lt;property&gt;
+   &lt;name&gt;dfs.block.local-path-access.user&lt;/name&gt;
+   &lt;value&gt;impala&lt;/value&gt;
+&lt;/property&gt;
+
+&lt;property&gt;
+    &lt;name&gt;dfs.client.file-block-storage-locations.timeout.millis&lt;/name&gt;
+    &lt;value&gt;10000&lt;/value&gt;
+&lt;/property&gt;</codeblock>
+          <note> In the preceding example,
+              the�<codeph>dfs.block.local-path-access.user</codeph>�is the user
+            running the <codeph>impalad</codeph>�process. By default, that
+            account is�<codeph>impala</codeph>. </note>
+        </li>
+        <li> Use� <codeph>usermod</codeph> �to add users requiring local block
+          access to the appropriate HDFS group. For example, if you
+            assigned�<codeph>impala</codeph> to the
+            <codeph>dfs.block.local-path-access.user</codeph> �property, you
+          would add <codeph>impala</codeph> �to the hadoop HDFS group: <codeblock>$ usermod -a -G hadoop impala</codeblock>
+          <note> The default HDFS group is <codeph>hadoop</codeph>, but it is
+            possible to have an environment configured to use an alternate
+            group. To find the configured HDFS group name using the Cloudera
+            Manager Admin Console: <ol id="ol_km4_4bc_nr">
+              <li>Go to the HDFS service.</li>
+              <li
+                conref="../shared/cm_common_elements.xml#cm/config_edit" />
+              <li>Click <menucascade>
+                  <uicontrol>Scope</uicontrol>
+                  <uicontrol><varname>HDFS service name</varname>
+                    (Service-Wide)</uicontrol>
+                </menucascade>.</li>
+              <li>Click <menucascade>
+                  <uicontrol>Category</uicontrol>
+                  <uicontrol>Advanced</uicontrol>
+                </menucascade>.</li>
+              <li>The <uicontrol>Shared Hadoop Group Name</uicontrol> property
+                contains the group name.</li>
+            </ol></note>
+          <note> If you are going to enable block location tracking, you can
+            skip copying configuration files and restarting DataNodes and go
+            straight to <xref href="#config_performance/block_location_tracking"/>.
+            Configuring short-circuit reads and block
+            location tracking require the same process of copying files and
+            restarting services, so you can complete that process once when you
+            have completed all configuration changes. Whether you copy files and
+            restart services now or during configuring block location tracking,
+            short-circuit reads are not enabled until you complete those final
+            steps. </note>
+        </li>
+        <li conref="#config_performance/copy_config_files" />
+        <li conref="#config_performance/restart_all_datanodes" />
+      </ol>
+    </section>
+
+    <section id="block_location_tracking">
+
+      <title>Mandatory: Block Location Tracking</title>
+
+      <p>
+        Enabling block location metadata allows Impala to know which disk data blocks are located on, allowing
+        better utilization of the underlying disks. Impala will not start unless this setting is enabled.
+      </p>
+
+      <p>
+        <b>To enable block location tracking:</b>
+      </p>
+
+      <ol>
+        <li>
+          For each DataNode, adding the following to the�<codeph>hdfs-site.xml</codeph> file:
+<codeblock>&lt;property&gt;
+  &lt;name&gt;dfs.datanode.hdfs-blocks-metadata.enabled&lt;/name&gt;
+  &lt;value&gt;true&lt;/value&gt;
+&lt;/property&gt; </codeblock>
+        </li>
+
+        <li conref="#config_performance/copy_config_files"/>
+
+        <li conref="#config_performance/restart_all_datanodes"/>
+      </ol>
+    </section>
+
+    <section id="native_checksumming">
+
+      <title>Optional: Native Checksumming</title>
+
+      <p>
+        Enabling native checksumming causes Impala to use an optimized native library for computing checksums, if
+        that library is available.
+      </p>
+
+      <p id="p_29">
+        <b>To enable native checksumming:</b>
+      </p>
+
+      <p>
+        If you installed CDH from packages, the native checksumming library is installed and setup correctly. In
+        such a case, no additional steps are required. Conversely, if you installed by other means, such as with
+        tarballs, native checksumming may not be available due to missing shared objects. Finding the message
+        "<codeph>Unable to load native-hadoop library for your platform... using builtin-java classes where
+        applicable</codeph>" in the Impala logs indicates native checksumming may be unavailable. To enable native
+        checksumming, you must build and install <codeph>libhadoop.so</codeph> (the
+        <!-- Another instance of stale link. -->
+        <!-- <xref href="http://hadoop.apache.org/docs/r0.19.1/native_libraries.html" scope="external" format="html">Hadoop Native Library</xref>). -->
+        Hadoop Native Library).
+      </p>
+    </section>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_connecting.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_connecting.xml b/docs/topics/impala_connecting.xml
new file mode 100644
index 0000000..354e698
--- /dev/null
+++ b/docs/topics/impala_connecting.xml
@@ -0,0 +1,202 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="connecting">
+
+  <title>Connecting to impalad through impala-shell</title>
+  <titlealts audience="PDF"><navtitle>Connecting to impalad</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="impala-shell"/>
+      <data name="Category" value="Network"/>
+      <data name="Category" value="DataNode"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+<!--
+TK: This would be a good theme for a tutorial topic.
+Lots of nuances to illustrate through sample code.
+-->
+
+    <p>
+      Within an <cmdname>impala-shell</cmdname> session, you can only issue queries while connected to an instance
+      of the <cmdname>impalad</cmdname> daemon. You can specify the connection information:
+      <ul>
+        <li>
+          Through command-line options when you run the <cmdname>impala-shell</cmdname> command.
+        </li>
+        <li>
+          Through a configuration file that is read when you run the <cmdname>impala-shell</cmdname> command.
+        </li>
+        <li>
+          During an <cmdname>impala-shell</cmdname> session, by issuing a <codeph>CONNECT</codeph> command.
+        </li>
+      </ul>
+      See <xref href="impala_shell_options.xml"/> for the command-line and configuration file options you can use.
+    </p>
+
+    <p>
+      You can connect to any DataNode where an instance of <cmdname>impalad</cmdname> is running,
+      and that host coordinates the execution of all queries sent to it.
+    </p>
+
+    <p>
+      For simplicity during development, you might always connect to the same host, perhaps running <cmdname>impala-shell</cmdname> on
+      the same host as <cmdname>impalad</cmdname> and specifying the hostname as <codeph>localhost</codeph>.
+    </p>
+
+    <p>
+      In a production environment, you might enable load balancing, in which you connect to specific host/port combination
+      but queries are forwarded to arbitrary hosts. This technique spreads the overhead of acting as the coordinator
+      node among all the DataNodes in the cluster. See <xref href="impala_proxy.xml"/> for details.
+    </p>
+
+    <p>
+      <b>To connect the Impala shell during shell startup:</b>
+    </p>
+
+    <ol>
+      <li>
+        Locate the hostname of a DataNode within the cluster that is running an instance of the
+        <cmdname>impalad</cmdname> daemon. If that DataNode uses a non-default port (something
+        other than port 21000) for <cmdname>impala-shell</cmdname> connections, find out the
+        port number also.
+      </li>
+
+      <li>
+        Use the <codeph>-i</codeph> option to the
+        <cmdname>impala-shell</cmdname> interpreter to specify the connection information for
+        that instance of <cmdname>impalad</cmdname>:
+<codeblock>
+# When you are logged into the same machine running impalad.
+# The prompt will reflect the current hostname.
+$ impala-shell
+
+# When you are logged into the same machine running impalad.
+# The host will reflect the hostname 'localhost'.
+$ impala-shell -i localhost
+
+# When you are logged onto a different host, perhaps a client machine
+# outside the Hadoop cluster.
+$ impala-shell -i <varname>some.other.hostname</varname>
+
+# When you are logged onto a different host, and impalad is listening
+# on a non-default port. Perhaps a load balancer is forwarding requests
+# to a different host/port combination behind the scenes.
+$ impala-shell -i <varname>some.other.hostname</varname>:<varname>port_number</varname>
+</codeblock>
+      </li>
+    </ol>
+
+    <p>
+      <b>To connect the Impala shell after shell startup:</b>
+    </p>
+
+    <ol>
+      <li>
+        Start the Impala shell with no connection:
+<codeblock>$ impala-shell</codeblock>
+        <p>
+          You should see a prompt like the following:
+        </p>
+<codeblock>Welcome to the Impala shell. Press TAB twice to see a list of available commands.
+
+Copyright (c) <varname>year</varname> Cloudera, Inc. All rights reserved.
+
+<ph conref="../shared/ImpalaVariables.xml#impala_vars/ShellBanner"/>
+[Not connected] &gt; </codeblock>
+      </li>
+
+      <li>
+        Locate the hostname of a DataNode within the cluster that is running an instance of the
+        <cmdname>impalad</cmdname> daemon. If that DataNode uses a non-default port (something
+        other than port 21000) for <cmdname>impala-shell</cmdname> connections, find out the
+        port number also.
+      </li>
+
+      <li>
+        Use the <codeph>connect</codeph> command to connect to an Impala instance. Enter a command of the form:
+<codeblock>[Not connected] &gt; connect <varname>impalad-host</varname>
+[<varname>impalad-host</varname>:21000] &gt;</codeblock>
+        <note>
+          Replace <varname>impalad-host</varname> with the hostname you have configured for any DataNode running
+          Impala in your environment. The changed prompt indicates a successful connection.
+        </note>
+      </li>
+    </ol>
+
+    <p>
+      <b>To start <cmdname>impala-shell</cmdname> in a specific database:</b>
+    </p>
+
+    <p>
+      You can use all the same connection options as in previous examples.
+      For simplicity, these examples assume that you are logged into one of
+      the DataNodes that is running the <cmdname>impalad</cmdname> daemon.
+    </p>
+
+    <ol>
+      <li>
+        Find the name of the database containing the relevant tables, views, and so
+        on that you want to operate on.
+      </li>
+
+      <li>
+        Use the <codeph>-d</codeph> option to the
+        <cmdname>impala-shell</cmdname> interpreter to connect and immediately
+        switch to the specified database, without the need for a <codeph>USE</codeph>
+        statement or fully qualified names:
+<codeblock>
+# Subsequent queries with unqualified names operate on
+# tables, views, and so on inside the database named 'staging'.
+$ impala-shell -i localhost -d staging
+
+# It is common during development, ETL, benchmarking, and so on
+# to have different databases containing the same table names
+# but with different contents or layouts.
+$ impala-shell -i localhost -d parquet_snappy_compression
+$ impala-shell -i localhost -d parquet_gzip_compression
+</codeblock>
+      </li>
+    </ol>
+
+    <p>
+      <b>To run one or several statements in non-interactive mode:</b>
+    </p>
+
+    <p>
+      You can use all the same connection options as in previous examples.
+      For simplicity, these examples assume that you are logged into one of
+      the DataNodes that is running the <cmdname>impalad</cmdname> daemon.
+    </p>
+
+    <ol>
+      <li>
+        Construct a statement, or a file containing a sequence of statements,
+        that you want to run in an automated way, without typing or copying
+        and pasting each time.
+      </li>
+
+      <li>
+        Invoke <cmdname>impala-shell</cmdname> with the <codeph>-q</codeph> option to run a single statement, or
+        the <codeph>-f</codeph> option to run a sequence of statements from a file.
+        The <cmdname>impala-shell</cmdname> command returns immediately, without going into
+        the interactive interpreter.
+<codeblock>
+# A utility command that you might run while developing shell scripts
+# to manipulate HDFS files.
+$ impala-shell -i localhost -d database_of_interest -q 'show tables'
+
+# A sequence of CREATE TABLE, CREATE VIEW, and similar DDL statements
+# can go into a file to make the setup process repeatable.
+$ impala-shell -i localhost -d database_of_interest -f recreate_tables.sql
+</codeblock>
+      </li>
+    </ol>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_delegation.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_delegation.xml b/docs/topics/impala_delegation.xml
new file mode 100644
index 0000000..0d59761
--- /dev/null
+++ b/docs/topics/impala_delegation.xml
@@ -0,0 +1,88 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.2" id="delegation">
+
+  <title>Configuring Impala Delegation for Hue and BI Tools</title>
+
+  <prolog>
+    <metadata>
+      <data name="Category" value="Security"/>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Authentication"/>
+      <data name="Category" value="Delegation"/>
+      <data name="Category" value="Hue"/>
+      <data name="Category" value="Administrators"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+<!--
+      When users connect to Impala directly through the <cmdname>impala-shell</cmdname> interpreter, the Sentry
+      authorization framework determines what actions they can take and what data they can see.
+-->
+      When users submit Impala queries through a separate application, such as Hue or a business intelligence tool,
+      typically all requests are treated as coming from the same user. In Impala 1.2 and higher, authentication is
+      extended by a new feature that allows applications to pass along credentials for the users that connect to
+      them (known as <q>delegation</q>), and issue Impala queries with the privileges for those users. Currently,
+      the delegation feature is available only for Impala queries submitted through application interfaces such as
+      Hue and BI tools; for example, Impala cannot issue queries using the privileges of the HDFS user.
+    </p>
+
+    <p>
+      The delegation feature is enabled by a startup option for <cmdname>impalad</cmdname>:
+      <codeph>--authorized_proxy_user_config</codeph>. When you specify this option, users whose names you specify
+      (such as <codeph>hue</codeph>) can delegate the execution of a query to another user. The query runs with the
+      privileges of the delegated user, not the original user such as <codeph>hue</codeph>. The name of the
+      delegated user is passed using the HiveServer2 configuration property <codeph>impala.doas.user</codeph>.
+    </p>
+
+    <p>
+      You can specify a list of users that the application user can delegate to, or <codeph>*</codeph> to allow a
+      superuser to delegate to any other user. For example:
+    </p>
+
+<codeblock>impalad --authorized_proxy_user_config 'hue=user1,user2;admin=*' ...</codeblock>
+
+    <note>
+      Make sure to use single quotes or escape characters to ensure that any <codeph>*</codeph> characters do not
+      undergo wildcard expansion when specified in command-line arguments.
+    </note>
+
+    <p>
+      See <xref href="impala_config_options.xml#config_options"/> for details about adding or changing
+      <cmdname>impalad</cmdname> startup options. See
+      <xref href="http://blog.cloudera.com/blog/2013/07/how-hiveserver2-brings-security-and-concurrency-to-apache-hive/" scope="external" format="html">this
+      Cloudera blog post</xref> for background information about the delegation capability in HiveServer2.
+    </p>
+
+    <p>
+      To set up authentication for the delegated users:
+    </p>
+
+    <ul>
+      <li>
+        <p>
+          On the server side, configure either user/password authentication through LDAP, or Kerberos
+          authentication, for all the delegated users. See <xref href="impala_ldap.xml#ldap"/> or
+          <xref href="impala_kerberos.xml#kerberos"/> for details.
+        </p>
+      </li>
+
+      <li>
+        <p>
+          On the client side, follow the instructions in the <q>Using User Name and Password</q> section in the
+          <xref href="http://www.cloudera.com/content/cloudera-content/cloudera-docs/Connectors/PDF/Cloudera-ODBC-Driver-for-Impala-Install-Guide.pdf" scope="external" format="pdf">ODBC
+          driver installation guide</xref>. Then search for <q>delegation</q> in that same installation guide to
+          learn about the <uicontrol>Delegation UID</uicontrol> field and <codeph>DelegationUID</codeph> configuration keyword to enable the delegation feature for
+          ODBC-based BI tools.
+        </p>
+      </li>
+    </ul>
+
+  </conbody>
+
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_development.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_development.xml b/docs/topics/impala_development.xml
new file mode 100644
index 0000000..a2eef16
--- /dev/null
+++ b/docs/topics/impala_development.xml
@@ -0,0 +1,229 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="intro_dev">
+
+  <title>Developing Impala Applications</title>
+  <titlealts audience="PDF"><navtitle>Developing Applications</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Concepts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      The core development language with Impala is SQL. You can also use Java or other languages to interact with
+      Impala through the standard JDBC and ODBC interfaces used by many business intelligence tools. For
+      specialized kinds of analysis, you can supplement the SQL built-in functions by writing
+      <xref href="impala_udf.xml#udfs">user-defined functions (UDFs)</xref> in C++ or Java.
+    </p>
+
+    <p outputclass="toc inpage"/>
+  </conbody>
+
+  <concept id="intro_sql">
+
+    <title>Overview of the Impala SQL Dialect</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Concepts"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        The Impala SQL dialect is highly compatible with the SQL syntax used in the Apache Hive component (HiveQL). As
+        such, it is familiar to users who are already familiar with running SQL queries on the Hadoop
+        infrastructure. Currently, Impala SQL supports a subset of HiveQL statements, data types, and built-in
+        functions. Impala also includes additional built-in functions for common industry features, to simplify
+        porting SQL from non-Hadoop systems.
+      </p>
+
+      <p>
+        For users coming to Impala from traditional database or data warehousing backgrounds, the following aspects of the SQL dialect
+        might seem familiar:
+      </p>
+
+      <ul>
+        <li>
+          <p>
+            The <codeph>SELECT</codeph> statement includes familiar clauses such as <codeph>WHERE</codeph>,
+            <codeph>GROUP BY</codeph>, <codeph>ORDER BY</codeph>, and <codeph>WITH</codeph>.
+            You will find familiar notions such as
+            <xref href="impala_joins.xml#joins">joins</xref>, <xref href="impala_functions.xml#builtins">built-in
+            functions</xref> for processing strings, numbers, and dates,
+            <xref href="impala_aggregate_functions.xml#aggregate_functions">aggregate functions</xref>,
+            <xref href="impala_subqueries.xml#subqueries">subqueries</xref>, and
+            <xref href="impala_operators.xml#comparison_operators">comparison operators</xref>
+            such as <codeph>IN()</codeph> and <codeph>BETWEEN</codeph>.
+            The <codeph>SELECT</codeph> statement is the place where SQL standards compliance is most important.
+          </p>
+        </li>
+
+        <li>
+          <p>
+          From the data warehousing world, you will recognize the notion of
+          <xref href="impala_partitioning.xml#partitioning">partitioned tables</xref>.
+          One or more columns serve as partition keys, and the data is physically arranged so that
+          queries that refer to the partition key columns in the <codeph>WHERE</codeph> clause
+          can skip partitions that do not match the filter conditions. For example, if you have 10
+          years worth of data and use a clause such as <codeph>WHERE year = 2015</codeph>,
+          <codeph>WHERE year &gt; 2010</codeph>, or <codeph>WHERE year IN (2014, 2015)</codeph>,
+          Impala skips all the data for non-matching years, greatly reducing the amount of I/O
+          for the query.
+          </p>
+        </li>
+
+        <li rev="1.2">
+          <p>
+          In Impala 1.2 and higher, <xref href="impala_udf.xml#udfs">UDFs</xref> let you perform custom comparisons
+          and transformation logic during <codeph>SELECT</codeph> and <codeph>INSERT...SELECT</codeph> statements.
+          </p>
+        </li>
+      </ul>
+
+      <p>
+        For users coming to Impala from traditional database or data warehousing backgrounds, the following aspects of the SQL dialect
+        might require some learning and practice for you to become proficient in the Hadoop environment:
+      </p>
+
+      <ul>
+        <li>
+          <p>
+          Impala SQL is focused on queries and includes relatively little DML. There is no <codeph>UPDATE</codeph>
+          or <codeph>DELETE</codeph> statement. Stale data is typically discarded (by <codeph>DROP TABLE</codeph>
+          or <codeph>ALTER TABLE ... DROP PARTITION</codeph> statements) or replaced (by <codeph>INSERT
+          OVERWRITE</codeph> statements).
+          </p>
+        </li>
+
+        <li>
+          <p>
+          All data creation is done by <codeph>INSERT</codeph> statements, which typically insert data in bulk by
+          querying from other tables. There are two variations, <codeph>INSERT INTO</codeph> which appends to the
+          existing data, and <codeph>INSERT OVERWRITE</codeph> which replaces the entire contents of a table or
+          partition (similar to <codeph>TRUNCATE TABLE</codeph> followed by a new <codeph>INSERT</codeph>). There
+          is no <codeph>INSERT ... VALUES</codeph> syntax to insert a single row.
+          </p>
+        </li>
+
+        <li>
+          <p>
+          You often construct Impala table definitions and data files in some other environment, and then attach
+          Impala so that it can run real-time queries. The same data files and table metadata are shared with other
+          components of the Hadoop ecosystem. In particular, Impala can access tables created by Hive or data
+          inserted by Hive, and Hive can access tables and data produced by Impala. Many other Hadoop components
+          can write files in formats such as Parquet and Avro, that can then be queried by Impala.
+          </p>
+        </li>
+
+        <li>
+          <p>
+          Because Hadoop and Impala are focused on data warehouse-style operations on large data sets, Impala SQL
+          includes some idioms that you might find in the import utilities for traditional database systems. For
+          example, you can create a table that reads comma-separated or tab-separated text files, specifying the
+          separator in the <codeph>CREATE TABLE</codeph> statement. You can create <b>external tables</b> that read
+          existing data files but do not move or transform them.
+          </p>
+        </li>
+
+        <li>
+          <p>
+          Because Impala reads large quantities of data that might not be perfectly tidy and predictable, it does
+          not impose length constraints on string data types. For example, you can define a database column as
+          <codeph>STRING</codeph> with unlimited length, rather than <codeph>CHAR(1)</codeph> or
+          <codeph>VARCHAR(64)</codeph>. <ph rev="2.0.0">(Although in Impala 2.0 and later, you can also use
+          length-constrained <codeph>CHAR</codeph> and <codeph>VARCHAR</codeph> types.)</ph>
+          </p>
+        </li>
+
+      </ul>
+
+      <p>
+        <b>Related information:</b> <xref href="impala_langref.xml#langref"/>, especially
+        <xref href="impala_langref_sql.xml#langref_sql"/> and <xref href="impala_functions.xml#builtins"/>
+      </p>
+    </conbody>
+  </concept>
+
+<!-- Bunch of potential concept topics for future consideration. Major areas of Impala modelled on areas of discussion for Oracle Database, and distributed databases in general. -->
+
+  <concept id="intro_datatypes" audience="Cloudera">
+
+    <title>Overview of Impala SQL Data Types</title>
+
+    <conbody/>
+  </concept>
+
+  <concept id="intro_network" audience="Cloudera">
+
+    <title>Overview of Impala Network Topology</title>
+
+    <conbody/>
+  </concept>
+
+  <concept id="intro_cluster" audience="Cloudera">
+
+    <title>Overview of Impala Cluster Topology</title>
+
+    <conbody/>
+  </concept>
+
+  <concept id="intro_apis">
+
+    <title>Overview of Impala Programming Interfaces</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="JDBC"/>
+      <data name="Category" value="ODBC"/>
+      <data name="Category" value="Hue"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        You can connect and submit requests to the Impala daemons through:
+      </p>
+
+      <ul>
+        <li>
+          The <codeph><xref href="impala_impala_shell.xml#impala_shell">impala-shell</xref></codeph> interactive
+          command interpreter.
+        </li>
+
+        <li>
+          The <xref href="http://gethue.com/" scope="external" format="html">Hue</xref> web-based user interface.
+        </li>
+
+        <li>
+          <xref href="impala_jdbc.xml#impala_jdbc">JDBC</xref>.
+        </li>
+
+        <li>
+          <xref href="impala_odbc.xml#impala_odbc">ODBC</xref>.
+        </li>
+      </ul>
+
+      <p>
+        With these options, you can use Impala in heterogeneous environments, with JDBC or ODBC applications
+        running on non-Linux platforms. You can also use Impala on combination with various Business Intelligence
+        tools that use the JDBC and ODBC interfaces.
+      </p>
+
+      <p>
+        Each <codeph>impalad</codeph> daemon process, running on separate nodes in a cluster, listens to
+        <xref href="impala_ports.xml#ports">several ports</xref> for incoming requests. Requests from
+        <codeph>impala-shell</codeph> and Hue are routed to the <codeph>impalad</codeph> daemons through the same
+        port. The <codeph>impalad</codeph> daemons listen on separate ports for JDBC and ODBC requests.
+      </p>
+    </conbody>
+  </concept>
+</concept>

[7/7] incubator-impala git commit: New files needed to make PDF build happy.

Posted by jr...@apache.org.

New files needed to make PDF build happy.


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/1fcc8cee
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/1fcc8cee
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/1fcc8cee

Branch: refs/heads/doc_prototype
Commit: 1fcc8ceecf31c8602594b626dfb25f67324537f6
Parents: 8039fbb
Author: John Russell <jr...@cloudera.com>
Authored: Fri Oct 28 17:33:41 2016 -0700
Committer: John Russell <jr...@cloudera.com>
Committed: Fri Oct 28 17:33:41 2016 -0700

----------------------------------------------------------------------
 docs/Cloudera-Impala-Release-Notes.ditamap    |   10 +
 docs/topics/impala_admin.xml                  |   60 +
 docs/topics/impala_auditing.xml               |  260 +++
 docs/topics/impala_authentication.xml         |   39 +
 docs/topics/impala_cluster_sizing.xml         |  353 ++++
 docs/topics/impala_cm_installation.xml        |   56 +
 docs/topics/impala_concepts.xml               |  295 +++
 docs/topics/impala_config.xml                 |   57 +
 docs/topics/impala_config_options.xml         |  593 ++++++
 docs/topics/impala_config_performance.xml     |  291 +++
 docs/topics/impala_connecting.xml             |  202 +++
 docs/topics/impala_delegation.xml             |   88 +
 docs/topics/impala_development.xml            |  229 +++
 docs/topics/impala_faq.xml                    | 1880 ++++++++++++++++++++
 docs/topics/impala_intro.xml                  |   81 +
 docs/topics/impala_kerberos.xml               |  370 ++++
 docs/topics/impala_ldap.xml                   |  354 ++++
 docs/topics/impala_lineage.xml                |  113 ++
 docs/topics/impala_mixed_security.xml         |   46 +
 docs/topics/impala_noncm_installation.xml     |  175 ++
 docs/topics/impala_perf_benchmarking.xml      |   36 +
 docs/topics/impala_perf_cookbook.xml          |  269 +++
 docs/topics/impala_perf_resources.xml         |   60 +
 docs/topics/impala_perf_skew.xml              |  150 ++
 docs/topics/impala_perf_testing.xml           |  175 ++
 docs/topics/impala_planning.xml               |   30 +
 docs/topics/impala_ports.xml                  |  440 +++++
 docs/topics/impala_proxy.xml                  |  635 +++++++
 docs/topics/impala_rcfile.xml                 |  244 +++
 docs/topics/impala_release_notes.xml          |   17 +
 docs/topics/impala_schema_design.xml          |  222 +++
 docs/topics/impala_security_files.xml         |   67 +
 docs/topics/impala_security_guidelines.xml    |  108 ++
 docs/topics/impala_security_install.xml       |   24 +
 docs/topics/impala_security_metastore.xml     |   40 +
 docs/topics/impala_security_webui.xml         |   66 +
 docs/topics/impala_seqfile.xml                |  239 +++
 docs/topics/impala_shell_commands.xml         |  399 +++++
 docs/topics/impala_shell_running_commands.xml |  265 +++
 docs/topics/impala_ssl.xml                    |  256 +++
 docs/topics/impala_troubleshooting.xml        |  447 +++++
 docs/topics/impala_webui.xml                  |  650 +++++++
 docs/topics/rg_impala_vd.xml                  | 1165 ++++++++++++
 43 files changed, 11556 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/Cloudera-Impala-Release-Notes.ditamap
----------------------------------------------------------------------
diff --git a/docs/Cloudera-Impala-Release-Notes.ditamap b/docs/Cloudera-Impala-Release-Notes.ditamap
new file mode 100644
index 0000000..7545b2e
--- /dev/null
+++ b/docs/Cloudera-Impala-Release-Notes.ditamap
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE map PUBLIC "-//OASIS//DTD DITA Map//EN" "map.dtd">
+<map audience="standalone">
+  <title>Cloudera Impala Release Notes</title>
+  <topicref href="topics/impala_relnotes.xml" audience="HTML standalone"/>
+  <topicref href="topics/impala_new_features.xml"/>
+  <topicref href="topics/impala_incompatible_changes.xml"/>
+  <topicref href="topics/impala_known_issues.xml"/>
+  <topicref href="topics/impala_fixed_issues.xml"/>
+</map>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_admin.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_admin.xml b/docs/topics/impala_admin.xml
new file mode 100644
index 0000000..3da7d5f
--- /dev/null
+++ b/docs/topics/impala_admin.xml
@@ -0,0 +1,60 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="admin">
+
+  <title>Impala Administration</title>
+  <titlealts audience="PDF"><navtitle>Administration</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Administrators"/>
+      <!-- Although there is a reasonable amount of info on the page, it could be better to use wiki-style embedding instead of linking hither and thither. -->
+      <data name="Category" value="Stub Pages"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      As an administrator, you monitor Impala's use of resources and take action when necessary to keep Impala
+      running smoothly and avoid conflicts with other Hadoop components running on the same cluster. When you
+      detect that an issue has happened or could happen in the future, you reconfigure Impala or other components
+      such as HDFS or even the hardware of the cluster itself to resolve or avoid problems.
+    </p>
+
+    <p outputclass="toc"/>
+
+    <p>
+      <b>Related tasks:</b>
+    </p>
+
+    <p>
+      As an administrator, you can expect to perform installation, upgrade, and configuration tasks for Impala on
+      all machines in a cluster. See <xref href="impala_install.xml#install"/>,
+      <xref href="impala_upgrading.xml#upgrading"/>, and <xref href="impala_config.xml#config"/> for details.
+    </p>
+
+    <p>
+      For security tasks typically performed by administrators, see <xref href="impala_security.xml#security"/>.
+    </p>
+
+    <p>
+      Administrators also decide how to allocate cluster resources so that all Hadoop components can run smoothly
+      together. For Impala, this task primarily involves:
+      <ul>
+        <li>
+          Deciding how many Impala queries can run concurrently and with how much memory, through the admission
+          control feature. See <xref href="impala_admission.xml#admission_control"/> for details.
+        </li>
+
+        <li>
+          Dividing cluster resources such as memory between Impala and other components, using YARN for overall
+          resource management, and Llama to mediate resource requests from Impala to YARN. See
+          <xref href="impala_resource_management.xml#resource_management"/> for details.
+        </li>
+      </ul>
+    </p>
+
+<!-- <p conref="../shared/impala_common.xml#common/impala_mr"/> -->
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_auditing.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_auditing.xml b/docs/topics/impala_auditing.xml
new file mode 100644
index 0000000..6332957
--- /dev/null
+++ b/docs/topics/impala_auditing.xml
@@ -0,0 +1,260 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="auditing">
+
+  <title>Auditing Impala Operations</title>
+  <titlealts audience="PDF"><navtitle>Auditing</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Auditing"/>
+      <data name="Category" value="Governance"/>
+      <data name="Category" value="Navigator"/>
+      <data name="Category" value="Security"/>
+      <data name="Category" value="Administrators"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      To monitor how Impala data is being used within your organization, ensure that your Impala authorization and
+      authentication policies are effective, and detect attempts at intrusion or unauthorized access to Impala
+      data, you can use the auditing feature in Impala 1.2.1 and higher:
+    </p>
+
+    <ul>
+      <li>
+        Enable auditing by including the option <codeph>-audit_event_log_dir=<varname>directory_path</varname></codeph>
+        in your <cmdname>impalad</cmdname> startup options for a cluster not managed by Cloudera Manager, or
+        <xref audience="integrated" href="cn_iu_audit_log.xml#xd_583c10bfdbd326ba--6eed2fb8-14349d04bee--7d6f/section_v25_lmy_bn">configuring Impala Daemon logging in Cloudera Manager</xref><xref audience="standalone" href="http://www.cloudera.com/documentation/enterprise/latest/topics/cn_iu_service_audit.html" scope="external" format="html">configuring Impala Daemon logging in Cloudera Manager</xref>.
+        The log directory must be a local directory on the
+        server, not an HDFS directory.
+      </li>
+
+      <li>
+        Decide how many queries will be represented in each log files. By default, Impala starts a new log file
+        every 5000 queries. To specify a different number, <ph
+          audience="standalone"
+          >include
+        the option <codeph>-max_audit_event_log_file_size=<varname>number_of_queries</varname></codeph> in the
+        <cmdname>impalad</cmdname> startup
+        options</ph><xref
+          href="cn_iu_audit_log.xml#xd_583c10bfdbd326ba--6eed2fb8-14349d04bee--7d6f/section_v25_lmy_bn"
+            audience="integrated"
+            >configure
+        Impala Daemon logging in Cloudera Manager</xref>.
+      </li>
+
+      <li> Configure Cloudera Navigator to collect and consolidate the audit
+        logs from all the hosts in the cluster. </li>
+
+      <li>
+        Use Cloudera Navigator or Cloudera Manager to filter, visualize, and produce reports based on the audit
+        data. (The Impala auditing feature works with Cloudera Manager 4.7 to 5.1 and Cloudera Navigator 2.1 and
+        higher.) Check the audit data to ensure that all activity is authorized and detect attempts at
+        unauthorized access.
+      </li>
+    </ul>
+
+    <p outputclass="toc inpage"/>
+  </conbody>
+
+  <concept id="auditing_performance">
+
+    <title>Durability and Performance Considerations for Impala Auditing</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Performance"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        The auditing feature only imposes performance overhead while auditing is enabled.
+      </p>
+
+      <p>
+        Because any Impala host can process a query, enable auditing on all hosts where the
+        <ph audience="standalone"><cmdname>impalad</cmdname> daemon</ph>
+        <ph audience="integrated">Impala Daemon role</ph> runs. Each host stores its own log
+        files, in a directory in the local filesystem. The log data is periodically flushed to disk (through an
+        <codeph>fsync()</codeph> system call) to avoid loss of audit data in case of a crash.
+      </p>
+
+      <p> The runtime overhead of auditing applies to whichever host serves as the coordinator for the query, that is, the host you connect to when you issue the query. This might be the same host for all queries, or different applications or users might connect to and issue queries through different hosts. </p>
+
+      <p> To avoid excessive I/O overhead on busy coordinator hosts, Impala syncs the audit log data (using the <codeph>fsync()</codeph> system call) periodically rather than after every query. Currently, the <codeph>fsync()</codeph> calls are issued at a fixed interval, every 5 seconds. </p>
+
+      <p>
+        By default, Impala avoids losing any audit log data in the case of an error during a logging operation
+        (such as a disk full error), by immediately shutting down
+        <cmdname audience="standalone">impalad</cmdname><ph audience="integrated">the Impala
+        Daemon role</ph> on the host where the auditing problem occurred.
+        <ph audience="standalone">You can override this setting by specifying the option
+        <codeph>-abort_on_failed_audit_event=false</codeph> in the <cmdname>impalad</cmdname> startup options.</ph>
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="auditing_format">
+
+    <title>Format of the Audit Log Files</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Logs"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p> The audit log files represent the query information in JSON format, one query per line. Typically, rather than looking at the log files themselves, you use the Cloudera Navigator product to consolidate the log data from all Impala hosts and filter and visualize the results in useful ways. (If you do examine the raw log data, you might run the files through a JSON pretty-printer first.) </p>
+
+      <p>
+        All the information about schema objects accessed by the query is encoded in a single nested record on the
+        same line. For example, the audit log for an <codeph>INSERT ... SELECT</codeph> statement records that a
+        select operation occurs on the source table and an insert operation occurs on the destination table. The
+        audit log for a query against a view records the base table accessed by the view, or multiple base tables
+        in the case of a view that includes a join query. Every Impala operation that corresponds to a SQL
+        statement is recorded in the audit logs, whether the operation succeeds or fails. Impala records more
+        information for a successful operation than for a failed one, because an unauthorized query is stopped
+        immediately, before all the query planning is completed.
+      </p>
+
+<!-- Opportunity to conref at the phrase level here... the content of this paragraph is the same as part
+     of a list bullet earlier on. -->
+
+      <p>
+        The information logged for each query includes:
+      </p>
+
+      <ul>
+        <li>
+          Client session state:
+          <ul>
+            <li>
+              Session ID
+            </li>
+
+            <li>
+              User name
+            </li>
+
+            <li>
+              Network address of the client connection
+            </li>
+          </ul>
+        </li>
+
+        <li>
+          SQL statement details:
+          <ul>
+            <li>
+              Query ID
+            </li>
+
+            <li>
+              Statement Type - DML, DDL, and so on
+            </li>
+
+            <li>
+              SQL statement text
+            </li>
+
+            <li>
+              Execution start time, in local time
+            </li>
+
+            <li>
+              Execution Status - Details on any errors that were encountered
+            </li>
+
+            <li>
+              Target Catalog Objects:
+              <ul>
+                <li>
+                  Object Type - Table, View, or Database
+                </li>
+
+                <li>
+                  Fully qualified object name
+                </li>
+
+                <li>
+                  Privilege - How the object is being used (<codeph>SELECT</codeph>, <codeph>INSERT</codeph>,
+                  <codeph>CREATE</codeph>, and so on)
+                </li>
+              </ul>
+            </li>
+          </ul>
+        </li>
+      </ul>
+
+<!-- Delegating actual examples to the Cloudera Navigator doc for the moment.
+<p>
+Here is an excerpt from a sample audit log file:
+</p>
+<codeblock></codeblock>
+-->
+    </conbody>
+  </concept>
+
+  <concept id="auditing_exceptions">
+
+    <title>Which Operations Are Audited</title>
+
+    <conbody>
+
+      <p>
+        The kinds of SQL queries represented in the audit log are:
+      </p>
+
+      <ul>
+        <li>
+          Queries that are prevented due to lack of authorization.
+        </li>
+
+        <li>
+          Queries that Impala can analyze and parse to determine that they are authorized. The audit data is
+          recorded immediately after Impala finishes its analysis, before the query is actually executed.
+        </li>
+      </ul>
+
+      <p>
+        The audit log does not contain entries for queries that could not be parsed and analyzed. For example, a
+        query that fails due to a syntax error is not recorded in the audit log. The audit log also does not
+        contain queries that fail due to a reference to a table that does not exist, if you would be authorized to
+        access the table if it did exist.
+      </p>
+
+      <p>
+        Certain statements in the <cmdname>impala-shell</cmdname> interpreter, such as <codeph>CONNECT</codeph>,
+        <codeph rev="1.4.0">SUMMARY</codeph>, <codeph>PROFILE</codeph>, <codeph>SET</codeph>, and
+        <codeph>QUIT</codeph>, do not correspond to actual SQL queries, and these statements are not reflected in
+        the audit log.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="auditing_reviewing">
+
+    <title>Reviewing the Audit Logs</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Logs"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        You typically do not review the audit logs in raw form. The Cloudera Manager Agent periodically transfers
+        the log information into a back-end database where it can be examined in consolidated form. See
+        <ph audience="standalone">the <xref href="http://www.cloudera.com/content/cloudera-content/cloudera-docs/Navigator/latest/Cloudera-Navigator-Installation-and-User-Guide/Cloudera-Navigator-Installation-and-User-Guide.html"
+            scope="external" format="html">Cloudera Navigator documentation</xref> for details</ph>
+            <xref href="cn_iu_audits.xml#cn_topic_7" audience="integrated" />.
+      </p>
+    </conbody>
+  </concept>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_authentication.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_authentication.xml b/docs/topics/impala_authentication.xml
new file mode 100644
index 0000000..7200e5f
--- /dev/null
+++ b/docs/topics/impala_authentication.xml
@@ -0,0 +1,39 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="authentication">
+
+  <title>Impala Authentication</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Security"/>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Authentication"/>
+      <data name="Category" value="Administrators"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Authentication is the mechanism to ensure that only specified hosts and users can connect to Impala. It also
+      verifies that when clients connect to Impala, they are connected to a legitimate server. This feature
+      prevents spoofing such as <term>impersonation</term> (setting up a phony client system with the same account
+      and group names as a legitimate user) and <term>man-in-the-middle attacks</term> (intercepting application
+      requests before they reach Impala and eavesdropping on sensitive information in the requests or the results).
+    </p>
+
+    <p>
+      Impala supports authentication using either Kerberos or LDAP.
+    </p>
+
+    <note conref="../shared/impala_common.xml#common/authentication_vs_authorization"/>
+
+    <p outputclass="toc"/>
+
+    <p>
+      Once you are finished setting up authentication, move on to authorization, which involves specifying what
+      databases, tables, HDFS directories, and so on can be accessed by particular users when they connect through
+      Impala. See <xref href="impala_authorization.xml#authorization"/> for details.
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_cluster_sizing.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_cluster_sizing.xml b/docs/topics/impala_cluster_sizing.xml
new file mode 100644
index 0000000..382f68c
--- /dev/null
+++ b/docs/topics/impala_cluster_sizing.xml
@@ -0,0 +1,353 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="cluster_sizing">
+
+  <title>Cluster Sizing Guidelines for Impala</title>
+  <titlealts audience="PDF"><navtitle>Cluster Sizing</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Clusters"/>
+      <data name="Category" value="Planning"/>
+      <data name="Category" value="Sizing"/>
+      <data name="Category" value="Deploying"/>
+      <!-- Hoist by my own petard. Memory is an important theme of this topic but that's in a <section> title. -->
+      <data name="Category" value="Sectionated Pages"/>
+      <data name="Category" value="Memory"/>
+      <data name="Category" value="Scalability"/>
+      <data name="Category" value="Proof of Concept"/>
+      <data name="Category" value="Requirements"/>
+      <data name="Category" value="Guidelines"/>
+      <data name="Category" value="Best Practices"/>
+      <data name="Category" value="Administrators"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">cluster sizing</indexterm>
+      This document provides a very rough guideline to estimate the size of a cluster needed for a specific
+      customer application. You can use this information when planning how much and what type of hardware to
+      acquire for a new cluster, or when adding Impala workloads to an existing cluster.
+    </p>
+
+    <note>
+      Before making purchase or deployment decisions, consult your Cloudera representative to verify the
+      conclusions about hardware requirements based on your data volume and workload.
+    </note>
+
+<!--    <p outputclass="toc inpage"/> -->
+
+    <p>
+      Always use hosts with identical specifications and capacities for all the nodes in the cluster. Currently,
+      Impala divides the work evenly between cluster nodes, regardless of their exact hardware configuration.
+      Because work can be distributed in different ways for different queries, if some hosts are overloaded
+      compared to others in terms of CPU, memory, I/O, or network, you might experience inconsistent performance
+      and overall slowness
+    </p>
+
+    <p>
+      For analytic workloads with star/snowflake schemas, and using consistent hardware for all nodes (64 GB RAM,
+      12 2 TB hard drives, 2x E5-2630L 12 cores total, 10 GB network), the following table estimates the number of
+      DataNodes needed in the cluster based on data size and the number of concurrent queries, for workloads
+      similar to TPC-DS benchmark queries:
+    </p>
+
+    <table>
+      <title>Cluster size estimation based on the number of concurrent queries and data size with a 20 second average query response time</title>
+      <tgroup cols="6">
+        <colspec colnum="1" colname="col1"/>
+        <colspec colnum="2" colname="col2"/>
+        <colspec colnum="3" colname="col3"/>
+        <colspec colnum="4" colname="col4"/>
+        <colspec colnum="5" colname="col5"/>
+        <colspec colnum="6" colname="col6"/>
+        <thead>
+          <row>
+            <entry>
+              Data Size
+            </entry>
+            <entry>
+              1 query
+            </entry>
+            <entry>
+              10 queries
+            </entry>
+            <entry>
+              100 queries
+            </entry>
+            <entry>
+              1000 queries
+            </entry>
+            <entry>
+              2000 queries
+            </entry>
+          </row>
+        </thead>
+        <tbody>
+          <row>
+            <entry>
+              <b>250 GB</b>
+            </entry>
+            <entry>
+              2
+            </entry>
+            <entry>
+              2
+            </entry>
+            <entry>
+              5
+            </entry>
+            <entry>
+              35
+            </entry>
+            <entry>
+              70
+            </entry>
+          </row>
+          <row>
+            <entry>
+              <b>500 GB</b>
+            </entry>
+            <entry>
+              2
+            </entry>
+            <entry>
+              2
+            </entry>
+            <entry>
+              10
+            </entry>
+            <entry>
+              70
+            </entry>
+            <entry>
+              135
+            </entry>
+          </row>
+          <row>
+            <entry>
+              <b>1 TB</b>
+            </entry>
+            <entry>
+              2
+            </entry>
+            <entry>
+              2
+            </entry>
+            <entry>
+              15
+            </entry>
+            <entry>
+              135
+            </entry>
+            <entry>
+              270
+            </entry>
+          </row>
+          <row>
+            <entry>
+              <b>15 TB</b>
+            </entry>
+            <entry>
+              2
+            </entry>
+            <entry>
+              20
+            </entry>
+            <entry>
+              200
+            </entry>
+            <entry>
+              N/A
+            </entry>
+            <entry>
+              N/A
+            </entry>
+          </row>
+          <row>
+            <entry>
+              <b>30 TB</b>
+            </entry>
+            <entry>
+              4
+            </entry>
+            <entry>
+              40
+            </entry>
+            <entry>
+              400
+            </entry>
+            <entry>
+              N/A
+            </entry>
+            <entry>
+              N/A
+            </entry>
+          </row>
+          <row>
+            <entry>
+              <b>60 TB</b>
+            </entry>
+            <entry>
+              8
+            </entry>
+            <entry>
+              80
+            </entry>
+            <entry>
+              800
+            </entry>
+            <entry>
+              N/A
+            </entry>
+            <entry>
+              N/A
+            </entry>
+          </row>
+        </tbody>
+      </tgroup>
+    </table>
+
+    <section id="sizing_factors">
+
+      <title>Factors Affecting Scalability</title>
+
+      <p>
+        A typical analytic workload (TPC-DS style queries) using recommended hardware is usually CPU-bound. Each
+        node can process roughly 1.6 GB/sec. Both CPU-bound and disk-bound workloads can scale almost linearly with
+        cluster size. However, for some workloads, the scalability might be bounded by the network, or even by
+        memory.
+      </p>
+
+      <p>
+        If the workload is already network bound (on a 10 GB network), increasing the cluster size won\u2019t reduce
+        the network load; in fact, a larger cluster could increase network traffic because some queries involve
+        <q>broadcast</q> operations to all DataNodes. Therefore, boosting the cluster size does not improve query
+        throughput in a network-constrained environment.
+      </p>
+
+      <p>
+        Let\u2019s look at a memory-bound workload. A workload is memory-bound if Impala cannot run any additional
+        concurrent queries because all memory allocated has already been consumed, but neither CPU, disk, nor
+        network is saturated yet. This can happen because currently Impala uses only a single core per node to
+        process join and aggregation queries. For a node with 128 GB of RAM, if a join node takes 50 GB, the system
+        cannot run more than 2 such queries at the same time.
+      </p>
+
+      <p>
+        Therefore, at most 2 cores are used. Throughput can still scale almost linearly even for a memory-bound
+        workload. It\u2019s just that the CPU will not be saturated. Per-node throughput will be lower than 1.6
+        GB/sec. Consider increasing the memory per node.
+      </p>
+
+      <p>
+        As long as the workload is not network- or memory-bound, we can use the 1.6 GB/second per node as the
+        throughput estimate.
+      </p>
+    </section>
+
+    <section id="sizing_details">
+
+      <title>A More Precise Approach</title>
+
+      <p>
+        A more precise sizing estimate would require not only queries per minute (QPM), but also an average data
+        size scanned per query (D). With the proper partitioning strategy, D is usually a fraction of the total
+        data size. The following equation can be used as a rough guide to estimate the number of nodes (N) needed:
+      </p>
+
+<codeblock>Eq 1: N &gt; QPM * D / 100 GB
+</codeblock>
+
+      <p>
+        Here is an example. Suppose, on average, a query scans 50 GB of data and the average response time is
+        required to be 15 seconds or less when there are 100 concurrent queries. The QPM is 100/15*60 = 400. We can
+        estimate the number of node using our equation above.
+      </p>
+
+<codeblock>N &gt; QPM * D / 100GB
+N &gt; 400 * 50GB / 100GB
+N &gt; 200
+</codeblock>
+
+      <p>
+        Because this figure is a rough estimate, the corresponding number of nodes could be between 100 and 500.
+      </p>
+
+      <p>
+        Depending on the complexity of the query, the processing rate of query might change. If the query has more
+        joins, aggregation functions, or CPU-intensive functions such as string processing or complex UDFs, the
+        process rate will be lower than 1.6 GB/second per node. On the other hand, if the query only does scan and
+        filtering on numbers, the processing rate can be higher.
+      </p>
+    </section>
+
+    <section id="sizing_mem_estimate">
+
+      <title>Estimating Memory Requirements</title>
+      <!--
+  <prolog>
+    <metadata>
+      <data name="Category" value="Memory"/>
+    </metadata>
+  </prolog>
+      -->
+
+      <p>
+        Impala can handle joins between multiple large tables. Make sure that statistics are collected for all the
+        joined tables, using the <codeph><xref href="impala_compute_stats.xml#compute_stats">COMPUTE
+        STATS</xref></codeph> statement. However, joining big tables does consume more memory. Follow the steps
+        below to calculate the minimum memory requirement.
+      </p>
+
+      <p>
+        Suppose you are running the following join:
+      </p>
+
+<codeblock>select a.*, b.col_1, b.col_2, \u2026 b.col_n
+from a, b
+where a.key = b.key
+and b.col_1 in (1,2,4...)
+and b.col_4 in (....);
+</codeblock>
+
+      <p>
+        And suppose table <codeph>B</codeph> is smaller than table <codeph>A</codeph> (but still a large table).
+      </p>
+
+      <p>
+        The memory requirement for the query is the right-hand table (<codeph>B</codeph>), after decompression,
+        filtering (<codeph>b.col_n in ...</codeph>) and after projection (only using certain columns) must be less
+        than the total memory of the entire cluster.
+      </p>
+
+<codeblock>Cluster Total Memory Requirement  = Size of the smaller table *
+  selectivity factor from the predicate *
+  projection factor * compression ratio
+</codeblock>
+
+      <p>
+        In this case, assume that table <codeph>B</codeph> is 100 TB in Parquet format with 200 columns. The
+        predicate on <codeph>B</codeph> (<codeph>b.col_1 in ...and b.col_4 in ...</codeph>) will select only 10% of
+        the rows from <codeph>B</codeph> and for projection, we are only projecting 5 columns out of 200 columns.
+        Usually, Snappy compression gives us 3 times compression, so we estimate a 3x compression factor.
+      </p>
+
+<codeblock>Cluster Total Memory Requirement  = Size of the smaller table *
+  selectivity factor from the predicate *
+  projection factor * compression ratio
+  = 100TB * 10% * 5/200 * 3
+  = 0.75TB
+  = 750GB
+</codeblock>
+
+      <p>
+        So, if you have a 10-node cluster, each node has 128 GB of RAM and you give 80% to Impala, then you have 1
+        TB of usable memory for Impala, which is more than 750GB. Therefore, your cluster can handle join queries
+        of this magnitude.
+      </p>
+    </section>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_cm_installation.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_cm_installation.xml b/docs/topics/impala_cm_installation.xml
new file mode 100644
index 0000000..ff8325d
--- /dev/null
+++ b/docs/topics/impala_cm_installation.xml
@@ -0,0 +1,56 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="cm_installation">
+
+  <title>Installing Impala with Cloudera Manager</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Installing"/>
+      <data name="Category" value="Cloudera Manager"/>
+      <data name="Category" value="Administrators"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Before installing Impala through the Cloudera Manager interface, make sure all applicable nodes have the
+      appropriate hardware configuration and levels of operating system and CDH. See
+      <xref href="impala_prereqs.xml#prereqs"/> for details.
+    </p>
+
+    <note rev="1.2.0">
+      <p rev="1.2.0">
+        To install the latest Impala under CDH 4, upgrade Cloudera Manager to 4.8 or higher. Cloudera Manager 4.8 is
+        the first release that can manage the Impala catalog service introduced in Impala 1.2. Cloudera Manager 4.8
+        requires this service to be present, so if you upgrade to Cloudera Manager 4.8, also upgrade Impala to the
+        most recent version at the same time.
+<!-- Not so relevant now for 1.1.1, but maybe someday we'll capture all this history in a compatibility grid.
+        Upgrade to Cloudera Manager 4.6.2 or higher to enable Cloudera Manager to
+        handle access control for the Impala web UI, available by default through
+        port 25000 on each Impala host.
+        -->
+      </p>
+    </note>
+
+    <p>
+      For information on installing Impala in a Cloudera Manager-managed environment, see
+      <xref audience="integrated" href="cm_ig_install_impala.xml"/><xref audience="standalone" href="http://www.cloudera.com/documentation/enterprise/latest/topics/cm_ig_install_impala.html" scope="external" format="html"/>.
+    </p>
+
+    <p>
+      Managing your Impala installation through Cloudera Manager has a number of advantages. For example, when you
+      make configuration changes to CDH components using Cloudera Manager, it automatically applies changes to the
+      copies of configuration files, such as <codeph>hive-site.xml</codeph>, that Impala keeps under
+      <filepath>/etc/impala/conf</filepath>. It also sets up the Hive Metastore service that is required for
+      Impala running under CDH 4.1.
+    </p>
+
+    <p>
+      In some cases, depending on the level of Impala, CDH, and Cloudera Manager, you might need to add particular
+      component configuration details in some of the free-form option fields on the Impala configuration pages
+      within Cloudera Manager. <ph conref="../shared/impala_common.xml#common/safety_valve"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_concepts.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_concepts.xml b/docs/topics/impala_concepts.xml
new file mode 100644
index 0000000..48b3637
--- /dev/null
+++ b/docs/topics/impala_concepts.xml
@@ -0,0 +1,295 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="concepts">
+
+  <title>Impala Concepts and Architecture</title>
+  <titlealts audience="PDF"><navtitle>Concepts and Architecture</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Concepts"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Stub Pages"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+    <draft-comment author="-dita-use-conref-target"
+      conref="../shared/cdh_cm_common.xml#id_dgz_rhr_kv/draft-comment-test"/>
+    <p>
+      The following sections provide background information to help you become productive using Impala and
+      its features. Where appropriate, the explanations include context to help understand how aspects of Impala
+      relate to other technologies you might already be familiar with, such as relational database management
+      systems and data warehouses, or other Hadoop components such as Hive, HDFS, and HBase.
+    </p>
+
+    <p outputclass="toc"/>
+  </conbody>
+
+<!-- These other topics are waiting to be filled in. Could become subtopics or top-level topics depending on the depth of coverage in each case. -->
+
+  <concept id="intro_data_lifecycle" audience="Cloudera">
+
+    <title>Overview of the Data Lifecycle for Impala</title>
+
+    <conbody/>
+  </concept>
+
+  <concept id="intro_etl" audience="Cloudera">
+
+    <title>Overview of the Extract, Transform, Load (ETL) Process for Impala</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="ETL"/>
+      <data name="Category" value="Ingest"/>
+      <data name="Category" value="Concepts"/>
+    </metadata>
+  </prolog>
+
+    <conbody/>
+  </concept>
+
+  <concept id="intro_hadoop_data" audience="Cloudera">
+
+    <title>How Impala Works with Hadoop Data Files</title>
+
+    <conbody/>
+  </concept>
+
+  <concept id="intro_web_ui" audience="Cloudera">
+
+    <title>Overview of the Impala Web Interface</title>
+
+    <conbody/>
+  </concept>
+
+  <concept id="intro_bi" audience="Cloudera">
+
+    <title>Using Impala with Business Intelligence Tools</title>
+
+    <conbody/>
+  </concept>
+
+  <concept id="intro_ha" audience="Cloudera">
+
+    <title>Overview of Impala Availability and Fault Tolerance</title>
+
+    <conbody/>
+  </concept>
+
+<!-- This is pretty much ready to go. Decide if it should go under "Concepts" or "Performance",
+     and if it should be split out into a separate file, and then take out the audience= attribute
+     to make it visible.
+-->
+
+  <concept id="intro_llvm" audience="Cloudera">
+
+    <title>Overview of Impala Runtime Code Generation</title>
+
+    <conbody>
+
+<!-- Adapted from the CIDR15 paper written by the Impala team. -->
+
+      <p>
+        Impala uses <term>LLVM</term> (a compiler library and collection of related tools) to perform just-in-time
+        (JIT) compilation within the running <cmdname>impalad</cmdname> process. This runtime code generation
+        technique improves query execution times by generating native code optimized for the architecture of each
+        host in your particular cluster. Performance gains of 5 times or more are typical for representative
+        workloads.
+      </p>
+
+      <p>
+        Impala uses runtime code generation to produce query-specific versions of functions that are critical to
+        performance. In particular, code generation is applied to <term>inner loop</term> functions, that is, those
+        that are executed many times (for every tuple) in a given query, and thus constitute a large portion of the
+        total time the query takes to execute. For example, when Impala scans a data file, it calls a function to
+        parse each record into Impala\u2019s in-memory tuple format. For queries scanning large tables, billions of
+        records could result in billions of function calls. This function must therefore be extremely efficient for
+        good query performance, and removing even a few instructions from each function call can result in large
+        query speedups.
+      </p>
+
+      <p>
+        Overall, JIT compilation has an effect similar to writing custom code to process a query. For example, it
+        eliminates branches, unrolls loops, propagates constants, offsets and pointers, and inlines functions.
+        Inlining is especially valuable for functions used internally to evaluate expressions, where the function
+        call itself is more expensive than the function body (for example, a function that adds two numbers).
+        Inlining functions also increases instruction-level parallelism, and allows the compiler to make further
+        optimizations such as subexpression elimination across expressions.
+      </p>
+
+      <p>
+        Impala generates runtime query code automatically, so you do not need to do anything special to get this
+        performance benefit. This technique is most effective for complex and long-running queries that process
+        large numbers of rows. If you need to issue a series of short, small queries, you might turn off this
+        feature to avoid the overhead of compilation time for each query. In this case, issue the statement
+        <codeph>SET DISABLE_CODEGEN=true</codeph> to turn off runtime code generation for the duration of the
+        current session.
+      </p>
+
+<!--
+      <p>
+        Without code generation,
+        functions tend to be suboptimal
+        to handle situations that cannot be predicted in advance.
+        For example,
+        a record-parsing function that
+        only handles integer types will be faster at parsing an integer-only file
+        than a function that handles other data types
+        such as strings and floating-point numbers.
+        However, the schemas of the files to
+        be scanned are unknown at compile time,
+        and so a general-purpose function must be used, even if at runtime
+        it is known that more limited functionality is sufficient.
+      </p>
+
+      <p>
+        A source of large runtime overheads are virtual functions. Virtual function calls incur a large performance
+        penalty, particularly when the called function is very simple, as the calls cannot be inlined.
+        If the type of the object instance is known at runtime, we can use code generation to replace the virtual
+        function call with a call directly to the correct function, which can then be inlined. This is especially
+        valuable when evaluating expression trees. In Impala (as in many systems), expressions are composed of a
+        tree of individual operators and functions.
+      </p>
+
+      <p>
+        Each type of expression that can appear in a query is implemented internally by overriding a virtual function.
+        Many of these expression functions are quite simple, for example, adding two numbers.
+        The virtual function call can be more expensive than the function body itself. By resolving the virtual
+        function calls with code generation and then inlining the resulting function calls, Impala can evaluate expressions
+        directly with no function call overhead. Inlining functions also increases
+        instruction-level parallelism, and allows the compiler to make further optimizations such as subexpression
+        elimination across expressions.
+      </p>
+-->
+    </conbody>
+  </concept>
+
+<!-- Same as the previous section: adapted from CIDR paper, ready to externalize after deciding where to go. -->
+
+  <concept audience="Cloudera" id="intro_io">
+
+    <title>Overview of Impala I/O</title>
+
+    <conbody>
+
+      <p>
+        Efficiently retrieving data from HDFS is a challenge for all SQL-on-Hadoop systems. To perform
+        data scans from both disk and memory at or near hardware speed, Impala uses an HDFS feature called
+        <term>short-circuit local reads</term> to bypass the DataNode protocol when reading from local disk. Impala
+        can read at almost disk bandwidth (approximately 100 MB/s per disk) and is typically able to saturate all
+        available disks. For example, with 12 disks, Impala is typically capable of sustaining I/O at 1.2 GB/sec.
+        Furthermore, <term>HDFS caching</term> allows Impala to access memory-resident data at memory bus speed,
+        and saves CPU cycles as there is no need to copy or checksum data blocks within memory.
+      </p>
+
+      <p>
+        The I/O manager component interfaces with storage devices to read and write data. I/O manager assigns a
+        fixed number of worker threads per physical disk (currently one thread per rotational disk and eight per
+        SSD), providing an asynchronous interface to clients (<term>scanner threads</term>).
+      </p>
+    </conbody>
+  </concept>
+
+<!-- Same as the previous section: adapted from CIDR paper, ready to externalize after deciding where to go. -->
+
+<!-- Although good idea to get some answers from Henry first. -->
+
+  <concept audience="Cloudera" id="intro_state_distribution">
+
+    <title>State distribution</title>
+
+    <conbody>
+
+      <p>
+        As a massively parallel database that can run on hundreds of nodes, Impala must coordinate and synchronize
+        its metadata across the entire cluster. Impala's symmetric-node architecture means that any node can accept
+        and execute queries, and thus each node needs up-to-date versions of the system catalog and a knowledge of
+        which hosts the <cmdname>impalad</cmdname> daemons run on. To avoid the overhead of TCP connections and
+        remote procedure calls to retrieve metadata during query planning, Impala implements a simple
+        publish-subscribe service called the <term>statestore</term> to push metadata changes to a set of
+        subscribers (the <cmdname>impalad</cmdname> daemons running on all the DataNodes).
+      </p>
+
+      <p>
+        The statestore maintains a set of topics, which are arrays of <codeph>(<varname>key</varname>,
+        <varname>value</varname>, <varname>version</varname>)</codeph> triplets called <term>entries</term> where
+        <varname>key</varname> and <varname>value</varname> are byte arrays, and <varname>version</varname> is a
+        64-bit integer. A topic is defined by an application, and so the statestore has no understanding of the
+        contents of any topic entry. Topics are persistent through the lifetime of the statestore, but are not
+        persisted across service restarts. Processes that receive updates to any topic are called
+        <term>subscribers</term>, and express their interest by registering with the statestore at startup and
+        providing a list of topics. The statestore responds to registration by sending the subscriber an initial
+        topic update for each registered topic, which consists of all the entries currently in that topic.
+      </p>
+
+<!-- Henry: OK, but in practice, what is in these topic messages for Impala? -->
+
+      <p>
+        After registration, the statestore periodically sends two kinds of messages to each subscriber. The first
+        kind of message is a topic update, and consists of all changes to a topic (new entries, modified entries
+        and deletions) since the last update was successfully sent to the subscriber. Each subscriber maintains a
+        per-topic most-recent-version identifier which allows the statestore to only send the delta between
+        updates. In response to a topic update, each subscriber sends a list of changes it intends to make to its
+        subscribed topics. Those changes are guaranteed to have been applied by the time the next update is
+        received.
+      </p>
+
+      <p>
+        The second kind of statestore message is a <term>heartbeat</term>, formerly sometimes called
+        <term>keepalive</term>. The statestore uses heartbeat messages to maintain the connection to each
+        subscriber, which would otherwise time out its subscription and attempt to re-register.
+      </p>
+
+      <p>
+        Prior to Impala 2.0, both kinds of communication were combined in a single kind of message. Because these
+        messages could be very large in instances with thousands of tables, partitions, data files, and so on,
+        Impala 2.0 and higher divides the types of messages so that the small heartbeat pings can be transmitted
+        and acknowledged quickly, increasing the reliability of the statestore mechanism that detects when Impala
+        nodes become unavailable.
+      </p>
+
+      <p>
+        If the statestore detects a failed subscriber (for example, by repeated failed heartbeat deliveries), it
+        stops sending updates to that node.
+<!-- Henry: what are examples of these transient topic entries? -->
+        Some topic entries are marked as transient, meaning that if their owning subscriber fails, they are
+        removed.
+      </p>
+
+      <p>
+        Although the asynchronous nature of this mechanism means that metadata updates might take some time to
+        propagate across the entire cluster, that does not affect the consistency of query planning or results.
+        Each query is planned and coordinated by a particular node, so as long as the coordinator node is aware of
+        the existence of the relevant tables, data files, and so on, it can distribute the query work to other
+        nodes even if those other nodes have not received the latest metadata updates.
+<!-- Henry: need another example here of what's in a topic, e.g. is it the list of available tables? -->
+<!--
+        For example, query planning is performed on a single node based on the
+        catalog metadata topic, and once a full plan has been computed, all information required to execute that
+        plan is distributed directly to the executing nodes.
+        There is no requirement that an executing node should
+        know about the same version of the catalog metadata topic.
+-->
+      </p>
+
+      <p>
+        We have found that the statestore process with default settings scales well to medium sized clusters, and
+        can serve our largest deployments with some configuration changes.
+<!-- Henry: elaborate on the configuration changes. -->
+      </p>
+
+      <p>
+<!-- Henry: other examples like load information? How is load information used? -->
+        The statestore does not persist any metadata to disk: all current metadata is pushed to the statestore by
+        its subscribers (for example, load information). Therefore, should a statestore restart, its state can be
+        recovered during the initial subscriber registration phase. Or if the machine that the statestore is
+        running on fails, a new statestore process can be started elsewhere, and subscribers can fail over to it.
+        There is no built-in failover mechanism in Impala, instead deployments commonly use a retargetable DNS
+        entry to force subscribers to automatically move to the new process instance.
+<!-- Henry: translate that last sentence into instructions / guidelines. -->
+      </p>
+    </conbody>
+  </concept>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_config.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_config.xml b/docs/topics/impala_config.xml
new file mode 100644
index 0000000..7ea82e5
--- /dev/null
+++ b/docs/topics/impala_config.xml
@@ -0,0 +1,57 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="config">
+
+  <title>Managing Impala</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Administrators"/>
+      <data name="Category" value="Configuring"/>
+      <data name="Category" value="JDBC"/>
+      <data name="Category" value="ODBC"/>
+      <data name="Category" value="Stub Pages"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      This section explains how to configure Impala to accept connections from applications that use popular
+      programming APIs:
+    </p>
+
+    <ul>
+      <li>
+        <xref href="impala_config_performance.xml#config_performance"/>
+      </li>
+
+      <li>
+        <xref href="impala_odbc.xml#impala_odbc"/>
+      </li>
+
+      <li>
+        <xref href="impala_jdbc.xml#impala_jdbc"/>
+      </li>
+    </ul>
+
+    <p>
+      This type of configuration is especially useful when using Impala in combination with Business Intelligence
+      tools, which use these standard interfaces to query different kinds of database and Big Data systems.
+    </p>
+
+    <p>
+      You can also configure these other aspects of Impala:
+    </p>
+
+    <ul>
+      <li>
+        <xref href="impala_security.xml#security"/>
+      </li>
+
+      <li>
+        <xref href="impala_config_options.xml#config_options"/>
+      </li>
+    </ul>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_config_options.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_config_options.xml b/docs/topics/impala_config_options.xml
new file mode 100644
index 0000000..03f07d2
--- /dev/null
+++ b/docs/topics/impala_config_options.xml
@@ -0,0 +1,593 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="config_options">
+
+  <title>Modifying Impala Startup Options</title>
+
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Configuring"/>
+      <data name="Category" value="Administrators"/>
+      <data name="Category" value="Developers"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">defaults file</indexterm>
+
+      <indexterm audience="Cloudera">configuration file</indexterm>
+
+      <indexterm audience="Cloudera">options</indexterm>
+
+      <indexterm audience="Cloudera">IMPALA_STATE_STORE_PORT</indexterm>
+
+      <indexterm audience="Cloudera">IMPALA_BACKEND_PORT</indexterm>
+
+      <indexterm audience="Cloudera">IMPALA_LOG_DIR</indexterm>
+
+      <indexterm audience="Cloudera">IMPALA_STATE_STORE_ARGS</indexterm>
+
+      <indexterm audience="Cloudera">IMPALA_SERVER_ARGS</indexterm>
+
+      <indexterm audience="Cloudera">ENABLE_CORE_DUMPS</indexterm>
+
+      <indexterm audience="Cloudera">core dumps</indexterm>
+
+      <indexterm audience="Cloudera">restarting services</indexterm>
+
+      <indexterm audience="Cloudera">services</indexterm>
+      The configuration options for the Impala-related daemons let you choose which hosts and
+      ports to use for the services that run on a single host, specify directories for logging,
+      control resource usage and security, and specify other aspects of the Impala software.
+    </p>
+
+    <p outputclass="toc inpage"/>
+
+  </conbody>
+
+  <concept id="config_options_cm">
+
+    <title>Configuring Impala Startup Options through Cloudera Manager</title>
+
+    <conbody>
+
+      <p>
+        If you manage your cluster through Cloudera Manager, configure the settings for all the
+        Impala-related daemons by navigating to this page:
+        <menucascade><uicontrol>Clusters</uicontrol><uicontrol>Impala</uicontrol><uicontrol>Configuration</uicontrol><uicontrol>View
+        and Edit</uicontrol></menucascade>. See the Cloudera Manager documentation for
+        <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/cm_mc_impala_service.html" scope="external" format="html">instructions
+        about how to configure Impala through Cloudera Manager</xref>.
+      </p>
+
+      <p>
+        If the Cloudera Manager interface does not yet have a form field for a newly added
+        option, or if you need to use special options for debugging and troubleshooting, the
+        <uicontrol>Advanced</uicontrol> option page for each daemon includes one or more fields
+        where you can enter option names directly.
+        <ph conref="../shared/impala_common.xml#common/safety_valve"/> There is also a free-form
+        field for query options, on the top-level <uicontrol>Impala Daemon</uicontrol> options
+        page.
+      </p>
+
+    </conbody>
+
+  </concept>
+
+  <concept id="config_options_noncm">
+
+    <title>Configuring Impala Startup Options through the Command Line</title>
+
+    <conbody>
+
+      <p>
+        When you run Impala in a non-Cloudera Manager environment, the Impala server,
+        statestore, and catalog services start up using values provided in a defaults file,
+        <filepath>/etc/default/impala</filepath>.
+      </p>
+
+      <p>
+        This file includes information about many resources used by Impala. Most of the defaults
+        included in this file should be effective in most cases. For example, typically you
+        would not change the definition of the <codeph>CLASSPATH</codeph> variable, but you
+        would always set the address used by the statestore server. Some of the content you
+        might modify includes:
+      </p>
+
+<!-- Note: Update the following example for each release with the associated lines from /etc/default/impala
+           from a non-CM-managed system. -->
+
+<codeblock rev="ver">IMPALA_STATE_STORE_HOST=127.0.0.1
+IMPALA_STATE_STORE_PORT=24000
+IMPALA_BACKEND_PORT=22000
+IMPALA_LOG_DIR=/var/log/impala
+IMPALA_CATALOG_SERVICE_HOST=...
+IMPALA_STATE_STORE_HOST=...
+
+export IMPALA_STATE_STORE_ARGS=${IMPALA_STATE_STORE_ARGS:- \
+    -log_dir=${IMPALA_LOG_DIR} -state_store_port=${IMPALA_STATE_STORE_PORT}}
+IMPALA_SERVER_ARGS=" \
+-log_dir=${IMPALA_LOG_DIR} \
+-catalog_service_host=${IMPALA_CATALOG_SERVICE_HOST} \
+-state_store_port=${IMPALA_STATE_STORE_PORT} \
+-use_statestore \
+-state_store_host=${IMPALA_STATE_STORE_HOST} \
+-be_port=${IMPALA_BACKEND_PORT}"
+export ENABLE_CORE_DUMPS=${ENABLE_COREDUMPS:-false}</codeblock>
+
+      <p>
+        To use alternate values, edit the defaults file, then restart all the Impala-related
+        services so that the changes take effect. Restart the Impala server using the following
+        commands:
+      </p>
+
+<codeblock>$ sudo service impala-server restart
+Stopping Impala Server:                                    [  OK  ]
+Starting Impala Server:                                    [  OK  ]</codeblock>
+
+      <p>
+        Restart the Impala statestore using the following commands:
+      </p>
+
+<codeblock>$ sudo service impala-state-store restart
+Stopping Impala State Store Server:                        [  OK  ]
+Starting Impala State Store Server:                        [  OK  ]</codeblock>
+
+      <p>
+        Restart the Impala catalog service using the following commands:
+      </p>
+
+<codeblock>$ sudo service impala-catalog restart
+Stopping Impala Catalog Server:                            [  OK  ]
+Starting Impala Catalog Server:                            [  OK  ]</codeblock>
+
+      <p>
+        Some common settings to change include:
+      </p>
+
+      <ul>
+        <li>
+          <p>
+            Statestore address. Cloudera recommends the statestore be on a separate host not
+            running the <cmdname>impalad</cmdname> daemon. In that recommended configuration,
+            the <cmdname>impalad</cmdname> daemon cannot refer to the statestore server using
+            the loopback address. If the statestore is hosted on a machine with an IP address of
+            192.168.0.27, change:
+          </p>
+<codeblock>IMPALA_STATE_STORE_HOST=127.0.0.1</codeblock>
+          <p>
+            to:
+          </p>
+<codeblock>IMPALA_STATE_STORE_HOST=192.168.0.27</codeblock>
+        </li>
+
+        <li rev="1.2">
+          <p>
+            Catalog server address (including both the hostname and the port number). Update the
+            value of the <codeph>IMPALA_CATALOG_SERVICE_HOST</codeph> variable. Cloudera
+            recommends the catalog server be on the same host as the statestore. In that
+            recommended configuration, the <cmdname>impalad</cmdname> daemon cannot refer to the
+            catalog server using the loopback address. If the catalog service is hosted on a
+            machine with an IP address of 192.168.0.27, add the following line:
+          </p>
+<codeblock>IMPALA_CATALOG_SERVICE_HOST=192.168.0.27:26000</codeblock>
+          <p>
+            The <filepath>/etc/default/impala</filepath> defaults file currently does not define
+            an <codeph>IMPALA_CATALOG_ARGS</codeph> environment variable, but if you add one it
+            will be recognized by the service startup/shutdown script. Add a definition for this
+            variable to <filepath>/etc/default/impala</filepath> and add the option
+            <codeph>-catalog_service_host=<varname>hostname</varname></codeph>. If the port is
+            different than the default 26000, also add the option
+            <codeph>-catalog_service_port=<varname>port</varname></codeph>.
+          </p>
+        </li>
+
+        <li id="mem_limit">
+          <p>
+            Memory limits. You can limit the amount of memory available to Impala. For example,
+            to allow Impala to use no more than 70% of system memory, change:
+          </p>
+<!-- Note: also needs to be updated for each release to reflect latest /etc/default/impala. -->
+<codeblock>export IMPALA_SERVER_ARGS=${IMPALA_SERVER_ARGS:- \
+    -log_dir=${IMPALA_LOG_DIR} \
+    -state_store_port=${IMPALA_STATE_STORE_PORT} \
+    -use_statestore -state_store_host=${IMPALA_STATE_STORE_HOST} \
+    -be_port=${IMPALA_BACKEND_PORT}}</codeblock>
+          <p>
+            to:
+          </p>
+<codeblock>export IMPALA_SERVER_ARGS=${IMPALA_SERVER_ARGS:- \
+    -log_dir=${IMPALA_LOG_DIR} -state_store_port=${IMPALA_STATE_STORE_PORT} \
+    -use_statestore -state_store_host=${IMPALA_STATE_STORE_HOST} \
+    -be_port=${IMPALA_BACKEND_PORT} -mem_limit=70%}</codeblock>
+          <p>
+            You can specify the memory limit using absolute notation such as
+            <codeph>500m</codeph> or <codeph>2G</codeph>, or as a percentage of physical memory
+            such as <codeph>60%</codeph>.
+          </p>
+
+          <note>
+            Queries that exceed the specified memory limit are aborted. Percentage limits are
+            based on the physical memory of the machine and do not consider cgroups.
+          </note>
+        </li>
+
+        <li>
+          <p>
+            Core dump enablement. To enable core dumps on systems not managed by Cloudera
+            Manager, change:
+          </p>
+<codeblock>export ENABLE_CORE_DUMPS=${ENABLE_COREDUMPS:-false}</codeblock>
+          <p>
+            to:
+          </p>
+<codeblock>export ENABLE_CORE_DUMPS=${ENABLE_COREDUMPS:-true}</codeblock>
+          <p>
+            On systems managed by Cloudera Manager, enable the <uicontrol>Enable Core
+            Dump</uicontrol> setting for the Impala service.
+          </p>
+
+          <note conref="../shared/impala_common.xml#common/core_dump_considerations"/>
+        </li>
+
+        <li>
+          <p>
+            Authorization using the open source Sentry plugin. Specify the
+            <codeph>-server_name</codeph> and <codeph>-authorization_policy_file</codeph>
+            options as part of the <codeph>IMPALA_SERVER_ARGS</codeph> and
+            <codeph>IMPALA_STATE_STORE_ARGS</codeph> settings to enable the core Impala support
+            for authentication. See <xref href="impala_authorization.xml#secure_startup"/> for
+            details.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            Auditing for successful or blocked Impala queries, another aspect of security.
+            Specify the <codeph>-audit_event_log_dir=<varname>directory_path</varname></codeph>
+            option and optionally the
+            <codeph>-max_audit_event_log_file_size=<varname>number_of_queries</varname></codeph>
+            and <codeph>-abort_on_failed_audit_event</codeph> options as part of the
+            <codeph>IMPALA_SERVER_ARGS</codeph> settings, for each Impala node, to enable and
+            customize auditing. See <xref href="impala_auditing.xml#auditing"/> for details.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            Password protection for the Impala web UI, which listens on port 25000 by default.
+            This feature involves adding some or all of the
+            <codeph>--webserver_password_file</codeph>,
+            <codeph>--webserver_authentication_domain</codeph>, and
+            <codeph>--webserver_certificate_file</codeph> options to the
+            <codeph>IMPALA_SERVER_ARGS</codeph> and <codeph>IMPALA_STATE_STORE_ARGS</codeph>
+            settings. See <xref href="impala_security_guidelines.xml#security_guidelines"/> for
+            details.
+          </p>
+        </li>
+
+        <li id="default_query_options">
+          <p rev="DOCS-677">
+            Another setting you might add to <codeph>IMPALA_SERVER_ARGS</codeph> is a
+            comma-separated list of query options and values:
+<codeblock>-default_query_options='<varname>option</varname>=<varname>value</varname>,<varname>option</varname>=<varname>value</varname>,...'
+</codeblock>
+            These options control the behavior of queries performed by this
+            <cmdname>impalad</cmdname> instance. The option values you specify here override the
+            default values for <xref href="impala_query_options.xml#query_options">Impala query
+            options</xref>, as shown by the <codeph>SET</codeph> statement in
+            <cmdname>impala-shell</cmdname>.
+          </p>
+        </li>
+
+<!-- Removing this reference now that the options are de-emphasized / desupported in CDH 5.5 / Impala 2.3 and up.
+        <li rev="1.2">
+          <p>
+          Options for resource management, in conjunction with the YARN component. These options include
+          <codeph>-enable_rm</codeph> and <codeph>-cgroup_hierarchy_path</codeph>.
+          <ph rev="1.4.0">Additional options to help fine-tune the resource estimates are
+          <codeph>-\u2014rm_always_use_defaults</codeph>,
+          <codeph>-\u2014rm_default_memory=<varname>size</varname></codeph>, and
+          <codeph>-\u2014rm_default_cpu_cores</codeph>.</ph> For details about these options, see
+          <xref href="impala_resource_management.xml#rm_options"/>. See
+          <xref href="impala_resource_management.xml#resource_management"/> for information about resource
+          management in general.
+          </p>
+        </li>
+-->
+
+        <li>
+          <p>
+            During troubleshooting, Cloudera Support might direct you to change other values,
+            particularly for <codeph>IMPALA_SERVER_ARGS</codeph>, to work around issues or
+            gather debugging information.
+          </p>
+        </li>
+      </ul>
+
+<!-- Removing this reference now that the options are de-emphasized / desupported in CDH 5.5 / Impala 2.3 and up.
+      <p conref="impala_resource_management.xml#rm_options/resource_management_impalad_options"/>
+-->
+
+      <note>
+        <p>
+          These startup options for the <cmdname>impalad</cmdname> daemon are different from the
+          command-line options for the <cmdname>impala-shell</cmdname> command. For the
+          <cmdname>impala-shell</cmdname> options, see
+          <xref href="impala_shell_options.xml#shell_options"/>.
+        </p>
+      </note>
+
+      <p audience="Cloudera" outputclass="toc inpage"/>
+
+    </conbody>
+
+    <concept audience="Cloudera" id="config_options_impalad_details">
+
+      <title>Configuration Options for impalad Daemon</title>
+
+      <conbody>
+
+        <p>
+          Some common settings to change include:
+        </p>
+
+        <ul>
+          <li>
+            <p>
+              Statestore address. Cloudera recommends the statestore be on a separate host not
+              running the <cmdname>impalad</cmdname> daemon. In that recommended configuration,
+              the <cmdname>impalad</cmdname> daemon cannot refer to the statestore server using
+              the loopback address. If the statestore is hosted on a machine with an IP address
+              of 192.168.0.27, change:
+            </p>
+<codeblock>IMPALA_STATE_STORE_HOST=127.0.0.1</codeblock>
+            <p>
+              to:
+            </p>
+<codeblock>IMPALA_STATE_STORE_HOST=192.168.0.27</codeblock>
+          </li>
+
+          <li rev="1.2">
+            <p>
+              Catalog server address. Update the <codeph>IMPALA_CATALOG_SERVICE_HOST</codeph>
+              variable, including both the hostname and the port number in the value. Cloudera
+              recommends the catalog server be on the same host as the statestore. In that
+              recommended configuration, the <cmdname>impalad</cmdname> daemon cannot refer to
+              the catalog server using the loopback address. If the catalog service is hosted on
+              a machine with an IP address of 192.168.0.27, add the following line:
+            </p>
+<codeblock>IMPALA_CATALOG_SERVICE_HOST=192.168.0.27:26000</codeblock>
+            <p>
+              The <filepath>/etc/default/impala</filepath> defaults file currently does not
+              define an <codeph>IMPALA_CATALOG_ARGS</codeph> environment variable, but if you
+              add one it will be recognized by the service startup/shutdown script. Add a
+              definition for this variable to <filepath>/etc/default/impala</filepath> and add
+              the option <codeph>-catalog_service_host=<varname>hostname</varname></codeph>. If
+              the port is different than the default 26000, also add the option
+              <codeph>-catalog_service_port=<varname>port</varname></codeph>.
+            </p>
+          </li>
+
+          <li id="mem_limit">
+            Memory limits. You can limit the amount of memory available to Impala. For example,
+            to allow Impala to use no more than 70% of system memory, change:
+<!-- Note: also needs to be updated for each release to reflect latest /etc/default/impala. -->
+<codeblock>export IMPALA_SERVER_ARGS=${IMPALA_SERVER_ARGS:- \
+    -log_dir=${IMPALA_LOG_DIR} \
+    -state_store_port=${IMPALA_STATE_STORE_PORT} \
+    -use_statestore -state_store_host=${IMPALA_STATE_STORE_HOST} \
+    -be_port=${IMPALA_BACKEND_PORT}}</codeblock>
+            <p>
+              to:
+            </p>
+<codeblock>export IMPALA_SERVER_ARGS=${IMPALA_SERVER_ARGS:- \
+    -log_dir=${IMPALA_LOG_DIR} -state_store_port=${IMPALA_STATE_STORE_PORT} \
+    -use_statestore -state_store_host=${IMPALA_STATE_STORE_HOST} \
+    -be_port=${IMPALA_BACKEND_PORT} -mem_limit=70%}</codeblock>
+            <p>
+              You can specify the memory limit using absolute notation such as
+              <codeph>500m</codeph> or <codeph>2G</codeph>, or as a percentage of physical
+              memory such as <codeph>60%</codeph>.
+            </p>
+
+            <note>
+              Queries that exceed the specified memory limit are aborted. Percentage limits are
+              based on the physical memory of the machine and do not consider cgroups.
+            </note>
+          </li>
+
+          <li>
+            Core dump enablement. To enable core dumps, change:
+<codeblock>export ENABLE_CORE_DUMPS=${ENABLE_COREDUMPS:-false}</codeblock>
+            <p>
+              to:
+            </p>
+<codeblock>export ENABLE_CORE_DUMPS=${ENABLE_COREDUMPS:-true}</codeblock>
+            <note>
+              The location of core dump files may vary according to your operating system
+              configuration. Other security settings may prevent Impala from writing core dumps
+              even when this option is enabled.
+            </note>
+          </li>
+
+          <li>
+            Authorization using the open source Sentry plugin. Specify the
+            <codeph>-server_name</codeph> and <codeph>-authorization_policy_file</codeph>
+            options as part of the <codeph>IMPALA_SERVER_ARGS</codeph> and
+            <codeph>IMPALA_STATE_STORE_ARGS</codeph> settings to enable the core Impala support
+            for authentication. See <xref href="impala_authorization.xml#secure_startup"/> for
+            details.
+          </li>
+
+          <li>
+            Auditing for successful or blocked Impala queries, another aspect of security.
+            Specify the <codeph>-audit_event_log_dir=<varname>directory_path</varname></codeph>
+            option and optionally the
+            <codeph>-max_audit_event_log_file_size=<varname>number_of_queries</varname></codeph>
+            and <codeph>-abort_on_failed_audit_event</codeph> options as part of the
+            <codeph>IMPALA_SERVER_ARGS</codeph> settings, for each Impala node, to enable and
+            customize auditing. See <xref href="impala_auditing.xml#auditing"/> for details.
+          </li>
+
+          <li>
+            Password protection for the Impala web UI, which listens on port 25000 by default.
+            This feature involves adding some or all of the
+            <codeph>--webserver_password_file</codeph>,
+            <codeph>--webserver_authentication_domain</codeph>, and
+            <codeph>--webserver_certificate_file</codeph> options to the
+            <codeph>IMPALA_SERVER_ARGS</codeph> and <codeph>IMPALA_STATE_STORE_ARGS</codeph>
+            settings. See <xref href="impala_security_webui.xml"/> for details.
+          </li>
+
+          <li id="default_query_options">
+            Another setting you might add to <codeph>IMPALA_SERVER_ARGS</codeph> is:
+<codeblock>-default_query_options='<varname>option</varname>=<varname>value</varname>,<varname>option</varname>=<varname>value</varname>,...'
+</codeblock>
+            These options control the behavior of queries performed by this
+            <cmdname>impalad</cmdname> instance. The option values you specify here override the
+            default values for <xref href="impala_query_options.xml#query_options">Impala query
+            options</xref>, as shown by the <codeph>SET</codeph> statement in
+            <cmdname>impala-shell</cmdname>.
+          </li>
+
+<!-- Removing this reference now that the options are de-emphasized / desupported in CDH 5.5 / Impala 2.3 and up.
+          <li rev="1.2">
+            Options for resource management, in conjunction with the YARN component. These options
+            include <codeph>-enable_rm</codeph> and <codeph>-cgroup_hierarchy_path</codeph>.
+            <ph rev="1.4.0">Additional options to help fine-tune the resource estimates are
+            <codeph>-\u2014rm_always_use_defaults</codeph>,
+            <codeph>-\u2014rm_default_memory=<varname>size</varname></codeph>, and
+            <codeph>-\u2014rm_default_cpu_cores</codeph>.</ph> For details about these options, see
+            <xref href="impala_resource_management.xml#rm_options"/>. See
+            <xref href="impala_resource_management.xml#resource_management"/> for information about resource
+            management in general.
+          </li>
+-->
+
+          <li>
+            During troubleshooting, Cloudera Support might direct you to change other values,
+            particularly for <codeph>IMPALA_SERVER_ARGS</codeph>, to work around issues or
+            gather debugging information.
+          </li>
+        </ul>
+
+<!-- Removing this reference now that the options are de-emphasized / desupported in CDH 5.5 / Impala 2.3 and up.
+        <p conref="impala_resource_management.xml#rm_options/resource_management_impalad_options"/>
+-->
+
+        <note>
+          <p>
+            These startup options for the <cmdname>impalad</cmdname> daemon are different from
+            the command-line options for the <cmdname>impala-shell</cmdname> command. For the
+            <cmdname>impala-shell</cmdname> options, see
+            <xref href="impala_shell_options.xml#shell_options"/>.
+          </p>
+        </note>
+
+      </conbody>
+
+    </concept>
+
+    <concept audience="Cloudera" id="config_options_statestored_details">
+
+      <title>Configuration Options for statestored Daemon</title>
+
+      <conbody>
+
+        <p></p>
+
+      </conbody>
+
+    </concept>
+
+    <concept audience="Cloudera" id="config_options_catalogd_details">
+
+      <title>Configuration Options for catalogd Daemon</title>
+
+      <conbody>
+
+        <p></p>
+
+      </conbody>
+
+    </concept>
+
+  </concept>
+
+  <concept id="config_options_checking">
+
+    <title>Checking the Values of Impala Configuration Options</title>
+
+    <conbody>
+
+      <p>
+        You can check the current runtime value of all these settings through the Impala web
+        interface, available by default at
+        <codeph>http://<varname>impala_hostname</varname>:25000/varz</codeph> for the
+        <cmdname>impalad</cmdname> daemon,
+        <codeph>http://<varname>impala_hostname</varname>:25010/varz</codeph> for the
+        <cmdname>statestored</cmdname> daemon, or
+        <codeph>http://<varname>impala_hostname</varname>:25020/varz</codeph> for the
+        <cmdname>catalogd</cmdname> daemon. In the Cloudera Manager interface, you can see the
+        link to the appropriate <uicontrol><varname>service_name</varname> Web UI</uicontrol>
+        page when you look at the status page for a specific daemon on a specific host.
+      </p>
+
+    </conbody>
+
+  </concept>
+
+  <concept id="config_options_impalad">
+
+    <title>Startup Options for impalad Daemon</title>
+
+    <conbody>
+
+      <p>
+        The <codeph>impalad</codeph> daemon implements the main Impala service, which performs
+        query processing and reads and writes the data files.
+      </p>
+
+    </conbody>
+
+  </concept>
+
+  <concept id="config_options_statestored">
+
+    <title>Startup Options for statestored Daemon</title>
+
+    <conbody>
+
+      <p>
+        The <cmdname>statestored</cmdname> daemon implements the Impala statestore service,
+        which monitors the availability of Impala services across the cluster, and handles
+        situations such as nodes becoming unavailable or becoming available again.
+      </p>
+
+    </conbody>
+
+  </concept>
+
+  <concept rev="1.2" id="config_options_catalogd">
+
+    <title>Startup Options for catalogd Daemon</title>
+
+    <conbody>
+
+      <p>
+        The <cmdname>catalogd</cmdname> daemon implements the Impala catalog service, which
+        broadcasts metadata changes to all the Impala nodes when Impala creates a table, inserts
+        data, or performs other kinds of DDL and DML operations.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/load_catalog_in_background"/>
+
+    </conbody>
+
+  </concept>
+
+</concept>

[4/7] incubator-impala git commit: New files needed to make PDF build happy.

Posted by jr...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_kerberos.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_kerberos.xml b/docs/topics/impala_kerberos.xml
new file mode 100644
index 0000000..7c59185
--- /dev/null
+++ b/docs/topics/impala_kerberos.xml
@@ -0,0 +1,370 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="kerberos">
+
+  <title>Enabling Kerberos Authentication for Impala</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Security"/>
+      <data name="Category" value="Kerberos"/>
+      <data name="Category" value="Authentication"/>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Configuring"/>
+      <data name="Category" value="Starting and Stopping"/>
+      <data name="Category" value="Administrators"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Impala supports Kerberos authentication. For more information on enabling Kerberos authentication, see the
+      topic on Configuring Hadoop Security in the
+      <xref href="http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH4/latest/CDH4-Security-Guide/cdh4sg_topic_3.html" scope="external" format="html">CDH4
+      Security Guide</xref> or the
+<!-- Original URL: http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH5/latest/CDH5-Security-Guide/cdh_sg_cdh5_hadoop_security.html -->
+      <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/cdh_sg_cdh5_hadoop_security.html" scope="external" format="html">CDH
+      5 Security Guide</xref>.
+    </p>
+
+    <p>
+      When using Impala in a managed environment, Cloudera Manager automatically completes Kerberos configuration.
+      In an unmanaged environment, create a Kerberos principal for each host running <cmdname>impalad</cmdname> or
+      <cmdname>statestored</cmdname>. Cloudera recommends using a consistent format, such as
+      <codeph>impala/_HOST@Your-Realm</codeph>, but you can use any three-part Kerberos server principal.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/user_kerberized"/>
+
+    <note conref="../shared/impala_common.xml#common/authentication_vs_authorization"/>
+
+    <p outputclass="toc inpage"/>
+
+    <p>
+      An alternative form of authentication you can use is LDAP, described in <xref href="impala_ldap.xml#ldap"/>.
+    </p>
+  </conbody>
+
+  <concept id="kerberos_prereqs">
+
+    <title>Requirements for Using Impala with Kerberos</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Requirements"/>
+      <data name="Category" value="Planning"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p conref="../shared/impala_common.xml#common/rhel5_kerberos"/>
+
+<!-- This note adapted from the one at http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH4/latest/CDH4-Security-Guide/cdh4sg_topic_3_4.html.
+   Ideally should be conref'ed in both places. -->
+
+      <note type="important">
+        <p>
+          If you plan to use Impala in your cluster, you must configure your KDC to allow tickets to be renewed,
+          and you must configure <filepath>krb5.conf</filepath> to request renewable tickets. Typically, you can do
+          this by adding the <codeph>max_renewable_life</codeph> setting to your realm in
+          <filepath>kdc.conf</filepath>, and by adding the <filepath>renew_lifetime</filepath> parameter to the
+          <filepath>libdefaults</filepath> section of <filepath>krb5.conf</filepath>. For more information about
+          renewable tickets, see the
+          <xref href="http://web.mit.edu/Kerberos/krb5-1.8/" scope="external" format="html"> Kerberos
+          documentation</xref>.
+        </p>
+        <p rev="1.2">
+          Currently, you cannot use the resource management feature in CDH 5 on a cluster that has Kerberos
+          authentication enabled.
+        </p>
+      </note>
+
+      <p>
+        Start all <cmdname>impalad</cmdname> and <cmdname>statestored</cmdname> daemons with the
+        <codeph>--principal</codeph> and <codeph>--keytab-file</codeph> flags set to the principal and full path
+        name of the <codeph>keytab</codeph> file containing the credentials for the principal.
+      </p>
+
+      <p>
+        Impala supports the Cloudera ODBC driver and the Kerberos interface provided. To use Kerberos through the
+        ODBC driver, the host type must be set depending on the level of the ODBD driver:
+      </p>
+
+      <ul>
+        <li>
+          <codeph>SecImpala</codeph> for the ODBC 1.0 driver.
+        </li>
+
+        <li>
+          <codeph>SecBeeswax</codeph> for the ODBC 1.2 driver.
+        </li>
+
+        <li>
+          Blank for the ODBC 2.0 driver or higher, when connecting to a secure cluster.
+        </li>
+
+        <li>
+          <codeph>HS2NoSasl</codeph> for the ODBC 2.0 driver or higher, when connecting to a non-secure cluster.
+        </li>
+      </ul>
+
+      <p>
+        To enable Kerberos in the Impala shell, start the <cmdname>impala-shell</cmdname> command using the
+        <codeph>-k</codeph> flag.
+      </p>
+
+      <p>
+        To enable Impala to work with Kerberos security on your Hadoop cluster, make sure you perform the
+        installation and configuration steps in
+<!-- Original URL: http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH5/latest/CDH5-Security-Guide/CDH5-Security-Guide.html -->
+        <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/sg_authentication.html" scope="external" format="html">Authentication in the CDH 5 Security Guide</xref>
+        or
+        the topic on Configuring Hadoop Security in the <xref href="http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH4/latest/CDH4-Security-Guide/CDH4-Security-Guide.html" scope="external" format="html">CDH4 Security Guide</xref>.
+        Also note that when Kerberos security is enabled in Impala, a web browser that
+        supports Kerberos HTTP SPNEGO is required to access the Impala web console (for example, Firefox, Internet
+        Explorer, or Chrome).
+      </p>
+
+      <p>
+        If the NameNode, Secondary NameNode, DataNode, JobTracker, TaskTrackers, ResourceManager, NodeManagers,
+        HttpFS, Oozie, Impala, or Impala statestore services are configured to use Kerberos HTTP SPNEGO
+        authentication, and two or more of these services are running on the same host, then all of the running
+        services must use the same HTTP principal and keytab file used for their HTTP endpoints.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="kerberos_config">
+
+    <title>Configuring Impala to Support Kerberos Security</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Configuring"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        Enabling Kerberos authentication for Impala involves steps that can be summarized as follows:
+      </p>
+
+      <ul>
+        <li>
+          Creating service principals for Impala and the HTTP service. Principal names take the form:
+          <codeph><varname>serviceName</varname>/<varname>fully.qualified.domain.name</varname>@<varname>KERBEROS.REALM</varname></codeph>
+        </li>
+
+        <li>
+          Creating, merging, and distributing key tab files for these principals.
+        </li>
+
+        <li>
+          Editing <codeph>/etc/default/impala</codeph> (in cluster not managed by Cloudera Manager), or editing the
+          <uicontrol>Security</uicontrol> settings in the Cloudera Manager interface, to accommodate Kerberos
+          authentication.
+        </li>
+      </ul>
+    </conbody>
+
+    <concept id="kerberos_setup">
+
+      <title>Enabling Kerberos for Impala</title>
+
+      <conbody>
+
+<!--
+      <p>
+        <b>To enable Kerberos for Impala:</b>
+      </p>
+-->
+
+        <ol>
+          <li>
+            Create an Impala service principal, specifying the name of the OS user that the Impala daemons run
+            under, the fully qualified domain name of each node running <cmdname>impalad</cmdname>, and the realm
+            name. For example:
+<codeblock>$ kadmin
+kadmin: addprinc -requires_preauth -randkey impala/impala_host.example.com@TEST.EXAMPLE.COM</codeblock>
+          </li>
+
+          <li>
+            Create an HTTP service principal. For example:
+<codeblock>kadmin: addprinc -randkey HTTP/impala_host.example.com@TEST.EXAMPLE.COM</codeblock>
+            <note>
+              The <codeph>HTTP</codeph> component of the service principal must be uppercase as shown in the
+              preceding example.
+            </note>
+          </li>
+
+          <li>
+            Create <codeph>keytab</codeph> files with both principals. For example:
+<codeblock>kadmin: xst -k impala.keytab impala/impala_host.example.com
+kadmin: xst -k http.keytab HTTP/impala_host.example.com
+kadmin: quit</codeblock>
+          </li>
+
+          <li>
+            Use <codeph>ktutil</codeph> to read the contents of the two keytab files and then write those contents
+            to a new file. For example:
+<codeblock>$ ktutil
+ktutil: rkt impala.keytab
+ktutil: rkt http.keytab
+ktutil: wkt impala-http.keytab
+ktutil: quit</codeblock>
+          </li>
+
+          <li>
+            (Optional) Test that credentials in the merged keytab file are valid, and that the <q>renew until</q>
+            date is in the future. For example:
+<codeblock>$ klist -e -k -t impala-http.keytab</codeblock>
+          </li>
+
+          <li>
+            Copy the <filepath>impala-http.keytab</filepath> file to the Impala configuration directory. Change the
+            permissions to be only read for the file owner and change the file owner to the <codeph>impala</codeph>
+            user. By default, the Impala user and group are both named <codeph>impala</codeph>. For example:
+<codeblock>$ cp impala-http.keytab /etc/impala/conf
+$ cd /etc/impala/conf
+$ chmod 400 impala-http.keytab
+$ chown impala:impala impala-http.keytab</codeblock>
+          </li>
+
+          <li>
+            Add Kerberos options to the Impala defaults file, <filepath>/etc/default/impala</filepath>. Add the
+            options for both the <cmdname>impalad</cmdname> and <cmdname>statestored</cmdname> daemons, using the
+            <codeph>IMPALA_SERVER_ARGS</codeph> and <codeph>IMPALA_STATE_STORE_ARGS</codeph> variables. For
+            example, you might add:
+<!-- Found these in a discussion post somewhere but not applicable as Impala startup options.
+-kerberos_ticket_life=36000
+-maxrenewlife 7days
+-->
+<codeblock>-kerberos_reinit_interval=60
+-principal=impala_1/impala_host.example.com@TEST.EXAMPLE.COM
+-keytab_file=/var/run/cloudera-scm-agent/process/3212-impala-IMPALAD/impala.keytab</codeblock>
+            <p>
+              For more information on changing the Impala defaults specified in
+              <filepath>/etc/default/impala</filepath>, see
+              <xref href="impala_config_options.xml#config_options">Modifying Impala Startup
+              Options</xref>.
+            </p>
+          </li>
+        </ol>
+
+        <note>
+          Restart <cmdname>impalad</cmdname> and <cmdname>statestored</cmdname> for these configuration changes to
+          take effect.
+        </note>
+      </conbody>
+    </concept>
+  </concept>
+
+  <concept id="kerberos_proxy">
+
+    <title>Enabling Kerberos for Impala with a Proxy Server</title>
+
+    <conbody>
+
+      <p>
+        A common configuration for Impala with High Availability is to use a proxy server to submit requests to the
+        actual <cmdname>impalad</cmdname> daemons on different hosts in the cluster. This configuration avoids
+        connection problems in case of machine failure, because the proxy server can route new requests through one
+        of the remaining hosts in the cluster. This configuration also helps with load balancing, because the
+        additional overhead of being the <q>coordinator node</q> for each query is spread across multiple hosts.
+      </p>
+
+      <p>
+        Although you can set up a proxy server with or without Kerberos authentication, typically users set up a
+        secure Kerberized configuration. For information about setting up a proxy server for Impala, including
+        Kerberos-specific steps, see <xref href="impala_proxy.xml#proxy"/>.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="spnego">
+
+    <title>Using a Web Browser to Access a URL Protected by Kerberos HTTP SPNEGO</title>
+
+    <conbody>
+
+      <p>
+        Your web browser must support Kerberos HTTP SPNEGO. For example, Chrome, Firefox, or Internet Explorer.
+      </p>
+
+      <p>
+        <b>To configure Firefox to access a URL protected by Kerberos HTTP SPNEGO:</b>
+      </p>
+
+      <ol>
+        <li>
+          Open the advanced settings Firefox configuration page by loading the <codeph>about:config</codeph> page.
+        </li>
+
+        <li>
+          Use the <b>Filter</b> text box to find <codeph>network.negotiate-auth.trusted-uris</codeph>.
+        </li>
+
+        <li>
+          Double-click the <codeph>network.negotiate-auth.trusted-uris</codeph> preference and enter the hostname
+          or the domain of the web server that is protected by Kerberos HTTP SPNEGO. Separate multiple domains and
+          hostnames with a comma.
+        </li>
+
+        <li>
+          Click <b>OK</b>.
+        </li>
+      </ol>
+    </conbody>
+  </concept>
+
+  <concept id="kerberos_delegation">
+    <title>Enabling Impala Delegation for Kerberos Users</title>
+    <conbody>
+      <p>
+        See <xref href="impala_delegation.xml#delegation"/> for details about the delegation feature
+        that lets certain users submit queries using the credentials of other users.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="ssl_jdbc_odbc">
+    <title>Using TLS/SSL with Business Intelligence Tools</title>
+    <conbody>
+      <p>
+        You can use Kerberos authentication, TLS/SSL encryption, or both to secure
+        connections from JDBC and ODBC applications to Impala.
+        See <xref href="impala_jdbc.xml#impala_jdbc"/> and <xref href="impala_odbc.xml#impala_odbc"/>
+        for details.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/hive_jdbc_ssl_kerberos_caveat"/>
+    </conbody>
+  </concept>
+
+  <concept id="whitelisting_internal_apis">
+  <title>Enabling Access to Internal Impala APIs for Kerberos Users</title>
+    <conbody>
+    <!-- Reusing (most of) the text from the New Features bullet here. Turn into a conref in both places. -->
+      <p rev="IMPALA-3095">
+        For applications that need direct access
+        to Impala APIs, without going through the HiveServer2 or Beeswax interfaces, you can
+        specify a list of Kerberos users who are allowed to call those APIs. By default, the
+        <codeph>impala</codeph> and <codeph>hdfs</codeph> users are the only ones authorized
+        for this kind of access.
+        Any users not explicitly authorized through the <codeph>internal_principals_whitelist</codeph>
+        configuration setting are blocked from accessing the APIs. This setting applies to all the
+        Impala-related daemons, although currently it is primarily used for HDFS to control the
+        behavior of the catalog server.
+      </p>
+    </conbody>
+
+  </concept>
+
+  <concept id="auth_to_local" rev="IMPALA-2660 CDH-40241">
+    <title>Mapping Kerberos Principals to Short Names for Impala</title>
+    <conbody>
+      <p conref="../shared/impala_common.xml#common/auth_to_local_instructions"/>
+    </conbody>
+  </concept>
+
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_ldap.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_ldap.xml b/docs/topics/impala_ldap.xml
new file mode 100644
index 0000000..f2ef523
--- /dev/null
+++ b/docs/topics/impala_ldap.xml
@@ -0,0 +1,354 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="ldap">
+
+  <title>Enabling LDAP Authentication for Impala</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Security"/>
+      <data name="Category" value="LDAP"/>
+      <data name="Category" value="Authentication"/>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Configuring"/>
+      <data name="Category" value="Starting and Stopping"/>
+      <data name="Category" value="Administrators"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+<!-- Similar discussion under 'Authentication' parent topic. Maybe do some conref'ing or linking upward. -->
+
+    <p> Authentication is the process of allowing only specified named users to
+      access the server (in this case, the Impala server). This feature is
+      crucial for any production deployment, to prevent misuse, tampering, or
+      excessive load on the server. Impala uses LDAP for authentication,
+      verifying the credentials of each user who connects through
+        <cmdname>impala-shell</cmdname>, Hue, a Business Intelligence tool, JDBC
+      or ODBC application, and so on. </p>
+
+    <note conref="../shared/impala_common.xml#common/authentication_vs_authorization"/>
+
+    <p outputclass="toc inpage"/>
+
+    <p>
+      An alternative form of authentication you can use is Kerberos, described in
+      <xref href="impala_kerberos.xml#kerberos"/>.
+    </p>
+  </conbody>
+
+  <concept id="ldap_prereqs">
+
+    <title>Requirements for Using Impala with LDAP</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Requirements"/>
+      <data name="Category" value="Planning"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p rev="1.4.0">
+        Authentication against LDAP servers is available in Impala 1.2.2 and higher. Impala 1.4.0 adds support for
+        secure LDAP authentication through SSL and TLS.
+      </p>
+
+      <p>
+        The Impala LDAP support lets you use Impala with systems such as Active Directory that use LDAP behind the
+        scenes.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="ldap_client_server">
+
+    <title>Client-Server Considerations for LDAP</title>
+
+    <conbody>
+
+      <p>
+        Only client-&gt;Impala connections can be authenticated by LDAP.
+      </p>
+
+      <p> You must use the Kerberos authentication mechanism for connections
+        between internal Impala components, such as between the
+          <cmdname>impalad</cmdname>, <cmdname>statestored</cmdname>, and
+          <cmdname>catalogd</cmdname> daemons. See <xref
+          href="impala_kerberos.xml#kerberos" /> on how to set up Kerberos for
+        Impala. </p>
+    </conbody>
+  </concept>
+
+  <concept id="ldap_config">
+
+    <title>Server-Side LDAP Setup</title>
+
+    <conbody>
+
+      <p>
+        These requirements apply on the server side when configuring and starting Impala:
+      </p>
+
+      <p>
+        To enable LDAP authentication, set the following startup options for <cmdname>impalad</cmdname>:
+      </p>
+
+      <ul>
+        <li>
+          <codeph>--enable_ldap_auth</codeph> enables LDAP-based authentication between the client and Impala.
+        </li>
+
+        <li rev="1.4.0">
+          <codeph>--ldap_uri</codeph> sets the URI of the LDAP server to use. Typically, the URI is prefixed with
+          <codeph>ldap://</codeph>. In Impala 1.4.0 and higher, you can specify secure SSL-based LDAP transport by
+          using the prefix <codeph>ldaps://</codeph>. The URI can optionally specify the port, for example:
+          <codeph>ldap://ldap_server.cloudera.com:389</codeph> or
+          <codeph>ldaps://ldap_server.cloudera.com:636</codeph>. (389 and 636 are the default ports for non-SSL and
+          SSL LDAP connections, respectively.)
+        </li>
+
+<!-- Some amount of this bullet could be conref'ed. Similar but not identical bullet occurs later under TLS. -->
+
+        <li rev="1.4.0">
+          For <codeph>ldaps://</codeph> connections secured by SSL,
+          <codeph>--ldap_ca_certificate="<varname>/path/to/certificate/pem</varname>"</codeph> specifies the
+          location of the certificate in standard <codeph>.PEM</codeph> format. Store this certificate on the local
+          filesystem, in a location that only the <codeph>impala</codeph> user and other trusted users can read.
+        </li>
+
+<!-- Per Henry: not for public consumption.
+<li>
+  If you need to provide a custom SASL configuration,
+  set <codeph>- -ldap_manual_config</codeph> to bypass all the automatic configuration.
+</li>
+-->
+      </ul>
+    </conbody>
+  </concept>
+
+  <concept id="ldap_bind_strings">
+
+    <title>Support for Custom Bind Strings</title>
+
+    <conbody>
+
+      <p>
+        When Impala connects to LDAP it issues a bind call to the LDAP server to authenticate as the connected
+        user. Impala clients, including the Impala shell, provide the short name of the user to Impala. This is
+        necessary so that Impala can use Sentry for role-based access, which uses short names.
+      </p>
+
+      <p>
+        However, LDAP servers often require more complex, structured usernames for authentication. Impala supports
+        three ways of transforming the short name (for example, <codeph>'henry'</codeph>) to a more complicated
+        string. If necessary, specify one of the following configuration options when starting the
+        <cmdname>impalad</cmdname> daemon on each DataNode:
+      </p>
+
+      <ul>
+        <li>
+          <codeph>--ldap_domain</codeph>: Replaces the username with a string
+          <codeph><varname>username</varname>@<varname>ldap_domain</varname></codeph>.
+        </li>
+
+        <li>
+          <codeph>--ldap_baseDN</codeph>: Replaces the username with a <q>distinguished name</q> (DN) of the form:
+          <codeph>uid=<varname>userid</varname>,ldap_baseDN</codeph>. (This is equivalent to a Hive option).
+        </li>
+
+        <li>
+          <codeph>--ldap_bind_pattern</codeph>: This is the most general option, and replaces the username with the
+          string <varname>ldap_bind_pattern</varname> where all instances of the string <codeph>#UID</codeph> are
+          replaced with <varname>userid</varname>. For example, an <codeph>ldap_bind_pattern</codeph> of
+          <codeph>"user=#UID,OU=foo,CN=bar"</codeph> with a username of <codeph>henry</codeph> will construct a
+          bind name of <codeph>"user=henry,OU=foo,CN=bar"</codeph>.
+        </li>
+      </ul>
+
+      <p rev="CDH-26854">
+        For clusters not managed by Cloudera Manager,
+        specify the option on the <cmdname>impalad</cmdname> command line.
+        For clusters managed by Cloudera Manager 5.4.0 and higher,
+        search for the configuration field names <codeph>ldap_domain</codeph>,
+        <codeph>ldap_basedn</codeph>, or <codeph>ldap_bind_pattern</codeph>,
+        fill in and save the appropriate field values, and restart the Impala service.
+        Prior to Cloudera Manager 5.4.0, these values were filled in using the
+        <uicontrol>Impala Daemon Command Line Argument Advanced Configuration Snippet (Safety Valve)</uicontrol>
+        field.
+      </p>
+
+      <p>
+        These options are mutually exclusive; Impala does not start if more than one of these options is specified.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="ldap_security">
+
+    <title>Secure LDAP Connections</title>
+
+    <conbody>
+
+      <p>
+        To avoid sending credentials over the wire in cleartext, you must configure a secure connection between
+        both the client and Impala, and between Impala and the LDAP server. The secure connection could use SSL or
+        TLS.
+      </p>
+
+      <p>
+        <b>Secure LDAP connections through SSL:</b>
+      </p>
+
+      <p>
+        For SSL-enabled LDAP connections, specify a prefix of <codeph>ldaps://</codeph> instead of
+        <codeph>ldap://</codeph>. Also, the default port for SSL-enabled LDAP connections is 636 instead of 389.
+      </p>
+
+      <p rev="1.4.0">
+        <b>Secure LDAP connections through TLS:</b>
+      </p>
+
+      <p>
+        <xref href="http://en.wikipedia.org/wiki/Transport_Layer_Security" scope="external" format="html">TLS</xref>,
+        the successor to the SSL protocol, is supported by most modern LDAP servers. Unlike SSL connections, TLS
+        connections can be made on the same server port as non-TLS connections. To secure all connections using
+        TLS, specify the following flags as startup options to the <cmdname>impalad</cmdname> daemon:
+      </p>
+
+      <ul>
+        <li>
+          <codeph>--ldap_tls</codeph> tells Impala to start a TLS connection to the LDAP server, and to fail
+          authentication if it cannot be done.
+        </li>
+
+        <li rev="1.4.0">
+          <codeph>--ldap_ca_certificate="<varname>/path/to/certificate/pem</varname>"</codeph> specifies the
+          location of the certificate in standard <codeph>.PEM</codeph> format. Store this certificate on the local
+          filesystem, in a location that only the <codeph>impala</codeph> user and other trusted users can read.
+        </li>
+      </ul>
+    </conbody>
+  </concept>
+
+  <concept id="ldap_impala_shell">
+
+    <title>LDAP Authentication for impala-shell Interpreter</title>
+
+    <conbody>
+
+      <p>
+        To connect to Impala using LDAP authentication, you specify command-line options to the
+        <cmdname>impala-shell</cmdname> command interpreter and enter the password when prompted:
+      </p>
+
+      <ul>
+        <li>
+          <codeph>-l</codeph> enables LDAP authentication.
+        </li>
+
+        <li>
+          <codeph>-u</codeph> sets the user. Per Active Directory, the user is the short username, not the full
+          LDAP distinguished name. If your LDAP settings include a search base, use the
+          <codeph>--ldap_bind_pattern</codeph> on the <cmdname>impalad</cmdname> daemon to translate the short user
+          name from <cmdname>impala-shell</cmdname> automatically to the fully qualified name.
+<!--
+include that as part of the
+username, for example <codeph>username@example.com</codeph>.
+-->
+        </li>
+
+        <li>
+          <cmdname>impala-shell</cmdname> automatically prompts for the password.
+        </li>
+      </ul>
+
+      <p>
+        For the full list of available <cmdname>impala-shell</cmdname> options, see
+        <xref href="impala_shell_options.xml#shell_options"/>.
+      </p>
+
+      <p>
+        <b>LDAP authentication for JDBC applications:</b> See <xref href="impala_jdbc.xml#impala_jdbc"/> for the
+        format to use with the JDBC connection string for servers using LDAP authentication.
+      </p>
+    </conbody>
+  </concept>
+  <concept id="ldap_impala_hue">
+    <title>Enabling LDAP for Impala in Hue</title>
+    <prolog>
+      <metadata>
+        <data name="Category" value="Hue"/>
+      </metadata>
+    </prolog>
+    <conbody>
+      <section id="ldap_impala_hue_cm">
+        <title>Enabling LDAP for Impala in Hue Using Cloudera Manager</title>
+        <p>
+          <ol>
+            <li>Go to the Hue service.</li>
+            <li>Click the Configuration tab.</li>
+            <li>Select <menucascade><uicontrol>Scope</uicontrol><uicontrol>Hue
+                  Server</uicontrol></menucascade>.</li>
+            <li>Select
+              <menucascade><uicontrol>Category</uicontrol><uicontrol>Advanced</uicontrol></menucascade>.</li>
+            <li>Add the following properties to the <b>Hue Server Advanced
+                Configuration Snippet (Safety Valve) for
+                hue_safety_valve_server.ini</b>
+              property.<codeblock>[impala]
+auth_username=&lt;LDAP username of Hue user to be authenticated>
+auth_password=&lt;LDAP password of Hue user to be authenticated></codeblock></li>
+            <li>Click <b>Save Changes</b>.</li>
+          </ol>
+        </p>
+      </section>
+      <section id="ldap_impala_hue_cmdline">
+        <title>Enabling LDAP for Impala in Hue Using the Command Line</title>
+        <p>LDAP authentication for the Impala app in Hue can be enabled by
+          setting the following properties under the <codeph>[impala]</codeph>
+          section in <codeph>hue.ini</codeph>. <table id="ldap_impala_hue_configs">
+            <tgroup cols="2">
+              <colspec colname="1" colwidth="1*" />
+              <colspec colname="2" colwidth="2*" />
+              <tbody>
+                <row>
+                  <entry><codeph>auth_username</codeph></entry>
+                  <entry>LDAP username of Hue user to be authenticated.</entry>
+                </row>
+                <row>
+                  <entry><codeph>auth_password</codeph></entry>
+                  <entry>
+                    <p>LDAP password of Hue user to be authenticated.</p>
+                  </entry>
+                </row>
+              </tbody>
+            </tgroup>
+          </table>These login details are only used by Impala to authenticate to
+          LDAP. The Impala service trusts Hue to have already validated the user
+          being impersonated, rather than simply passing on the credentials.</p>
+      </section>
+    </conbody>
+  </concept>
+
+  <concept id="ldap_delegation">
+    <title>Enabling Impala Delegation for LDAP Users</title>
+    <conbody>
+      <p>
+        See <xref href="impala_delegation.xml#delegation"/> for details about the delegation feature
+        that lets certain users submit queries using the credentials of other users.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="ldap_restrictions">
+
+    <title>LDAP Restrictions for Impala</title>
+
+    <conbody>
+
+      <p>
+        The LDAP support is preliminary. It currently has only been tested against Active Directory.
+      </p>
+    </conbody>
+  </concept>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_lineage.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_lineage.xml b/docs/topics/impala_lineage.xml
new file mode 100644
index 0000000..c05391c
--- /dev/null
+++ b/docs/topics/impala_lineage.xml
@@ -0,0 +1,113 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="lineage" rev="2.2.0">
+
+  <title>Viewing Lineage Information for Impala Data</title>
+  <titlealts audience="PDF"><navtitle>Viewing Lineage Info</navtitle></titlealts>
+  <prolog>
+
+    <metadata>
+
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Lineage"/>
+      <data name="Category" value="Governance"/>
+      <data name="Category" value="Data Management"/>
+      <data name="Category" value="Navigator"/>
+      <data name="Category" value="Administrators"/>
+
+    </metadata>
+
+  </prolog>
+
+  <conbody>
+
+    <p rev="2.2.0">
+      <indexterm audience="Cloudera">lineage</indexterm>
+      <indexterm audience="Cloudera">column lineage</indexterm>
+      <term>Lineage</term> is a feature in the Cloudera Navigator data
+      management component that helps you track where data originated, and how
+      data propagates through the system through SQL statements such as
+        <codeph>SELECT</codeph>, <codeph>INSERT</codeph>, and <codeph>CREATE
+        TABLE AS SELECT</codeph>. Impala is covered by the Cloudera Navigator
+      lineage features in CDH 5.4.0 and higher. </p>
+
+    <p>
+      This type of tracking is important in high-security configurations, especially in highly regulated industries
+      such as healthcare, pharmaceuticals, financial services and intelligence. For such kinds of sensitive data, it is important to know all
+      the places in the system that contain that data or other data derived from it; to verify who has accessed
+      that data; and to be able to doublecheck that the data used to make a decision was processed correctly and
+      not tampered with.
+    </p>
+
+    <p>
+      You interact with this feature through <term>lineage diagrams</term> showing relationships between tables and
+      columns. For instructions about interpreting lineage diagrams, see
+      <xref audience="integrated" href="cn_iu_lineage.xml" /><xref audience="standalone" href="http://www.cloudera.com/documentation/enterprise/latest/topics/cn_iu_lineage.html" scope="external" format="html"/>.
+    </p>
+
+    <section id="column_lineage">
+
+      <title>Column Lineage</title>
+
+      <p>
+        <term>Column lineage</term> tracks information in fine detail, at the level of
+        particular columns rather than entire tables.
+      </p>
+
+      <p>
+        For example, if you have a table with information derived from web logs, you might copy that data into
+        other tables as part of the ETL process. The ETL operations might involve transformations through
+        expressions and function calls, and rearranging the columns into more or fewer tables
+        (<term>normalizing</term> or <term>denormalizing</term> the data). Then for reporting, you might issue
+        queries against multiple tables and views. In this example, column lineage helps you determine that data
+        that entered the system as <codeph>RAW_LOGS.FIELD1</codeph> was then turned into
+        <codeph>WEBSITE_REPORTS.IP_ADDRESS</codeph> through an <codeph>INSERT ... SELECT</codeph> statement. Or,
+        conversely, you could start with a reporting query against a view, and trace the origin of the data in a
+        field such as <codeph>TOP_10_VISITORS.USER_ID</codeph> back to the underlying table and even further back
+        to the point where the data was first loaded into Impala.
+      </p>
+
+      <p>
+        When you have tables where you need to track or control access to sensitive information at the column
+        level, see <xref href="impala_authorization.xml#authorization"/> for how to implement column-level
+        security. You set up authorization using the Sentry framework, create views that refer to specific sets of
+        columns, and then assign authorization privileges to those views rather than the underlying tables.
+      </p>
+
+    </section>
+
+    <section id="lineage_data">
+
+      <title>Lineage Data for Impala</title>
+
+      <p>
+        The lineage feature is enabled by default. When lineage logging is enabled, the serialized column lineage
+        graph is computed for each query and stored in a specialized log file in JSON format.
+      </p>
+
+      <p>
+        Impala records queries in the lineage log if they complete successfully, or fail due to authorization
+        errors. For write operations such as <codeph>INSERT</codeph> and <codeph>CREATE TABLE AS SELECT</codeph>,
+        the statement is recorded in the lineage log only if it successfully completes. Therefore, the lineage
+        feature tracks data that was accessed by successful queries, or that was attempted to be accessed by
+        unsuccessful queries that were blocked due to authorization failure. These kinds of queries represent data
+        that really was accessed, or where the attempted access could represent malicious activity.
+      </p>
+
+      <p>
+        Impala does not record in the lineage log queries that fail due to syntax errors or that fail or are
+        cancelled before they reach the stage of requesting rows from the result set.
+      </p>
+
+      <p>
+        To enable or disable this feature on a system not managed by Cloudera Manager, set or remove the
+        <codeph>-lineage_event_log_dir</codeph> configuration option for the <cmdname>impalad</cmdname> daemon. For
+        information about turning the lineage feature on and off through Cloudera Manager, see
+        <xref audience="integrated" href="datamgmt_impala_lineage_log.xml"/><xref audience="standalone" href="http://www.cloudera.com/documentation/enterprise/latest/topics/datamgmt_impala_lineage_log.html" scope="external" format="html"/>.
+      </p>
+
+    </section>
+
+  </conbody>
+
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_mixed_security.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_mixed_security.xml b/docs/topics/impala_mixed_security.xml
new file mode 100644
index 0000000..b9e6933
--- /dev/null
+++ b/docs/topics/impala_mixed_security.xml
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="mixed_security">
+
+  <title>Using Multiple Authentication Methods with Impala</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Security"/>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Authentication"/>
+      <data name="Category" value="Kerberos"/>
+      <data name="Category" value="LDAP"/>
+      <data name="Category" value="Administrators"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Impala 2.0 and later automatically handles both Kerberos and LDAP authentication. Each
+      <cmdname>impalad</cmdname> daemon can accept both Kerberos and LDAP requests through the same port. No
+      special actions need to be taken if some users authenticate through Kerberos and some through LDAP.
+    </p>
+
+    <p>
+      Prior to Impala 2.0, you had to configure each <cmdname>impalad</cmdname> to listen on a specific port
+      depending on the kind of authentication, then configure your network load balancer to forward each kind of
+      request to a DataNode that was set up with the appropriate authentication type. Once the initial request was
+      made using either Kerberos or LDAP authentication, Impala automatically handled the process of coordinating
+      the work across multiple nodes and transmitting intermediate results back to the coordinator node.
+    </p>
+
+<!--
+    <p>
+    This technique is most suitable for larger clusters, where
+    you are already using load balancing software for high availability.
+    You configure Impala to run on a different port on the nodes configured for LDAP.
+    Then you configure the load balancing software to forward Kerberos
+    connection requests to nodes using the default port, and LDAP connection requests
+    to nodes using an alternative port for LDAP.
+    Consult the documentation for your load balancing software for how to
+    configure that type of forwarding.
+    </p>
+-->
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_noncm_installation.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_noncm_installation.xml b/docs/topics/impala_noncm_installation.xml
new file mode 100644
index 0000000..b1ee0ef
--- /dev/null
+++ b/docs/topics/impala_noncm_installation.xml
@@ -0,0 +1,175 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="noncm_installation">
+
+  <title>Installing Impala without Cloudera Manager</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Installing"/>
+      <data name="Category" value="Administrators"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Before installing Impala manually, make sure all applicable nodes have the appropriate hardware
+      configuration, levels of operating system and CDH, and any other software prerequisites. See
+      <xref href="impala_prereqs.xml#prereqs"/> for details.
+    </p>
+
+    <p>
+      You can install Impala across many hosts or on one host:
+    </p>
+
+    <ul>
+      <li>
+        Installing Impala across multiple machines creates a distributed configuration. For best performance,
+        install Impala on <b>all</b> DataNodes.
+      </li>
+
+      <li>
+        Installing Impala on a single machine produces a pseudo-distributed cluster.
+      </li>
+    </ul>
+
+    <p>
+      <b>To install Impala on a host:</b>
+    </p>
+
+    <ol>
+      <li>
+        Install CDH as described in the Installation section of the
+        <xref href="http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH4/latest/CDH4-Installation-Guide/CDH4-Installation-Guide.html" scope="external" format="html">CDH
+        4 Installation Guide</xref> or the
+<!-- Original URL: http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH5/latest/CDH5-Installation-Guide/CDH5-Installation-Guide.html -->
+        <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/installation.html" scope="external" format="html">CDH
+        5 Installation Guide</xref>.
+      </li>
+
+      <li>
+        <p>
+          Install the Hive metastore somewhere in your cluster, as described in the Hive Installation topic in the
+          <xref href="http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH4/latest/CDH4-Installation-Guide/cdh4ig_topic_18.html" scope="external" format="html">CDH
+          4 Installation Guide</xref> or the
+<!-- Original URL: http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH5/latest/CDH5-Installation-Guide/cdh_ig_hive_installation.html -->
+          <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/cdh_ig_hive_installation.html" scope="external" format="html">CDH
+          5 Installation Guide</xref>. As part of this process, you configure the Hive metastore to use an external
+          database as a metastore. Impala uses this same database for its own table metadata. You can choose either
+          a MySQL or PostgreSQL database as the metastore. The process for configuring each type of database is
+          described in the CDH Installation Guide).
+        </p>
+        <p>
+          Cloudera recommends setting up a Hive metastore service rather than connecting directly to the metastore
+          database; this configuration is required when running Impala under CDH 4.1. Make sure the
+          <filepath>/etc/impala/conf/hive-site.xml</filepath> file contains the following setting, substituting the
+          appropriate hostname for <varname>metastore_server_host</varname>:
+        </p>
+<codeblock>&lt;property&gt;
+&lt;name&gt;hive.metastore.uris&lt;/name&gt;
+&lt;value&gt;thrift://<varname>metastore_server_host</varname>:9083&lt;/value&gt;
+&lt;/property&gt;
+&lt;property&gt;
+&lt;name&gt;hive.metastore.client.socket.timeout&lt;/name&gt;
+&lt;value&gt;3600&lt;/value&gt;
+&lt;description&gt;MetaStore Client socket timeout in seconds&lt;/description&gt;
+&lt;/property&gt;</codeblock>
+      </li>
+
+      <li>
+        (Optional) If you installed the full Hive component on any host, you can verify that the metastore is
+        configured properly by starting the Hive console and querying for the list of available tables. Once you
+        confirm that the console starts, exit the console to continue the installation:
+<codeblock>$ hive
+Hive history file=/tmp/root/hive_job_log_root_201207272011_678722950.txt
+hive&gt; show tables;
+table1
+table2
+hive&gt; quit;
+$</codeblock>
+      </li>
+
+      <li>
+        Confirm that your package management command is aware of the Impala repository settings, as described in
+        <xref href="impala_prereqs.xml#prereqs"/>. (For CDH 4, this is a different repository than for CDH.) You
+        might need to download a repo or list file into a system directory underneath <filepath>/etc</filepath>.
+      </li>
+
+      <li>
+        Use <b>one</b> of the following sets of commands to install the Impala package:
+        <p>
+          <b>For RHEL, Oracle Linux, or CentOS systems:</b>
+        </p>
+<codeblock rev="1.2">$ sudo yum install impala             # Binaries for daemons
+$ sudo yum install impala-server      # Service start/stop script
+$ sudo yum install impala-state-store # Service start/stop script
+$ sudo yum install impala-catalog     # Service start/stop script
+</codeblock>
+        <p>
+          <b>For SUSE systems:</b>
+        </p>
+<codeblock rev="1.2">$ sudo zypper install impala             # Binaries for daemons
+$ sudo zypper install impala-server      # Service start/stop script
+$ sudo zypper install impala-state-store # Service start/stop script
+$ sudo zypper install impala-catalog     # Service start/stop script
+</codeblock>
+        <p>
+          <b>For Debian or Ubuntu systems:</b>
+        </p>
+<codeblock rev="1.2">$ sudo apt-get install impala             # Binaries for daemons
+$ sudo apt-get install impala-server      # Service start/stop script
+$ sudo apt-get install impala-state-store # Service start/stop script
+$ sudo apt-get install impala-catalog     # Service start/stop script
+</codeblock>
+        <note>
+          Cloudera recommends that you not install Impala on any HDFS NameNode. Installing Impala on NameNodes
+          provides no additional data locality, and executing queries with such a configuration might cause memory
+          contention and negatively impact the HDFS NameNode.
+        </note>
+      </li>
+
+      <li>
+        Copy the client <codeph>hive-site.xml</codeph>, <codeph>core-site.xml</codeph>,
+        <codeph>hdfs-site.xml</codeph>, and <codeph>hbase-site.xml</codeph> configuration files to the Impala
+        configuration directory, which defaults to <codeph>/etc/impala/conf</codeph>. Create this directory if it
+        does not already exist.
+      </li>
+
+      <li>
+        Use <b>one</b> of the following commands to install <codeph>impala-shell</codeph> on the machines from
+        which you want to issue queries. You can install <codeph>impala-shell</codeph> on any supported machine
+        that can connect to DataNodes that are running <codeph>impalad</codeph>.
+        <p>
+          <b>For RHEL/CentOS systems:</b>
+        </p>
+<codeblock>$ sudo yum install impala-shell</codeblock>
+        <p>
+          <b>For SUSE systems:</b>
+        </p>
+<codeblock>$ sudo zypper install impala-shell</codeblock>
+        <p>
+          <b>For Debian/Ubuntu systems:</b>
+        </p>
+<codeblock>$ sudo apt-get install impala-shell</codeblock>
+      </li>
+
+      <li>
+        Complete any required or recommended configuration, as described in
+        <xref href="impala_config_performance.xml#config_performance"/>. Some of these configuration changes are
+        mandatory. (They are applied automatically when you install using Cloudera Manager.)
+      </li>
+    </ol>
+
+    <p>
+      Once installation and configuration are complete, see <xref href="impala_processes.xml#processes"/> for how
+      to activate the software on the appropriate nodes in your cluster.
+    </p>
+
+    <p>
+      If this is your first time setting up and using Impala in this cluster, run through some of the exercises in
+      <xref href="impala_tutorial.xml#tutorial"/> to verify that you can do basic operations such as creating
+      tables and querying them.
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_perf_benchmarking.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_perf_benchmarking.xml b/docs/topics/impala_perf_benchmarking.xml
new file mode 100644
index 0000000..b2e058d
--- /dev/null
+++ b/docs/topics/impala_perf_benchmarking.xml
@@ -0,0 +1,36 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="perf_benchmarks">
+
+  <title>Benchmarking Impala Queries</title>
+  <titlealts audience="PDF"><navtitle>Benchmarking</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Performance"/>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Proof of Concept"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Because Impala, like other Hadoop components, is designed to handle large data volumes in a distributed
+      environment, conduct any performance tests using realistic data and cluster configurations. Use a multi-node
+      cluster rather than a single node; run queries against tables containing terabytes of data rather than tens
+      of gigabytes. The parallel processing techniques used by Impala are most appropriate for workloads that are
+      beyond the capacity of a single server.
+    </p>
+
+    <p>
+      When you run queries returning large numbers of rows, the CPU time to pretty-print the output can be
+      substantial, giving an inaccurate measurement of the actual query time. Consider using the
+      <codeph>-B</codeph> option on the <codeph>impala-shell</codeph> command to turn off the pretty-printing, and
+      optionally the <codeph>-o</codeph> option to store query results in a file rather than printing to the
+      screen. See <xref href="impala_shell_options.xml#shell_options"/> for details.
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_perf_cookbook.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_perf_cookbook.xml b/docs/topics/impala_perf_cookbook.xml
new file mode 100644
index 0000000..a42f7c9
--- /dev/null
+++ b/docs/topics/impala_perf_cookbook.xml
@@ -0,0 +1,269 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="perf_cookbook">
+
+  <title>Impala Performance Guidelines and Best Practices</title>
+  <titlealts audience="PDF"><navtitle>Performance Best Practices</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Performance"/>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Planning"/>
+      <data name="Category" value="Proof of Concept"/>
+      <data name="Category" value="Guidelines"/>
+      <data name="Category" value="Best Practices"/>
+      <data name="Category" value="Proof of Concept"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Here are performance guidelines and best practices that you can use during planning, experimentation, and
+      performance tuning for an Impala-enabled CDH cluster. All of this information is also available in more
+      detail elsewhere in the Impala documentation; it is gathered together here to serve as a cookbook and
+      emphasize which performance techniques typically provide the highest return on investment
+    </p>
+
+    <p outputclass="toc inpage"/>
+
+    <section id="perf_cookbook_file_format">
+
+      <title>Choose the appropriate file format for the data.</title>
+
+      <p>
+        Typically, for large volumes of data (multiple gigabytes per table or partition), the Parquet file format
+        performs best because of its combination of columnar storage layout, large I/O request size, and
+        compression and encoding. See <xref href="impala_file_formats.xml#file_formats"/> for comparisons of all
+        file formats supported by Impala, and <xref href="impala_parquet.xml#parquet"/> for details about the
+        Parquet file format.
+      </p>
+
+      <note>
+        For smaller volumes of data, a few gigabytes or less for each table or partition, you might not see
+        significant performance differences between file formats. At small data volumes, reduced I/O from an
+        efficient compressed file format can be counterbalanced by reduced opportunity for parallel execution. When
+        planning for a production deployment or conducting benchmarks, always use realistic data volumes to get a
+        true picture of performance and scalability.
+      </note>
+    </section>
+
+    <section id="perf_cookbook_small_files">
+
+      <title>Avoid data ingestion processes that produce many small files.</title>
+
+      <p>
+        When producing data files outside of Impala, prefer either text format or Avro, where you can build up the
+        files row by row. Once the data is in Impala, you can convert it to the more efficient Parquet format and
+        split into multiple data files using a single <codeph>INSERT ... SELECT</codeph> statement. Or, if you have
+        the infrastructure to produce multi-megabyte Parquet files as part of your data preparation process, do
+        that and skip the conversion step inside Impala.
+      </p>
+
+      <p>
+        Always use <codeph>INSERT ... SELECT</codeph> to copy significant volumes of data from table to table
+        within Impala. Avoid <codeph>INSERT ... VALUES</codeph> for any substantial volume of data or
+        performance-critical tables, because each such statement produces a separate tiny data file. See
+        <xref href="impala_insert.xml#insert"/> for examples of the <codeph>INSERT ... SELECT</codeph> syntax.
+      </p>
+
+      <p>
+        For example, if you have thousands of partitions in a Parquet table, each with less than
+        <ph rev="parquet_block_size">256 MB</ph> of data, consider partitioning in a less granular way, such as by
+        year / month rather than year / month / day. If an inefficient data ingestion process produces thousands of
+        data files in the same table or partition, consider compacting the data by performing an <codeph>INSERT ...
+        SELECT</codeph> to copy all the data to a different table; the data will be reorganized into a smaller
+        number of larger files by this process.
+      </p>
+    </section>
+
+    <section id="perf_cookbook_partitioning">
+
+      <title>Choose partitioning granularity based on actual data volume.</title>
+
+      <p>
+        Partitioning is a technique that physically divides the data based on values of one or more columns, such
+        as by year, month, day, region, city, section of a web site, and so on. When you issue queries that request
+        a specific value or range of values for the partition key columns, Impala can avoid reading the irrelevant
+        data, potentially yielding a huge savings in disk I/O.
+      </p>
+
+      <p>
+        When deciding which column(s) to use for partitioning, choose the right level of granularity. For example,
+        should you partition by year, month, and day, or only by year and month? Choose a partitioning strategy
+        that puts at least <ph rev="parquet_block_size">256 MB</ph> of data in each partition, to take advantage of
+        HDFS bulk I/O and Impala distributed queries.
+      </p>
+
+      <p>
+        Over-partitioning can also cause query planning to take longer than necessary, as Impala prunes the
+        unnecessary partitions. Ideally, keep the number of partitions in the table under 30 thousand.
+      </p>
+
+      <p>
+        When preparing data files to go in a partition directory, create several large files rather than many small
+        ones. If you receive data in the form of many small files and have no control over the input format,
+        consider using the <codeph>INSERT ... SELECT</codeph> syntax to copy data from one table or partition to
+        another, which compacts the files into a relatively small number (based on the number of nodes in the
+        cluster).
+      </p>
+
+      <p>
+        If you need to reduce the overall number of partitions and increase the amount of data in each partition,
+        first look for partition key columns that are rarely referenced or are referenced in non-critical queries
+        (not subject to an SLA). For example, your web site log data might be partitioned by year, month, day, and
+        hour, but if most queries roll up the results by day, perhaps you only need to partition by year, month,
+        and day.
+      </p>
+
+      <p>
+        If you need to reduce the granularity even more, consider creating <q>buckets</q>, computed values
+        corresponding to different sets of partition key values. For example, you can use the
+        <codeph>TRUNC()</codeph> function with a <codeph>TIMESTAMP</codeph> column to group date and time values
+        based on intervals such as week or quarter. See
+        <xref href="impala_datetime_functions.xml#datetime_functions"/> for details.
+      </p>
+
+      <p>
+        See <xref href="impala_partitioning.xml#partitioning"/> for full details and performance considerations for
+        partitioning.
+      </p>
+    </section>
+
+    <section id="perf_cookbook_partition_keys">
+
+      <title>Use smallest appropriate integer types for partition key columns.</title>
+
+      <p>
+        Although it is tempting to use strings for partition key columns, since those values are turned into HDFS
+        directory names anyway, you can minimize memory usage by using numeric values for common partition key
+        fields such as <codeph>YEAR</codeph>, <codeph>MONTH</codeph>, and <codeph>DAY</codeph>. Use the smallest
+        integer type that holds the appropriate range of values, typically <codeph>TINYINT</codeph> for
+        <codeph>MONTH</codeph> and <codeph>DAY</codeph>, and <codeph>SMALLINT</codeph> for <codeph>YEAR</codeph>.
+        Use the <codeph>EXTRACT()</codeph> function to pull out individual date and time fields from a
+        <codeph>TIMESTAMP</codeph> value, and <codeph>CAST()</codeph> the return value to the appropriate integer
+        type.
+      </p>
+    </section>
+
+    <section id="perf_cookbook_parquet_block_size">
+
+      <title>Choose an appropriate Parquet block size.</title>
+
+      <p rev="parquet_block_size">
+        By default, the Impala <codeph>INSERT ... SELECT</codeph> statement creates Parquet files with a 256 MB
+        block size. (This default was changed in Impala 2.0. Formerly, the limit was 1 GB, but Impala made
+        conservative estimates about compression, resulting in files that were smaller than 1 GB.)
+      </p>
+
+      <p>
+        Each Parquet file written by Impala is a single block, allowing the whole file to be processed as a unit by a single host.
+        As you copy Parquet files into HDFS or between HDFS filesystems, use <codeph>hdfs dfs -pb</codeph> to preserve the original
+        block size.
+      </p>
+
+      <p>
+        If there is only one or a few data block in your Parquet table, or in a partition that is the only one
+        accessed by a query, then you might experience a slowdown for a different reason: not enough data to take
+        advantage of Impala's parallel distributed queries. Each data block is processed by a single core on one of
+        the DataNodes. In a 100-node cluster of 16-core machines, you could potentially process thousands of data
+        files simultaneously. You want to find a sweet spot between <q>many tiny files</q> and <q>single giant
+        file</q> that balances bulk I/O and parallel processing. You can set the <codeph>PARQUET_FILE_SIZE</codeph>
+        query option before doing an <codeph>INSERT ... SELECT</codeph> statement to reduce the size of each
+        generated Parquet file. <ph rev="2.0.0">(Specify the file size as an absolute number of bytes, or in Impala
+        2.0 and later, in units ending with <codeph>m</codeph> for megabytes or <codeph>g</codeph> for
+        gigabytes.)</ph> Run benchmarks with different file sizes to find the right balance point for your
+        particular data volume.
+      </p>
+    </section>
+
+    <section id="perf_cookbook_stats">
+
+      <title>Gather statistics for all tables used in performance-critical or high-volume join queries.</title>
+
+      <p>
+        Gather the statistics with the <codeph>COMPUTE STATS</codeph> statement. See
+        <xref href="impala_perf_joins.xml#perf_joins"/> for details.
+      </p>
+    </section>
+
+    <section id="perf_cookbook_network">
+
+      <title>Minimize the overhead of transmitting results back to the client.</title>
+
+      <p>
+        Use techniques such as:
+      </p>
+
+      <ul>
+        <li>
+          Aggregation. If you need to know how many rows match a condition, the total values of matching values
+          from some column, the lowest or highest matching value, and so on, call aggregate functions such as
+          <codeph>COUNT()</codeph>, <codeph>SUM()</codeph>, and <codeph>MAX()</codeph> in the query rather than
+          sending the result set to an application and doing those computations there. Remember that the size of an
+          unaggregated result set could be huge, requiring substantial time to transmit across the network.
+        </li>
+
+        <li>
+          Filtering. Use all applicable tests in the <codeph>WHERE</codeph> clause of a query to eliminate rows
+          that are not relevant, rather than producing a big result set and filtering it using application logic.
+        </li>
+
+        <li>
+          <codeph>LIMIT</codeph> clause. If you only need to see a few sample values from a result set, or the top
+          or bottom values from a query using <codeph>ORDER BY</codeph>, include the <codeph>LIMIT</codeph> clause
+          to reduce the size of the result set rather than asking for the full result set and then throwing most of
+          the rows away.
+        </li>
+
+        <li>
+          Avoid overhead from pretty-printing the result set and displaying it on the screen. When you retrieve the
+          results through <cmdname>impala-shell</cmdname>, use <cmdname>impala-shell</cmdname> options such as
+          <codeph>-B</codeph> and <codeph>--output_delimiter</codeph> to produce results without special
+          formatting, and redirect output to a file rather than printing to the screen. Consider using
+          <codeph>INSERT ... SELECT</codeph> to write the results directly to new files in HDFS. See
+          <xref href="impala_shell_options.xml#shell_options"/> for details about the
+          <cmdname>impala-shell</cmdname> command-line options.
+        </li>
+      </ul>
+    </section>
+
+    <section id="perf_cookbook_explain">
+
+      <title>Verify that your queries are planned in an efficient logical manner.</title>
+
+      <p>
+        Examine the <codeph>EXPLAIN</codeph> plan for a query before actually running it. See
+        <xref href="impala_explain.xml#explain"/> and <xref href="impala_explain_plan.xml#perf_explain"/> for
+        details.
+      </p>
+    </section>
+
+    <section id="perf_cookbook_profile">
+
+      <title>Verify performance characteristics of queries.</title>
+
+      <p>
+        Verify that the low-level aspects of I/O, memory usage, network bandwidth, CPU utilization, and so on are
+        within expected ranges by examining the query profile for a query after running it. See
+        <xref href="impala_explain_plan.xml#perf_profile"/> for details.
+      </p>
+    </section>
+
+    <section id="perf_cookbook_os">
+
+      <title>Use appropriate operating system settings.</title>
+
+      <p>
+        See <xref href="http://www.cloudera.com/content/www/en-us/documentation/enterprise/latest/topics/cdh_admin_performance.html" scope="external" format="html">Optimizing Performance in CDH</xref>
+        for recommendations about operating system
+        settings that you can change to influence Impala performance. In particular, you might find
+        that changing the <codeph>vm.swappiness</codeph> Linux kernel setting to a non-zero value improves
+        overall performance.
+      </p>
+    </section>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_perf_resources.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_perf_resources.xml b/docs/topics/impala_perf_resources.xml
new file mode 100644
index 0000000..e00c6de
--- /dev/null
+++ b/docs/topics/impala_perf_resources.xml
@@ -0,0 +1,60 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="mem_limits">
+
+  <title>Controlling Impala Resource Usage</title>
+  <titlealts audience="PDF"><navtitle>Controlling Resource Usage</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Performance"/>
+      <data name="Category" value="Memory"/>
+      <data name="Category" value="Scalability"/>
+      <data name="Category" value="Resource Management"/>
+      <data name="Category" value="Administrators"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Sometimes, balancing raw query performance against scalability requires limiting the amount of resources,
+      such as memory or CPU, used by a single query or group of queries. Impala can use several mechanisms that
+      help to smooth out the load during heavy concurrent usage, resulting in faster overall query times and
+      sharing of resources across Impala queries, MapReduce jobs, and other kinds of workloads across a CDH
+      cluster:
+    </p>
+
+    <ul>
+      <li>
+        The Impala admission control feature uses a fast, distributed mechanism to hold back queries that exceed
+        limits on the number of concurrent queries or the amount of memory used. The queries are queued, and
+        executed as other queries finish and resources become available. You can control the concurrency limits,
+        and specify different limits for different groups of users to divide cluster resources according to the
+        priorities of different classes of users. This feature is new in Impala 1.3, and works with both CDH 4 and
+        CDH 5. See <xref href="impala_admission.xml#admission_control"/> for details.
+      </li>
+
+      <li>
+        <p>
+          You can restrict the amount of memory Impala reserves during query execution by specifying the
+          <codeph>-mem_limit</codeph> option for the <codeph>impalad</codeph> daemon. See
+          <xref href="impala_config_options.xml#config_options"/> for details. This limit applies only to the
+          memory that is directly consumed by queries; Impala reserves additional memory at startup, for example to
+          hold cached metadata.
+        </p>
+      </li>
+
+      <li>
+        <p>
+          For production deployment, Cloudera recommends that you implement resource isolation using mechanisms
+          such as cgroups, which you can configure using Cloudera Manager. For details, see the
+          <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/cm_mc_service_pools.html" scope="external" format="html">Static
+          Resource Pools</xref> in the Cloudera Manager documentation.
+        </p>
+      </li>
+    </ul>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_perf_skew.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_perf_skew.xml b/docs/topics/impala_perf_skew.xml
new file mode 100644
index 0000000..b3a7cec
--- /dev/null
+++ b/docs/topics/impala_perf_skew.xml
@@ -0,0 +1,150 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="perf_skew">
+
+  <title>Detecting and Correcting HDFS Block Skew Conditions</title>
+  <titlealts audience="PDF"><navtitle>HDFS Block Skew</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Performance"/>
+      <data name="Category" value="HDFS"/>
+      <data name="Category" value="Proof of Concept"/>
+      <data name="Category" value="Administrators"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      For best performance of Impala parallel queries, the work is divided equally across hosts in the cluster, and
+      all hosts take approximately equal time to finish their work. If one host takes substantially longer than
+      others, the extra time needed for the slow host can become the dominant factor in query performance.
+      Therefore, one of the first steps in performance tuning for Impala is to detect and correct such conditions.
+    </p>
+
+    <p>
+      The main cause of uneven performance that you can correct within Impala is <term>skew</term> in the number of
+      HDFS data blocks processed by each host, where some hosts process substantially more data blocks than others.
+      This condition can occur because of uneven distribution of the data values themselves, for example causing
+      certain data files or partitions to be large while others are very small. (Although it is possible to have
+      unevenly distributed data without any problems with the distribution of HDFS blocks.) Block skew could also
+      be due to the underlying block allocation policies within HDFS, the replication factor of the data files, and
+      the way that Impala chooses the host to process each data block.
+    </p>
+
+    <p>
+      The most convenient way to detect block skew, or slow-host issues in general, is to examine the <q>executive
+      summary</q> information from the query profile after running a query:
+    </p>
+
+    <ul>
+      <li>
+        <p>
+          In <cmdname>impala-shell</cmdname>, issue the <codeph>SUMMARY</codeph> command immediately after the
+          query is complete, to see just the summary information. If you detect issues involving skew, you might
+          switch to issuing the <codeph>PROFILE</codeph> command, which displays the summary information followed
+          by a detailed performance analysis.
+        </p>
+      </li>
+
+      <li>
+        <p>
+          In the Cloudera Manager interface or the Impala debug web UI, click on the <uicontrol>Profile</uicontrol>
+          link associated with the query after it is complete. The executive summary information is displayed early
+          in the profile output.
+        </p>
+      </li>
+    </ul>
+
+    <p>
+      For each phase of the query, you see an <uicontrol>Avg Time</uicontrol> and a <uicontrol>Max Time</uicontrol>
+      value, along with <uicontrol>#Hosts</uicontrol> indicating how many hosts are involved in that query phase.
+      For all the phases with <uicontrol>#Hosts</uicontrol> greater than one, look for cases where the maximum time
+      is substantially greater than the average time. Focus on the phases that took the longest, for example, those
+      taking multiple seconds rather than milliseconds or microseconds.
+    </p>
+
+    <p>
+      If you detect that some hosts take longer than others, first rule out non-Impala causes. One reason that some
+      hosts could be slower than others is if those hosts have less capacity than the others, or if they are
+      substantially busier due to unevenly distributed non-Impala workloads:
+    </p>
+
+    <ul>
+      <li>
+        <p>
+          For clusters running Impala, keep the relative capacities of all hosts roughly equal. Any cost savings
+          from including some underpowered hosts in the cluster will likely be outweighed by poor or uneven
+          performance, and the time spent diagnosing performance issues.
+        </p>
+      </li>
+
+      <li>
+        <p>
+          If non-Impala workloads cause slowdowns on some hosts but not others, use the appropriate load-balancing
+          techniques for the non-Impala components to smooth out the load across the cluster.
+        </p>
+      </li>
+    </ul>
+
+    <p>
+      If the hosts on your cluster are evenly powered and evenly loaded, examine the detailed profile output to
+      determine which host is taking longer than others for the query phase in question. Examine how many bytes are
+      processed during that phase on that host, how much memory is used, and how many bytes are transmitted across
+      the network.
+    </p>
+
+    <p>
+      The most common symptom is a higher number of bytes read on one host than others, due to one host being
+      requested to process a higher number of HDFS data blocks. This condition is more likely to occur when the
+      number of blocks accessed by the query is relatively small. For example, if you have a 10-node cluster and
+      the query processes 10 HDFS blocks, each node might not process exactly one block. If one node sits idle
+      while another node processes two blocks, the query could take twice as long as if the data was perfectly
+      distributed.
+    </p>
+
+    <p>
+      Possible solutions in this case include:
+    </p>
+
+    <ul>
+      <li>
+        <p>
+          If the query is artificially small, perhaps for benchmarking purposes, scale it up to process a larger
+          data set. For example, if some nodes read 10 HDFS data blocks while others read 11, the overall effect of
+          the uneven distribution is much lower than when some nodes did twice as much work as others. As a
+          guideline, aim for a <q>sweet spot</q> where each node reads 2 GB or more from HDFS per query. Queries
+          that process lower volumes than that could experience inconsistent performance that smooths out as
+          queries become more data-intensive.
+        </p>
+      </li>
+
+      <li>
+        <p>
+          If the query processes only a few large blocks, so that many nodes sit idle and cannot help to
+          parallelize the query, consider reducing the overall block size. For example, you might adjust the
+          <codeph>PARQUET_FILE_SIZE</codeph> query option before copying or converting data into a Parquet table.
+          Or you might adjust the granularity of data files produced earlier in the ETL pipeline by non-Impala
+          components. In Impala 2.0 and later, the default Parquet block size is 256 MB, reduced from 1 GB, to
+          improve parallelism for common cluster sizes and data volumes.
+        </p>
+      </li>
+
+      <li>
+        <p>
+          Reduce the amount of compression applied to the data. For text data files, the highest degree of
+          compression (gzip) produces unsplittable files that are more difficult for Impala to process in parallel,
+          and require extra memory during processing to hold the compressed and uncompressed data simultaneously.
+          For binary formats such as Parquet and Avro, compression can result in fewer data blocks overall, but
+          remember that when queries process relatively few blocks, there is less opportunity for parallel
+          execution and many nodes in the cluster might sit idle. Note that when Impala writes Parquet data with
+          the query option <codeph>COMPRESSION_CODEC=NONE</codeph> enabled, the data is still typically compact due
+          to the encoding schemes used by Parquet, independent of the final compression step.
+        </p>
+      </li>
+    </ul>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_perf_testing.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_perf_testing.xml b/docs/topics/impala_perf_testing.xml
new file mode 100644
index 0000000..d621556
--- /dev/null
+++ b/docs/topics/impala_perf_testing.xml
@@ -0,0 +1,175 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="performance_testing">
+
+  <title>Testing Impala Performance</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Performance"/>
+      <data name="Category" value="Troubleshooting"/>
+      <data name="Category" value="Proof of Concept"/>
+      <data name="Category" value="Logs"/>
+      <data name="Category" value="Administrators"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+      <!-- Should reorg this topic to use nested topics, not sections. Some keywords like 'logs' buried in section titles. -->
+      <data name="Category" value="Sectionated Pages"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Test to ensure that Impala is configured for optimal performance. If you have installed Impala without
+      Cloudera Manager, complete the processes described in this topic to help ensure a proper configuration. Even
+      if you installed Impala with Cloudera Manager, which automatically applies appropriate configurations, these
+      procedures can be used to verify that Impala is set up correctly.
+    </p>
+
+    <section id="checking_config_performance">
+
+      <title>Checking Impala Configuration Values</title>
+
+      <p>
+        You can inspect Impala configuration values by connecting to your Impala server using a browser.
+      </p>
+
+      <p>
+        <b>To check Impala configuration values:</b>
+      </p>
+
+      <ol>
+        <li>
+          Use a browser to connect to one of the hosts running <codeph>impalad</codeph> in your environment.
+          Connect using an address of the form
+          <codeph>http://<varname>hostname</varname>:<varname>port</varname>/varz</codeph>.
+          <note>
+            In the preceding example, replace <codeph>hostname</codeph> and <codeph>port</codeph> with the name and
+            port of your Impala server. The default port is 25000.
+          </note>
+        </li>
+
+        <li>
+          Review the configured values.
+          <p>
+            For example, to check that your system is configured to use block locality tracking information, you
+            would check that the value for <codeph>dfs.datanode.hdfs-blocks-metadata.enabled</codeph> is
+            <codeph>true</codeph>.
+          </p>
+        </li>
+      </ol>
+
+      <p id="p_31">
+        <b>To check data locality:</b>
+      </p>
+
+      <ol>
+        <li>
+          Execute a query on a dataset that is available across multiple nodes. For example, for a table named
+          <codeph>MyTable</codeph> that has a reasonable chance of being spread across multiple DataNodes:
+<codeblock>[impalad-host:21000] &gt; SELECT COUNT (*) FROM MyTable</codeblock>
+        </li>
+
+        <li>
+          After the query completes, review the contents of the Impala logs. You should find a recent message
+          similar to the following:
+<codeblock>Total remote scan volume = 0</codeblock>
+        </li>
+      </ol>
+
+      <p>
+        The presence of remote scans may indicate <codeph>impalad</codeph> is not running on the correct nodes.
+        This can be because some DataNodes do not have <codeph>impalad</codeph> running or it can be because the
+        <codeph>impalad</codeph> instance that is starting the query is unable to contact one or more of the
+        <codeph>impalad</codeph> instances.
+      </p>
+
+      <p>
+        <b>To understand the causes of this issue:</b>
+      </p>
+
+      <ol>
+        <li>
+          Connect to the debugging web server. By default, this server runs on port 25000. This page lists all
+          <codeph>impalad</codeph> instances running in your cluster. If there are fewer instances than you expect,
+          this often indicates some DataNodes are not running <codeph>impalad</codeph>. Ensure
+          <codeph>impalad</codeph> is started on all DataNodes.
+        </li>
+
+        <li>
+          <!-- To do:
+            There are other references to this tip about the "Impala daemon's hostname" elsewhere. Could reconcile, conref, or link.
+          -->
+          If you are using multi-homed hosts, ensure that the Impala daemon's hostname resolves to the interface on
+          which <codeph>impalad</codeph> is running. The hostname Impala is using is displayed when
+          <codeph>impalad</codeph> starts. To explicitly set the hostname, use the <codeph>--hostname</codeph>�flag.
+        </li>
+
+        <li>
+          Check that <codeph>statestored</codeph> is running as expected. Review the contents of the state store
+          log to ensure all instances of <codeph>impalad</codeph> are listed as having connected to the state
+          store.
+        </li>
+      </ol>
+    </section>
+
+    <section id="checking_config_logs">
+
+      <title>Reviewing Impala Logs</title>
+
+      <p>
+        You can review the contents of the Impala logs for signs that short-circuit reads or block location
+        tracking are not functioning. Before checking logs, execute a simple query against a small HDFS dataset.
+        Completing a query task generates log messages using current settings. Information on starting Impala and
+        executing queries can be found in <xref href="impala_processes.xml#processes"/> and
+        <xref href="impala_impala_shell.xml#impala_shell"/>. Information on logging can be found in
+        <xref href="impala_logging.xml#logging"/>. Log messages and their interpretations are as follows:
+      </p>
+
+      <table>
+        <tgroup cols="2">
+          <colspec colname="1" colwidth="30*"/>
+          <colspec colname="2" colwidth="10*"/>
+          <thead>
+            <row>
+              <entry>
+                Log Message
+              </entry>
+              <entry>
+                Interpretation
+              </entry>
+            </row>
+          </thead>
+          <tbody>
+            <row>
+              <entry>
+                <p>
+<pre>Unknown disk id. This will negatively affect performance. Check your hdfs settings to enable block location metadata
+</pre>
+                </p>
+              </entry>
+              <entry>
+                <p>
+                  Tracking block locality is not enabled.
+                </p>
+              </entry>
+            </row>
+            <row>
+              <entry>
+                <p>
+<pre>Unable to load native-hadoop library for your platform... using builtin-java classes where applicable</pre>
+                </p>
+              </entry>
+              <entry>
+                <p>
+                  Native checksumming is not enabled.
+                </p>
+              </entry>
+            </row>
+          </tbody>
+        </tgroup>
+      </table>
+    </section>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_planning.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_planning.xml b/docs/topics/impala_planning.xml
new file mode 100644
index 0000000..f103ab8
--- /dev/null
+++ b/docs/topics/impala_planning.xml
@@ -0,0 +1,30 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="planning">
+
+  <title>Planning for Impala Deployment</title>
+  <titlealts audience="PDF"><navtitle>Deployment Planning</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Deploying"/>
+      <data name="Category" value="Planning"/>
+      <data name="Category" value="Proof of Concept"/>
+      <data name="Category" value="Administrators"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Stub Pages"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">planning</indexterm>
+      Before you set up Impala in production, do some planning to make sure that your hardware setup has sufficient
+      capacity, that your cluster topology is optimal for Impala queries, and that your schema design and ETL
+      processes follow the best practices for Impala.
+    </p>
+
+    <p outputclass="toc"/>
+  </conbody>
+</concept>

[5/7] incubator-impala git commit: New files needed to make PDF build happy.

Posted by jr...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_faq.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_faq.xml b/docs/topics/impala_faq.xml
new file mode 100644
index 0000000..94b0b33
--- /dev/null
+++ b/docs/topics/impala_faq.xml
@@ -0,0 +1,1880 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="faq">
+
+  <title>Impala Frequently Asked Questions</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="FAQs"/>
+      <data name="Category" value="Planning"/>
+      <data name="Category" value="Getting Started"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Here are the categories of frequently asked questions for Impala, the interactive SQL engine included with CDH.
+    </p>
+
+    <p outputclass="toc inpage"/>
+  </conbody>
+
+  <concept id="faq_eval">
+
+    <title>Trying Impala</title>
+
+    <conbody>
+
+      <p outputclass="toc inpage" audience="PDF">
+        FAQs in this category:
+      </p>
+
+      <section id="faq_tryout">
+
+        <title>How do I try Impala out?</title>
+
+        <sectiondiv id="faq_try_impala">
+
+          <p>
+            To look at the core features and functionality on Impala, the easiest way to try out Impala is to
+            download the Cloudera QuickStart VM and start the Impala service through Cloudera Manager, then use
+            <cmdname>impala-shell</cmdname> in a terminal window or the Impala Query UI in the Hue web interface.
+          </p>
+
+          <p>
+            To do performance testing and try out the management features for Impala on a cluster, you need to move
+            beyond the QuickStart VM with its virtualized single-node environment. Ideally, download the Cloudera
+            Manager software to set up the cluster, then install the Impala software through Cloudera Manager.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_demo_vm">
+
+        <title>Does Cloudera offer a VM for demonstrating Impala?</title>
+
+        <sectiondiv id="faq_demo_vm_sect">
+
+          <p>
+            Cloudera offers a demonstration VM called the QuickStart VM, available in VMWare, VirtualBox, and KVM
+            formats. For more information, see
+<!-- Was:          <xref href="cloudera-content/cloudera-docs/DemoVMs/Cloudera-QuickStart-VM/cloudera_impala.html" scope="external" format="html">Cloudera Impala Demo VM</xref> -->
+<!-- Then was:          <xref href="cloudera-content/cloudera-docs/DemoVMs/Cloudera-QuickStart-VM/cloudera_quickstart_vm.html" scope="external" format="html">the Cloudera QuickStart VM</xref>. -->
+<!-- Finally(?) was:            <xref href="https://ccp.cloudera.com/display/SUPPORT/Cloudera+QuickStart+VM" scope="external" format="html">the Cloudera QuickStart VM</xref>. -->
+            <xref href="http://www.cloudera.com/content/support/en/downloads/quickstart_vms.html" scope="external" format="html">the
+            Cloudera QuickStart VM</xref>. After booting the QuickStart VM, many services are turned off by
+            default; in the Cloudera Manager UI that appears automatically, turn on Impala and any other components
+            that you want to try out.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_docs">
+
+        <title>Where can I find Impala documentation?</title>
+
+        <sectiondiv id="faq_doc">
+
+          <p>
+            Starting with Impala 1.3.0, Impala documentation is integrated with the CDH 5 documentation, in
+            addition to the standalone Impala documentation for use with CDH 4. For CDH 5, the core Impala
+            developer and administrator information remains in the associated
+<!-- Original URL: http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH5/latest/Impala/impala.html -->
+            <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/impala.html" scope="external" format="html">Impala
+            documentation</xref> portion. Information about Impala release notes, installation, configuration,
+            startup, and security is embedded in the corresponding CDH 5 guides.
+          </p>
+
+<!-- Same list is in impala.xml and Impala FAQs. Conref in both places. -->
+
+          <ul>
+            <li>
+              <xref href="impala_new_features.xml#new_features">New features</xref>
+            </li>
+
+            <li>
+              <xref href="impala_known_issues.xml#known_issues">Known and fixed issues</xref>
+            </li>
+
+            <li>
+              <xref href="impala_incompatible_changes.xml#incompatible_changes">Incompatible changes</xref>
+            </li>
+
+            <li>
+              <xref href="impala_install.xml#install">Installing Impala</xref>
+            </li>
+
+            <li>
+              <xref href="impala_upgrading.xml#upgrading">Upgrading Impala</xref>
+            </li>
+
+            <li>
+              <xref href="impala_config.xml#config">Configuring Impala</xref>
+            </li>
+
+            <li>
+              <xref href="impala_processes.xml#processes">Starting Impala</xref>
+            </li>
+
+            <li>
+              <xref href="impala_security.xml#security">Security for Impala</xref>
+            </li>
+
+            <li>
+<!-- Original URL: http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH5/latest/CDH-Version-and-Packaging-Information/CDH-Version-and-Packaging-Information.html -->
+              <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/rg_vd.html" scope="external" format="html">CDH
+              Version and Packaging Information</xref>
+            </li>
+          </ul>
+
+          <p>
+            Information about the latest CDH 4-compatible Impala release remains at the
+<!-- Original URL: updated this from a /v1/ URL. -->
+            <xref href="http://www.cloudera.com/content/cloudera/en/documentation/impala/latest.html" scope="external" format="html">Impala
+            for CDH 4 Documentation</xref> page.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_more_info">
+
+        <title>Where can I get more information about Impala?</title>
+
+        <sectiondiv id="faq_more_info_sect">
+
+          <!-- JDR: Not changing these instances of 'Cloudera Impala' because those are the real titles of those books or blog posts. -->
+          <p>
+            More product information is available here:
+          </p>
+
+          <ul>
+            <li>
+              O'Reilly introductory e-book:
+              <xref href="http://radar.oreilly.com/2013/10/cloudera-impala-bringing-the-sql-and-hadoop-worlds-together.html" scope="external" format="html">Cloudera
+              Impala: Bringing the SQL and Hadoop Worlds Together</xref>
+            </li>
+
+            <li>
+              O'Reilly getting started guide for developers:
+              <xref href="http://shop.oreilly.com/product/0636920033936.do" scope="external" format="html">Getting
+              Started with Impala: Interactive SQL for Apache Hadoop</xref>
+            </li>
+
+            <li>
+              Blog:
+              <xref href="http://blog.cloudera.com/blog/2012/10/cloudera-impala-real-time-queries-in-apache-hadoop-for-real" scope="external" format="html">Cloudera
+              Impala: Real-Time Queries in Apache Hadoop, For Real</xref>
+            </li>
+
+            <li>
+              Webinar:
+              <xref href="http://www.cloudera.com/content/cloudera/en/resources/library/recordedwebinar/impala-real-time-queries-in-hadoop-webinar-slides.html" scope="external" format="html">Introduction
+              to Impala</xref>
+            </li>
+
+            <li>
+              Product website page:
+              <xref href="http://www.cloudera.com/content/cloudera/en/products-and-services/cdh/impala.html" scope="external" format="html">Cloudera
+              Enterprise RTQ</xref>
+            </li>
+          </ul>
+
+          <p>
+            To see the latest release announcements for Impala, see the
+            <xref href="http://community.cloudera.com/t5/Release-Announcements/bd-p/RelAnnounce" scope="external" format="html">Cloudera
+            Announcements</xref> forum.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_community">
+
+        <title>How can I ask questions and provide feedback about Impala?</title>
+
+        <sectiondiv id="faq_qanda">
+
+          <ul>
+            <li>
+              Join the
+              <xref href="http://community.cloudera.com/t5/Interactive-Short-cycle-SQL/bd-p/Impala" scope="external" format="html">Impala
+              discussion forum</xref> and the
+              <xref href="https://groups.google.com/a/cloudera.org/forum/?fromgroups#!forum/impala-user" scope="external" format="html">Impala
+              mailing list</xref> to ask questions and provide feedback.
+            </li>
+
+            <li>
+              Use the <xref href="https://issues.cloudera.org/browse/IMPALA" scope="external" format="html">Impala
+              Jira project</xref> to log bug reports and requests for features.
+            </li>
+          </ul>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_tpcds">
+
+        <title>Where can I get sample data to try?</title>
+
+        <p>
+          You can get scripts that produce data files and set up an environment for TPC-DS style benchmark tests
+          from <xref href="https://github.com/cloudera/impala-tpcds-kit" scope="external" format="html">this Github
+          repository</xref>. In addition to being useful for experimenting with performance, the tables are suited
+          to experimenting with many aspects of SQL on Impala: they contain a good mixture of data types, data
+          distributions, partitioning, and relational data suitable for join queries.
+        </p>
+      </section>
+    </conbody>
+  </concept>
+
+  <concept id="faq_prereq">
+
+    <title>Impala System Requirements</title>
+  <prolog>
+    <metadata>
+      <!-- Normally I don't categorize subtopics under FAQs. Making an exception to beef up the EC2 category,
+           and to judge whether it makes sense to relax that rule a bit. -->
+      <data name="Category" value="Amazon"/>
+      <data name="Category" value="EC2"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p outputclass="toc inpage" audience="PDF">
+        FAQs in this category:
+      </p>
+
+      <section id="faq_prereqs">
+
+        <title>What are the software and hardware requirements for running Impala?</title>
+
+        <sectiondiv id="faq_system_reqs">
+
+          <p>
+            For information on Impala requirements, see <xref href="impala_prereqs.xml#prereqs"/>. Note that there
+            is often a minimum required level of Cloudera Manager for any given Impala version.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_memory_prereq">
+
+        <title>How much memory is required?</title>
+
+        <sectiondiv id="faq_mem_req">
+
+          <!-- To do:
+            Prefer to have more examples / citations for larger memory sizes. What are the most
+            memory-intensive operations that require or benefit from large mem size?
+            Actually that info should go into impala_scalability.xml and be xref'ed from here.
+          -->
+
+          <p>
+            Although Impala is not an in-memory database, when dealing with large tables and large result sets, you
+            should expect to dedicate a substantial portion of physical memory for the <cmdname>impalad</cmdname>
+            daemon. Recommended physical memory for an Impala node is 128 GB or higher. If practical, devote
+            approximately 80% of physical memory to Impala.
+<!-- The machines we typically run on have approximately 32-48 GB. -->
+          </p>
+
+          <p>
+            The amount of memory required for an Impala operation depends on several factors:
+          </p>
+
+          <ul>
+            <li>
+              <p>
+                The file format of the table. Different file formats represent the same data in more or fewer data
+                files. The compression and encoding for each file format might require a different amount of
+                temporary memory to decompress the data for analysis.
+              </p>
+            </li>
+
+            <li>
+              <p>
+                Whether the operation is a <codeph>SELECT</codeph> or an <codeph>INSERT</codeph>. For example,
+                Parquet tables require relatively little memory to query, because Impala reads and decompresses
+                data in 8 MB chunks. Inserting into a Parquet table is a more memory-intensive operation because
+                the data for each data file (potentially <ph rev="parquet_block_size">hundreds of megabytes,
+                depending on the value of the <codeph>PARQUET_FILE_SIZE</codeph> query option</ph>) is stored in
+                memory until encoded, compressed, and written to disk.
+<!-- In 2.0, default might be smaller than maximum. -->
+              </p>
+            </li>
+
+            <li>
+              <p>
+                Whether the table is partitioned or not, and whether a query against a partitioned table can take
+                advantage of partition pruning.
+              </p>
+            </li>
+
+            <li>
+              <p>
+                Whether the final result set is sorted by the <codeph>ORDER BY</codeph> clause.
+<!--
+<ph rev="obwl">Remember, Impala requires that all <codeph>ORDER BY</codeph> queries include a
+<codeph>LIMIT</codeph> clause too, either in the query syntax or implicitly
+through the <codeph>DEFAULT_ORDER_BY_LIMIT</codeph> query option.</ph>
+-->
+                Each Impala node scans and filters a portion of the total data, and applies the
+                <codeph>LIMIT</codeph> to its own portion of the result set. <ph rev="1.4.0">In Impala 1.4.0 and
+                higher, if the sort operation requires more memory than is available on any particular host, Impala
+                uses a temporary disk work area to perform the sort.</ph> The intermediate result sets
+<!-- (each with a maximum size of <codeph>LIMIT</codeph> rows) -->
+                are all sent back to the coordinator node, which does the final sorting and then applies the
+                <codeph>LIMIT</codeph> clause to the final result set.
+              </p>
+              <p>
+                For example, if you execute the query:
+<codeblock>select * from giant_table order by some_column limit 1000;</codeblock>
+                and your cluster has 50 nodes, then each of those 50 nodes will transmit a maximum of 1000 rows
+                back to the coordinator node. The coordinator node needs enough memory to sort
+                (<codeph>LIMIT</codeph> * <varname>cluster_size</varname>) rows, although in the end the final
+                result set is at most <codeph>LIMIT</codeph> rows, 1000 in this case.
+              </p>
+              <p>
+                Likewise, if you execute the query:
+<codeblock>select * from giant_table where test_val &gt; 100 order by some_column;</codeblock>
+                then each node filters out a set of rows matching the <codeph>WHERE</codeph> conditions, sorts the
+                results (with no size limit), and sends the sorted intermediate rows back to the coordinator node.
+                The coordinator node might need substantial memory to sort the final result set, and so might use a
+                temporary disk work area for that final phase of the query.
+              </p>
+            </li>
+
+            <li>
+              <p>
+                Whether the query contains any join clauses, <codeph>GROUP BY</codeph> clauses, analytic functions,
+                or <codeph>DISTINCT</codeph> operators. These operations all require some in-memory work areas that
+                vary depending on the volume and distribution of data. In Impala 2.0 and later, these kinds of
+                operations utilize temporary disk work areas if memory usage grows too large to handle. See
+                <xref href="impala_scalability.xml#spill_to_disk"/> for details.
+              </p>
+            </li>
+
+            <li>
+              <p>
+                The size of the result set. When intermediate results are being passed around between nodes, the
+                amount of data depends on the number of columns returned by the query. For example, it is more
+                memory-efficient to query only the columns that are actually needed in the result set rather than
+                always issuing <codeph>SELECT *</codeph>.
+              </p>
+            </li>
+
+            <li>
+              <p>
+                The mechanism by which work is divided for a join query. You use the <codeph>COMPUTE STATS</codeph>
+                statement, and query hints in the most difficult cases, to help Impala pick the most efficient
+                execution plan. See <xref href="impala_perf_joins.xml#perf_joins"/> for details.
+              </p>
+            </li>
+          </ul>
+
+          <p>
+            See <xref href="impala_prereqs.xml#prereqs_hardware"/> for more details and recommendations about
+            Impala hardware prerequisites.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_cpu_prereq">
+
+        <title>What processor type and speed does Cloudera recommend?</title>
+
+        <sectiondiv id="faq_cpu_req">
+
+          <p rev="CDH-24874">
+            Impala makes use of SSE 4.1 instructions.
+<!-- Commenting out of caution after IMPALA-160 and CDH-20937.
+            For best performance, use Nehalem or later for
+            Intel chips and Bulldozer or later for AMD chips.
+          Impala runs on older machines with the SSE3 instruction set,
+          but will not achieve the best performance.
+          -->
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_prereq_ec2">
+
+        <title>What EC2 instances are recommended for Impala?</title>
+
+        <p>
+          For large storage capacity and large I/O bandwidth, consider the <codeph>hs1.8xlarge</codeph> and
+          <codeph>cc2.8xlarge</codeph> instance types. Impala I/O patterns typically do not benefit enough from SSD
+          storage to make up for the lower overall size. For performance and security considerations for deploying
+          CDH and its components on AWS, see
+          <xref href="http://www.cloudera.com/content/dam/cloudera/Resources/PDF/whitepaper/AWS_Reference_Architecture_Whitepaper.pdf" scope="external" format="html">Cloudera
+          Enterprise Reference Architecture for AWS Deployments</xref>.
+        </p>
+      </section>
+    </conbody>
+  </concept>
+
+  <concept id="faq_features">
+
+    <title>Supported and Unsupported Functionality In Impala</title>
+
+    <conbody>
+
+      <p outputclass="toc inpage" audience="PDF">
+        FAQs in this category:
+      </p>
+
+      <section id="features">
+
+        <title>What are the main features of Impala?</title>
+
+        <sectiondiv id="faq_features_sql">
+
+          <ul>
+            <li>
+              A large set of SQL statements, including <xref href="impala_select.xml#select">SELECT</xref> and
+              <xref href="impala_insert.xml#insert">INSERT</xref>, with
+              <xref href="impala_joins.xml#joins">joins</xref>, <xref href="impala_subqueries.xml#subqueries"/>,
+              and <xref href="impala_analytic_functions.xml#analytic_functions"/>. Highly compatible with HiveQL,
+              and also including some vendor extensions. For more information, see
+              <xref href="impala_langref.xml#langref"/>.
+            </li>
+
+            <li>
+              Distributed, high-performance queries. See <xref href="impala_performance.xml#performance"/> for
+              information about Impala performance optimizations and tuning techniques for queries.
+            </li>
+
+            <li>
+              Using Cloudera Manager, you can deploy and manage your Impala services. Cloudera Manager is the best
+              way to get started with Impala on your cluster.
+            </li>
+
+            <li>
+              Using Hue for queries.
+            </li>
+
+            <li>
+              Appending and inserting data into tables through the
+              <xref href="impala_insert.xml#insert">INSERT</xref> statement. See
+              <xref href="impala_file_formats.xml#file_formats"/> for the details about which operations are
+              supported for which file formats.
+            </li>
+
+            <li>
+              ODBC: Impala is certified to run against MicroStrategy and Tableau, with restrictions. For more
+              information, see <xref href="impala_odbc.xml#impala_odbc"/>.
+            </li>
+
+            <li>
+              Querying data stored in HDFS and HBase in a single query. See
+              <xref href="impala_hbase.xml#impala_hbase"/> for details.
+            </li>
+
+            <li rev="2.2.0">
+              In Impala 2.2.0 and higher, querying data stored in the Amazon Simple Storage Service (S3). See
+              <xref href="impala_s3.xml#s3"/> for details.
+            </li>
+
+            <li>
+              Concurrent client requests. Each Impala daemon can handle multiple concurrent client requests. The
+              effects on performance depend on your particular hardware and workload.
+            </li>
+
+            <li>
+              Kerberos authentication. For more information, see
+              <xref href="impala_security.xml#security"/>.
+            </li>
+
+            <li>
+              Partitions. With Impala SQL, you can create partitioned tables with the <codeph>CREATE TABLE</codeph>
+              statement, and add and drop partitions with the <codeph>ALTER TABLE</codeph> statement. Impala also
+              takes advantage of the partitioning present in Hive tables. See
+              <xref href="impala_partitioning.xml#partitioning"/> for details.
+            </li>
+          </ul>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_unsupported">
+
+        <title>What features from relational databases or Hive are not available in Impala?</title>
+
+        <sectiondiv id="faq_unsupported_sql">
+
+          <!-- To do:
+            Good opportunity for a conref since there is a similar "unsupported" topic in the Language Reference section.
+          -->
+
+          <ul>
+            <li>
+              Querying streaming data.
+            </li>
+
+            <li>
+              Deleting individual rows. You delete data in bulk by overwriting an entire table or partition, or by
+              dropping a table.
+            </li>
+
+            <li>
+              Indexing (not currently). LZO-compressed text files can be indexed outside of Impala, as described in
+              <xref href="impala_txtfile.xml#lzo"/>.
+            </li>
+
+<!--
+          <li>
+            YARN integration (available when Impala is used with CDH 5).
+          </li>
+-->
+
+            <li>
+<!-- Former URL disappeared: cloudera.comcloudera/en/products/cdh/search.html -->
+<!-- Subscription URL doesn't seem appropriate: http://www.cloudera.com/content/cloudera/en/products-and-services/cloudera-enterprise/RTS-subscription.html -->
+              Full text search on text fields. The Cloudera Search product is appropriate for this use case.
+            </li>
+
+            <li>
+              Custom Hive Serializer/Deserializer classes (SerDes). Impala supports a set of common native file
+              formats that have built-in SerDes in CDH. See <xref href="impala_file_formats.xml#file_formats"/> for
+              details.
+            </li>
+
+            <li>
+              Checkpointing within a query. That is, Impala does not save intermediate results to disk during
+              long-running queries. Currently, Impala cancels a running query if any host on which that query is
+              executing fails. When one or more hosts are down, Impala reroutes future queries to only use the
+              available hosts, and Impala detects when the hosts come back up and begins using them again. Because
+              a query can be submitted through any Impala node, there is no single point of failure. In the future,
+              we will consider adding additional work allocation features to Impala, so that a running query would
+              complete even in the presence of host failures.
+            </li>
+
+<!--
+          <li>
+            Transforms.
+          </li>
+-->
+
+            <li>
+              Encryption of data transmitted between Impala daemons.
+            </li>
+
+<!--
+            <li>
+              Window functions.
+            </li>
+-->
+
+<!--
+          <li>
+            Hive UDFs.
+          </li>
+-->
+
+            <li>
+              Hive indexes.
+            </li>
+
+            <li>
+              Non-Hadoop data stores, such as relational databases.
+            </li>
+          </ul>
+
+          <p>
+            For the detailed list of features that are different between Impala and HiveQL, see
+            <xref href="impala_langref_unsupported.xml#langref_hiveql_delta"/>.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_jdbc">
+
+        <title>Does Impala support generic JDBC?</title>
+
+        <sectiondiv id="faq_jdbc_sect">
+
+          <p>
+            Impala supports the HiveServer2 JDBC driver.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_avro">
+
+        <title>Is Avro supported?</title>
+
+        <sectiondiv id="faq_avro_sect">
+
+          <p>
+            Yes, Avro is supported. Impala has always been able to query Avro tables. You can use the Impala
+            <codeph>LOAD DATA</codeph> statement to load existing Avro data files into a table. Starting with
+            Impala 1.4, you can create Avro tables with Impala. Currently, you still use the
+            <codeph>INSERT</codeph> statement in Hive to copy data from another table into an Avro table. See
+            <xref href="impala_avro.xml#avro"/> for details.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section audience="Cloudera" id="faq_roadmap">
+
+<!-- Hidden to avoid RevRec implications. -->
+
+        <title>What's next for Impala?</title>
+
+        <sectiondiv id="faq_next">
+
+          <p>
+            See our blog post:
+            <xref href="http://blog.cloudera.com/blog/2013/09/whats-next-for-impala-after-release-1-1/" scope="external" format="html">http://blog.cloudera.com/blog/2012/12/whats-next-for-cloudera-impala/</xref>
+          </p>
+
+        </sectiondiv>
+      </section>
+    </conbody>
+  </concept>
+
+  <concept id="faq_tasks">
+
+    <title>How do I?</title>
+
+    <conbody>
+
+      <p outputclass="toc inpage" audience="PDF">
+        FAQs in this category:
+      </p>
+
+      <section id="faq_secure_sql_text">
+
+        <title>How do I prevent users from seeing the text of SQL queries?</title>
+
+        <p>
+          For instructions on making the Impala log files unreadable by unprivileged users, see
+          <xref href="impala_security_files.xml#secure_files"/>.
+        </p>
+
+        <p>
+          For instructions on password-protecting the web interface to the Impala log files and other internal
+          server information, see <xref href="impala_security_webui.xml#security_webui"/>.
+        </p>
+
+        <p rev="2.2.0">
+          In Impala 2.2 / CDH 5.4 and higher, you can use the log redaction feature
+          to obfuscate sensitive information in Impala log files.
+          See
+          <xref audience="integrated" href="sg_redaction.xml#log_redact"/><xref audience="standalone" href="http://www.cloudera.com/documentation/enterprise/latest/topics/sg_redaction.html" scope="external" format="html"/>
+          for details.
+        </p>
+
+      </section>
+
+      <section id="faq_num_nodes">
+
+        <title>How do I know how many Impala nodes are in my cluster?</title>
+
+        <p>
+          The Impala statestore keeps track of how many <cmdname>impalad</cmdname> nodes are currently available.
+          You can see this information through the statestore web interface. For example, at the URL
+          <codeph>http://<varname>statestore_host</varname>:25010/metrics</codeph> you might see lines like the
+          following:
+        </p>
+
+<codeblock>statestore.live-backends:3
+statestore.live-backends.list:[<varname>host1</varname>:22000, <varname>host1</varname>:26000, <varname>host2</varname>:22000]</codeblock>
+
+        <p>
+          The number of <cmdname>impalad</cmdname> nodes is the number of list items referring to port 22000, in
+          this case two. (Typically, this number is one less than the number reported by the
+          <codeph>statestore.live-backends</codeph> line.) If an <cmdname>impalad</cmdname> node became unavailable
+          or came back after an outage, the information reported on this page would change appropriately.
+        </p>
+
+        <!-- To do:
+          If there is a good CM technique, mention that here also.
+        -->
+      </section>
+
+    </conbody>
+  </concept>
+
+  <concept id="faq_performance">
+
+    <title>Impala Performance</title>
+
+    <conbody>
+
+<!-- Template for new FAQ entries.
+      <section>
+        <title></title>
+        <sectiondiv id="">
+        <p>
+        </p>
+        </sectiondiv>
+      </section>
+
+-->
+
+      <p outputclass="toc inpage" audience="PDF">
+        FAQs in this category:
+      </p>
+
+      <section id="faq_streaming">
+
+        <title>Are results returned as they become available, or all at once when a query completes?</title>
+
+        <sectiondiv id="faq_stream_results">
+
+          <p>
+            Impala streams results whenever they are available, when possible. Certain SQL operations (aggregation
+            or <codeph>ORDER BY</codeph>) require all of the input to be ready before Impala can return results.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_slow_query">
+
+        <title>Why does my query run slowly?</title>
+
+        <sectiondiv id="faq_slow_query_sect">
+
+          <p>
+            There are many possible reasons why a given query could be slow. Use the following checklist to
+            diagnose performance issues with existing queries, and to avoid such issues when writing new queries,
+            setting up new nodes, creating new tables, or loading data.
+          </p>
+
+          <ul>
+            <li rev="1.4.0">
+              Immediately after the query finishes, issue a <codeph>SUMMARY</codeph> command in
+              <cmdname>impala-shell</cmdname>. You can check which phases of execution took the longest, and
+              compare estimated values for memory usage and number of rows with the actual values.
+            </li>
+
+            <li>
+              Immediately after the query finishes, issue a <codeph>PROFILE</codeph> command in
+              <cmdname>impala-shell</cmdname>. The numbers in the <codeph>BytesRead</codeph>,
+              <codeph>BytesReadLocal</codeph>, and <codeph>BytesReadShortCircuit</codeph> should be identical for a
+              specific node. For example:
+<codeblock>- BytesRead: 180.33 MB
+- BytesReadLocal: 180.33 MB
+- BytesReadShortCircuit: 180.33 MB</codeblock>
+              If <codeph>BytesReadLocal</codeph> is lower than <codeph>BytesRead</codeph>, something in your
+              cluster is misconfigured, such as the <cmdname>impalad</cmdname> daemon not running on all the data
+              nodes. If <codeph>BytesReadShortCircuit</codeph> is lower than <codeph>BytesRead</codeph>,
+              short-circuit reads are not enabled properly on that node; see
+              <xref href="impala_config_performance.xml#config_performance"/> for instructions.
+            </li>
+
+            <li>
+              If the table was just created, or this is the first query that accessed the table after an
+              <codeph>INVALIDATE METADATA</codeph> statement or after the <cmdname>impalad</cmdname> daemon was
+              restarted, there might be a one-time delay while the metadata for the table is loaded and cached.
+              Check whether the slowdown disappears when the query is run again. When doing performance
+              comparisons, consider issuing a <codeph>DESCRIBE <varname>table_name</varname></codeph> statement for
+              each table first, to make sure any timings only measure the actual query time and not the one-time
+              wait to load the table metadata.
+            </li>
+
+            <li>
+              Is the table data in uncompressed text format? Check by issuing a <codeph>DESCRIBE FORMATTED
+              <varname>table_name</varname></codeph> statement. A text table is indicated by the line:
+<codeblock>InputFormat: org.apache.hadoop.mapred.TextInputFormat</codeblock>
+              Although uncompressed text is the default format for a <codeph>CREATE TABLE</codeph> statement with
+              no <codeph>STORED AS</codeph> clauses, it is also the bulkiest format for disk storage and
+              consequently usually the slowest format for queries. For data where query performance is crucial,
+              particularly for tables that are frequently queried, consider starting with or converting to a
+              compact binary file format such as Parquet, Avro, RCFile, or SequenceFile. For details, see
+              <xref href="impala_file_formats.xml#file_formats"/>.
+            </li>
+
+            <li>
+              If your table has many columns, but the query refers to only a few columns, consider using the
+              Parquet file format. Its data files are organized with a column-oriented layout that lets queries
+              minimize the amount of I/O needed to retrieve, filter, and aggregate the values for specific columns.
+              See <xref href="impala_parquet.xml#parquet"/> for details.
+            </li>
+
+            <li>
+              If your query involves any joins, are the tables in the query ordered so that the tables or
+              subqueries are ordered with the one returning the largest number of rows on the left, followed by the
+              smallest (most selective), the second smallest, and so on? That ordering allows Impala to optimize
+              the way work is distributed among the nodes and how intermediate results are routed from one node to
+              another. For example, all other things being equal, the following join order results in an efficient
+              query:
+<codeblock>select some_col from
+    huge_table join big_table join small_table join medium_table
+  where
+    huge_table.id = big_table.id
+    and big_table.id = medium_table.id
+    and medium_table.id = small_table.id;</codeblock>
+              See <xref href="impala_perf_joins.xml#perf_joins"/> for performance tips for join queries.
+            </li>
+
+            <li>
+              Also for join queries, do you have table statistics for the table, and column statistics for the
+              columns used in the join clauses? Column statistics let Impala better choose how to distribute the
+              work for the various pieces of a join query. See <xref href="impala_perf_stats.xml#perf_stats"/> for
+              details about gathering statistics.
+            </li>
+
+            <li>
+              Does your table consist of many small data files? Impala works most efficiently with data files in
+              the multi-megabyte range; Parquet, a format optimized for data warehouse-style queries, uses
+              <ph rev="parquet_block_size">large files (originally 1 GB, now 256 MB in Impala 2.0 and higher) with
+              a block size matching the file size</ph>. Use the <codeph>DESCRIBE FORMATTED
+              <varname>table_name</varname></codeph> statement in <cmdname>impala-shell</cmdname> to see where the
+              data for a table is located, and use the <cmdname>hadoop fs -ls</cmdname> or <cmdname>hdfs dfs
+              -ls</cmdname> Unix commands to see the files and their sizes. If you have thousands of small data
+              files, that is a signal that you should consolidate into a smaller number of large files. Use an
+              <codeph>INSERT ... SELECT</codeph> statement to copy the data to a new table, reorganizing into new
+              data files as part of the process. Prefer to construct large data files and import them in bulk
+              through the <codeph>LOAD DATA</codeph> or <codeph>CREATE EXTERNAL TABLE</codeph> statements, rather
+              than issuing many <codeph>INSERT ... VALUES</codeph> statements; each <codeph>INSERT ...
+              VALUES</codeph> statement creates a separate tiny data file. If you have thousands of files all in
+              the same directory, but each one is megabytes in size, consider using a partitioned table so that
+              each partition contains a smaller number of files. See the following point for more on partitioning.
+            </li>
+
+            <li>
+              If your data is easy to group according to time or geographic region, have you partitioned your table
+              based on the corresponding columns such as <codeph>YEAR</codeph>, <codeph>MONTH</codeph>, and/or
+              <codeph>DAY</codeph>? Partitioning a table based on certain columns allows queries that filter based
+              on those same columns to avoid reading the data files for irrelevant years, postal codes, and so on.
+              (Do not partition down to too fine a level; try to structure the partitions so that there is still
+              sufficient data in each one to take advantage of the multi-megabyte HDFS block size.) See
+              <xref href="impala_partitioning.xml#partitioning"/> for details.
+            </li>
+          </ul>
+
+        </sectiondiv>
+      </section>
+
+      <section id="failed_query">
+
+        <title>Why does my SELECT statement fail?</title>
+
+        <sectiondiv id="faq_select_fail">
+
+          <p>
+            When a <codeph>SELECT</codeph> statement fails, the cause usually falls into one of the following
+            categories:
+          </p>
+
+          <ul>
+            <li>
+              A timeout because of a performance, capacity, or network issue affecting one particular node.
+            </li>
+
+            <li>
+              Excessive memory use for a join query, resulting in automatic cancellation of the query.
+            </li>
+
+            <li>
+              A low-level issue affecting how native code is generated on each node to handle particular
+              <codeph>WHERE</codeph> clauses in the query. For example, a machine instruction could be generated
+              that is not supported by the processor of a certain node. If the error message in the log suggests
+              the cause was an illegal instruction, consider turning off native code generation temporarily, and
+              trying the query again.
+            </li>
+
+            <li>
+              Malformed input data, such as a text data file with an enormously long line, or with a delimiter that
+              does not match the character specified in the <codeph>FIELDS TERMINATED BY</codeph> clause of the
+              <codeph>CREATE TABLE</codeph> statement.
+            </li>
+          </ul>
+
+        </sectiondiv>
+      </section>
+
+      <section id="failed_insert">
+
+        <title>Why does my INSERT statement fail?</title>
+
+        <sectiondiv id="faq_insert_fail">
+
+          <p>
+            When an <codeph>INSERT</codeph> statement fails, it is usually the result of exceeding some limit
+            within a Hadoop component, typically HDFS.
+          </p>
+
+          <ul>
+            <li>
+              An <codeph>INSERT</codeph> into a partitioned table can be a strenuous operation due to the
+              possibility of opening many files and associated threads simultaneously in HDFS. Impala 1.1.1
+              includes some improvements to distribute the work more efficiently, so that the values for each
+              partition are written by a single node, rather than as a separate data file from each node.
+            </li>
+
+            <li>
+              Certain expressions in the <codeph>SELECT</codeph> part of the <codeph>INSERT</codeph> statement can
+              complicate the execution planning and result in an inefficient <codeph>INSERT</codeph> operation. Try
+              to make the column data types of the source and destination tables match up, for example by doing
+              <codeph>ALTER TABLE ... REPLACE COLUMNS</codeph> on the source table if necessary. Try to avoid
+              <codeph>CASE</codeph> expressions in the <codeph>SELECT</codeph> portion, because they make the
+              result values harder to predict than transferring a column unchanged or passing the column through a
+              built-in function.
+            </li>
+
+            <li>
+              Be prepared to raise some limits in the HDFS configuration settings, either temporarily during the
+              <codeph>INSERT</codeph> or permanently if you frequently run such <codeph>INSERT</codeph> statements
+              as part of your ETL pipeline.
+            </li>
+
+            <li>
+              The resource usage of an <codeph>INSERT</codeph> statement can vary depending on the file format of
+              the destination table. Inserting into a Parquet table is memory-intensive, because the data for each
+              partition is buffered in memory until it reaches 1 gigabyte, at which point the data file is written
+              to disk. Impala can distribute the work for an <codeph>INSERT</codeph> more efficiently when
+              statistics are available for the source table that is queried during the <codeph>INSERT</codeph>
+              statement. See <xref href="impala_perf_stats.xml#perf_stats"/> for details about gathering
+              statistics.
+            </li>
+          </ul>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_scalability">
+
+        <title>Does Impala performance improve as it is deployed to more hosts in a cluster in much the same way that Hadoop performance does?</title>
+
+        <sectiondiv id="faq_hosts">
+
+          <draft-comment translate="no">
+Like to combine this one with the DataNodes question a little later.
+</draft-comment>
+
+          <p>
+            Yes. Impala scales with the number of hosts. It is important to install Impala on all the DataNodes in
+            the cluster, because otherwise some of the nodes must do remote reads to retrieve data not available
+            for local reads. Data locality is an important architectural aspect for Impala performance. See
+            <xref href="http://blog.cloudera.com/blog/2014/01/impala-performance-dbms-class-speed/" scope="external" format="html">this
+            Impala performance blog post</xref> for background. Note that this blog post refers to benchmarks with
+            Impala 1.1.1; Impala has added even more performance features in the 1.2.x series.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_hdfs_block_size">
+
+        <title>Is the HDFS block size reduced to achieve faster query results?</title>
+
+        <sectiondiv id="faq_block_size">
+
+          <p>
+            No. Impala does not make any changes to the HDFS or HBase data sets.
+          </p>
+
+          <p>
+            The default Parquet block size is relatively large (<ph rev="parquet_block_size">256 MB in Impala 2.0
+            and later; 1 GB in earlier releases</ph>). You can control the block size when creating Parquet files
+            using the <xref href="impala_parquet_file_size.xml#parquet_file_size">PARQUET_FILE_SIZE</xref> query
+            option.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_caching">
+
+        <title>Does Impala use caching?</title>
+
+        <sectiondiv>
+
+          <p id="caching">
+            Impala does not cache table data. It does cache some table and file metadata. Although queries might run
+            faster on subsequent iterations because the data set was cached in the OS buffer cache, Impala does not
+            explicitly control this.
+          </p>
+
+          <p rev="1.4.0">
+            Impala takes advantage of the HDFS caching feature in CDH 5. You can designate
+            which tables or partitions are cached through the <codeph>CACHED</codeph>
+            and <codeph>UNCACHED</codeph> clauses of the <codeph>CREATE TABLE</codeph>
+            and <codeph>ALTER TABLE</codeph> statements.
+            Impala can also take advantage of data that is pinned in the HDFS cache
+            through the <cmdname>hdfscacheadmin</cmdname> command.
+            See <xref href="impala_perf_hdfs_caching.xml#hdfs_caching"/> for details.
+          </p>
+
+        </sectiondiv>
+      </section>
+    </conbody>
+  </concept>
+
+  <concept id="faq_use_cases">
+
+    <title>Impala Use Cases</title>
+    <prolog>
+      <metadata>
+        <data name="Category" value="Use Cases"/>
+      </metadata>
+    </prolog>
+
+    <conbody>
+
+      <p outputclass="toc inpage" audience="PDF">
+        FAQs in this category:
+      </p>
+
+      <section id="faq_impala_hive_mr">
+
+        <title>What are good use cases for Impala as opposed to Hive or MapReduce?</title>
+
+        <sectiondiv id="faq_impala_vs_hive">
+
+          <p>
+            Impala is well-suited to executing SQL queries for interactive exploratory analytics on large data
+            sets. Hive and MapReduce are appropriate for very long running, batch-oriented tasks such as ETL.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_mapreduce">
+
+        <title>Is MapReduce required for Impala? Will Impala continue to work as expected if MapReduce is stopped?</title>
+
+        <sectiondiv id="faq_mapreduce_sect">
+
+          <p>
+            Impala does not use MapReduce at all.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_cep">
+
+        <title>Can Impala be used for complex event processing?</title>
+
+        <sectiondiv id="faq_cep_sect">
+
+          <p>
+            For example, in an industrial environment, many agents may generate large amounts of data. Can Impala
+            be used to analyze this data, checking for notable changes in the environment?
+          </p>
+
+          <p>
+            Complex Event Processing (CEP) is usually performed by dedicated stream-processing systems. Impala is
+            not a stream-processing system, as it most closely resembles a relational database.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_ad_hoc">
+
+        <title>Is Impala intended to handle real time queries in low-latency applications or is it for ad hoc queries for the purpose of data exploration?</title>
+
+        <sectiondiv id="faq_real_time">
+
+          <p>
+            Ad-hoc queries are the primary use case for Impala. We anticipate it being used in many other
+            situations where low-latency is required. Whether Impala is appropriate for any particular use-case
+            depends on the workload, data size and query volume. See <xref href="impala_intro.xml#benefits"/> for
+            the primary benefits you can expect when using Impala.
+          </p>
+
+        </sectiondiv>
+      </section>
+    </conbody>
+  </concept>
+
+  <concept id="faq_hive">
+
+    <title>Questions about Impala And Hive</title>
+
+    <conbody>
+
+      <p outputclass="toc inpage" audience="PDF">
+        FAQs in this category:
+      </p>
+
+      <draft-comment translate="no">
+Note: earlier question refers to Impala vs. Hive and MapReduce altogether.
+Should consolidate since makes sense to have one faq_hive ID.
+</draft-comment>
+
+      <section id="faq_hive_pig">
+
+        <title>How does Impala compare to Hive and Pig?</title>
+
+        <sectiondiv id="faq_hive_pig_sect">
+
+          <p>
+            Impala is different from Hive and Pig because it uses its own daemons that are spread across the
+            cluster for queries. Because Impala does not rely on MapReduce, it avoids the startup overhead of
+            MapReduce jobs, allowing Impala to return results in real time.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_serdes">
+
+        <title>Can I do transforms or add new functionality?</title>
+
+        <sectiondiv id="faq_udf">
+
+          <p>
+            Impala adds support for UDFs in Impala 1.2. You can write your own functions in C++, or reuse existing
+            Java-based Hive UDFs. The UDF support includes scalar functions and user-defined aggregate functions
+            (UDAs). User-defined table functions (UDTFs) are not currently supported.
+          </p>
+
+          <p>
+            Impala does not currently support an extensible serialization-deserialization framework (SerDes), and
+            so adding extra functionality to Impala is not as straightforward as for Hive or Pig.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_hive_compat">
+
+        <title>Can any Impala query also be executed in Hive?</title>
+
+        <sectiondiv id="faq_hiveql">
+
+          <p>
+            Yes. There are some minor differences in how some queries are handled, but Impala queries can also be
+            completed in Hive. Impala SQL is a subset of HiveQL, with some functional limitations such as
+            transforms. For details of the Impala SQL dialect, see
+            <xref href="impala_langref_sql.xml#langref_sql"/>. For the Impala built-in functions, see
+            <xref href="impala_functions.xml#builtins"/>. For the detailed list of differences between Impala and
+            HiveQL, see <xref href="impala_langref_unsupported.xml#langref_hiveql_delta"/>.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_hive_hbase_import">
+
+        <title>Can I use Impala to query data already loaded into Hive and HBase?</title>
+
+        <sectiondiv id="faq_hive_hbase">
+
+          <p>
+            There are no additional steps to allow Impala to query tables managed by Hive, whether they are stored
+            in HDFS or HBase. Make sure that Impala is configured to access the Hive metastore correctly and you
+            should be ready to go. Keep in mind that <codeph>impalad</codeph>, by default, runs as the
+            <codeph>impala</codeph> user, so you might need to adjust some file permissions depending on how strict
+            your permissions are currently.
+          </p>
+
+          <p>
+            See <xref href="impala_hbase.xml#impala_hbase"/> for details about querying data in HBase.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_hive_prereq">
+
+        <title>Is Hive an Impala requirement?</title>
+
+        <sectiondiv id="faq_hive_prereq_sect">
+
+          <p>
+            The Hive metastore service is a requirement. Impala shares the same metastore database as Hive,
+            allowing Impala and Hive to access the same tables transparently.
+          </p>
+
+          <p>
+            Hive itself is optional, and does not need to be installed on the same nodes as Impala. Currently,
+            Impala supports a wider variety of read (query) operations than write (insert) operations; you use Hive
+            to insert data into tables that use certain file formats. See
+            <xref href="impala_file_formats.xml#file_formats"/> for details.
+          </p>
+
+        </sectiondiv>
+      </section>
+    </conbody>
+  </concept>
+
+  <concept id="faq_ha">
+
+    <title>Impala Availability</title>
+
+    <conbody>
+
+      <p outputclass="toc inpage" audience="PDF">
+        FAQs in this category:
+      </p>
+
+      <section id="faq_production">
+
+        <title>Is Impala production ready?</title>
+
+        <sectiondiv id="faq_production_sect">
+
+          <p>
+            Impala has finished its beta release cycle, and the 1.0, 1.1, and 1.2 GA releases are production ready.
+            The 1.1.x series includes additional security features for authorization, an important requirement for
+            production use in many organizations. The 1.2.x series includes important performance features,
+            particularly for large join queries. Some Cloudera customers are already using Impala for large
+            workloads.
+          </p>
+
+          <p rev="1.3.0">
+            The Impala 1.3.0 and higher releases are bundled with corresponding levels of CDH 5.
+            The number of new features grows with each release.
+            See <xref href="impala_new_features.xml#new_features"/> for a full list.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_ha_config">
+
+        <title>How do I configure Hadoop high availability (HA) for Impala?</title>
+
+        <sectiondiv id="faq_ha_config_sect">
+
+          <p rev="1.2.0">
+            You can set up a proxy server to relay requests back and forth to the Impala servers, for load
+            balancing and high availability. See <xref href="impala_proxy.xml#proxy"/> for details.
+          </p>
+
+          <p>
+            You can enable HDFS HA for the Hive metastore. See the
+<!-- Original URL: http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH5/latest/CDH5-High-Availability-Guide/cdh_hag_hdfs_ha_cdh_components_config.html -->
+            <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/cdh_hag_cdh_other_ha.html" scope="external" format="html">CDH5 High Availability Guide</xref>
+            or the
+            <xref href="http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH4/latest/CDH4-High-Availability-Guide/cdh4hag_topic_2_6.html" scope="external" format="html">CDH4 High Availability Guide</xref>
+            for details.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_spof">
+
+        <title>What happens if there is an error in Impala?</title>
+
+        <sectiondiv id="faq_spof_sect">
+
+          <p>
+            There is not a single point of failure in Impala. All Impala daemons are fully able to handle incoming
+            queries. If a machine fails however, all queries with fragments running on that machine will fail.
+            Because queries are expected to return quickly, you can just rerun the query if there is a failure. See
+            <xref href="impala_concepts.xml#concepts"/> for details about the Impala architecture.
+          </p>
+
+          <draft-comment translate="no">
+Clarify to what extent the catalog service could be seen as a single point of failure.
+</draft-comment>
+
+          <p>
+            The longer answer: Impala must be able to connect to the Hive metastore. Impala aggressively caches
+            metadata so the metastore host should have minimal load. Impala relies on the HDFS NameNode, and, in
+            CDH4, you can configure HA for HDFS. Impala also has centralized services, known as the
+            <xref href="impala_components.xml#intro_statestore">statestore</xref> and
+            <xref href="impala_components.xml#intro_catalogd">catalog</xref> services, that run on one host only.
+            Impala continues to execute queries if the statestore host is down, but it will not get state updates.
+            For example, if a host is added to the cluster while the statestore host is down, the existing
+            instances of <codeph>impalad</codeph> running on the other hosts will not find out about this new host.
+            Once the statestore process is restarted, all the information it serves is automatically reconstructed
+            from all running Impala daemons.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_max_rows">
+
+        <title>What is the maximum number of rows in a table?</title>
+
+        <sectiondiv id="faq_max_rows_sect">
+
+          <p>
+            There is no defined maximum. Some customers have used Impala to query a table with over a trillion
+            rows.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_contention">
+
+        <title>Can Impala and MapReduce jobs run on the same cluster without resource contention?</title>
+
+        <sectiondiv id="faq_mapreduce_contention">
+
+          <p>
+            Yes. See <xref href="impala_perf_resources.xml#mem_limits"/> for how to control Impala resource usage
+            using the Linux cgroup mechanism, and <xref href="impala_resource_management.xml#resource_management"/>
+            for how to use Impala with the YARN resource management framework. Impala is designed to run on the
+            DataNode hosts. Any contention depends mostly on the cluster setup and workload.
+          </p>
+
+          <p conref="../shared/impala_common.xml#common/impala_mr"/>
+
+        </sectiondiv>
+      </section>
+    </conbody>
+  </concept>
+
+  <concept id="faq_internals">
+
+    <title>Impala Internals</title>
+
+    <conbody>
+
+      <p outputclass="toc inpage" audience="PDF">
+        FAQs in this category:
+      </p>
+
+      <section id="faq_impalad_hosts">
+
+        <title>On which hosts does Impala run?</title>
+
+        <sectiondiv id="faq_data_nodes">
+
+          <p>
+            Cloudera strongly recommends running the <cmdname>impalad</cmdname> daemon on each DataNode for good
+            performance. Although this topology is not a hard requirement, if there are data blocks with no Impala
+            daemons running on any of the hosts containing replicas of those blocks, queries involving that data
+            could be very inefficient. In that case, the data must be transmitted from one host to another for
+            processing by <q>remote reads</q>, a condition Impala normally tries to avoid. See
+            <xref href="impala_concepts.xml#concepts"/> for details about the Impala architecture. Impala schedules
+            query fragments on all hosts holding data relevant to the query, if possible.
+          </p>
+
+          <p>
+            In cases where some hosts in the cluster have much greater CPU and memory capacity than others, or
+            where some hosts have extra CPU capacity because some CPU-intensive phases are single-threaded,
+            some users have run multiple <cmdname>impalad</cmdname> daemons on a single host to take advantage
+            of the extra CPU capacity. This configuration is only practical for specific workloads that
+            rely heavily on aggregation, and the physical hosts must have sufficient memory to accomodate
+            the requirements for multiple <cmdname>impalad</cmdname> instances.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_join_internals">
+
+        <title>How are joins performed in Impala?</title>
+
+        <sectiondiv id="faq_joins">
+
+          <draft-comment translate="no">
+Will change with join order optimizations, now slated for 1.2.2.
+</draft-comment>
+
+          <p>
+            By default, Impala automatically determines the most efficient order in which to join tables using a
+            cost-based method, based on their overall size and number of rows. (This is a new feature in Impala
+            1.2.2 and higher.) The <codeph>COMPUTE STATS</codeph> statement gathers information about each table
+            that is crucial for efficient join performance.
+<!--
+          The order in which tables are joined is the same order in which tables appear in the
+          <codeph>SELECT</codeph> statement's
+          <codeph>FROM</codeph> clause. That is, there is no join order optimization
+          taking place at the moment. It is usually optimal for the smallest table to appear as the right-most table in
+          a <codeph>JOIN</codeph> clause.
+          -->
+            Impala chooses between two techniques for join queries, known as <q>broadcast joins</q> and
+            <q>partitioned joins</q>. See <xref href="impala_joins.xml#joins"/> for syntax details and
+            <xref href="impala_perf_joins.xml#perf_joins"/> for performance considerations.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_join_sizes">
+
+        <title>How does Impala process join queries for large tables?</title>
+
+        <sectiondiv>
+
+          <p>
+            Impala utilizes multiple strategies to allow joins between tables and result sets of various sizes.
+            When joining a large table with a small one, the data from the small table is transmitted to each node
+            for intermediate processing. When joining two large tables, the data from one of the tables is divided
+            into pieces, and each node processes only selected pieces. See <xref href="impala_joins.xml#joins"/>
+            for details about join processing, <xref href="impala_perf_joins.xml#perf_joins"/> for performance
+            considerations, and <xref href="impala_hints.xml#hints"/> for how to fine-tune the join strategy.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_aggregation_implementation">
+
+        <title>What is Impala's aggregation strategy?</title>
+
+        <sectiondiv id="faq_join_aggregation">
+
+          <p rev="2.0.0">
+            Impala currently only supports in-memory hash aggregation.
+            In Impala 2.0 and higher, if the memory requirements for a
+            join or aggregation operation exceed the memory limit for
+            a particular host, Impala uses a temporary work area on disk
+            to help the query complete successfully.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_metadata_management">
+
+        <title>How is Impala metadata managed?</title>
+
+        <sectiondiv id="faq_metadata">
+
+          <draft-comment translate="no">
+Doesn't seem related to joins...
+</draft-comment>
+
+          <p>
+            Impala uses two pieces of metadata: the catalog information from the Hive metastore and the file
+            metadata from the NameNode. Currently, this metadata is lazily populated and cached when an
+            <codeph>impalad</codeph> needs it to plan a query.
+          </p>
+
+          <p>
+            The <xref href="impala_refresh.xml#refresh">REFRESH</xref> statement updates the metadata for a
+            particular table after loading new data through Hive. The
+            <xref href="impala_invalidate_metadata.xml#invalidate_metadata"/> statement refreshes all metadata, so
+            that Impala recognizes new tables or other DDL and DML changes performed through Hive.
+          </p>
+
+          <p rev="1.2.0">
+            In Impala 1.2 and higher, a dedicated <cmdname>catalogd</cmdname> daemon broadcasts metadata changes
+            due to Impala DDL or DML statements to all nodes, reducing or eliminating the need to use the
+            <codeph>REFRESH</codeph> and <codeph>INVALIDATE METADATA</codeph> statements.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_namenode_overhead">
+
+        <title>What load do concurrent queries produce on the NameNode?</title>
+
+        <sectiondiv id="faq_namenode_load">
+
+          <p>
+            The load Impala generates is very similar to MapReduce. Impala contacts the NameNode during the
+            planning phase to get the file metadata (this is only run on the host the query was sent to). Every
+            <codeph>impalad</codeph> will read files as part of normal processing of the query.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_perf_architecture">
+
+        <title>How does Impala achieve its performance improvements?</title>
+
+        <sectiondiv id="faq_performance_features">
+
+          <p>
+            These are the main factors in the performance of Impala versus that of other Hadoop components and
+            related technologies.
+          </p>
+
+          <p>
+            Impala avoids MapReduce. While MapReduce is a great general parallel processing model with many
+            benefits, it is not designed to execute SQL. Impala avoids the inefficiencies of MapReduce in these
+            ways:
+          </p>
+
+          <ul>
+            <li>
+              Impala does not materialize intermediate results to disk. SQL queries often map to multiple MapReduce
+              jobs with all intermediate data sets written to disk.
+            </li>
+
+            <li>
+              Impala avoids MapReduce start-up time. For interactive queries, the MapReduce start-up time becomes
+              very noticeable. Impala runs as a service and essentially has no start-up time.
+            </li>
+
+            <li>
+              Impala can more naturally disperse query plans instead of having to fit them into a pipeline of map
+              and reduce jobs. This enables Impala to parallelize multiple stages of a query and avoid overheads
+              such as sort and shuffle when unnecessary.
+            </li>
+          </ul>
+
+          <p>
+            Impala uses a more efficient execution engine by taking advantage of modern hardware and technologies:
+          </p>
+
+          <ul>
+            <li>
+              Impala generates runtime code. Impala uses LLVM to generate assembly code for the query that is being
+              run. Individual queries do not have to pay the overhead of running on a system that needs to be able
+              to execute arbitrary queries.
+            </li>
+
+            <li>
+              Impala uses available hardware instructions when possible. Impala uses the supplemental SSE3 (SSSE3)
+              instructions which can offer tremendous speedups in some cases. (Impala 2.0 and 2.1 required
+              the SSE4.1 instruction set; Impala 2.2 and higher relax the restriction again so only
+              SSSE3 is required.)
+            </li>
+
+            <li>
+              Impala uses better I/O scheduling. Impala is aware of the disk location of blocks and is able to
+              schedule the order to process blocks to keep all disks busy.
+            </li>
+
+            <li>
+              Impala is designed for performance. A lot of time has been spent in designing Impala with sound
+              performance-oriented fundamentals, such as tight inner loops, inlined function calls, minimal
+              branching, better use of cache, and minimal memory usage.
+            </li>
+          </ul>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_memory_exceeded">
+
+        <title>What happens when the data set exceeds available memory?</title>
+
+        <sectiondiv id="faq_mem_limit_exceeded">
+
+          <p>
+            Currently, if the memory required to process intermediate results on a node exceeds the amount
+            available to Impala on that node, the query is cancelled. You can adjust the memory available to Impala
+            on each node, and you can fine-tune the join strategy to reduce the memory required for the biggest
+            queries. We do plan on supporting external joins and sorting in the future.
+          </p>
+
+          <p>
+            Keep in mind though that the memory usage is not directly based on the input data set size. For
+            aggregations, the memory usage is the number of rows <i>after</i> grouping. For joins, the memory usage
+            is the combined size of the tables <i>excluding</i> the biggest table, and Impala can use join
+            strategies that divide up large joined tables among the various nodes rather than transmitting the
+            entire table to each node.
+          </p>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_memory_pressure">
+
+        <title>What are the most memory-intensive operations?</title>
+
+        <sectiondiv id="faq_memory_fail">
+
+          <p>
+            If a query fails with an error indicating <q>memory limit exceeded</q>, you might suspect a memory
+            leak. The problem could actually be a query that is structured in a way that causes Impala to allocate
+            more memory than you expect, exceeded the memory allocated for Impala on a particular node. Some
+            examples of query or table structures that are especially memory-intensive are:
+          </p>
+
+          <ul>
+            <li>
+              <codeph>INSERT</codeph> statements using dynamic partitioning, into a table with many different
+              partitions. (Particularly for tables using Parquet format, where the data for each partition is held
+              in memory until it reaches <ph rev="parquet_block_size">the full block size</ph> in size before it is
+              written to disk.) Consider breaking up such operations into several different <codeph>INSERT</codeph>
+              statements, for example to load data one year at a time rather than for all years at once.
+            </li>
+
+            <li>
+              <codeph>GROUP BY</codeph> on a unique or high-cardinality column. Impala allocates some handler
+              structures for each different value in a <codeph>GROUP BY</codeph> query. Having millions of
+              different <codeph>GROUP BY</codeph> values could exceed the memory limit.
+            </li>
+
+            <li>
+              Queries involving very wide tables, with thousands of columns, particularly with many
+              <codeph>STRING</codeph> columns. Because Impala allows a <codeph>STRING</codeph> value to be up to 32
+              KB, the intermediate results during such queries could require substantial memory allocation.
+            </li>
+          </ul>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_memory_dealloc">
+
+        <title>When does Impala hold on to or return memory?</title>
+
+        <p>
+          Impala allocates memory using
+          <codeph><xref href="http://goog-perftools.sourceforge.net/doc/tcmalloc.html" scope="external" format="html">tcmalloc</xref></codeph>,
+          a memory allocator that is optimized for high concurrency. Once Impala allocates memory, it keeps that
+          memory reserved to use for future queries. Thus, it is normal for Impala to show high memory usage when
+          idle. If Impala detects that it is about to exceed its memory limit (defined by the
+          <codeph>-mem_limit</codeph> startup option or the <codeph>MEM_LIMIT</codeph> query option), it
+          deallocates memory not needed by the current queries.
+        </p>
+
+        <p>
+          When issuing queries through the JDBC or ODBC interfaces, make sure to call the appropriate close method
+          afterwards. Otherwise, some memory associated with the query is not freed.
+        </p>
+      </section>
+    </conbody>
+  </concept>
+
+  <concept id="faq_sql">
+
+    <title>SQL</title>
+
+    <conbody>
+
+      <p outputclass="toc inpage" audience="PDF">
+        FAQs in this category:
+      </p>
+
+      <section id="faq_update">
+
+        <title>Is there an UPDATE statement?</title>
+
+        <sectiondiv id="faq_update_sect">
+
+          <p>
+            Impala does not currently have an <codeph>UPDATE</codeph> statement, which would typically be used to
+            change a single row, a small group of rows, or a specific column. The HDFS-based files used by typical
+            Impala queries are optimized for bulk operations across many megabytes of data at a time, making
+            traditional <codeph>UPDATE</codeph> operations inefficient or impractical.
+          </p>
+
+          <p>
+            You can use the following techniques to achieve the same goals as the familiar <codeph>UPDATE</codeph>
+            statement, in a way that preserves efficient file layouts for subsequent queries:
+          </p>
+
+          <ul>
+            <li>
+              Replace the entire contents of a table or partition with updated data that you have already staged in
+              a different location, either using <codeph>INSERT OVERWRITE</codeph>, <codeph>LOAD DATA</codeph>, or
+              manual HDFS file operations followed by a <codeph>REFRESH</codeph> statement for the table.
+              Optionally, you can use built-in functions and expressions in the <codeph>INSERT</codeph> statement
+              to transform the copied data in the same way you would normally do in an <codeph>UPDATE</codeph>
+              statement, for example to turn a mixed-case string into all uppercase or all lowercase.
+            </li>
+
+            <li>
+              To update a single row, use an HBase table, and issue an <codeph>INSERT ... VALUES</codeph> statement
+              using the same key as the original row. Because HBase handles duplicate keys by only returning the
+              latest row with a particular key value, the newly inserted row effectively hides the previous one.
+            </li>
+          </ul>
+
+        </sectiondiv>
+      </section>
+
+      <section id="faq_udfs">
+
+        <title>Can Impala do user-defined functions (UDFs)?</title>
+
+        <p>
+          Impala 1.2 and higher does support UDFs and UDAs. You can either write native Impala UDFs and UDAs in
+          C++, or reuse UDFs (but not UDAs) originally written in Java for use with Hive. See
+          <xref href="impala_udf.xml#udfs"/> for details.
+        </p>
+      </section>
+
+      <section id="faq_refresh">
+
+        <title>Why do I have to use REFRESH and INVALIDATE METADATA, what do they do?</title>
+
+        <p>
+          In Impala 1.2 and higher, there is much less need to use the <codeph>REFRESH</codeph> and
+          <codeph>INVALIDATE METADATA</codeph> statements:
+        </p>
+
+        <ul>
+          <li>
+            The new <codeph>impala-catalog</codeph> service, represented by the <cmdname>catalogd</cmdname> daemon,
+            broadcasts the results of Impala DDL statements to all Impala nodes. Thus, if you do a <codeph>CREATE
+            TABLE</codeph> statement in Impala while connected to one node, you do not need to do
+            <codeph>INVALIDATE METADATA</codeph> before issuing queries through a different node.
+          </li>
+
+          <li>
+            The catalog service only recognizes changes made through Impala, so you must still issue a
+            <codeph>REFRESH</codeph> statement if you load data through Hive or by manipulating files in HDFS, and
+            you must issue an <codeph>INVALIDATE METADATA</codeph> statement if you create a table, alter a table,
+            add or drop partitions, or do other DDL statements in Hive.
+          </li>
+
+          <li>
+            Because the catalog service broadcasts the results of <codeph>REFRESH</codeph> and <codeph>INVALIDATE
+            METADATA</codeph> statements to all nodes, in the cases where you do still need to issue those
+            statements, you can do that on a single node rather than on every node, and the changes will be
+            automatically recognized across the cluster, making it more convenient to load balance by issuing
+            queries through arbitrary Impala nodes rather than always using the same coordinator node.
+          </li>
+        </ul>
+      </section>
+
+      <section id="faq_drop_table_space">
+
+        <title>Why is space not freed up when I issue DROP TABLE?</title>
+
+        <p>
+          Impala deletes data files when you issue a <codeph>DROP TABLE</codeph> on an internal table, but not an
+          external one. By default, the <codeph>CREATE TABLE</codeph> statement creates internal tables, where the
+          files are managed by Impala. An external table is created with a <codeph>CREATE EXTERNAL TABLE</codeph>
+          statement, where the files reside in a location outside the control of Impala. Issue a <codeph>DESCRIBE
+          FORMATTED</codeph> statement to check whether a table is internal or external. The keyword
+          <codeph>MANAGED_TABLE</codeph> indicates an internal table, from which Impala can delete the data files.
+          The keyword <codeph>EXTERNAL_TABLE</codeph> indicates an external table, where Impala will leave the data
+          files untouched when you drop the table.
+        </p>
+
+        <p>
+          Even when you drop an internal table and the files are removed from their original location, you might
+          not get the hard drive space back immediately. By default, files that are deleted in HDFS go into a
+          special trashcan directory, from which they are purged after a period of time (by default, 6 hours). For
+          background information on the trashcan mechanism, see
+          <xref href="https://archive.cloudera.com/cdh4/cdh/4/hadoop/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html" scope="external" format="html"/>.
+          For information on purging files from the trashcan, see
+          <xref href="https://archive.cloudera.com/cdh4/cdh/4/hadoop/hadoop-project-dist/hadoop-common/FileSystemShell.html" scope="external" format="html"/>.
+        </p>
+
+        <p>
+          When Impala deletes files and they are moved to the HDFS trashcan, they go into an HDFS directory owned
+          by the <codeph>impala</codeph> user. If the <codeph>impala</codeph> user does not have an HDFS home
+          directory where a trashcan can be created, the files are not deleted or moved, as a safety measure. If
+          you issue a <codeph>DROP TABLE</codeph> statement and find that the table data files are left in their
+          original location, create an HDFS directory <filepath>/user/impala</filepath>, owned and writeable by
+          the <codeph>impala</codeph> user. For example, you might find that <filepath>/user/impala</filepath> is
+          owned by the <codeph>hdfs</codeph> user, in which case you would switch to the <codeph>hdfs</codeph> user
+          and issue a command such as:
+        </p>
+
+<codeblock>hdfs dfs -chown -R impala /user/impala</codeblock>
+      </section>
+
+      <section id="faq_dual">
+
+        <title>Is there a DUAL table?</title>
+
+        <p>
+          You might be used to running queries against a single-row table named <codeph>DUAL</codeph> to try out
+          expressions, built-in functions, and UDFs. Impala does not have a <codeph>DUAL</codeph> table. To achieve
+          the same result, you can issue a <codeph>SELECT</codeph> statement without any table name:
+        </p>
+
+<codeblock>select 2+2;
+select substr('hello',2,1);
+select pow(10,6);
+</codeblock>
+      </section>
+    </conbody>
+  </concept>
+
+  <concept id="faq_partitioning">
+
+    <title>Partitioned Tables</title>
+
+    <conbody>
+
+      <p outputclass="toc inpage" audience="PDF">
+        FAQs in this category:
+      </p>
+
+      <section id="faq_partition_csv_etl">
+
+        <title>How do I load a big CSV file into a partitioned table?</title>
+
+        <p>
+          To load a data file into a partitioned table, when the data file includes fields like year, month, and so
+          on that correspond to the partition key columns, use a two-stage process. First, use the <codeph>LOAD
+          DATA</codeph> or <codeph>CREATE EXTERNAL TABLE</codeph> statement to bring the data into an unpartitioned
+          text table. Then use an <codeph>INSERT ... SELECT</codeph> statement to copy the data from the
+          unpartitioned table to a partitioned one. Include a <codeph>PARTITION</codeph> clause in the
+          <codeph>INSERT</codeph> statement to specify the partition key columns. The <codeph>INSERT</codeph>
+          operation splits up the data into separate data files for each partition. For examples, see
+          <xref href="impala_partitioning.xml#partitioning"/>. For details about loading data into partitioned
+          Parquet tables, a popular choice for high-volume data, see <xref href="impala_parquet.xml#parquet_etl"/>.
+        </p>
+      </section>
+
+      <section id="faq_partition_select_star">
+
+        <title>Can I do INSERT ... SELECT * into a partitioned table?</title>
+
+        <p>
+          When you use the <codeph>INSERT ... SELECT *</codeph> syntax to copy data into a partitioned table, the
+          columns corresponding to the partition key columns must appear last in the columns returned by the
+          <codeph>SELECT *</codeph>. You can create the table with the partition key columns defined last. Or, you
+          can use the <codeph>CREATE VIEW</codeph> statement to create a view that reorders the columns: put the
+          partition key columns last, then do the <codeph>INSERT ... SELECT *</codeph> from the view.
+        </p>
+      </section>
+    </conbody>
+  </concept>
+
+  <concept id="faq_hbase">
+
+    <title>HBase</title>
+
+    <conbody>
+
+      <p outputclass="toc inpage" audience="PDF">
+        FAQs in this category:
+      </p>
+
+      <section id="faq_hbase_use_cases">
+
+        <title>What kinds of Impala queries or data are best suited for HBase?</title>
+
+        <p>
+          HBase tables are ideal for queries where normally you would use a key-value store. That is, where you
+          retrieve a single row or a few rows, by testing a special unique key column using the <codeph>=</codeph>
+          or <codeph>IN</codeph> operators.
+        </p>
+
+        <p>
+          HBase tables are not suitable for queries that produce large result sets with thousands of rows. HBase
+          tables are also not suitable for queries that perform full table scans because the <codeph>WHERE</codeph>
+          clause does not request specific values from the unique key column.
+        </p>
+
+        <p>
+          Use HBase tables for data that is inserted one row or a few rows at a time, such as by the <codeph>INSERT
+          ... VALUES</codeph> syntax. Loading data piecemeal like this into an HDFS-backed table produces many tiny
+          files, which is a very inefficient layout for HDFS data files.
+        </p>
+
+        <p>
+          If the lack of an <codeph>UPDATE</codeph> statement in Impala is a problem for you, you can simulate
+          single-row updates by doing an <codeph>INSERT ... VALUES</codeph> statement using an existing value for
+          the key column. The old row value is hidden; only the new row value is seen by queries.
+        </p>
+
+        <p>
+          HBase tables are often wide (containing many columns) and sparse (with most column values
+          <codeph>NULL</codeph>). For example, you might record hundreds of different data points for each user of
+          an online service, such as whether the user had registered for an online game or enabled particular
+          account features. With Impala and HBase, you could look up all the information for a specific customer
+          efficiently in a single query. For any given customer, most of these columns might be
+          <codeph>NULL</codeph>, because a typical customer might not make use of most features of an online
+          service.
+        </p>
+      </section>
+    </conbody>
+  </concept>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_intro.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_intro.xml b/docs/topics/impala_intro.xml
new file mode 100644
index 0000000..c599bc5
--- /dev/null
+++ b/docs/topics/impala_intro.xml
@@ -0,0 +1,81 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="intro">
+
+  <title id="impala"><ph audience="standalone">Introducing Apache Impala (incubating)</ph><ph audience="integrated">Apache Impala (incubating) Overview</ph></title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Getting Started"/>
+      <data name="Category" value="Concepts"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+    </metadata>
+  </prolog>
+
+  <conbody id="intro_body">
+
+      <p>
+        Impala provides fast, interactive SQL queries directly on your Apache Hadoop data stored in HDFS,
+        HBase, <ph rev="2.2.0">or the Amazon Simple Storage Service (S3)</ph>.
+        In addition to using the same unified storage platform,
+        Impala also uses the same metadata, SQL syntax (Hive SQL), ODBC driver, and user interface
+        (Impala query UI in Hue) as Apache Hive. This
+        provides a familiar and unified platform for real-time or batch-oriented queries.
+      </p>
+
+      <p>
+        Impala is an addition to tools available for querying big data. Impala does not replace the batch
+        processing frameworks built on MapReduce such as Hive. Hive and other frameworks built on MapReduce are
+        best suited for long running batch jobs, such as those involving batch processing of Extract, Transform,
+        and Load (ETL) type jobs.
+      </p>
+
+      <note>
+        Impala was accepted into the Apache incubator on December 2, 2015.
+        In places where the documentation formerly referred to <q>Cloudera Impala</q>,
+        now the official name is <q>Apache Impala (incubating)</q>.
+      </note>
+
+  </conbody>
+
+  <concept id="benefits">
+
+    <title>Impala Benefits</title>
+
+    <conbody>
+
+      <p conref="../shared/impala_common.xml#common/impala_benefits"/>
+
+    </conbody>
+  </concept>
+
+  <concept id="impala_cdh">
+
+    <title>How Impala Works with CDH</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Concepts"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p conref="../shared/impala_common.xml#common/impala_overview_diagram"/>
+
+      <p conref="../shared/impala_common.xml#common/component_list"/>
+
+      <p conref="../shared/impala_common.xml#common/query_overview"/>
+    </conbody>
+  </concept>
+
+  <concept id="features">
+
+    <title>Primary Impala Features</title>
+
+    <conbody>
+
+      <p conref="../shared/impala_common.xml#common/feature_list"/>
+    </conbody>
+  </concept>
+</concept>

[3/7] incubator-impala git commit: New files needed to make PDF build happy.

Posted by jr...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_ports.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_ports.xml b/docs/topics/impala_ports.xml
new file mode 100644
index 0000000..ba57ede
--- /dev/null
+++ b/docs/topics/impala_ports.xml
@@ -0,0 +1,440 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="ports">
+
+  <title>Ports Used by Impala</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Ports"/>
+      <data name="Category" value="Network"/>
+      <data name="Category" value="Administrators"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody id="conbody_ports">
+
+    <p>
+      <indexterm audience="Cloudera">ports</indexterm>
+      Impala uses the TCP ports listed in the following table. Before deploying Impala, ensure these ports are open
+      on each system.
+    </p>
+
+    <table>
+      <tgroup cols="5">
+        <colspec colname="1" colwidth="20*"/>
+        <colspec colname="2" colwidth="30*"/>
+        <colspec colname="3" colwidth="10*"/>
+        <colspec colname="4" colwidth="20*"/>
+        <colspec colname="5" colwidth="30*"/>
+        <thead>
+          <row>
+            <entry>
+              Component
+            </entry>
+            <entry>
+              Service
+            </entry>
+            <entry>
+              Port
+            </entry>
+            <entry>
+              Access Requirement
+            </entry>
+            <entry>
+              Comment
+            </entry>
+          </row>
+        </thead>
+        <tbody>
+          <row>
+            <entry>
+              <p>
+                Impala Daemon
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Impala Daemon Frontend Port
+              </p>
+            </entry>
+            <entry>
+              <p>
+                21000
+              </p>
+            </entry>
+            <entry>
+              <p>
+                External
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Used to transmit commands and receive results by <codeph>impala-shell</codeph> and
+                version 1.2 of the Cloudera ODBC driver.
+              </p>
+            </entry>
+          </row>
+          <row>
+            <entry>
+              <p>
+                Impala Daemon
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Impala Daemon Frontend Port
+              </p>
+            </entry>
+            <entry>
+              <p>
+                21050
+              </p>
+            </entry>
+            <entry>
+              <p>
+                External
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Used to transmit commands and receive results by applications, such as Business Intelligence tools,
+                using JDBC, the Beeswax query editor in Hue, and version 2.0 or higher of the Cloudera ODBC driver.
+              </p>
+            </entry>
+          </row>
+          <row>
+            <entry>
+              <p>
+                Impala Daemon
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Impala Daemon Backend Port
+              </p>
+            </entry>
+            <entry>
+              <p>
+                22000
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Internal
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Internal use only. Impala daemons use this port to communicate with each other.
+              </p>
+            </entry>
+          </row>
+          <row>
+            <entry>
+              <p>
+                Impala Daemon
+              </p>
+            </entry>
+            <entry>
+              <p>
+                StateStoreSubscriber Service Port
+              </p>
+            </entry>
+            <entry>
+              <p>
+                23000
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Internal
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Internal use only. Impala daemons listen on this port for updates from the statestore daemon.
+              </p>
+            </entry>
+          </row>
+          <row rev="2.1.0">
+            <entry>
+              <p>
+                Catalog Daemon
+              </p>
+            </entry>
+            <entry>
+              <p>
+                StateStoreSubscriber Service Port
+              </p>
+            </entry>
+            <entry>
+              <p>
+                23020
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Internal
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Internal use only. The catalog daemon listens on this port for updates from the statestore daemon.
+              </p>
+            </entry>
+          </row>
+          <row>
+            <entry>
+              <p>
+                Impala Daemon
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Impala Daemon HTTP Server Port
+              </p>
+            </entry>
+            <entry>
+              <p>
+                25000
+              </p>
+            </entry>
+            <entry>
+              <p>
+                External
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Impala web interface for administrators to monitor and troubleshoot.
+              </p>
+            </entry>
+          </row>
+          <row>
+            <entry>
+              <p>
+                Impala StateStore Daemon
+              </p>
+            </entry>
+            <entry>
+              <p>
+                StateStore HTTP Server Port
+              </p>
+            </entry>
+            <entry>
+              <p>
+                25010
+              </p>
+            </entry>
+            <entry>
+              <p>
+                External
+              </p>
+            </entry>
+            <entry>
+              <p>
+                StateStore web interface for administrators to monitor and troubleshoot.
+              </p>
+            </entry>
+          </row>
+          <row rev="1.2">
+            <entry>
+              <p>
+                Impala Catalog Daemon
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Catalog HTTP Server Port
+              </p>
+            </entry>
+            <entry>
+              <p>
+                25020
+              </p>
+            </entry>
+            <entry>
+              <p>
+                External
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Catalog service web interface for administrators to monitor and troubleshoot. New in Impala 1.2 and
+                higher.
+              </p>
+            </entry>
+          </row>
+          <row>
+            <entry>
+              <p>
+                Impala StateStore Daemon
+              </p>
+            </entry>
+            <entry>
+              <p>
+                StateStore Service Port
+              </p>
+            </entry>
+            <entry>
+              <p>
+                24000
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Internal
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Internal use only. The statestore daemon listens on this port for registration/unregistration
+                requests.
+              </p>
+            </entry>
+          </row>
+          <row rev="1.2">
+            <entry>
+              <p>
+                Impala Catalog Daemon
+              </p>
+            </entry>
+            <entry>
+              <p>
+                StateStore Service Port
+              </p>
+            </entry>
+            <entry>
+              <p>
+                26000
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Internal
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Internal use only. The catalog service uses this port to communicate with the Impala daemons. New
+                in Impala 1.2 and higher.
+              </p>
+            </entry>
+          </row>
+          <row rev="1.3.0">
+            <entry>
+              <p>
+                Impala Daemon
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Llama Callback Port
+              </p>
+            </entry>
+            <entry>
+              <p>
+                28000
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Internal
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Internal use only. Impala daemons use to communicate with Llama. New in CDH 5.0.0 and higher.
+              </p>
+            </entry>
+          </row>
+          <row rev="1.3.0">
+            <entry>
+              <p>
+                Impala Llama ApplicationMaster
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Llama Thrift Admin Port
+              </p>
+            </entry>
+            <entry>
+              <p>
+                15002
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Internal
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Internal use only. New in CDH 5.0.0 and higher.
+              </p>
+            </entry>
+          </row>
+          <row rev="1.3.0">
+            <entry>
+              <p>
+                Impala Llama ApplicationMaster
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Llama Thrift Port
+              </p>
+            </entry>
+            <entry>
+              <p>
+                15000
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Internal
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Internal use only. New in CDH 5.0.0 and higher.
+              </p>
+            </entry>
+          </row>
+          <row rev="1.3.0">
+            <entry>
+              <p>
+                Impala Llama ApplicationMaster
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Llama HTTP Port
+              </p>
+            </entry>
+            <entry>
+              <p>
+                15001
+              </p>
+            </entry>
+            <entry>
+              <p>
+                External
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Llama service web interface for administrators to monitor and troubleshoot. New in CDH 5.0.0 and
+                higher.
+              </p>
+            </entry>
+          </row>
+        </tbody>
+      </tgroup>
+    </table>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_proxy.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_proxy.xml b/docs/topics/impala_proxy.xml
new file mode 100644
index 0000000..84511c7
--- /dev/null
+++ b/docs/topics/impala_proxy.xml
@@ -0,0 +1,635 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="proxy">
+
+  <title>Using Impala through a Proxy for High Availability</title>
+  <titlealts audience="PDF"><navtitle>Load-Balancing Proxy for HA</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="High Availability"/>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Network"/>
+      <data name="Category" value="Proxy"/>
+      <data name="Category" value="Administrators"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      For most clusters that have multiple users and production availability requirements, you might set up a proxy
+      server to relay requests to and from Impala.
+    </p>
+
+    <p>
+      Currently, the Impala statestore mechanism does not include such proxying and load-balancing features. Set up
+      a software package of your choice to perform these functions.
+    </p>
+
+    <note>
+      <p conref="../shared/impala_common.xml#common/statestored_catalogd_ha_blurb"/>
+    </note>
+
+    <p outputclass="toc inpage"/>
+
+  </conbody>
+
+  <concept id="proxy_overview">
+
+    <title>Overview of Proxy Usage and Load Balancing for Impala</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Concepts"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        Using a load-balancing proxy server for Impala has the following advantages:
+      </p>
+
+      <ul>
+        <li>
+          Applications connect to a single well-known host and port, rather than keeping track of the hosts where
+          the <cmdname>impalad</cmdname> daemon is running.
+        </li>
+
+        <li>
+          If any host running the <cmdname>impalad</cmdname> daemon becomes unavailable, application connection
+          requests still succeed because you always connect to the proxy server rather than a specific host running
+          the <cmdname>impalad</cmdname> daemon.
+        </li>
+
+        <li>
+          The coordinator node for each Impala query potentially requires more memory and CPU cycles than the other
+          nodes that process the query. The proxy server can issue queries using round-robin scheduling, so that
+          each connection uses a different coordinator node. This load-balancing technique lets the Impala nodes
+          share this additional work, rather than concentrating it on a single machine.
+        </li>
+      </ul>
+
+      <p>
+        The following setup steps are a general outline that apply to any load-balancing proxy software:
+      </p>
+
+      <ol>
+        <li>
+          Download the load-balancing proxy software. It should only need to be installed and configured on a
+          single host. Pick a host other than the DataNodes where <cmdname>impalad</cmdname> is running,
+          because the intention is to protect against the possibility of one or more of these DataNodes becoming unavailable.
+        </li>
+
+        <li>
+          Configure the load balancer (typically by editing a configuration file).
+          In particular:
+          <ul>
+            <li>
+              <p>
+                Set up a port that the load balancer will listen on to relay Impala requests back and forth.
+              </p>
+            </li>
+            <li>
+              <p rev="DOCS-690">
+                Consider enabling <q>sticky sessions</q>. Cloudera recommends enabling this setting
+                so that stateless client applications such as <cmdname>impalad</cmdname> and Hue
+                are not disconnected from long-running queries. Evaluate whether this setting is
+                appropriate for your combination of workload and client applications.
+              </p>
+            </li>
+            <li>
+              <p>
+                For Kerberized clusters, follow the instructions in <xref href="impala_proxy.xml#proxy_kerberos"/>.
+              </p>
+            </li>
+          </ul>
+        </li>
+
+        <li>
+          Specify the host and port settings for each Impala node. These are the hosts that the load balancer will
+          choose from when relaying each Impala query. See <xref href="impala_ports.xml#ports"/> for when to use
+          port 21000, 21050, or another value depending on what type of connections you are load balancing.
+          <note rev="CDH-30399">
+            <p rev="CDH-30399">
+              In particular, if you are using Hue or JDBC-based applications,
+              you typically set up load balancing for both ports 21000 and 21050, because
+              these client applications connect through port 21050 while the <cmdname>impala-shell</cmdname>
+              command connects through port 21000.
+            </p>
+          </note>
+        </li>
+
+        <li>
+          Run the load-balancing proxy server, pointing it at the configuration file that you set up.
+        </li>
+
+        <li>
+          On systems managed by Cloudera Manager, on the page
+          <menucascade><uicontrol>Impala</uicontrol><uicontrol>Configuration</uicontrol><uicontrol>Impala Daemon
+          Default Group</uicontrol></menucascade>, specify a value for the <uicontrol>Impala Daemons Load
+          Balancer</uicontrol> field. Specify the address of the load balancer in
+          <codeph><varname>host</varname>:<varname>port</varname></codeph> format. This setting lets Cloudera
+          Manager route all appropriate Impala-related operations through the proxy server.
+        </li>
+
+        <li>
+          For any scripts, jobs, or configuration settings for applications that formerly connected to a specific
+          datanode to run Impala SQL statements, change the connection information (such as the <codeph>-i</codeph>
+          option in <cmdname>impala-shell</cmdname>) to point to the load balancer instead.
+        </li>
+      </ol>
+
+      <note>
+        The following sections use the HAProxy software as a representative example of a load balancer
+        that you can use with Impala.
+        For information specifically about using Impala with the F5 BIG-IP load balancer, see
+        <xref href="http://www.cloudera.com/documentation/other/reference-architecture/PDF/Impala-HA-with-F5-BIG-IP.pdf" scope="external" format="html">Impala HA with F5 BIG-IP</xref>.
+      </note>
+
+    </conbody>
+
+  </concept>
+
+  <concept id="proxy_balancing" rev="CDH-33836 DOCS-349 CDH-39925 CDH-36812" audience="Cloudera">
+    <title>Choosing the Load-Balancing Algorithm</title>
+    <conbody>
+      <p>
+        Load-balancing software offers a number of algorithms to distribute requests.
+        Each algorithm has its own characteristics that make it suitable in some situations
+        but not others.
+      </p>
+
+      <dl>
+        <dlentry>
+          <dt>leastconn</dt>
+          <dd>
+            Connects sessions to the coordinator with the fewest connections, to balance the load evenly.
+            Typically used for workloads consisting of many independent, short-running queries.
+            In configurations with only a few client machines, this setting can avoid having all
+            requests go to only a small set of coordinators.
+          </dd>
+        </dlentry>
+        <dlentry>
+          <dt>source affinity</dt>
+          <dd>
+            Sessions from the same IP address always go to the same coordinator.
+            A good choice for Impala workloads containing a mix of queries and
+            DDL statements, such as <codeph>CREATE TABLE</codeph> and <codeph>ALTER TABLE</codeph>.
+            Because the metadata changes from a DDL statement take time to propagate across the cluster,
+            prefer to use source affinity in this case. If necessary, run the DDL and subsequent
+            queries that depend on the results of the DDL through the same session, for example
+            by running <codeph>impala-shell -f <varname>script_file</varname></codeph> to submit
+            several statements through a single session.
+            An alternative is to set the query option <codeph>SYNC_DDL=1</codeph>
+            to hold back subsequent queries until the results of a DDL operation have propagated
+            throughout the cluster, but that is a relatively expensive setting.
+            Recommended for use with Hue.
+          </dd>
+        </dlentry>
+        <dlentry>
+          <dt>sticky</dt>
+          <dd>
+            Similar to source affinity. Sessions from the same IP address always go to the same coordinator.
+            The maintenance overhead for the <q>stick tables</q> can cause long-running Hue sessions
+            to disconnect, therefore source affinity is often a better choice.
+          </dd>
+        </dlentry>
+        <dlentry>
+          <dt>round-robin</dt>
+          <dd>
+            Distributes connections to all coordinator nodes.
+            Typically not recommended for Impala.
+          </dd>
+        </dlentry>
+      </dl>
+
+      <p>
+        You might need to perform benchmarks and load testing to determine which setting is optimal for your
+        use case. If some client applications have special characteristics, such as long-running Hue queries
+        working best with source affinity, you might configure multiple virtual IP addresses with a
+        different load-balancing algorithm for each.
+      </p>
+
+    </conbody>
+  </concept>
+
+  <concept id="proxy_kerberos">
+
+    <title>Special Proxy Considerations for Clusters Using Kerberos</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Security"/>
+      <data name="Category" value="Kerberos"/>
+      <data name="Category" value="Authentication"/>
+      <data name="Category" value="Proxy"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        In a cluster using Kerberos, applications check host credentials to verify that the host they are
+        connecting to is the same one that is actually processing the request, to prevent man-in-the-middle
+        attacks. To clarify that the load-balancing proxy server is legitimate, perform these extra Kerberos setup
+        steps:
+      </p>
+
+      <ol>
+        <li>
+          This section assumes you are starting with a Kerberos-enabled cluster. See
+          <xref href="impala_kerberos.xml#kerberos"/> for instructions for setting up Impala with Kerberos. See the
+          <cite>CDH Security Guide</cite> for
+          <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/cdh_sg_kerberos_prin_keytab_deploy.html" scope="external" format="html">general steps to set up Kerberos</xref>.
+        </li>
+
+        <li>
+          Choose the host you will use for the proxy server. Based on the Kerberos setup procedure, it should
+          already have an entry <codeph>impala/<varname>proxy_host</varname>@<varname>realm</varname></codeph> in
+          its keytab. If not, go back over the initial Kerberos configuration steps for the keytab on each host
+          running the <cmdname>impalad</cmdname> daemon.
+        </li>
+
+        <li rev="CDH-40363">
+          For a cluster managed by Cloudera Manager (5.4.2 or higher), fill in the Impala configuration setting
+          <uicontrol>Impala Daemons Load Balancer</uicontrol> with the appropriate host:port combination.
+          Then restart the Impala service.
+          For systems using a recent level of Cloudera Manager, this is all the configuration you need; you can skip the remaining steps in this procedure.
+        </li>
+
+        <li>
+          On systems not managed by Cloudera Manager, or systems using Cloudera Manager earlier than 5.4.2:
+
+        <ol>
+          <li>
+            Copy the keytab file from the proxy host to all other hosts in the cluster that run the
+            <cmdname>impalad</cmdname> daemon. (For optimal performance, <cmdname>impalad</cmdname> should be running
+            on all DataNodes in the cluster.) Put the keytab file in a secure location on each of these other hosts.
+          </li>
+
+          <li>
+            Add an entry <codeph>impala/<varname>actual_hostname</varname>@<varname>realm</varname></codeph> to the keytab on each
+            host running the <cmdname>impalad</cmdname> daemon.
+          </li>
+
+          <li>
+            For each impalad node, merge the existing keytab with the proxy\u2019s keytab using
+            <cmdname>ktutil</cmdname>, producing a new keytab file. For example:
+  <codeblock>$ ktutil
+  ktutil: read_kt proxy.keytab
+  ktutil: read_kt impala.keytab
+  ktutil: write_kt proxy_impala.keytab
+  ktutil: quit</codeblock>
+            <note>
+              On systems managed by Cloudera Manager 5.1.0 and later, the keytab merging happens automatically. To
+              verify that Cloudera Manager has merged the keytabs, run the command:
+  <codeblock>klist -k <varname>keytabfile</varname></codeblock>
+              which lists the credentials for both <codeph>principal</codeph> and <codeph>be_principal</codeph> on
+              all nodes.
+            </note>
+          </li>
+
+          <li>
+            Make sure that the <codeph>impala</codeph> user has permission to read this merged keytab file.
+          </li>
+
+          <li>
+            Change some configuration settings for each host in the cluster that participates in the load balancing.
+            Follow the appropriate steps depending on whether you use Cloudera Manager or not:
+            <ul>
+              <li> In the <cmdname>impalad</cmdname> option definition, or the advanced
+                configuration snippet, add: <codeblock>--principal=impala/<varname>proxy_host</varname>@<varname>realm</varname>
+  --be_principal=impala/<varname>actual_host</varname>@<varname>realm</varname>
+  --keytab_file=<varname>path_to_merged_keytab</varname></codeblock>
+                <note>
+                  <p>On a cluster managed by Cloudera Manager 5.1 (or higher),
+                    when you set up Kerberos authentication using the wizard, you
+                    can choose to allow Cloudera Manager to deploy the
+                      <systemoutput>krb5.conf</systemoutput> on your cluster. In
+                    such a case, you do not need to explicitly modify safety valve
+                    parameters as directed above. </p>
+                  <p>Every host has a different <codeph>--be_principal</codeph>
+                    because the actual hostname is different on each host. </p>
+                  <p> Specify the fully qualified domain name (FQDN) for the proxy
+                    host, not the IP address. Use the exact FQDN as returned by a
+                    reverse DNS lookup for the associated IP address. </p>
+                </note>
+              </li>
+
+              <li>
+                On a cluster managed by Cloudera Manager, create a role group to set the configuration values from
+                the preceding step on a per-host basis.
+              </li>
+
+              <li>
+                On a cluster not managed by Cloudera Manager, see
+                <xref href="impala_config_options.xml#config_options"/> for the procedure to modify the startup
+                options.
+              </li>
+            </ul>
+          </li>
+
+          <li>
+            Restart Impala to make the changes take effect. Follow the appropriate steps depending on whether you use
+            Cloudera Manager or not:
+            <ul>
+              <li>
+                On a cluster managed by Cloudera Manager, restart the Impala service.
+              </li>
+
+              <li>
+                On a cluster not managed by Cloudera Manager, restart the <cmdname>impalad</cmdname> daemons on all
+                hosts in the cluster, as well as the <cmdname>statestored</cmdname> and <cmdname>catalogd</cmdname>
+                daemons.
+              </li>
+            </ul>
+          </li>
+        </ol>
+        </li>
+      </ol>
+
+<!--
+We basically want to merge the keytab from the proxy host to all the impalad host's keytab file. To merge two keytab files, we first need to ship the proxy keytab to all the impalad node, then merge keytab files using MIT Kerberos "ktutil" command line tool.
+
+<codeblock>$ ktutil
+ktutil: read_kt krb5.keytab
+ktutil: read_kt proxy-host.keytab
+ktutil: write_kt krb5.keytab
+ktutil: quit</codeblock>
+
+The setup of the -principal and -be_principal has to be set through safety valve.
+-->
+
+    </conbody>
+
+  </concept>
+
+  <concept id="tut_proxy">
+
+    <title>Example of Configuring HAProxy Load Balancer for Impala</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Configuring"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        If you are not already using a load-balancing proxy, you can experiment with
+        <xref href="http://haproxy.1wt.eu/" scope="external" format="html">HAProxy</xref> a free, open source load
+        balancer. This example shows how you might install and configure that load balancer on a Red Hat Enterprise
+        Linux system.
+      </p>
+
+      <ul>
+        <li>
+          <p>
+            Install the load balancer: <codeph>yum install haproxy</codeph>
+          </p>
+        </li>
+
+        <li>
+          <p>
+            Set up the configuration file: <filepath>/etc/haproxy/haproxy.cfg</filepath>. See the following section
+            for a sample configuration file.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            Run the load balancer (on a single host, preferably one not running <cmdname>impalad</cmdname>):
+          </p>
+<codeblock>/usr/sbin/haproxy \u2013f /etc/haproxy/haproxy.cfg</codeblock>
+        </li>
+
+        <li>
+          <p>
+            In <cmdname>impala-shell</cmdname>, JDBC applications, or ODBC applications, connect to the listener
+            port of the proxy host, rather than port 21000 or 21050 on a host actually running <cmdname>impalad</cmdname>.
+            The sample configuration file sets haproxy to listen on port 25003, therefore you would send all
+            requests to <codeph><varname>haproxy_host</varname>:25003</codeph>.
+          </p>
+        </li>
+      </ul>
+
+      <p>
+        This is the sample <filepath>haproxy.cfg</filepath> used in this example:
+      </p>
+
+<codeblock>global
+    # To have these messages end up in /var/log/haproxy.log you will
+    # need to:
+    #
+    # 1) configure syslog to accept network log events.  This is done
+    #    by adding the '-r' option to the SYSLOGD_OPTIONS in
+    #    /etc/sysconfig/syslog
+    #
+    # 2) configure local2 events to go to the /var/log/haproxy.log
+    #   file. A line like the following can be added to
+    #   /etc/sysconfig/syslog
+    #
+    #    local2.*                       /var/log/haproxy.log
+    #
+    log         127.0.0.1 local0
+    log         127.0.0.1 local1 notice
+    chroot      /var/lib/haproxy
+    pidfile     /var/run/haproxy.pid
+    maxconn     4000
+    user        haproxy
+    group       haproxy
+    daemon
+
+    # turn on stats unix socket
+    #stats socket /var/lib/haproxy/stats
+
+#---------------------------------------------------------------------
+# common defaults that all the 'listen' and 'backend' sections will
+# use if not designated in their block
+#
+# You might need to adjust timing values to prevent timeouts.
+#---------------------------------------------------------------------
+defaults
+    mode                    http
+    log                     global
+    option                  httplog
+    option                  dontlognull
+    option http-server-close
+    option forwardfor       except 127.0.0.0/8
+    option                  redispatch
+    retries                 3
+    maxconn                 3000
+    contimeout 5000
+    clitimeout 50000
+    srvtimeout 50000
+
+#
+# This sets up the admin page for HA Proxy at port 25002.
+#
+listen stats :25002
+    balance
+    mode http
+    stats enable
+    stats auth <varname>username</varname>:<varname>password</varname>
+
+# This is the setup for Impala. Impala client connect to load_balancer_host:25003.
+# HAProxy will balance connections among the list of servers listed below.
+# The list of Impalad is listening at port 21000 for beeswax (impala-shell) or original ODBC driver.
+# For JDBC or ODBC version 2.x driver, use port 21050 instead of 21000.
+listen impala :25003
+    mode tcp
+    option tcplog
+    balance leastconn
+
+    server <varname>symbolic_name_1</varname> impala-host-1.example.com:21000
+    server <varname>symbolic_name_2</varname> impala-host-2.example.com:21000
+    server <varname>symbolic_name_3</varname> impala-host-3.example.com:21000
+    server <varname>symbolic_name_4</varname> impala-host-4.example.com:21000
+
+# Setup for Hue or other JDBC-enabled applications.
+# In particular, Hue requires sticky sessions.
+# The application connects to load_balancer_host:21051, and HAProxy balances
+# connections to the associated hosts, where Impala listens for JDBC
+# requests on port 21050.
+listen impalajdbc :21051
+    mode tcp
+    option tcplog
+    balance source
+    server <varname>symbolic_name_5</varname> impala-host-1.example.com:21050
+    server <varname>symbolic_name_6</varname> impala-host-2.example.com:21050
+    server <varname>symbolic_name_7</varname> impala-host-3.example.com:21050
+    server <varname>symbolic_name_8</varname> impala-host-4.example.com:21050
+</codeblock>
+
+      <note conref="../shared/impala_common.xml#common/proxy_jdbc_caveat"/>
+
+      <p audience="Cloudera">
+        The following example shows extra steps needed for a cluster using Kerberos authentication:
+      </p>
+
+<codeblock audience="Cloudera">$ klist
+$ impala-shell -k
+$ kinit -r 1d -kt /systest/keytabs/hdfs.keytab hdfs
+$ impala-shell -i c2104.hal.cloudera.com:21000
+$ impala-shell -i c2104.hal.cloudera.com:25003
+[root@c2104 alan]# ps -ef |grep impalad
+root      6442  6428  0 12:21 pts/0    00:00:00 grep impalad
+impala   30577 22192 99 Nov14 ?        3-16:42:32 /usr/lib/impala/sbin-debug/impalad --flagfile=/var/run/cloudera-scm-agent/process/10342-impala-IMPALAD/impala-conf/impalad_flags
+[root@c2104 alan]# vi /var/run/cloudera-scm-agent/process/10342-impala-IMPALAD/impala-conf/impalad_flags
+$ klist -k /var/run/cloudera-scm-agent/process/10342-impala-IMPALAD/impala.keytab
+Keytab name: FILE:/var/run/cloudera-scm-agent/process/10342-impala-IMPALAD/impala.keytab
+KVNO Principal
+---- --------------------------------------------------------------------------
+   2 impala/c2104.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 impala/c2104.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 impala/c2104.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 impala/c2104.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 HTTP/c2104.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 HTTP/c2104.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 HTTP/c2104.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 HTTP/c2104.hal.cloudera.com@HAL17.CLOUDERA.COM
+$ klist
+Ticket cache: FILE:/tmp/krb5cc_4028
+Default principal: hdfs@HAL17.CLOUDERA.COM
+
+Valid starting     Expires            Service principal
+11/15/13 12:17:17  11/15/13 12:32:17  krbtgt/HAL17.CLOUDERA.COM@HAL17.CLOUDERA.COM
+        renew until 11/16/13 12:17:17
+11/15/13 12:17:21  11/15/13 12:32:17  impala/c2104.hal.cloudera.com@HAL17.CLOUDERA.COM
+        renew until 11/16/13 12:17:17
+$ kinit -r 1d -kt /systest/keytabs/hdfs.keytab hdfs
+$ kinit -R
+$ impala-shell -k -i c2106.hal.cloudera.com:21000
+Starting Impala Shell using Kerberos authentication
+Using service name 'impala'
+Connected to c2106.hal.cloudera.com:21000
+$ impala-shell -i c2104.hal.cloudera.com:25003
+$ impala-shell -k -i c2104.hal.cloudera.com:25003
+Starting Impala Shell using Kerberos authentication
+Using service name 'impala'
+Connected to c2104.hal.cloudera.com:25003
+[c2104.hal.cloudera.com:25003] &gt; create table alan_tmp(a int);
+Query: create table alan_tmp(a int)
+ERROR: InternalException: Got exception: org.apache.hadoop.ipc.RemoteException User: hive/c2102.hal.cloudera.com@HAL17.CLOUDERA.COM is not allowed to impersonate impala/c2104.hal.cloudera.com@HAL17.CLOUDERA.COM
+$ kdestroy
+$ kinit -r 1d -kt /systest/keytabs/hdfs.keytab hdfs
+$ impala-shell -k -i c2104.hal.cloudera.com:25003
+# klist -k c2104.keytab
+Keytab name: FILE:c2104.keytab
+KVNO Principal
+---- --------------------------------------------------------------------------
+   2 impala/c2104.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 impala/c2104.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 impala/c2104.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 impala/c2104.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 HTTP/c2104.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 HTTP/c2104.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 HTTP/c2104.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 HTTP/c2104.hal.cloudera.com@HAL17.CLOUDERA.COM
+$ klist -k -t c2106.keytab
+Keytab name: FILE:c2106.keytab
+KVNO Timestamp         Principal
+---- ----------------- --------------------------------------------------------
+   2 02/14/13 12:12:22 HTTP/c2106.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 02/14/13 12:12:22 HTTP/c2106.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 02/14/13 12:12:22 HTTP/c2106.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 02/14/13 12:12:22 HTTP/c2106.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 02/14/13 12:12:22 impala/c2106.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 02/14/13 12:12:22 impala/c2106.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 02/14/13 12:12:22 impala/c2106.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 02/14/13 12:12:22 impala/c2106.hal.cloudera.com@HAL17.CLOUDERA.COM
+$ ktutil
+ktutil:  rkt c2104.keytab
+ktutil:  rkt c2106.keytab
+ktutil:  wkt my_test.keytab
+ktutil:  q
+$ klist -k -t my_test.keytab
+Keytab name: FILE:my_test.keytab
+KVNO Timestamp         Principal
+---- ----------------- --------------------------------------------------------
+   2 11/21/13 16:22:40 impala/c2104.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 11/21/13 16:22:40 impala/c2104.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 11/21/13 16:22:40 impala/c2104.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 11/21/13 16:22:40 impala/c2104.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 11/21/13 16:22:40 HTTP/c2104.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 11/21/13 16:22:40 HTTP/c2104.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 11/21/13 16:22:40 HTTP/c2104.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 11/21/13 16:22:40 HTTP/c2104.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 11/21/13 16:22:40 HTTP/c2106.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 11/21/13 16:22:41 HTTP/c2106.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 11/21/13 16:22:41 HTTP/c2106.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 11/21/13 16:22:41 HTTP/c2106.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 11/21/13 16:22:41 impala/c2106.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 11/21/13 16:22:41 impala/c2106.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 11/21/13 16:22:41 impala/c2106.hal.cloudera.com@HAL17.CLOUDERA.COM
+   2 11/21/13 16:22:41 impala/c2106.hal.cloudera.com@HAL17.CLOUDERA.COM
+$ kdestroy
+$ kinit -r 1d -kt /systest/keytabs/hdfs.keytab hdfs
+$ vi README
+$ kinit -R
+$ impala-shell -k -i c2104.hal.cloudera.com:25003
+Starting Impala Shell using Kerberos authentication
+Using service name 'impala'
+Connected to c2104.hal.cloudera.com:25003
+<ph conref="../shared/ImpalaVariables.xml#impala_vars/ImpaladBanner"/>
+Welcome to the Impala shell. Press TAB twice to see a list of available commands.
+
+Copyright (c) 2012 Cloudera, Inc. All rights reserved.
+
+<ph conref="../shared/ImpalaVariables.xml#impala_vars/ShellBanner"/>
+[c2104.hal.cloudera.com:25003] &gt; show tables;
+Query: show tables
+ERROR: AnalysisException: This Impala daemon is not ready to accept user requests. Status: Waiting for catalog update from the StateStore.
+[c2104.hal.cloudera.com:25003] &gt; quit;</codeblock>
+
+      <!--
+        At that point in the walkthrough with Alan Choi, we could never get Impala to accept any requests through the catalog server.
+        So I have not seen a 100% successful proxy setup process to verify all the details.
+      -->
+
+    </conbody>
+
+  </concept>
+
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_rcfile.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_rcfile.xml b/docs/topics/impala_rcfile.xml
new file mode 100644
index 0000000..1bfab8c
--- /dev/null
+++ b/docs/topics/impala_rcfile.xml
@@ -0,0 +1,244 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="rcfile">
+
+  <title>Using the RCFile File Format with Impala Tables</title>
+  <titlealts audience="PDF"><navtitle>RCFile Data Files</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <!-- <data name="Category" value="RCFile"/> -->
+      <data name="Category" value="File Formats"/>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">RCFile support in Impala</indexterm>
+      Impala supports using RCFile data files.
+    </p>
+
+    <table>
+      <title>RCFile Format Support in Impala</title>
+      <tgroup cols="5">
+        <colspec colname="1" colwidth="10*"/>
+        <colspec colname="2" colwidth="10*"/>
+        <colspec colname="3" colwidth="20*"/>
+        <colspec colname="4" colwidth="30*"/>
+        <colspec colname="5" colwidth="30*"/>
+        <thead>
+          <row>
+            <entry>
+              File Type
+            </entry>
+            <entry>
+              Format
+            </entry>
+            <entry>
+              Compression Codecs
+            </entry>
+            <entry>
+              Impala Can CREATE?
+            </entry>
+            <entry>
+              Impala Can INSERT?
+            </entry>
+          </row>
+        </thead>
+        <tbody>
+          <row conref="impala_file_formats.xml#file_formats/rcfile_support">
+            <entry/>
+          </row>
+        </tbody>
+      </tgroup>
+    </table>
+
+    <p outputclass="toc inpage"/>
+  </conbody>
+
+  <concept id="rcfile_create">
+
+    <title>Creating RCFile Tables and Loading Data</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="ETL"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        If you do not have an existing data file to use, begin by creating one in the appropriate format.
+      </p>
+
+      <p>
+        <b>To create an RCFile table:</b>
+      </p>
+
+      <p>
+        In the <codeph>impala-shell</codeph> interpreter, issue a command similar to:
+      </p>
+
+<codeblock>create table rcfile_table (<varname>column_specs</varname>) stored as rcfile;</codeblock>
+
+      <p>
+        Because Impala can query some kinds of tables that it cannot currently write to, after creating tables of
+        certain file formats, you might use the Hive shell to load the data. See
+        <xref href="impala_file_formats.xml#file_formats"/> for details. After loading data into a table through
+        Hive or other mechanism outside of Impala, issue a <codeph>REFRESH <varname>table_name</varname></codeph>
+        statement the next time you connect to the Impala node, before querying the table, to make Impala recognize
+        the new data.
+      </p>
+
+      <note type="important">
+        See <xref href="impala_known_issues.xml#known_issues"/> for potential compatibility issues with
+        RCFile tables created in Hive 0.12, due to a change in the default RCFile SerDe for Hive.
+      </note>
+
+      <p>
+        For example, here is how you might create some RCFile tables in Impala (by specifying the columns
+        explicitly, or cloning the structure of another table), load data through Hive, and query them through
+        Impala:
+      </p>
+
+<codeblock>$ impala-shell -i localhost
+[localhost:21000] &gt; create table rcfile_table (x int) stored as rcfile;
+[localhost:21000] &gt; create table rcfile_clone like some_other_table stored as rcfile;
+[localhost:21000] &gt; quit;
+
+$ hive
+hive&gt; insert into table rcfile_table select x from some_other_table;
+3 Rows loaded to rcfile_table
+Time taken: 19.015 seconds
+hive&gt; quit;
+
+$ impala-shell -i localhost
+[localhost:21000] &gt; select * from rcfile_table;
+Returned 0 row(s) in 0.23s
+[localhost:21000] &gt; -- Make Impala recognize the data loaded through Hive;
+[localhost:21000] &gt; refresh rcfile_table;
+[localhost:21000] &gt; select * from rcfile_table;
++---+
+| x |
++---+
+| 1 |
+| 2 |
+| 3 |
++---+
+Returned 3 row(s) in 0.23s</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/complex_types_unsupported_filetype"/>
+
+    </conbody>
+  </concept>
+
+  <concept id="rcfile_compression">
+
+    <title>Enabling Compression for RCFile Tables</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Snappy"/>
+      <data name="Category" value="Compression"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        <indexterm audience="Cloudera">compression</indexterm>
+        You may want to enable compression on existing tables. Enabling compression provides performance gains in
+        most cases and is supported for RCFile tables. For example, to enable Snappy compression, you would specify
+        the following additional settings when loading data through the Hive shell:
+      </p>
+
+<codeblock>hive&gt; SET hive.exec.compress.output=true;
+hive&gt; SET mapred.max.split.size=256000000;
+hive&gt; SET mapred.output.compression.type=BLOCK;
+hive&gt; SET mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;
+hive&gt; INSERT OVERWRITE TABLE <varname>new_table</varname> SELECT * FROM <varname>old_table</varname>;</codeblock>
+
+      <p>
+        If you are converting partitioned tables, you must complete additional steps. In such a case, specify
+        additional settings similar to the following:
+      </p>
+
+<codeblock>hive&gt; CREATE TABLE <varname>new_table</varname> (<varname>your_cols</varname>) PARTITIONED BY (<varname>partition_cols</varname>) STORED AS <varname>new_format</varname>;
+hive&gt; SET hive.exec.dynamic.partition.mode=nonstrict;
+hive&gt; SET hive.exec.dynamic.partition=true;
+hive&gt; INSERT OVERWRITE TABLE <varname>new_table</varname> PARTITION(<varname>comma_separated_partition_cols</varname>) SELECT * FROM <varname>old_table</varname>;</codeblock>
+
+      <p>
+        Remember that Hive does not require that you specify a source format for it. Consider the case of
+        converting a table with two partition columns called <codeph>year</codeph> and <codeph>month</codeph> to a
+        Snappy compressed RCFile. Combining the components outlined previously to complete this table conversion,
+        you would specify settings similar to the following:
+      </p>
+
+<codeblock>hive&gt; CREATE TABLE tbl_rc (int_col INT, string_col STRING) STORED AS RCFILE;
+hive&gt; SET hive.exec.compress.output=true;
+hive&gt; SET mapred.max.split.size=256000000;
+hive&gt; SET mapred.output.compression.type=BLOCK;
+hive&gt; SET mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;
+hive&gt; SET hive.exec.dynamic.partition.mode=nonstrict;
+hive&gt; SET hive.exec.dynamic.partition=true;
+hive&gt; INSERT OVERWRITE TABLE tbl_rc SELECT * FROM tbl;</codeblock>
+
+      <p>
+        To complete a similar process for a table that includes partitions, you would specify settings similar to
+        the following:
+      </p>
+
+<codeblock>hive&gt; CREATE TABLE tbl_rc (int_col INT, string_col STRING) PARTITIONED BY (year INT) STORED AS RCFILE;
+hive&gt; SET hive.exec.compress.output=true;
+hive&gt; SET mapred.max.split.size=256000000;
+hive&gt; SET mapred.output.compression.type=BLOCK;
+hive&gt; SET mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;
+hive&gt; SET hive.exec.dynamic.partition.mode=nonstrict;
+hive&gt; SET hive.exec.dynamic.partition=true;
+hive&gt; INSERT OVERWRITE TABLE tbl_rc PARTITION(year) SELECT * FROM tbl;</codeblock>
+
+      <note>
+        <p>
+          The compression type is specified in the following command:
+        </p>
+<codeblock>SET mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;</codeblock>
+        <p>
+          You could elect to specify alternative codecs such as <codeph>GzipCodec</codeph> here.
+        </p>
+      </note>
+    </conbody>
+  </concept>
+
+  <concept id="rcfile_performance">
+
+    <title>Query Performance for Impala RCFile Tables</title>
+
+    <conbody>
+
+      <p>
+        In general, expect query performance with RCFile tables to be
+        faster than with tables using text data, but slower than with
+        Parquet tables. See <xref href="impala_parquet.xml#parquet"/>
+        for information about using the Parquet file format for
+        high-performance analytic queries.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/s3_block_splitting"/>
+
+    </conbody>
+  </concept>
+
+  <concept audience="Cloudera" id="rcfile_data_types">
+
+    <title>Data Type Considerations for RCFile Tables</title>
+
+    <conbody>
+
+      <p></p>
+    </conbody>
+  </concept>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_release_notes.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_release_notes.xml b/docs/topics/impala_release_notes.xml
new file mode 100644
index 0000000..65a3997
--- /dev/null
+++ b/docs/topics/impala_release_notes.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="impala_release_notes">
+
+  <title>Impala Release Notes</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Release Notes"/>
+      <data name="Category" value="Administrators"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody conref="impala_relnotes.xml#relnotes/relnotes_intro"/>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_schema_design.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_schema_design.xml b/docs/topics/impala_schema_design.xml
new file mode 100644
index 0000000..4d08de5
--- /dev/null
+++ b/docs/topics/impala_schema_design.xml
@@ -0,0 +1,222 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="schema_design">
+
+  <title>Guidelines for Designing Impala Schemas</title>
+  <titlealts audience="PDF"><navtitle>Designing Schemas</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Planning"/>
+      <data name="Category" value="Sectionated Pages"/>
+      <data name="Category" value="Proof of Concept"/>
+      <data name="Category" value="Checklists"/>
+      <data name="Category" value="Guidelines"/>
+      <data name="Category" value="Best Practices"/>
+      <data name="Category" value="Performance"/>
+      <data name="Category" value="Compression"/>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Schemas"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Porting"/>
+      <data name="Category" value="Proof of Concept"/>
+      <data name="Category" value="Administrators"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      The guidelines in this topic help you to construct an optimized and scalable schema, one that integrates well
+      with your existing data management processes. Use these guidelines as a checklist when doing any
+      proof-of-concept work, porting exercise, or before deploying to production.
+    </p>
+
+    <p>
+      If you are adapting an existing database or Hive schema for use with Impala, read the guidelines in this
+      section and then see <xref href="impala_porting.xml#porting"/> for specific porting and compatibility tips.
+    </p>
+
+    <p outputclass="toc inpage"/>
+
+    <section id="schema_design_text_vs_binary">
+
+      <title>Prefer binary file formats over text-based formats.</title>
+
+      <p>
+        To save space and improve memory usage and query performance, use binary file formats for any large or
+        intensively queried tables. Parquet file format is the most efficient for data warehouse-style analytic
+        queries. Avro is the other binary file format that Impala supports, that you might already have as part of
+        a Hadoop ETL pipeline.
+      </p>
+
+      <p>
+        Although Impala can create and query tables with the RCFile and SequenceFile file formats, such tables are
+        relatively bulky due to the text-based nature of those formats, and are not optimized for data
+        warehouse-style queries due to their row-oriented layout. Impala does not support <codeph>INSERT</codeph>
+        operations for tables with these file formats.
+      </p>
+
+      <p>
+        Guidelines:
+      </p>
+
+      <ul>
+        <li>
+          For an efficient and scalable format for large, performance-critical tables, use the Parquet file format.
+        </li>
+
+        <li>
+          To deliver intermediate data during the ETL process, in a format that can also be used by other Hadoop
+          components, Avro is a reasonable choice.
+        </li>
+
+        <li>
+          For convenient import of raw data, use a text table instead of RCFile or SequenceFile, and convert to
+          Parquet in a later stage of the ETL process.
+        </li>
+      </ul>
+    </section>
+
+    <section id="schema_design_compression">
+
+      <title>Use Snappy compression where practical.</title>
+
+      <p>
+        Snappy compression involves low CPU overhead to decompress, while still providing substantial space
+        savings. In cases where you have a choice of compression codecs, such as with the Parquet and Avro file
+        formats, use Snappy compression unless you find a compelling reason to use a different codec.
+      </p>
+    </section>
+
+    <section id="schema_design_numeric_types">
+
+      <title>Prefer numeric types over strings.</title>
+
+      <p>
+        If you have numeric values that you could treat as either strings or numbers (such as
+        <codeph>YEAR</codeph>, <codeph>MONTH</codeph>, and <codeph>DAY</codeph> for partition key columns), define
+        them as the smallest applicable integer types. For example, <codeph>YEAR</codeph> can be
+        <codeph>SMALLINT</codeph>, <codeph>MONTH</codeph> and <codeph>DAY</codeph> can be <codeph>TINYINT</codeph>.
+        Although you might not see any difference in the way partitioned tables or text files are laid out on disk,
+        using numeric types will save space in binary formats such as Parquet, and in memory when doing queries,
+        particularly resource-intensive queries such as joins.
+      </p>
+    </section>
+
+<!-- Alan suggests not making this recommendation.
+<section id="schema_design_decimal">
+<title>Prefer DECIMAL types over FLOAT and DOUBLE.</title>
+<p>
+</p>
+</section>
+-->
+
+    <section id="schema_design_partitioning">
+
+      <title>Partition, but do not over-partition.</title>
+
+      <p>
+        Partitioning is an important aspect of performance tuning for Impala. Follow the procedures in
+        <xref href="impala_partitioning.xml#partitioning"/> to set up partitioning for your biggest, most
+        intensively queried tables.
+      </p>
+
+      <p>
+        If you are moving to Impala from a traditional database system, or just getting started in the Big Data
+        field, you might not have enough data volume to take advantage of Impala parallel queries with your
+        existing partitioning scheme. For example, if you have only a few tens of megabytes of data per day,
+        partitioning by <codeph>YEAR</codeph>, <codeph>MONTH</codeph>, and <codeph>DAY</codeph> columns might be
+        too granular. Most of your cluster might be sitting idle during queries that target a single day, or each
+        node might have very little work to do. Consider reducing the number of partition key columns so that each
+        partition directory contains several gigabytes worth of data.
+      </p>
+
+      <p rev="parquet_block_size">
+        For example, consider a Parquet table where each data file is 1 HDFS block, with a maximum block size of 1
+        GB. (In Impala 2.0 and later, the default Parquet block size is reduced to 256 MB. For this exercise, let's
+        assume you have bumped the size back up to 1 GB by setting the query option
+        <codeph>PARQUET_FILE_SIZE=1g</codeph>.) if you have a 10-node cluster, you need 10 data files (up to 10 GB)
+        to give each node some work to do for a query. But each core on each machine can process a separate data
+        block in parallel. With 16-core machines on a 10-node cluster, a query could process up to 160 GB fully in
+        parallel. If there are only a few data files per partition, not only are most cluster nodes sitting idle
+        during queries, so are most cores on those machines.
+      </p>
+
+      <p>
+        You can reduce the Parquet block size to as low as 128 MB or 64 MB to increase the number of files per
+        partition and improve parallelism. But also consider reducing the level of partitioning so that analytic
+        queries have enough data to work with.
+      </p>
+    </section>
+
+    <section id="schema_design_compute_stats">
+
+      <title>Always compute stats after loading data.</title>
+
+      <p>
+        Impala makes extensive use of statistics about data in the overall table and in each column, to help plan
+        resource-intensive operations such as join queries and inserting into partitioned Parquet tables. Because
+        this information is only available after data is loaded, run the <codeph>COMPUTE STATS</codeph> statement
+        on a table after loading or replacing data in a table or partition.
+      </p>
+
+      <p>
+        Having accurate statistics can make the difference between a successful operation, or one that fails due to
+        an out-of-memory error or a timeout. When you encounter performance or capacity issues, always use the
+        <codeph>SHOW STATS</codeph> statement to check if the statistics are present and up-to-date for all tables
+        in the query.
+      </p>
+
+      <p>
+        When doing a join query, Impala consults the statistics for each joined table to determine their relative
+        sizes and to estimate the number of rows produced in each join stage. When doing an <codeph>INSERT</codeph>
+        into a Parquet table, Impala consults the statistics for the source table to determine how to distribute
+        the work of constructing the data files for each partition.
+      </p>
+
+      <p>
+        See <xref href="impala_compute_stats.xml#compute_stats"/> for the syntax of the <codeph>COMPUTE
+        STATS</codeph> statement, and <xref href="impala_perf_stats.xml#perf_stats"/> for all the performance
+        considerations for table and column statistics.
+      </p>
+    </section>
+
+    <section id="schema_design_explain">
+
+      <title>Verify sensible execution plans with EXPLAIN and SUMMARY.</title>
+
+      <p>
+        Before executing a resource-intensive query, use the <codeph>EXPLAIN</codeph> statement to get an overview
+        of how Impala intends to parallelize the query and distribute the work. If you see that the query plan is
+        inefficient, you can take tuning steps such as changing file formats, using partitioned tables, running the
+        <codeph>COMPUTE STATS</codeph> statement, or adding query hints. For information about all of these
+        techniques, see <xref href="impala_performance.xml#performance"/>.
+      </p>
+
+      <p>
+        After you run a query, you can see performance-related information about how it actually ran by issuing the
+        <codeph>SUMMARY</codeph> command in <cmdname>impala-shell</cmdname>. Prior to Impala 1.4, you would use
+        the <codeph>PROFILE</codeph> command, but its highly technical output was only useful for the most
+        experienced users. <codeph>SUMMARY</codeph>, new in Impala 1.4, summarizes the most useful information for
+        all stages of execution, for all nodes rather than splitting out figures for each node.
+      </p>
+    </section>
+
+<!--
+<section id="schema_design_mem_limits">
+<title>Allocate resources Between Impala and batch jobs (MapReduce, Hive, Pig).</title>
+<p>
+</p>
+</section>
+
+<section id="schema_design_cm">
+<title>Use Cloudera Manager to monitor queries and overall performance.</title>
+<p>
+</p>
+</section>
+-->
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_security_files.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_security_files.xml b/docs/topics/impala_security_files.xml
new file mode 100644
index 0000000..befe696
--- /dev/null
+++ b/docs/topics/impala_security_files.xml
@@ -0,0 +1,67 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="secure_files">
+
+  <title>Securing Impala Data and Log Files</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Security"/>
+      <data name="Category" value="Logs"/>
+      <data name="Category" value="HDFS"/>
+      <data name="Category" value="Administrators"/>
+      <!-- To do for John: mention redaction as a fallback to keep sensitive info out of the log files. -->
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      One aspect of security is to protect files from unauthorized access at the filesystem level. For example, if
+      you store sensitive data in HDFS, you specify permissions on the associated files and directories in HDFS to
+      restrict read and write permissions to the appropriate users and groups.
+    </p>
+
+    <p>
+      If you issue queries containing sensitive values in the <codeph>WHERE</codeph> clause, such as financial
+      account numbers, those values are stored in Impala log files in the Linux filesystem and you must secure
+      those files also. For the locations of Impala log files, see <xref href="impala_logging.xml#logging"/>.
+    </p>
+
+    <p>
+      All Impala read and write operations are performed under the filesystem privileges of the
+      <codeph>impala</codeph> user. The <codeph>impala</codeph> user must be able to read all directories and data
+      files that you query, and write into all the directories and data files for <codeph>INSERT</codeph> and
+      <codeph>LOAD DATA</codeph> statements. At a minimum, make sure the <codeph>impala</codeph> user is in the
+      <codeph>hive</codeph> group so that it can access files and directories shared between Impala and Hive. See
+      <xref href="impala_prereqs.xml#prereqs_account"/> for more details.
+    </p>
+
+    <p>
+      Setting file permissions is necessary for Impala to function correctly, but is not an effective security
+      practice by itself:
+    </p>
+
+    <ul>
+      <li>
+      <p>
+        The way to ensure that only authorized users can submit requests for databases and tables they are allowed
+        to access is to set up Sentry authorization, as explained in
+        <xref href="impala_authorization.xml#authorization"/>. With authorization enabled, the checking of the user
+        ID and group is done by Impala, and unauthorized access is blocked by Impala itself. The actual low-level
+        read and write requests are still done by the <codeph>impala</codeph> user, so you must have appropriate
+        file and directory permissions for that user ID.
+      </p>
+      </li>
+
+      <li>
+      <p>
+        You must also set up Kerberos authentication, as described in <xref href="impala_kerberos.xml#kerberos"/>,
+        so that users can only connect from trusted hosts. With Kerberos enabled, if someone connects a new host to
+        the network and creates user IDs that match your privileged IDs, they will be blocked from connecting to
+        Impala at all from that host.
+      </p>
+      </li>
+    </ul>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_security_guidelines.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_security_guidelines.xml b/docs/topics/impala_security_guidelines.xml
new file mode 100644
index 0000000..e7713ff
--- /dev/null
+++ b/docs/topics/impala_security_guidelines.xml
@@ -0,0 +1,108 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.1" id="security_guidelines">
+
+  <title>Security Guidelines for Impala</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Security"/>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Planning"/>
+      <data name="Category" value="Guidelines"/>
+      <data name="Category" value="Best Practices"/>
+      <data name="Category" value="Administrators"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      The following are the major steps to harden a cluster running Impala against accidents and mistakes, or
+      malicious attackers trying to access sensitive data:
+    </p>
+
+    <ul>
+      <li>
+      <p>
+        Secure the <codeph>root</codeph> account. The <codeph>root</codeph> user can tamper with the
+        <cmdname>impalad</cmdname> daemon, read and write the data files in HDFS, log into other user accounts, and
+        access other system services that are beyond the control of Impala.
+      </p>
+      </li>
+
+      <li>
+      <p>
+        Restrict membership in the <codeph>sudoers</codeph> list (in the <filepath>/etc/sudoers</filepath> file).
+        The users who can run the <codeph>sudo</codeph> command can do many of the same things as the
+        <codeph>root</codeph> user.
+      </p>
+      </li>
+
+      <li>
+      <p>
+        Ensure the Hadoop ownership and permissions for Impala data files are restricted.
+      </p>
+      </li>
+
+      <li>
+      <p>
+        Ensure the Hadoop ownership and permissions for Impala log files are restricted.
+      </p>
+      </li>
+
+      <li>
+      <p>
+        Ensure that the Impala web UI (available by default on port 25000 on each Impala node) is
+        password-protected. See <xref href="impala_webui.xml#webui"/> for details.
+      </p>
+      </li>
+
+      <li>
+      <p>
+        Create a policy file that specifies which Impala privileges are available to users in particular Hadoop
+        groups (which by default map to Linux OS groups). Create the associated Linux groups using the
+        <cmdname>groupadd</cmdname> command if necessary.
+      </p>
+      </li>
+
+      <li>
+      <p>
+        The Impala authorization feature makes use of the HDFS file ownership and permissions mechanism; for
+        background information, see the
+        <xref href="https://archive.cloudera.com/cdh/3/hadoop/hdfs_permissions_guide.html" scope="external" format="html">CDH
+        HDFS Permissions Guide</xref>. Set up users and assign them to groups at the OS level, corresponding to the
+        different categories of users with different access levels for various databases, tables, and HDFS
+        locations (URIs). Create the associated Linux users using the <cmdname>useradd</cmdname> command if
+        necessary, and add them to the appropriate groups with the <cmdname>usermod</cmdname> command.
+      </p>
+      </li>
+
+      <li>
+      <p>
+        Design your databases, tables, and views with database and table structure to allow policy rules to specify
+        simple, consistent rules. For example, if all tables related to an application are inside a single
+        database, you can assign privileges for that database and use the <codeph>*</codeph> wildcard for the table
+        name. If you are creating views with different privileges than the underlying base tables, you might put
+        the views in a separate database so that you can use the <codeph>*</codeph> wildcard for the database
+        containing the base tables, while specifying the precise names of the individual views. (For specifying
+        table or database names, you either specify the exact name or <codeph>*</codeph> to mean all the databases
+        on a server, or all the tables and views in a database.)
+      </p>
+      </li>
+
+      <li>
+      <p>
+        Enable authorization by running the <codeph>impalad</codeph> daemons with the <codeph>-server_name</codeph>
+        and <codeph>-authorization_policy_file</codeph> options on all nodes. (The authorization feature does not
+        apply to the <cmdname>statestored</cmdname> daemon, which has no access to schema objects or data files.)
+      </p>
+      </li>
+
+      <li>
+      <p>
+        Set up authentication using Kerberos, to make sure users really are who they say they are.
+      </p>
+      </li>
+    </ul>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_security_install.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_security_install.xml b/docs/topics/impala_security_install.xml
new file mode 100644
index 0000000..56d34bc
--- /dev/null
+++ b/docs/topics/impala_security_install.xml
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="security_install">
+
+  <title>Installation Considerations for Impala Security</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Security"/>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Administrators"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Impala 1.1 comes set up with all the software and settings needed to enable security when you run the
+      <cmdname>impalad</cmdname> daemon with the new security-related options (<codeph>-server_name</codeph> and
+      <codeph>-authorization_policy_file</codeph>). You do not need to change any environment variables or install
+      any additional JAR files. In a cluster managed by Cloudera Manager, you do not need to change any settings in
+      Cloudera Manager.
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_security_metastore.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_security_metastore.xml b/docs/topics/impala_security_metastore.xml
new file mode 100644
index 0000000..246333f
--- /dev/null
+++ b/docs/topics/impala_security_metastore.xml
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="security_metastore">
+
+  <title>Securing the Hive Metastore Database</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Hive"/>
+      <data name="Category" value="Security"/>
+      <data name="Category" value="Metastore"/>
+      <data name="Category" value="Databases"/>
+      <data name="Category" value="Administrators"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+<!-- Some of this copied from earlier. Split out both instances into conrefs. -->
+
+    <p>
+      It is important to secure the Hive metastore, so that users cannot access the names or other information
+      about databases and tables the through the Hive client or by querying the metastore database. Do this by
+      turning on Hive metastore security, using the instructions in the
+      <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/cdh_sg_hive_security.html" scope="external" format="html">CDH 5 Security Guide</xref>
+      for securing different Hive components:
+    </p>
+
+    <ul>
+      <li>
+        Secure the Hive Metastore.
+      </li>
+
+      <li>
+        In addition, allow access to the metastore only from the HiveServer2 server, and then disable local access
+        to the HiveServer2 server.
+      </li>
+    </ul>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_security_webui.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_security_webui.xml b/docs/topics/impala_security_webui.xml
new file mode 100644
index 0000000..7ebd2ef
--- /dev/null
+++ b/docs/topics/impala_security_webui.xml
@@ -0,0 +1,66 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.1" id="security_webui">
+
+  <title>Securing the Impala Web User Interface</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Troubleshooting"/>
+      <data name="Category" value="Security"/>
+      <data name="Category" value="Administrators"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      The instructions in this section presume you are familiar with the
+      <xref href="http://en.wikipedia.org/wiki/.htpasswd" scope="external" format="html">
+      <filepath>.htpasswd</filepath> mechanism</xref> commonly used to password-protect pages on web servers.
+    </p>
+
+    <p>
+      Password-protect the Impala web UI that listens on port 25000 by default. Set up a
+      <filepath>.htpasswd</filepath> file in the <codeph>$IMPALA_HOME</codeph> directory, or start both the
+      <cmdname>impalad</cmdname> and <cmdname>statestored</cmdname> daemons with the
+      <codeph>--webserver_password_file</codeph> option to specify a different location (including the filename).
+    </p>
+
+    <p>
+      This file should only be readable by the Impala process and machine administrators, because it contains
+      (hashed) versions of passwords. The username / password pairs are not derived from Unix usernames, Kerberos
+      users, or any other system. The <codeph>domain</codeph> field in the password file must match the domain
+      supplied to Impala by the new command-line option <codeph>--webserver_authentication_domain</codeph>. The
+      default is <codeph>mydomain.com</codeph>.
+<!-- Password generator cited by Henry: <xref href="http://www.askapache.com/online-tools/htpasswd-generator/" scope="external" format="html"/> -->
+    </p>
+
+    <p>
+      Impala also supports using HTTPS for secure web traffic. To do so, set
+      <codeph>--webserver_certificate_file</codeph> to refer to a valid <codeph>.pem</codeph> TLS/SSL certificate file.
+      Impala will automatically start using HTTPS once the TLS/SSL certificate has been read and validated. A
+      <codeph>.pem</codeph> file is basically a private key, followed by a signed TLS/SSL certificate; make sure to
+      concatenate both parts when constructing the <codeph>.pem</codeph> file.
+<!-- Certificate info cited by Henry: <xref href="http://www.akadia.com/services/ssh_test_certificate.html" scope="external" format="html"/>
+This page was very useful for creating a certificate and private key file;
+the last step which was missing was to append one file to the other to make the <codeph>.pem</codeph> file. -->
+    </p>
+
+    <p>
+      If Impala cannot find or parse the <codeph>.pem</codeph> file, it prints an error message and quits.
+    </p>
+
+    <note>
+      <p>
+        If the private key is encrypted using a passphrase, Impala will ask for that passphrase on startup, which
+        is not useful for a large cluster. In that case, remove the passphrase and make the <codeph>.pem</codeph>
+        file readable only by Impala and administrators.
+      </p>
+      <p>
+        When you turn on TLS/SSL for the Impala web UI, the associated URLs change from <codeph>http://</codeph>
+        prefixes to <codeph>https://</codeph>. Adjust any bookmarks or application code that refers to those URLs.
+      </p>
+    </note>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_seqfile.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_seqfile.xml b/docs/topics/impala_seqfile.xml
new file mode 100644
index 0000000..860007e
--- /dev/null
+++ b/docs/topics/impala_seqfile.xml
@@ -0,0 +1,239 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="seqfile">
+
+  <title id="sequencefile">Using the SequenceFile File Format with Impala Tables</title>
+  <titlealts audience="PDF"><navtitle>SequenceFile Data Files</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <!-- <data name="Category" value="SequenceFile"/> -->
+      <data name="Category" value="File Formats"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">SequenceFile support in Impala</indexterm>
+      Impala supports using SequenceFile data files.
+    </p>
+
+    <table>
+      <title>SequenceFile Format Support in Impala</title>
+      <tgroup cols="5">
+        <colspec colname="1" colwidth="10*"/>
+        <colspec colname="2" colwidth="10*"/>
+        <colspec colname="3" colwidth="20*"/>
+        <colspec colname="4" colwidth="30*"/>
+        <colspec colname="5" colwidth="30*"/>
+        <thead>
+          <row>
+            <entry>
+              File Type
+            </entry>
+            <entry>
+              Format
+            </entry>
+            <entry>
+              Compression Codecs
+            </entry>
+            <entry>
+              Impala Can CREATE?
+            </entry>
+            <entry>
+              Impala Can INSERT?
+            </entry>
+          </row>
+        </thead>
+        <tbody>
+          <row conref="impala_file_formats.xml#file_formats/sequencefile_support">
+            <entry/>
+          </row>
+        </tbody>
+      </tgroup>
+    </table>
+
+    <p outputclass="toc inpage"/>
+  </conbody>
+
+  <concept id="seqfile_create">
+
+    <title>Creating SequenceFile Tables and Loading Data</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="ETL"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        If you do not have an existing data file to use, begin by creating one in the appropriate format.
+      </p>
+
+      <p>
+        <b>To create a SequenceFile table:</b>
+      </p>
+
+      <p>
+        In the <codeph>impala-shell</codeph> interpreter, issue a command similar to:
+      </p>
+
+<codeblock>create table sequencefile_table (<varname>column_specs</varname>) stored as sequencefile;</codeblock>
+
+      <p>
+        Because Impala can query some kinds of tables that it cannot currently write to, after creating tables of
+        certain file formats, you might use the Hive shell to load the data. See
+        <xref href="impala_file_formats.xml#file_formats"/> for details. After loading data into a table through
+        Hive or other mechanism outside of Impala, issue a <codeph>REFRESH <varname>table_name</varname></codeph>
+        statement the next time you connect to the Impala node, before querying the table, to make Impala recognize
+        the new data.
+      </p>
+
+      <p>
+        For example, here is how you might create some SequenceFile tables in Impala (by specifying the columns
+        explicitly, or cloning the structure of another table), load data through Hive, and query them through
+        Impala:
+      </p>
+
+<codeblock>$ impala-shell -i localhost
+[localhost:21000] &gt; create table seqfile_table (x int) stored as sequencefile;
+[localhost:21000] &gt; create table seqfile_clone like some_other_table stored as sequencefile;
+[localhost:21000] &gt; quit;
+
+$ hive
+hive&gt; insert into table seqfile_table select x from some_other_table;
+3 Rows loaded to seqfile_table
+Time taken: 19.047 seconds
+hive&gt; quit;
+
+$ impala-shell -i localhost
+[localhost:21000] &gt; select * from seqfile_table;
+Returned 0 row(s) in 0.23s
+[localhost:21000] &gt; -- Make Impala recognize the data loaded through Hive;
+[localhost:21000] &gt; refresh seqfile_table;
+[localhost:21000] &gt; select * from seqfile_table;
++---+
+| x |
++---+
+| 1 |
+| 2 |
+| 3 |
++---+
+Returned 3 row(s) in 0.23s</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/complex_types_unsupported_filetype"/>
+
+    </conbody>
+  </concept>
+
+  <concept id="seqfile_compression">
+
+    <title>Enabling Compression for SequenceFile Tables</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Compression"/>
+      <data name="Category" value="Snappy"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        <indexterm audience="Cloudera">compression</indexterm>
+        You may want to enable compression on existing tables. Enabling compression provides performance gains in
+        most cases and is supported for SequenceFile tables. For example, to enable Snappy compression, you would
+        specify the following additional settings when loading data through the Hive shell:
+      </p>
+
+<codeblock>hive&gt; SET hive.exec.compress.output=true;
+hive&gt; SET mapred.max.split.size=256000000;
+hive&gt; SET mapred.output.compression.type=BLOCK;
+hive&gt; SET mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;
+hive&gt; insert overwrite table <varname>new_table</varname> select * from <varname>old_table</varname>;</codeblock>
+
+      <p>
+        If you are converting partitioned tables, you must complete additional steps. In such a case, specify
+        additional settings similar to the following:
+      </p>
+
+<codeblock>hive&gt; create table <varname>new_table</varname> (<varname>your_cols</varname>) partitioned by (<varname>partition_cols</varname>) stored as <varname>new_format</varname>;
+hive&gt; SET hive.exec.dynamic.partition.mode=nonstrict;
+hive&gt; SET hive.exec.dynamic.partition=true;
+hive&gt; insert overwrite table <varname>new_table</varname> partition(<varname>comma_separated_partition_cols</varname>) select * from <varname>old_table</varname>;</codeblock>
+
+      <p>
+        Remember that Hive does not require that you specify a source format for it. Consider the case of
+        converting a table with two partition columns called <codeph>year</codeph> and <codeph>month</codeph> to a
+        Snappy compressed SequenceFile. Combining the components outlined previously to complete this table
+        conversion, you would specify settings similar to the following:
+      </p>
+
+<codeblock>hive&gt; create table TBL_SEQ (int_col int, string_col string) STORED AS SEQUENCEFILE;
+hive&gt; SET hive.exec.compress.output=true;
+hive&gt; SET mapred.max.split.size=256000000;
+hive&gt; SET mapred.output.compression.type=BLOCK;
+hive&gt; SET mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;
+hive&gt; SET hive.exec.dynamic.partition.mode=nonstrict;
+hive&gt; SET hive.exec.dynamic.partition=true;
+hive&gt; INSERT OVERWRITE TABLE tbl_seq SELECT * FROM tbl;</codeblock>
+
+      <p>
+        To complete a similar process for a table that includes partitions, you would specify settings similar to
+        the following:
+      </p>
+
+<codeblock>hive&gt; CREATE TABLE tbl_seq (int_col INT, string_col STRING) PARTITIONED BY (year INT) STORED AS SEQUENCEFILE;
+hive&gt; SET hive.exec.compress.output=true;
+hive&gt; SET mapred.max.split.size=256000000;
+hive&gt; SET mapred.output.compression.type=BLOCK;
+hive&gt; SET mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;
+hive&gt; SET hive.exec.dynamic.partition.mode=nonstrict;
+hive&gt; SET hive.exec.dynamic.partition=true;
+hive&gt; INSERT OVERWRITE TABLE tbl_seq PARTITION(year) SELECT * FROM tbl;</codeblock>
+
+      <note>
+        <p>
+          The compression type is specified in the following command:
+        </p>
+<codeblock>SET mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;</codeblock>
+        <p>
+          You could elect to specify alternative codecs such as <codeph>GzipCodec</codeph> here.
+        </p>
+      </note>
+    </conbody>
+  </concept>
+
+  <concept audience="Cloudera" id="seqfile_data_types">
+
+    <title>Data Type Considerations for SequenceFile Tables</title>
+
+    <conbody>
+
+      <p></p>
+    </conbody>
+  </concept>
+
+  <concept id="seqfile_performance">
+
+    <title>Query Performance for Impala SequenceFile Tables</title>
+
+    <conbody>
+
+      <p>
+        In general, expect query performance with SequenceFile tables to be
+        faster than with tables using text data, but slower than with
+        Parquet tables. See <xref href="impala_parquet.xml#parquet"/>
+        for information about using the Parquet file format for
+        high-performance analytic queries.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/s3_block_splitting"/>
+
+    </conbody>
+  </concept>
+
+</concept>

[2/7] incubator-impala git commit: New files needed to make PDF build happy.

Posted by jr...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_shell_commands.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_shell_commands.xml b/docs/topics/impala_shell_commands.xml
new file mode 100644
index 0000000..6d6f720
--- /dev/null
+++ b/docs/topics/impala_shell_commands.xml
@@ -0,0 +1,399 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="shell_commands">
+
+  <title>impala-shell Command Reference</title>
+  <titlealts audience="PDF"><navtitle>Command Reference</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="impala-shell"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">impala-shell</indexterm>
+      Use the following commands within <codeph>impala-shell</codeph> to pass requests to the
+      <codeph>impalad</codeph> daemon that the shell is connected to. You can enter a command interactively at the
+      prompt, or pass it as the argument to the <codeph>-q</codeph> option of <codeph>impala-shell</codeph>. Most
+      of these commands are passed to the Impala daemon as SQL statements; refer to the corresponding
+      <xref href="impala_langref_sql.xml#langref_sql">SQL language reference sections</xref> for full syntax
+      details.
+    </p>
+
+    <table>
+      <tgroup cols="2">
+        <colspec colname="1" colwidth="10*"/>
+        <colspec colname="2" colwidth="40*"/>
+        <thead>
+          <row>
+            <entry>
+              Command
+            </entry>
+            <entry>
+              Explanation
+            </entry>
+          </row>
+        </thead>
+        <tbody>
+          <row id="alter_cmd">
+            <entry>
+              <p>
+                <codeph>alter</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Changes the underlying structure or settings of an Impala table, or a table shared between Impala
+                and Hive. See <xref href="impala_alter_table.xml#alter_table"/> and
+                <xref href="impala_alter_view.xml#alter_view"/> for details.
+              </p>
+            </entry>
+          </row>
+          <row rev="1.2.2" id="compute_cmd">
+            <entry>
+              <p>
+                <codeph>compute stats</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Gathers important performance-related information for a table, used by Impala to optimize queries.
+                See <xref href="impala_compute_stats.xml#compute_stats"/> for details.
+              </p>
+            </entry>
+          </row>
+          <row id="connect_cmd">
+            <entry>
+              <p>
+                <codeph>connect</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Connects to the specified instance of <codeph>impalad</codeph>. The default port of 21000 is
+                assumed unless you provide another value. You can connect to any host in your cluster that is
+                running <codeph>impalad</codeph>. If you connect to an instance of <codeph>impalad</codeph> that
+                was started with an alternate port specified by the <codeph>--fe_port</codeph> flag, you must
+                provide that alternate port. See <xref href="impala_connecting.xml#connecting"/> for examples.
+              </p>
+
+              <p conref="../shared/impala_common.xml#common/set_vs_connect"/>
+            </entry>
+          </row>
+          <row id="describe_cmd">
+            <entry>
+              <p>
+                <codeph>describe</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Shows the columns, column data types, and any column comments for a specified table.
+                <codeph>DESCRIBE FORMATTED</codeph> shows additional information such as the HDFS data directory,
+                partitions, and internal properties for the table. See <xref href="impala_describe.xml#describe"/>
+                for details about the basic <codeph>DESCRIBE</codeph> output and the <codeph>DESCRIBE
+                FORMATTED</codeph> variant. You can use <codeph>DESC</codeph> as shorthand for the
+                <codeph>DESCRIBE</codeph> command.
+              </p>
+            </entry>
+          </row>
+          <row id="drop_cmd">
+            <entry>
+              <p>
+                <codeph>drop</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Removes a schema object, and in some cases its associated data files. See
+                <xref href="impala_drop_table.xml#drop_table"/>, <xref href="impala_drop_view.xml#drop_view"/>,
+                <xref href="impala_drop_database.xml#drop_database"/>, and
+                <xref href="impala_drop_function.xml#drop_function"/> for details.
+              </p>
+            </entry>
+          </row>
+          <row id="explain_cmd">
+            <entry>
+              <p>
+                <codeph>explain</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Provides the execution plan for a query. <codeph>EXPLAIN</codeph> represents a query as a series of
+                steps. For example, these steps might be map/reduce stages, metastore operations, or file system
+                operations such as move or rename. See <xref href="impala_explain.xml#explain"/> and
+                <xref href="impala_explain_plan.xml#perf_explain"/> for details.
+              </p>
+            </entry>
+          </row>
+          <row id="help_cmd">
+            <entry>
+              <p>
+                <codeph>help</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Help provides a list of all available commands and options.
+              </p>
+            </entry>
+          </row>
+          <row id="history_cmd">
+            <entry>
+              <p>
+                <codeph>history</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Maintains an enumerated cross-session command history. This history is stored in the
+                <filepath>~/.impalahistory</filepath> file.
+              </p>
+            </entry>
+          </row>
+          <row id="insert_cmd">
+            <entry>
+              <p>
+                <codeph>insert</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Writes the results of a query to a specified table. This either overwrites table data or appends
+                data to the existing table content. See <xref href="impala_insert.xml#insert"/> for details.
+              </p>
+            </entry>
+          </row>
+          <row id="invalidate_metadata_cmd">
+            <entry>
+              <p>
+                <codeph>invalidate metadata</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Updates <cmdname>impalad</cmdname> metadata for table existence and structure. Use this command
+                after creating, dropping, or altering databases, tables, or partitions in Hive. See
+                <xref href="impala_invalidate_metadata.xml#invalidate_metadata"/> for details.
+              </p>
+            </entry>
+          </row>
+          <row id="profile_cmd">
+            <entry>
+              <p>
+                <codeph>profile</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Displays low-level information about the most recent query. Used for performance diagnosis and
+                tuning. <ph rev="1.4.0"> The report starts with the same information as produced by the
+                <codeph>EXPLAIN</codeph> statement and the <codeph>SUMMARY</codeph> command.</ph> See
+                <xref href="impala_explain_plan.xml#perf_profile"/> for details.
+              </p>
+            </entry>
+          </row>
+          <row id="quit_cmd">
+            <entry>
+              <p>
+                <codeph>quit</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Exits the shell. Remember to include the final semicolon so that the shell recognizes the end of
+                the command.
+              </p>
+            </entry>
+          </row>
+          <row id="refresh_cmd">
+            <entry>
+              <p>
+                <codeph>refresh</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Refreshes <cmdname>impalad</cmdname> metadata for the locations of HDFS blocks corresponding to
+                Impala data files. Use this command after loading new data files into an Impala table through Hive
+                or through HDFS commands. See <xref href="impala_refresh.xml#refresh"/> for details.
+              </p>
+            </entry>
+          </row>
+          <row id="select_cmd">
+            <entry>
+              <p>
+                <codeph>select</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Specifies the data set on which to complete some action. All information returned from
+                <codeph>select</codeph> can be sent to some output such as the console or a file or can be used to
+                complete some other element of query. See <xref href="impala_select.xml#select"/> for details.
+              </p>
+            </entry>
+          </row>
+          <row id="set_cmd">
+            <entry>
+              <p>
+                <codeph>set</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Manages query options for an <cmdname>impala-shell</cmdname> session. The available options are the
+                ones listed in <xref href="impala_query_options.xml#query_options"/>. These options are used for
+                query tuning and troubleshooting. Issue <codeph>SET</codeph> with no arguments to see the current
+                query options, either based on the <cmdname>impalad</cmdname> defaults, as specified by you at
+                <cmdname>impalad</cmdname> startup, or based on earlier <codeph>SET</codeph> statements in the same
+                session. To modify option values, issue commands with the syntax <codeph>set
+                <varname>option</varname>=<varname>value</varname></codeph>. To restore an option to its default,
+                use the <codeph>unset</codeph> command. Some options take Boolean values of <codeph>true</codeph>
+                and <codeph>false</codeph>. Others take numeric arguments, or quoted string values.
+              </p>
+
+              <p conref="../shared/impala_common.xml#common/set_vs_connect"/>
+
+              <p rev="2.0.0">
+                In Impala 2.0 and later, <codeph>SET</codeph> is available as a SQL statement for any kind of
+                application, not only through <cmdname>impala-shell</cmdname>. See
+                <xref href="impala_set.xml#set"/> for details.
+              </p>
+
+              <p rev="2.5.0 IMPALA-2180">
+                In Impala 2.5 and later, you can use <codeph>SET</codeph> to define your own substitution variables
+                within an <cmdname>impala-shell</cmdname> session.
+                Within a SQL statement, you substitute the value by using the notation <codeph>${var:<varname>variable_name</varname>}</codeph>.
+              </p>
+            </entry>
+          </row>
+          <row id="shell_cmd">
+            <entry>
+              <p>
+                <codeph>shell</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Executes the specified command in the operating system shell without exiting
+                <codeph>impala-shell</codeph>. You can use the <codeph>!</codeph> character as shorthand for the
+                <codeph>shell</codeph> command.
+              </p>
+
+              <note>
+                Quote any instances of the <codeph>--</codeph> or <codeph>/*</codeph> tokens to avoid them being
+                interpreted as the start of a comment. To embed comments within <codeph>source</codeph> or
+                <codeph>!</codeph> commands, use the shell comment character <codeph>#</codeph> before the comment
+                portion of the line.
+              </note>
+            </entry>
+          </row>
+          <row id="show_cmd">
+            <entry>
+              <p>
+                <codeph>show</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Displays metastore data for schema objects created and accessed through Impala, Hive, or both.
+                <codeph>show</codeph> can be used to gather information about objects such as databases, tables, and functions.
+                See <xref href="impala_show.xml#show"/> for details.
+              </p>
+            </entry>
+          </row>
+          <row id="source_cmd" rev="IMPALA-3397 CDH-40097">
+            <entry>
+              <p>
+                <codeph>source</codeph> or <codeph>src</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Executes one or more statements residing in a specified file from the local filesystem.
+                Allows you to perform the same kinds of batch operations as with the <codeph>-f</codeph> option,
+                but interactively within the interpreter. The file can contain SQL statements and other
+                <cmdname>impala-shell</cmdname> commands, including additional <codeph>SOURCE</codeph> commands
+                to perform a flexible sequence of actions. Each command or statement, except the last one in the file,
+                must end with a semicolon.
+                See <xref href="impala_shell_running_commands.xml#shell_running_commands"/> for examples.
+              </p>
+            </entry>
+          </row>
+          <row rev="1.4.0" id="summary_cmd">
+            <entry>
+              <p>
+                <codeph>summary</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Summarizes the work performed in various stages of a query. It provides a higher-level view of the
+                information displayed by the <codeph>EXPLAIN</codeph> command. Added in Impala 1.4.0. See
+                <xref href="impala_explain_plan.xml#perf_summary"/> for details about the report format
+                and how to interpret it.
+              </p>
+              <p rev="2.3.0">
+                In CDH 5.5 / Impala 2.3 and higher, you can see a continuously updated report of
+                the summary information while a query is in progress.
+                See <xref href="impala_live_summary.xml#live_summary"/> for details.
+              </p>
+            </entry>
+          </row>
+          <row id="unset_cmd">
+            <entry>
+              <p>
+                <codeph>unset</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Removes any user-specified value for a query option and returns the option to its default value.
+                See <xref href="impala_query_options.xml#query_options"/> for the available query options.
+              </p>
+              <p rev="2.5.0 IMPALA-2180">
+                In CDH 5.7 / Impala 2.5 and higher, it can also remove user-specified substitution variables
+                using the notation <codeph>UNSET VAR:<varname>variable_name</varname></codeph>.
+              </p>
+            </entry>
+          </row>
+          <row id="use_cmd">
+            <entry>
+              <p>
+                <codeph>use</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Indicates the database against which to execute subsequent commands. Lets you avoid using fully
+                qualified names when referring to tables in databases other than <codeph>default</codeph>. See
+                <xref href="impala_use.xml#use"/> for details. Not effective with the <codeph>-q</codeph> option,
+                because that option only allows a single statement in the argument.
+              </p>
+            </entry>
+          </row>
+          <row id="version_cmd">
+            <entry>
+              <p>
+                <codeph>version</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Returns Impala version information.
+              </p>
+            </entry>
+          </row>
+        </tbody>
+      </tgroup>
+    </table>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_shell_running_commands.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_shell_running_commands.xml b/docs/topics/impala_shell_running_commands.xml
new file mode 100644
index 0000000..013b23d
--- /dev/null
+++ b/docs/topics/impala_shell_running_commands.xml
@@ -0,0 +1,265 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="shell_running_commands">
+
+  <title>Running Commands and SQL Statements in impala-shell</title>
+  <titlealts audience="PDF"><navtitle>Running Commands and SQL Statements</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="impala-shell"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      For information on available commands, see
+      <xref href="impala_shell_commands.xml#shell_commands"/>. You can see the full set of available
+      commands by pressing TAB twice, for example:
+    </p>
+
+<codeblock>[impalad-host:21000] &gt;
+connect   describe  explain   help      history   insert    quit      refresh   select    set       shell     show      use       version
+[impalad-host:21000] &gt;</codeblock>
+
+    <note>
+      Commands must be terminated by a semi-colon. A command can span multiple lines.
+    </note>
+
+    <p>
+      For example:
+    </p>
+
+<codeblock>[localhost:21000] &gt; select *
+                  &gt; from t1
+                  &gt; limit 5;
++---------+-----------+
+| s1      | s2        |
++---------+-----------+
+| hello   | world     |
+| goodbye | cleveland |
++---------+-----------+
+</codeblock>
+
+    <p>
+      A comment is considered part of the statement it precedes, so when you enter a <codeph>--</codeph> or
+      <codeph>/* */</codeph> comment, you get a continuation prompt until you finish entering a statement ending
+      with a semicolon:
+    </p>
+
+<codeblock>[localhost:21000] &gt; -- This is a test comment
+                  &gt; show tables like 't*';
++--------+
+| name   |
++--------+
+| t1     |
+| t2     |
+| tab1   |
+| tab2   |
+| tab3   |
+| text_t |
++--------+
+</codeblock>
+
+    <p>
+      Use the up-arrow and down-arrow keys to cycle through and edit previous commands.
+      <cmdname>impala-shell</cmdname> uses the <codeph>readline</codeph> library and so supports a standard set of
+      keyboard shortcuts for editing and cursor movement, such as <codeph>Ctrl-A</codeph> for beginning of line and
+      <codeph>Ctrl-E</codeph> for end of line.
+    </p>
+
+    <p rev="2.5.0 IMPALA-2179 IMPALA-2180">
+      In CDH 5.7 / Impala 2.5 and higher, you can define substitution variables to be used within SQL statements
+      processed by <cmdname>impala-shell</cmdname>. On the command line, you specify the option
+      <codeph>--var=<varname>variable_name</varname>=<varname>value</varname></codeph>.
+      Within an interactive session or a script file processed by the <codeph>-f</codeph> option, you specify
+      a <codeph>SET</codeph> command using the notation <codeph>SET VAR:<varname>variable_name</varname>=<varname>value</varname></codeph>.
+      Within a SQL statement, you substitute the value by using the notation <codeph>${var:<varname>variable_name</varname>}</codeph>.
+    </p>
+
+    <note>
+      Because this feature is part of <cmdname>impala-shell</cmdname> rather than the <cmdname>impalad</cmdname>
+      backend, make sure the client system you are connecting from has the most recent <cmdname>impala-shell</cmdname>.
+      You can use this feature with a new <cmdname>impala-shell</cmdname> connecting to an older <cmdname>impalad</cmdname>,
+      but not the reverse.
+    </note>
+
+    <p rev="2.5.0 IMPALA-2179 IMPALA-2180">
+      For example, here are some <cmdname>impala-shell</cmdname> commands that define substitution variables and then
+      use them in SQL statements executed through the <codeph>-q</codeph> and <codeph>-f</codeph> options.
+      Notice how the <codeph>-q</codeph> argument strings are single-quoted to prevent shell expansion of the
+      <codeph>${var:value}</codeph> notation, and any string literals within the queries are enclosed by double quotation marks.
+    </p>
+
+<codeblock rev="2.5.0 IMPALA-2179 IMPALA-2180">
+$ impala-shell --var=tname=table1 --var=colname=x --var=coltype=string -q 'create table ${var:tname} (${var:colname} ${var:coltype}) stored as parquet'
+Starting Impala Shell without Kerberos authentication
+Connected to <varname>hostname</varname>
+Server version: <varname>impalad_version</varname>
+Query: create table table1 (x string) stored as parquet
+
+$ NEW_STRING="hello world"
+$ impala-shell --var=tname=table1 --var=insert_val="$NEW_STRING" -q 'insert into ${var:tname} values ("${var:insert_val}")'
+Starting Impala Shell without Kerberos authentication
+Connected to <varname>hostname</varname>
+Server version: <varname>impalad_version</varname>
+Query: insert into table1 values ("hello world")
+Inserted 1 row(s) in 1.40s
+
+$ for VAL in foo bar bletch
+do
+  impala-shell --var=tname=table1 --var=insert_val="$VAL" -q 'insert into ${var:tname} values ("${var:insert_val}")'
+done
+...
+Query: insert into table1 values ("foo")
+Inserted 1 row(s) in 0.22s
+Query: insert into table1 values ("bar")
+Inserted 1 row(s) in 0.11s
+Query: insert into table1 values ("bletch")
+Inserted 1 row(s) in 0.21s
+
+$ echo "Search for what substring?" ; read answer
+Search for what substring?
+b
+$ impala-shell --var=tname=table1 -q 'select x from ${var:tname} where x like "%${var:answer}%"'
+Starting Impala Shell without Kerberos authentication
+Connected to <varname>hostname</varname>
+Server version: <varname>impalad_version</varname>
+Query: select x from table1 where x like "%b%"
++--------+
+| x      |
++--------+
+| bletch |
+| bar    |
++--------+
+Fetched 2 row(s) in 0.83s
+</codeblock>
+
+    <p rev="2.5.0 IMPALA-2179 IMPALA-2180">
+      Here is a substitution variable passed in by the <codeph>--var</codeph> option,
+      and then referenced by statements issued interactively. Then the variable is
+      cleared with the <codeph>UNSET</codeph> command, and defined again with the
+      <codeph>SET</codeph> command.
+    </p>
+
+<codeblock rev="2.5.0 IMPALA-2179 IMPALA-2180">
+$ impala-shell --quiet --var=tname=table1
+Starting Impala Shell without Kerberos authentication
+***********************************************************************************
+<varname>banner_message</varname>
+***********************************************************************************
+[<varname>hostname</varname>:21000] > select count(*) from ${var:tname};
++----------+
+| count(*) |
++----------+
+| 4        |
++----------+
+[<varname>hostname</varname>:21000] > unset var:tname;
+Unsetting variable TNAME
+[<varname>hostname</varname>:21000] > select count(*) from ${var:tname};
+Error: Unknown variable TNAME
+[<varname>hostname</varname>:21000] > set var:tname=table1;
+[<varname>hostname</varname>:21000] > select count(*) from ${var:tname};
++----------+
+| count(*) |
++----------+
+| 4        |
++----------+
+</codeblock>
+
+    <p rev="IMPALA-3397 CDH-40097">
+      The following example shows how the <codeph>SOURCE</codeph> command can execute
+      a series of statements from a file:
+    </p>
+
+<codeblock rev="IMPALA-3397 CDH-40097">
+$ cat commands.sql
+show databases;
+show tables in default;
+show functions in _impala_builtins like '*minute*';
+
+$ impala-shell -i localhost
+...
+[localhost:21000] > source commands.sql;
+Query: show databases
++------------------+----------------------------------------------+
+| name             | comment                                      |
++------------------+----------------------------------------------+
+| _impala_builtins | System database for Impala builtin functions |
+| default          | Default Hive database                        |
++------------------+----------------------------------------------+
+Fetched 2 row(s) in 0.06s
+Query: show tables in default
++-----------+
+| name      |
++-----------+
+| customers |
+| sample_07 |
+| sample_08 |
+| web_logs  |
++-----------+
+Fetched 4 row(s) in 0.02s
+Query: show functions in _impala_builtins like '*minute*'
++-------------+--------------------------------+-------------+---------------+
+| return type | signature                      | binary type | is persistent |
++-------------+--------------------------------+-------------+---------------+
+| INT         | minute(TIMESTAMP)              | BUILTIN     | true          |
+| TIMESTAMP   | minutes_add(TIMESTAMP, BIGINT) | BUILTIN     | true          |
+| TIMESTAMP   | minutes_add(TIMESTAMP, INT)    | BUILTIN     | true          |
+| TIMESTAMP   | minutes_sub(TIMESTAMP, BIGINT) | BUILTIN     | true          |
+| TIMESTAMP   | minutes_sub(TIMESTAMP, INT)    | BUILTIN     | true          |
++-------------+--------------------------------+-------------+---------------+
+Fetched 5 row(s) in 0.03s
+</codeblock>
+
+    <p rev="IMPALA-3397 CDH-40097">
+      The following example shows how a file that is run by the <codeph>SOURCE</codeph> command,
+      or through the <codeph>-q</codeph> or <codeph>-f</codeph> options of <cmdname>impala-shell</cmdname>,
+      can contain additional <codeph>SOURCE</codeph> commands.
+      The first file, <filepath>nested1.sql</filepath>, runs an <cmdname>impala-shell</cmdname> command
+      and then also runs the commands from <filepath>nested2.sql</filepath>.
+      This ability for scripts to call each other is often useful for code that sets up schemas for applications
+      or test environments.
+    </p>
+
+<codeblock rev="IMPALA-3397 CDH-40097">
+$ cat nested1.sql
+show functions in _impala_builtins like '*minute*';
+source nested2.sql
+$ cat nested2.sql
+show functions in _impala_builtins like '*hour*'
+
+$ impala-shell -i localhost -f nested1.sql
+Starting Impala Shell without Kerberos authentication
+Connected to localhost:21000
+...
+Query: show functions in _impala_builtins like '*minute*'
++-------------+--------------------------------+-------------+---------------+
+| return type | signature                      | binary type | is persistent |
++-------------+--------------------------------+-------------+---------------+
+| INT         | minute(TIMESTAMP)              | BUILTIN     | true          |
+| TIMESTAMP   | minutes_add(TIMESTAMP, BIGINT) | BUILTIN     | true          |
+| TIMESTAMP   | minutes_add(TIMESTAMP, INT)    | BUILTIN     | true          |
+| TIMESTAMP   | minutes_sub(TIMESTAMP, BIGINT) | BUILTIN     | true          |
+| TIMESTAMP   | minutes_sub(TIMESTAMP, INT)    | BUILTIN     | true          |
++-------------+--------------------------------+-------------+---------------+
+Fetched 5 row(s) in 0.01s
+Query: show functions in _impala_builtins like '*hour*'
++-------------+------------------------------+-------------+---------------+
+| return type | signature                    | binary type | is persistent |
++-------------+------------------------------+-------------+---------------+
+| INT         | hour(TIMESTAMP)              | BUILTIN     | true          |
+| TIMESTAMP   | hours_add(TIMESTAMP, BIGINT) | BUILTIN     | true          |
+| TIMESTAMP   | hours_add(TIMESTAMP, INT)    | BUILTIN     | true          |
+| TIMESTAMP   | hours_sub(TIMESTAMP, BIGINT) | BUILTIN     | true          |
+| TIMESTAMP   | hours_sub(TIMESTAMP, INT)    | BUILTIN     | true          |
++-------------+------------------------------+-------------+---------------+
+Fetched 5 row(s) in 0.01s
+</codeblock>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_ssl.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_ssl.xml b/docs/topics/impala_ssl.xml
new file mode 100644
index 0000000..56e86a0
--- /dev/null
+++ b/docs/topics/impala_ssl.xml
@@ -0,0 +1,256 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="ssl">
+
+  <title id="tls">Configuring TLS/SSL for Impala</title>
+
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Security"/>
+      <data name="Category" value="SSL"/>
+      <data name="Category" value="Encryption"/>
+      <data name="Category" value="Configuring"/>
+      <data name="Category" value="Administrators"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">SSL</indexterm>
+      Impala supports TLS/SSL network encryption, between Impala and client programs, and between the Impala-related daemons running on
+      different nodes in the cluster. This feature is important when you also use other features such as Kerberos authentication or Sentry
+      authorization, where credentials are being transmitted back and forth.
+      <note conref="../shared/CDHVariables.xml#xd_583c10bfdbd326ba-3ca24a24-13d80143249--7f9a/CMCDH_EitherOK"
+      />
+    </p>
+
+  </conbody>
+
+  <concept id="concept_gnk_2tt_qp">
+
+    <title>Using Cloudera Manager</title>
+
+    <prolog>
+      <metadata>
+        <data name="Category" value="Cloudera Manager"/>
+      </metadata>
+    </prolog>
+
+    <conbody>
+
+      <p>
+        To configure Impala to listen for Beeswax and HiveServer2 requests on TLS/SSL-secured ports:
+        <ol id="ol_rnf_ftt_qp">
+          <li>
+            Open the Cloudera Manager Admin Console and go to the <uicontrol>Impala</uicontrol> service.
+          </li>
+
+          <li
+            conref="../shared/cm_common_elements.xml#cm/config_edit"/>
+
+          <li>
+            Select <menucascade><uicontrol>Scope</uicontrol><uicontrol>Impala (Service-Wide)</uicontrol></menucascade>.
+          </li>
+
+          <li>
+            Select <menucascade><uicontrol>Category</uicontrol><uicontrol>Security</uicontrol></menucascade>.
+          </li>
+
+          <li>
+            Edit the following properties:
+            <table frame="all"
+              id="table_drf_ftt_qp">
+              <title>Impala SSL Properties</title>
+              <tgroup cols="2">
+                <colspec colname="c1" colnum="1" colwidth="1*"/>
+                <colspec colname="c2" colnum="2" colwidth="2.5*"/>
+                <thead>
+                  <row>
+                    <entry>
+                      Property
+                    </entry>
+                    <entry>
+                      Description
+                    </entry>
+                  </row>
+                </thead>
+                <tbody>
+                  <row>
+                    <entry>
+                      <b>Enable TLS/SSL for Impala Client Services</b>
+                    </entry>
+                    <entry>
+                      Encrypt communication between clients (like ODBC, JDBC, and the Impala shell) and the Impala daemon using Transport
+                      Layer Security (TLS) (formerly known as Secure Socket Layer (SSL)).
+                    </entry>
+                  </row>
+                  <row>
+                    <entry>
+                      <b>SSL/TLS Certificate for Clients</b>
+                    </entry>
+                    <entry>
+                      Local path to the X509 certificate that identifies the Impala daemon to clients during TLS/SSL connections. This
+                      file must be in PEM format.
+                    </entry>
+                  </row>
+                  <row>
+                    <entry>
+                      <b>SSL/TLS Private Key for Clients</b>
+                    </entry>
+                    <entry>
+                      Local path to the private key that matches the certificate specified in the Certificate for Clients. This file must be
+                      in PEM format.
+                    </entry>
+                  </row>
+                  <row>
+                    <entry>
+                      <b>SSL/TLS Private Key Password for Clients</b>
+                    </entry>
+                    <entry>
+                      A shell command for Impala to run on startup to retrieve the password for a password-protected private key file.
+                      The output of the command is truncated to a maximum of 1024 bytes, and any trailing whitespace (such as spaces
+                      or newline characters) is trimmed. If the command exits with an error, Impala does not start. If the password
+                      is incorrect, clients cannot connect to the server regardless of whether the public key is correct.
+                    </entry>
+                  </row>
+                  <row>
+                    <entry>
+                      <b>SSL/TLS CA Certificate</b>
+                    </entry>
+                    <entry>
+                      Must be specified for TLS/SSL encryption to be enabled for communication
+                      between internal Impala components.
+                    </entry>
+                  </row>
+                  <row>
+                    <entry>
+                      <b>SSL/TLS Certificate for <varname>Impala component</varname> Webserver</b>
+                    </entry>
+                    <entry>
+                      There are three of these configuration settings, one each for <q>Impala Daemon</q>,
+                      <q>Catalog Server</q>, and <q>Statestore</q>.
+                      Each of these Impala components has its own internal web server that powers the
+                      associated web UI with diagnostic information.
+                      The configuration setting represents the local path to the X509 certificate that
+                      identifies the web server to clients during TLS/SSL connections. This
+                      file must be in PEM format.
+                    </entry>
+                  </row>
+                </tbody>
+              </tgroup>
+            </table>
+          </li>
+
+          <li conref="../shared/cm_common_elements.xml#cm/save_changes_short"/>
+
+          <li>
+            Restart the Impala service.
+          </li>
+        </ol>
+      </p>
+
+      <p>
+        For information on configuring TLS/SSL communication with the <codeph>impala-shell</codeph> interpreter, see
+        <xref href="#concept_q1p_j2d_rp/secref"/>.
+      </p>
+
+    </conbody>
+
+  </concept>
+
+  <concept id="concept_q1p_j2d_rp">
+
+    <title>Using the Command Line</title>
+
+    <conbody>
+
+<!--
+Info from Henry, from https://docs.google.com/a/cloudera.com/document/d/1u00CJ8WRzXR-1AK_WnQlR6LMtY-7Rc3eHaKNgw3IZvA/edit
+-->
+
+      <p>
+        To enable SSL for when client applications connect to Impala, add both of the following flags to the <cmdname>impalad</cmdname> startup options:
+      </p>
+
+      <ul id="ul_i2p_m2d_rp">
+        <li>
+          <codeph>--ssl_server_certificate</codeph>: the full path to the server certificate, on the local filesystem.
+        </li>
+
+        <li>
+          <codeph>--ssl_private_key</codeph>: the full path to the server private key, on the local filesystem.
+        </li>
+      </ul>
+
+      <p rev="2.3.0">
+        In CDH 5.5 / Impala 2.3 and higher, Impala can also use SSL for its own internal communication between the
+        <cmdname>impalad</cmdname>, <codeph>statestored</codeph>, and <codeph>catalogd</codeph> daemons.
+        To enable this additional SSL encryption, set the <codeph>--ssl_server_certificate</codeph>
+        and <codeph>--ssl_private_key</codeph> flags in the startup options for
+        <cmdname>impalad</cmdname>, <cmdname>catalogd</cmdname>, and <cmdname>statestored</cmdname>,
+        and also add the <codeph>--ssl_client_ca_certificate</codeph> flag for all three of those daemons.
+      </p>
+
+      <note conref="../shared/impala_common.xml#common/impala_kerberos_ssl_caveat"/>
+
+      <p>
+        If either of these flags are set, both must be set. In that case, Impala starts listening for Beeswax and HiveServer2 requests on
+        SSL-secured ports only. (The port numbers stay the same; see <xref href="impala_ports.xml#ports"/> for details.)
+      </p>
+
+      <p>
+        Since Impala uses passphrase-less certificates in PEM format, you can reuse a host's existing Java keystore by converting it to the
+        PEM format. For instructions, see
+        <xref audience="integrated" href="cm_sg_openssl_jks.xml#concept_ek3_sdl_rp"/><xref audience="standalone" href="http://www.cloudera.com/documentation/enterprise/latest/topics/cm_sg_openssl_jks.html" scope="external" format="html"/>.
+      </p>
+
+      <section id="secref">
+
+        <title>Configuring TLS/SSL Communication for the Impala Shell</title>
+
+        <p>
+          Typically, a client program has corresponding configuration properties in Cloudera Manager to verify that it is connecting to the
+          right server. For example, with SSL enabled for Impala, you use the following options when starting the
+          <cmdname>impala-shell</cmdname> interpreter:
+        </p>
+
+        <ul id="ul_kgp_m2d_rp">
+          <li>
+            <codeph>--ssl</codeph>: enables TLS/SSL for <cmdname>impala-shell</cmdname>.
+          </li>
+
+          <li>
+            <codeph>--ca_cert</codeph>: the local pathname pointing to the third-party CA certificate, or to a copy of the server
+            certificate for self-signed server certificates.
+          </li>
+        </ul>
+
+        <p>
+          If <codeph>--ca_cert</codeph> is not set, <cmdname>impala-shell</cmdname> enables TLS/SSL, but does not validate the server
+          certificate. This is useful for connecting to a known-good Impala that is only running over TLS/SSL, when a copy of the
+          certificate is not available (such as when debugging customer installations).
+        </p>
+
+      </section>
+
+    </conbody>
+
+  </concept>
+
+  <concept id="ssl_jdbc_odbc">
+    <title>Using TLS/SSL with Business Intelligence Tools</title>
+    <conbody>
+      <p>
+        You can use Kerberos authentication, TLS/SSL encryption, or both to secure
+        connections from JDBC and ODBC applications to Impala.
+        See <xref href="impala_jdbc.xml#impala_jdbc"/> and <xref href="impala_odbc.xml#impala_odbc"/>
+        for details.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/hive_jdbc_ssl_kerberos_caveat"/>
+    </conbody>
+  </concept>
+
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_troubleshooting.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_troubleshooting.xml b/docs/topics/impala_troubleshooting.xml
new file mode 100644
index 0000000..f7ebe4e
--- /dev/null
+++ b/docs/topics/impala_troubleshooting.xml
@@ -0,0 +1,447 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="troubleshooting">
+
+  <title>Troubleshooting Impala</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Troubleshooting"/>
+      <data name="Category" value="Administrators"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">troubleshooting</indexterm>
+      Troubleshooting for Impala requires being able to diagnose and debug problems
+      with performance, network connectivity, out-of-memory conditions, disk space usage,
+      and crash or hang conditions in any of the Impala-related daemons.
+    </p>
+
+    <p outputclass="toc inpage" audience="PDF">
+      The following sections describe the general troubleshooting procedures to diagnose
+      different kinds of problems:
+    </p>
+
+  </conbody>
+
+  <concept id="trouble_sql">
+
+    <title>Troubleshooting Impala SQL Syntax Issues</title>
+
+    <conbody>
+
+      <p>
+        In general, if queries issued against Impala fail, you can try running these same queries against Hive.
+      </p>
+
+      <ul>
+        <li>
+          If a query fails against both Impala and Hive, it is likely that there is a problem with your query or
+          other elements of your CDH environment:
+          <ul>
+            <li>
+              Review the <xref href="impala_langref.xml#langref">Language Reference</xref> to ensure your query is
+              valid.
+            </li>
+
+            <li>
+              Check <xref href="impala_reserved_words.xml#reserved_words"/> to see if any database, table,
+              column, or other object names in your query conflict with Impala reserved words.
+              Quote those names with backticks (<codeph>``</codeph>) if so.
+            </li>
+
+            <li>
+              Check <xref href="impala_functions.xml#builtins"/> to confirm whether Impala supports all the
+              built-in functions being used by your query, and whether argument and return types are the
+              same as you expect.
+            </li>
+
+            <li>
+              Review the <xref href="impala_logging.xml#logs_debug">contents of the Impala logs</xref> for any information that may be useful in identifying the
+              source of the problem.
+            </li>
+          </ul>
+        </li>
+
+        <li>
+          If a query fails against Impala but not Hive, it is likely that there is a problem with your Impala
+          installation.
+        </li>
+      </ul>
+    </conbody>
+  </concept>
+
+  <concept id="trouble_io" rev="CDH-19201">
+    <title>Troubleshooting I/O Capacity Problems</title>
+    <conbody>
+      <p>
+        Impala queries are typically I/O-intensive. If there is an I/O problem with storage devices,
+        or with HDFS itself, Impala queries could show slow response times with no obvious cause
+        on the Impala side. Slow I/O on even a single DataNode could result in an overall slowdown, because
+        queries involving clauses such as <codeph>ORDER BY</codeph>, <codeph>GROUP BY</codeph>, or <codeph>JOIN</codeph>
+        do not start returning results until all DataNodes have finished their work.
+      </p>
+      <p>
+        To test whether the Linux I/O system itself is performing as expected, run Linux commands like
+        the following on each DataNode:
+      </p>
+<codeblock>
+$ sudo sysctl -w vm.drop_caches=3 vm.drop_caches=0
+vm.drop_caches = 3
+vm.drop_caches = 0
+$ sudo dd if=/dev/sda bs=1M of=/dev/null count=1k 
+1024+0 records in
+1024+0 records out
+1073741824 bytes (1.1 GB) copied, 5.60373 s, 192 MB/s
+$ sudo dd if=/dev/sdb bs=1M of=/dev/null count=1k
+1024+0 records in
+1024+0 records out
+1073741824 bytes (1.1 GB) copied, 5.51145 s, 195 MB/s
+$ sudo dd if=/dev/sdc bs=1M of=/dev/null count=1k
+1024+0 records in
+1024+0 records out
+1073741824 bytes (1.1 GB) copied, 5.58096 s, 192 MB/s
+$ sudo dd if=/dev/sdd bs=1M of=/dev/null count=1k
+1024+0 records in
+1024+0 records out
+1073741824 bytes (1.1 GB) copied, 5.43924 s, 197 MB/s
+</codeblock>
+      <p>
+        On modern hardware, a throughput rate of less than 100 MB/s typically indicates
+        a performance issue with the storage device. Correct the hardware problem before
+        continuing with Impala tuning or benchmarking.
+      </p>
+    </conbody>
+  </concept>
+
+
+  <concept id="trouble_cookbook">
+
+    <title>Impala Troubleshooting Quick Reference</title>
+
+    <conbody>
+
+      <p>
+        The following table lists common problems and potential solutions.
+      </p>
+
+      <table>
+        <tgroup cols="3">
+          <colspec colname="1" colwidth="10*"/>
+          <colspec colname="2" colwidth="30*"/>
+          <colspec colname="3" colwidth="30*"/>
+          <thead>
+            <row>
+              <entry>
+                Symptom
+              </entry>
+              <entry>
+                Explanation
+              </entry>
+              <entry>
+                Recommendation
+              </entry>
+            </row>
+          </thead>
+          <tbody>
+            <row>
+              <entry>
+                Impala takes a long time to start.
+              </entry>
+              <entry>
+                Impala instances with large numbers of tables, partitions, or data files take longer to start
+                because the metadata for these objects is broadcast to all <cmdname>impalad</cmdname> nodes and
+                cached.
+              </entry>
+              <entry>
+                Adjust timeout and synchronicity settings.
+              </entry>
+            </row>
+            <row>
+              <entry>
+                <p>
+                  Joins fail to complete.
+                </p>
+              </entry>
+              <entry>
+                <p>
+                  There may be insufficient memory. During a join, data from the second, third, and so on sets to
+                  be joined is loaded into memory. If Impala chooses an inefficient join order or join mechanism,
+                  the query could exceed the total memory available.
+                </p>
+              </entry>
+              <entry>
+                <p>
+                  Start by gathering statistics with the <codeph>COMPUTE STATS</codeph> statement for each table
+                  involved in the join. Consider specifying the <codeph>[SHUFFLE]</codeph> hint so that data from
+                  the joined tables is split up between nodes rather than broadcast to each node. If tuning at the
+                  SQL level is not sufficient, add more memory to your system or join smaller data sets.
+                </p>
+              </entry>
+            </row>
+            <row>
+              <entry>
+                <p>
+                  Queries return incorrect results.
+                </p>
+              </entry>
+              <entry>
+                <p>
+                  Impala metadata may be outdated after changes are performed in Hive.
+                </p>
+              </entry>
+              <entry>
+                <p>
+                  Where possible, use the appropriate Impala statement (<codeph>INSERT</codeph>, <codeph>LOAD
+                  DATA</codeph>, <codeph>CREATE TABLE</codeph>, <codeph>ALTER TABLE</codeph>, <codeph>COMPUTE
+                  STATS</codeph>, and so on) rather than switching back and forth between Impala and Hive. Impala
+                  automatically broadcasts the results of DDL and DML operations to all Impala nodes in the
+                  cluster, but does not automatically recognize when such changes are made through Hive. After
+                  inserting data, adding a partition, or other operation in Hive, refresh the metadata for the
+                  table as described in <xref href="impala_refresh.xml#refresh"/>.
+                </p>
+              </entry>
+            </row>
+            <row>
+              <entry>
+                <p>
+                  Queries are slow to return results.
+                </p>
+              </entry>
+              <entry>
+                <p>
+                  Some <codeph>impalad</codeph> instances may not have started. Using a browser, connect to the
+                  host running the Impala state store. Connect using an address of the form
+                  <codeph>http://<varname>hostname</varname>:<varname>port</varname>/metrics</codeph>.
+                </p>
+
+                <p>
+                  <note>
+                    Replace <varname>hostname</varname> and <varname>port</varname> with the hostname and port of
+                    your Impala state store host machine and web server port. The default port is 25010.
+                  </note>
+                  The number of <codeph>impalad</codeph> instances listed should match the expected number of
+                  <codeph>impalad</codeph> instances installed in the cluster. There should also be one
+                  <codeph>impalad</codeph> instance installed on each DataNode
+                </p>
+              </entry>
+              <entry>
+                <p>
+                  Ensure Impala is installed on all DataNodes. Start any <codeph>impalad</codeph> instances that
+                  are not running.
+                </p>
+              </entry>
+            </row>
+            <row>
+              <entry>
+                <p>
+                  Queries are slow to return results.
+                </p>
+              </entry>
+              <entry>
+                <p>
+                  Impala may not be configured to use native checksumming. Native checksumming uses
+                  machine-specific instructions to compute checksums over HDFS data very quickly. Review Impala
+                  logs. If you find instances of "<codeph>INFO util.NativeCodeLoader: Loaded the
+                  native-hadoop</codeph>" messages, native checksumming is not enabled.
+                </p>
+              </entry>
+              <entry>
+                <p>
+                  Ensure Impala is configured to use native checksumming as described in
+                  <xref href="impala_config_performance.xml#config_performance"/>.
+                </p>
+              </entry>
+            </row>
+            <row>
+              <entry>
+                <p>
+                  Queries are slow to return results.
+                </p>
+              </entry>
+              <entry>
+                <p>
+                  Impala may not be configured to use data locality tracking.
+                </p>
+              </entry>
+              <entry>
+                <p>
+                  Test Impala for data locality tracking and make configuration changes as necessary. Information
+                  on this process can be found in <xref href="impala_config_performance.xml#config_performance"/>.
+                </p>
+              </entry>
+            </row>
+            <row>
+              <entry>
+                <p>
+                  Attempts to complete Impala tasks such as executing INSERT-SELECT actions fail. The Impala logs
+                  include notes that files could not be opened due to permission denied.
+                </p>
+              </entry>
+              <entry>
+                <p>
+                  This can be the result of permissions issues. For example, you could use the Hive shell as the
+                  hive user to create a table. After creating this table, you could attempt to complete some
+                  action, such as an INSERT-SELECT on the table. Because the table was created using one user and
+                  the INSERT-SELECT is attempted by another, this action may fail due to permissions issues.
+                </p>
+              </entry>
+              <entry>
+                <p>
+                  In general, ensure the Impala user has sufficient permissions. In the preceding example, ensure
+                  the Impala user has sufficient permissions to the table that the Hive user created.
+                </p>
+              </entry>
+            </row>
+            <row rev="IMP-1210">
+              <entry>
+                <p>
+                  Impala fails to start up, with the <cmdname>impalad</cmdname> logs referring to errors connecting
+                  to the statestore service and attempts to re-register.
+                </p>
+              </entry>
+              <entry>
+                <p>
+                  A large number of databases, tables, partitions, and so on can require metadata synchronization,
+                  particularly on startup, that takes longer than the default timeout for the statestore service.
+                </p>
+              </entry>
+              <entry>
+                <p>
+                  Configure the statestore timeout value and possibly other settings related to the frequency of
+                  statestore updates and metadata loading. See
+                  <xref href="impala_timeouts.xml#statestore_timeout"/> and
+                  <xref href="impala_scalability.xml#statestore_scalability"/>.
+                </p>
+              </entry>
+            </row>
+          </tbody>
+        </tgroup>
+      </table>
+
+      <p audience="Cloudera">
+        Some or all of these settings might also be useful.
+<codeblock>NUM_SCANNER_THREADS: 0
+ABORT_ON_DEFAULT_LIMIT_EXCEEDED: 0
+MAX_IO_BUFFERS: 0
+DEFAULT_ORDER_BY_LIMIT: -1
+BATCH_SIZE: 0
+NUM_NODES: 0
+DISABLE_CODEGEN: 0
+MAX_ERRORS: 0
+ABORT_ON_ERROR: 0
+MAX_SCAN_RANGE_LENGTH: 0
+ALLOW_UNSUPPORTED_FORMATS: 0
+SUPPORT_START_OVER: false
+DEBUG_ACTION:
+MEM_LIMIT: 0
+</codeblock>
+      </p>
+    </conbody>
+  </concept>
+
+  <concept audience="Cloudera" id="core_dumps">
+
+    <title>Enabling Core Dumps for Impala</title>
+
+    <conbody>
+
+        <p>
+          Fill in details, then unhide.
+        </p>
+
+        <p>
+          From Nong:
+        </p>
+
+        <p>
+          In a CM-managed cluster, search for "core" from the impala configuration page. You should see the "enable
+          core dump" config.
+        </p>
+
+        <p>
+          From <xref href="impala_config_options.xml#config_options"/>:
+        </p>
+
+<codeblock>export ENABLE_CORE_DUMPS=${ENABLE_COREDUMPS:-false}</codeblock>
+
+        <note conref="../shared/impala_common.xml#common/core_dump_considerations"/>
+
+      <p></p>
+    </conbody>
+  </concept>
+
+  <concept audience="Cloudera" id="io_throughput">
+    <title>Verifying I/O Throughput</title>
+    <conbody>
+      <p>
+        Optimal Impala query performance depends on being able to perform I/O across multiple storage devices
+        in parallel, with the data transferred at or close to the maximum throughput for each device.
+        If a hardware or configuration issue causes a reduction in I/O throughput, even if the problem only
+        affects a subset of storage devices, you might experience
+        slow query performance that cannot be improved by using regular SQL tuning techniques.
+      </p>
+      <p>
+        As a general guideline, expect each commodity storage device (for example, a standard rotational
+        hard drive) to be able to transfer approximately 100 MB per second. If you see persistent slow query
+        perormance, examine the Impala logs to check
+      </p>
+
+<codeblock>
+<![CDATA[
+Useful test from beta at Visa.
+SME: Jayant@
+
+Symptoms:
+* Queries running slow
+* Scan rate of IO in Impala logs show noticeably less than expected IO rate for each disk (typical commodity disk should provide ~100 MB/s
+
+Actions:
+* Validate disk read from OS to confirm no issue at hardware or OS level
+* Validate disk read at HDFS to see if issue at HDFS config
+
+Specifics:
+Testing Linux and hardware IO:
+# First running:
+sudo sysctl -w vm.drop_caches=3 vm.drop_caches=0
+
+# Then Running:
+sudo dd if=/dev/sda bs=1M of=/dev/null count=1k
+& sudo dd if=/dev/sdb bs=1M of=/dev/null count=1k
+& sudo dd if=/dev/sdc bs=1M of=/dev/null count=1k
+& sudo dd if=/dev/sdd bs=1M of=/dev/null count=1k & wait
+
+Testing HDFS IO:
+# You can use TestDFSIO. Its documented here ; http://answers.oreilly.com/topic/460-how-to-benchmark-a-hadoop-cluster/
+# You can also use sar, dd and iostat for monitoring the disk.
+
+# writes 10 files each of 1000 MB
+hadoop jar $HADOOP_INSTALL/hadoop-*-test.jar TestDFSIO -write -nrFiles 10 -fileSize 1000
+
+# run the read benchmark
+hadoop jar $HADOOP_INSTALL/hadoop-*-test.jar TestDFSIO -read -nrFiles 10 -fileSize 1000
+
+# clean up the data
+hadoop jar $HADOOP_INSTALL/hadoop-*-test.jar TestDFSIO -clean
+]]>
+</codeblock>
+
+    </conbody>
+  </concept>
+
+  <concept id="webui_snippet" audience="PDF">
+    <title conref="impala_webui.xml#webui/webui_title"/>
+    <conbody>
+      <p conref="impala_webui.xml#webui/webui_intro"/>
+      <p>
+        For full details, see <xref href="impala_webui.xml#webui"/>.
+      </p>
+    </conbody>
+  </concept>
+
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_webui.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_webui.xml b/docs/topics/impala_webui.xml
new file mode 100644
index 0000000..38a5f00
--- /dev/null
+++ b/docs/topics/impala_webui.xml
@@ -0,0 +1,650 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="webui">
+
+  <title id="webui_title">Impala Web User Interface for Debugging</title>
+  <titlealts audience="PDF"><navtitle>Web User Interface</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Troubleshooting"/>
+      <data name="Category" value="Administrators"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p id="webui_intro">
+      <indexterm audience="Cloudera">web UI</indexterm>
+      <indexterm audience="Cloudera">debug UI</indexterm>
+      Each of the Impala daemons (<cmdname>impalad</cmdname>, <cmdname>statestored</cmdname>,
+      and <cmdname>catalogd</cmdname>) includes a built-in web server that displays
+      diagnostic and status information:
+      <ul>
+      <li>
+        <p>
+          The <cmdname>impalad</cmdname> web UI (default port: 25000) includes
+          information about configuration settings, running and completed queries, and associated performance and
+          resource usage for queries. In particular, the <uicontrol>Details</uicontrol> link for each query displays
+          alternative views of the query including a graphical representation of the plan, and the
+          output of the <codeph>EXPLAIN</codeph>, <codeph>SUMMARY</codeph>, and <codeph>PROFILE</codeph>
+          statements from <cmdname>impala-shell</cmdname>.
+          Each host that runs the <cmdname>impalad</cmdname> daemon has
+          its own instance of the web UI, with details about those queries for which that
+          host served as the coordinator. To get a consolidated view for all queries,
+          it is usually more convenient to use the charts, graphs, and other monitoring
+          features in Cloudera Manager. The <cmdname>impalad</cmdname> web UI is mainly
+          for diagnosing query problems that can be traced to a particular node.
+        </p>
+      </li>
+      <li>
+        <p>
+          The <cmdname>statestored</cmdname> web UI (default port: 25010) includes
+          information about memory usage, configuration settings, and ongoing health checks
+          performed by this daemon. Because there is only a single instance of this
+          daemon within any cluster, you view the web UI only on the particular host
+          that serves as the Impala Statestore.
+        </p>
+      </li>
+      <li>
+        <p>
+          The <cmdname>catalogd</cmdname> web UI (default port: 25020) includes
+          information about the databases, tables, and other objects managed by Impala,
+          in addition to the resource usage and configuration settings of the daemon itself.
+          The catalog information is represented as the underlying Thrift data structures.
+          Because there is only a single instance of this daemon within any cluster, you view the
+          web UI only on the particular host that serves as the Impala Catalog Server.
+        </p>
+      </li>
+      </ul>
+    </p>
+
+    <note>
+      <p>
+        The web user interface is primarily for problem diagnosis and troubleshooting. The items listed and their
+        formats are subject to change. To monitor Impala health, particularly across the entire cluster at once, use
+        the Cloudera Manager interface.
+      </p>
+    </note>
+
+    <p outputclass="toc inpage"/>
+  </conbody>
+
+  <concept id="webui_impalad">
+
+    <title>Debug Web UI for impalad</title>
+
+    <conbody>
+
+      <p>
+        To debug and troubleshoot the <cmdname>impalad</cmdname> daemon using a web-based interface, open the URL
+        <codeph>http://<varname>impala-server-hostname</varname>:25000/</codeph> in a browser. (For secure
+        clusters, use the prefix <codeph>https://</codeph> instead of <codeph>http://</codeph>.) Because each
+        Impala node produces its own set of debug information, choose a specific node that you are curious about or
+        suspect is having problems.
+      </p>
+
+      <note>
+        To get a convenient picture of the health of all Impala nodes in a cluster, use the Cloudera Manager
+        interface, which collects the low-level operational information from all Impala nodes, and presents a
+        unified view of the entire cluster.
+      </note>
+    </conbody>
+
+    <concept audience="Cloudera" id="webui_impalad_disabling">
+
+      <title>Turning off the Web UI for impalad</title>
+
+      <conbody>
+
+        <p></p>
+      </conbody>
+    </concept>
+
+    <concept id="webui_impalad_main">
+
+      <title>Main Page</title>
+
+      <conbody>
+
+        <p>
+          By default, the main page of the debug web UI is at
+          <codeph>http://<varname>impala-server-hostname</varname>:25000/</codeph> (non-secure cluster) or
+          <codeph>https://<varname>impala-server-hostname</varname>:25000/</codeph> (secure cluster).
+        </p>
+
+        <p>
+          This page lists the version of the <cmdname>impalad</cmdname> daemon, plus basic hardware and software
+          information about the corresponding host, such as information about the CPU, memory, disks, and operating
+          system version.
+        </p>
+      </conbody>
+    </concept>
+
+    <concept id="webui_impalad_backends">
+
+      <title>Backends Page</title>
+
+      <conbody>
+
+        <p>
+          By default, the <uicontrol>backends</uicontrol> page of the debug web UI is at
+          <codeph>http://<varname>impala-server-hostname</varname>:25000/backends</codeph> (non-secure cluster) or
+          <codeph>https://<varname>impala-server-hostname</varname>:25000/backends</codeph> (secure cluster).
+        </p>
+
+        <p>
+          This page lists the host and port info for each of the <cmdname>impalad</cmdname> nodes in the cluster.
+          Because each <cmdname>impalad</cmdname> daemon knows about every other <cmdname>impalad</cmdname> daemon
+          through the statestore, this information should be the same regardless of which node you select. Links
+          take you to the corresponding debug web pages for any of the other nodes in the cluster.
+        </p>
+      </conbody>
+    </concept>
+
+    <concept id="webui_impalad_catalog">
+
+      <title>Catalog Page</title>
+
+      <conbody>
+
+        <p>
+          By default, the <uicontrol>catalog</uicontrol> page of the debug web UI is at
+          <codeph>http://<varname>impala-server-hostname</varname>:25000/catalog</codeph> (non-secure cluster) or
+          <codeph>https://<varname>impala-server-hostname</varname>:25000/catalog</codeph> (secure cluster).
+        </p>
+
+        <p>
+          This page displays a list of databases and associated tables recognized by this instance of
+          <cmdname>impalad</cmdname>. You can use this page to locate which database a table is in, check the
+          exact spelling of a database or table name, look for identical table names in multiple databases, and so
+          on.
+        </p>
+      </conbody>
+    </concept>
+
+    <concept id="webui_impalad_logs">
+
+      <title>Logs Page</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Logs"/>
+    </metadata>
+  </prolog>
+
+      <conbody>
+
+        <p>
+          By default, the <uicontrol>logs</uicontrol> page of the debug web UI is at
+          <codeph>http://<varname>impala-server-hostname</varname>:25000/logs</codeph> (non-secure cluster) or
+          <codeph>https://<varname>impala-server-hostname</varname>:25000/logs</codeph> (secure cluster).
+        </p>
+
+        <p>
+          This page shows the last portion of the <filepath>impalad.INFO</filepath> log file, the most detailed of
+          the info, warning, and error logs for the <cmdname>impalad</cmdname> daemon. You can refer here to see
+          the details of the most recent operations, whether the operations succeeded or encountered errors. This
+          central page can be more convenient than looking around the filesystem for the log files, which could be
+          in different locations on clusters that use Cloudera Manager or not.
+        </p>
+      </conbody>
+    </concept>
+
+    <concept id="webui_impalad_memz">
+
+      <title>Memz Page</title>
+
+      <conbody>
+
+        <p>
+          By default, the <uicontrol>memz</uicontrol> page of the debug web UI is at
+          <codeph>http://<varname>impala-server-hostname</varname>:25000/memz</codeph> (non-secure cluster) or
+          <codeph>https://<varname>impala-server-hostname</varname>:25000/memz</codeph> (secure cluster).
+        </p>
+
+        <p>
+          This page displays summary and detailed information about memory usage by the <cmdname>impalad</cmdname>
+          daemon. You can see the memory limit in effect for the node, and how much of that memory Impala is
+          currently using.
+        </p>
+      </conbody>
+    </concept>
+
+    <concept id="webui_impalad_metrics">
+
+      <title>Metrics Page</title>
+
+      <conbody>
+
+        <p>
+          By default, the <uicontrol>metrics</uicontrol> page of the debug web UI is at
+          <codeph>http://<varname>impala-server-hostname</varname>:25000/metrics</codeph> (non-secure cluster) or
+          <codeph>https://<varname>impala-server-hostname</varname>:25000/metrics</codeph> (secure cluster).
+        </p>
+
+        <p>
+          This page displays the current set of metrics: counters and flags representing various aspects of
+          <cmdname>impalad</cmdname> internal operation. For the meanings of these metrics, see
+          <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/cm_metrics_impala.html" scope="external" format="html">Impala
+          Metrics</xref> in the Cloudera Manager documentation.
+        </p>
+      </conbody>
+    </concept>
+
+    <concept id="webui_impalad_queries">
+
+      <title>Queries Page</title>
+
+      <conbody>
+
+        <p>
+          By default, the <uicontrol>queries</uicontrol> page of the debug web UI is at
+          <codeph>http://<varname>impala-server-hostname</varname>:25000/queries</codeph> (non-secure cluster) or
+          <codeph>https://<varname>impala-server-hostname</varname>:25000/queries</codeph> (secure cluster).
+        </p>
+
+        <p>
+          This page lists all currently running queries, plus any completed queries whose details still reside in
+          memory. The queries are listed in reverse chronological order, with the most recent at the top. (You can
+          control the amount of memory devoted to completed queries by specifying the
+          <codeph>--query_log_size</codeph> startup option for <cmdname>impalad</cmdname>.)
+        </p>
+
+        <p>
+          On this page, you can see at a glance how many SQL statements are failing (<codeph>State</codeph> value
+          of <codeph>EXCEPTION</codeph>), how large the result sets are (<codeph># rows fetched</codeph>), and how
+          long each statement took (<codeph>Start Time</codeph> and <codeph>End Time</codeph>).
+        </p>
+
+        <p>
+          Each query has an associated link that displays the detailed query profile, which you can examine to
+          understand the performance characteristics of that query. See
+          <xref href="impala_explain_plan.xml#perf_profile"/> for details.
+        </p>
+      </conbody>
+    </concept>
+
+    <concept id="webui_impalad_sessions">
+
+      <title>Sessions Page</title>
+
+      <conbody>
+
+        <p>
+          By default, the <uicontrol>sessions</uicontrol> page of the debug web UI is at
+          <codeph>http://<varname>impala-server-hostname</varname>:25000/sessions</codeph> (non-secure cluster) or
+          <codeph>https://<varname>impala-server-hostname</varname>:25000/sessions</codeph> (secure cluster).
+        </p>
+
+        <p>
+          This page displays information about the sessions currently connected to this <cmdname>impalad</cmdname>
+          instance. For example, sessions could include connections from the <cmdname>impala-shell</cmdname>
+          command, JDBC or ODBC applications, or the Impala Query UI in the Hue web interface.
+        </p>
+      </conbody>
+    </concept>
+
+    <concept id="webui_impalad_threadz">
+
+      <title>Threadz Page</title>
+
+      <conbody>
+
+        <p>
+          By default, the <uicontrol>threadz</uicontrol> page of the debug web UI is at
+          <codeph>http://<varname>impala-server-hostname</varname>:25000/threadz</codeph> (non-secure cluster) or
+          <codeph>https://<varname>impala-server-hostname</varname>:25000/threadz</codeph> (secure cluster).
+        </p>
+
+        <p>
+          This page displays information about the threads used by this instance of <cmdname>impalad</cmdname>,
+          and shows which categories they are grouped into. Making use of this information requires substantial
+          knowledge about Impala internals.
+        </p>
+      </conbody>
+    </concept>
+
+    <concept id="webui_impalad_varz">
+
+      <title>Varz Page</title>
+
+      <conbody>
+
+        <p>
+          By default, the <uicontrol>varz</uicontrol> page of the debug web UI is at
+          <codeph>http://<varname>impala-server-hostname</varname>:25000/varz</codeph> (non-secure cluster) or
+          <codeph>https://<varname>impala-server-hostname</varname>:25000/varz</codeph> (secure cluster).
+        </p>
+
+        <p>
+          This page shows the configuration settings in effect when this instance of <cmdname>impalad</cmdname>
+          communicates with other Hadoop components such as HDFS and YARN. These settings are collected from a set
+          of configuration files; Impala might not actually make use of all settings.
+        </p>
+
+        <p>
+          The bottom of this page also lists all the command-line settings in effect for this instance of
+          <cmdname>impalad</cmdname>. See <xref href="impala_config_options.xml#config_options"/> for information
+          about modifying these values.
+        </p>
+      </conbody>
+    </concept>
+  </concept>
+
+  <concept audience="Cloudera" id="webui_statestored">
+
+    <title>Debug Web UI for statestored</title>
+
+    <conbody>
+
+      <p></p>
+    </conbody>
+
+    <concept audience="Cloudera" id="webui_statestored_disabling">
+
+      <title>Turning off the Web UI for statestored</title>
+
+      <conbody>
+
+        <p></p>
+      </conbody>
+    </concept>
+
+    <concept id="webui_statestored_main">
+
+      <title>Main Page</title>
+
+      <conbody>
+
+        <p>
+          By default, the main page of the debug web UI is at
+          <codeph>http://<varname>impala-server-hostname</varname>:25010/</codeph> (non-secure cluster) or
+          <codeph>https://<varname>impala-server-hostname</varname>:25010/</codeph> (secure cluster).
+        </p>
+
+        <p>
+          This page lists the version of the <cmdname>impalad</cmdname> daemon, plus basic hardware and software
+          information about the corresponding host, such as information about the CPU, memory, disks, and operating
+          system version.
+        </p>
+      </conbody>
+    </concept>
+
+    <concept id="webui_statestored_logs">
+
+      <title>Logs Page</title>
+
+      <conbody>
+
+        <p>
+          By default, the <uicontrol>logs</uicontrol> page of the debug web UI is at
+          <codeph>http://<varname>impala-server-hostname</varname>:25010/logs</codeph> (non-secure cluster) or
+          <codeph>https://<varname>impala-server-hostname</varname>:25010/logs</codeph> (secure cluster).
+        </p>
+
+        <p>
+          This page shows the last portion of the <filepath>impalad.INFO</filepath> log file, the most detailed of
+          the info, warning, and error logs for the <cmdname>impalad</cmdname> daemon. You can refer here to see
+          the details of the most recent operations, whether the operations succeeded or encountered errors. This
+          central page can be more convenient than looking around the filesystem for the log files, which could be
+          in different locations on clusters that use Cloudera Manager or not.
+        </p>
+      </conbody>
+    </concept>
+
+    <concept id="webui_statestored_memz">
+
+      <title>Memz Page</title>
+
+      <conbody>
+
+        <p>
+          By default, the <uicontrol>memz</uicontrol> page of the debug web UI is at
+          <codeph>http://<varname>impala-server-hostname</varname>:25010/memz</codeph> (non-secure cluster) or
+          <codeph>https://<varname>impala-server-hostname</varname>:25010/memz</codeph> (secure cluster).
+        </p>
+
+        <p>
+          This page displays summary and detailed information about memory usage by the <cmdname>impalad</cmdname>
+          daemon. You can see the memory limit in effect for the node, and how much of that memory Impala is
+          currently using.
+        </p>
+      </conbody>
+    </concept>
+
+    <concept id="webui_statestored_metrics">
+
+      <title>Metrics Page</title>
+
+      <conbody>
+
+        <p>
+          By default, the <uicontrol>metrics</uicontrol> page of the debug web UI is at
+          <codeph>http://<varname>impala-server-hostname</varname>:25010/metrics</codeph> (non-secure cluster) or
+          <codeph>https://<varname>impala-server-hostname</varname>:25010/metrics</codeph> (secure cluster).
+        </p>
+
+        <p>
+          This page displays the current set of metrics: counters and flags representing various aspects of
+          <cmdname>impalad</cmdname> internal operation. For the meanings of these metrics, see
+          <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/cm_metrics_impala.html" scope="external" format="html">Impala
+          Metrics</xref> in the Cloudera Manager documentation.
+        </p>
+      </conbody>
+    </concept>
+
+    <concept id="webui_statestored_subscribers">
+
+      <title>Subscribers Page</title>
+
+      <conbody>
+
+        <p>
+          By default, the <uicontrol>subscribers</uicontrol> page of the debug web UI is at
+          <codeph>http://<varname>impala-server-hostname</varname>:25010/subscribers</codeph> (non-secure cluster)
+          or <codeph>https://<varname>impala-server-hostname</varname>:25010/subscribers</codeph> (secure cluster).
+        </p>
+
+        <p>
+          This page displays information about...
+        </p>
+      </conbody>
+    </concept>
+
+    <concept id="webui_statestored_threadz">
+
+      <title>Threadz Page</title>
+
+      <conbody>
+
+        <p>
+          By default, the <uicontrol>threadz</uicontrol> page of the debug web UI is at
+          <codeph>http://<varname>impala-server-hostname</varname>:25010/threadz</codeph> (non-secure cluster) or
+          <codeph>https://<varname>impala-server-hostname</varname>:25010/threadz</codeph> (secure cluster).
+        </p>
+
+        <p>
+          This page displays information about the threads used by this instance of <cmdname>impalad</cmdname>,
+          and shows which categories they are grouped into. Making use of this information requires substantial
+          knowledge about Impala internals.
+        </p>
+      </conbody>
+    </concept>
+
+    <concept id="webui_statestored_topics">
+
+      <title>Topics Page</title>
+
+      <conbody>
+
+        <p>
+          By default, the <uicontrol>topics</uicontrol> page of the debug web UI is at
+          <codeph>http://<varname>impala-server-hostname</varname>:25010/topics</codeph> (non-secure cluster) or
+          <codeph>https://<varname>impala-server-hostname</varname>:25010/topics</codeph> (secure cluster).
+        </p>
+
+        <p>
+          This page displays information about...
+        </p>
+      </conbody>
+    </concept>
+
+    <concept id="webui_statestored_varz">
+
+      <title>Varz Page</title>
+
+      <conbody>
+
+        <p>
+          By default, the <uicontrol>varz</uicontrol> page of the debug web UI is at
+          <codeph>http://<varname>impala-server-hostname</varname>:25010/varz</codeph> (non-secure cluster) or
+          <codeph>https://<varname>impala-server-hostname</varname>:25010/varz</codeph> (secure cluster).
+        </p>
+
+        <p>
+          This page shows the configuration settings in effect when this instance of <cmdname>impalad</cmdname>
+          communicates with other Hadoop components such as HDFS and YARN. These settings are collected from a set
+          of configuration files; Impala might not actually make use of all settings.
+        </p>
+
+        <p>
+          The bottom of this page also lists all the command-line settings in effect for this instance of
+          <cmdname>impalad</cmdname>. See <xref href="impala_config_options.xml#config_options"/> for information
+          about modifying these values.
+        </p>
+      </conbody>
+    </concept>
+  </concept>
+
+  <concept audience="Cloudera" id="webui_catalogd">
+
+    <title>Debug Web UI for catalogd</title>
+
+    <conbody>
+
+      <p></p>
+    </conbody>
+
+    <concept audience="Cloudera" id="webui_catalogd_disabling">
+
+      <title>Turning off the Web UI for impalad</title>
+
+      <conbody>
+
+        <p></p>
+      </conbody>
+    </concept>
+
+    <concept id="webui_catalogd_main">
+
+      <title>Main Page</title>
+
+      <conbody>
+
+        <p>
+          By default, the main page of the debug web UI is at
+          <codeph>http://<varname>impala-server-hostname</varname>:25020/</codeph> (non-secure cluster) or
+          <codeph>https://<varname>impala-server-hostname</varname>:25020/</codeph> (secure cluster).
+        </p>
+
+        <p>
+          This page lists the version of the <cmdname>impalad</cmdname> daemon, plus basic hardware and software
+          information about the corresponding host, such as information about the CPU, memory, disks, and operating
+          system version.
+        </p>
+      </conbody>
+    </concept>
+
+    <concept id="webui_catalogd_catalog">
+
+      <title>Catalog Page</title>
+
+      <conbody>
+
+        <p>
+          By default, the <uicontrol>catalog</uicontrol> page of the debug web UI is at
+          <codeph>http://<varname>impala-server-hostname</varname>:25020/catalog</codeph> (non-secure cluster) or
+          <codeph>https://<varname>impala-server-hostname</varname>:25020/catalog</codeph> (secure cluster).
+        </p>
+
+        <p>
+          This page displays a list of databases and associated tables recognized by this instance of
+          <cmdname>impalad</cmdname>. You can use this page to locate which database a table is in, check the
+          exact spelling of a database or table name, look for identical table names in multiple databases, and so
+          on.
+        </p>
+      </conbody>
+    </concept>
+
+    <concept id="webui_catalogd_logs">
+
+      <title>Logs Page</title>
+
+      <conbody>
+
+        <p>
+          By default, the <uicontrol>logs</uicontrol> page of the debug web UI is at
+          <codeph>http://<varname>impala-server-hostname</varname>:25020/logs</codeph> (non-secure cluster) or
+          <codeph>https://<varname>impala-server-hostname</varname>:25020/logs</codeph> (secure cluster).
+        </p>
+
+        <p>
+          This page shows the last portion of the <filepath>impalad.INFO</filepath> log file, the most detailed of
+          the info, warning, and error logs for the <cmdname>impalad</cmdname> daemon. You can refer here to see
+          the details of the most recent operations, whether the operations succeeded or encountered errors. This
+          central page can be more convenient than looking around the filesystem for the log files, which could be
+          in different locations on clusters that use Cloudera Manager or not.
+        </p>
+      </conbody>
+    </concept>
+
+    <concept id="webui_catalogd_metrics">
+
+      <title>Metrics Page</title>
+
+      <conbody>
+
+        <p>
+          By default, the <uicontrol>metrics</uicontrol> page of the debug web UI is at
+          <codeph>http://<varname>impala-server-hostname</varname>:25020/metrics</codeph> (non-secure cluster) or
+          <codeph>https://<varname>impala-server-hostname</varname>:25020/metrics</codeph> (secure cluster).
+        </p>
+
+        <p>
+          This page displays the current set of metrics: counters and flags representing various aspects of
+          <cmdname>impalad</cmdname> internal operation. For the meanings of these metrics, see
+          <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/cm_metrics_impala.html" scope="external" format="html">Impala
+          Metrics</xref> in the Cloudera Manager documentation.
+        </p>
+      </conbody>
+    </concept>
+
+    <concept id="webui_catalogd_varz">
+
+      <title>Varz Page</title>
+
+      <conbody>
+
+        <p>
+          By default, the <uicontrol>varz</uicontrol> page of the debug web UI is at
+          <codeph>http://<varname>impala-server-hostname</varname>:25020/varz</codeph> (non-secure cluster) or
+          <codeph>https://<varname>impala-server-hostname</varname>:25020/varz</codeph> (secure cluster).
+        </p>
+
+        <p>
+          This page shows the configuration settings in effect when this instance of <cmdname>impalad</cmdname>
+          communicates with other Hadoop components such as HDFS and YARN. These settings are collected from a set
+          of configuration files; Impala might not actually make use of all settings.
+        </p>
+
+        <p>
+          The bottom of this page also lists all the command-line settings in effect for this instance of
+          <cmdname>impalad</cmdname>. See <xref href="impala_config_options.xml#config_options"/> for information
+          about modifying these values.
+        </p>
+      </conbody>
+    </concept>
+  </concept>
+</concept>