You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2019/12/10 23:45:47 UTC

[impala] 01/02: [DOCS] Update impala_proxy.xml with the latest info

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 4687885b808c9856e792f2439435dbcf2bedf7d1
Author: Alex Rodoni <ar...@cloudera.com>
AuthorDate: Wed Dec 4 11:52:05 2019 -0800

    [DOCS] Update impala_proxy.xml with the latest info
    
    Change-Id: Ia9d80e21abb385704eea863d221e333441af9a39
    Reviewed-on: http://gerrit.cloudera.org:8080/14857
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Balazs Jeszenszky <je...@gmail.com>
    Reviewed-by: Vincent Tran <vt...@cloudera.com>
    Reviewed-by: Alex Rodoni <ar...@cloudera.com>
---
 docs/topics/impala_jdbc.xml  | 424 +++++++++++++++----------------------------
 docs/topics/impala_proxy.xml | 164 ++++++++++-------
 2 files changed, 240 insertions(+), 348 deletions(-)

diff --git a/docs/topics/impala_jdbc.xml b/docs/topics/impala_jdbc.xml
index 8dc3707..0711f9a 100644
--- a/docs/topics/impala_jdbc.xml
+++ b/docs/topics/impala_jdbc.xml
@@ -19,9 +19,7 @@ under the License.
 -->
 <!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
 <concept id="impala_jdbc">
-
   <title id="jdbc">Configuring Impala to Work with JDBC</title>
-
   <prolog>
     <metadata>
       <data name="Category" value="Impala"/>
@@ -34,181 +32,106 @@ under the License.
       <data name="Category" value="Developers"/>
     </metadata>
   </prolog>
-
   <conbody>
-
-    <p>
-      <indexterm audience="hidden">JDBC</indexterm>
-      Impala supports the standard JDBC interface, allowing access from commercial Business
-      Intelligence tools and custom software written in Java or other programming languages. The
-      JDBC driver allows you to access Impala from a Java program that you write, or a Business
-      Intelligence or similar tool that uses JDBC to communicate with various database products.
-    </p>
-
-    <p>
-      Setting up a JDBC connection to Impala involves the following steps:
-    </p>
-
+    <p> Impala supports the standard JDBC interface, allowing access from
+      commercial Business Intelligence tools and custom software written in Java
+      or other programming languages. The JDBC driver allows you to access
+      Impala from a Java program that you write, or a Business Intelligence or
+      similar tool that uses JDBC to communicate with various database products. </p>
+    <p> Setting up a JDBC connection to Impala involves the following steps: </p>
     <ul>
-      <li>
-        Verifying the communication port where the Impala daemons in your cluster are listening
-        for incoming JDBC requests.
-      </li>
-
-      <li>
-        Installing the JDBC driver on every system that runs the JDBC-enabled application.
-      </li>
-
-      <li>
-        Specifying a connection string for the JDBC application to access one of the servers
-        running the <cmdname>impalad</cmdname> daemon, with the appropriate security settings.
-      </li>
+      <li> Verifying the communication port where the Impala daemons in your
+        cluster are listening for incoming JDBC requests. </li>
+      <li> Installing the JDBC driver on every system that runs the JDBC-enabled
+        application. </li>
+      <li> Specifying a connection string for the JDBC application to access one
+        of the servers running the <cmdname>impalad</cmdname> daemon, with the
+        appropriate security settings. </li>
     </ul>
-
     <p outputclass="toc inpage"/>
-
   </conbody>
-
   <concept id="jdbc_port">
-
     <title>Configuring the JDBC Port</title>
-
     <conbody>
-
-      <p>
-        The following are the default ports that Impala server accepts JDBC connections through:
-        <simpletable frame="all"
+      <p> The following are the default ports that Impala server accepts JDBC
+        connections through: <simpletable frame="all"
           relcolwidth="1.0* 1.03* 2.38*" id="simpletable_tr2_gnt_43b">
-
           <strow>
-
             <stentry><b>Protocol</b>
-
             </stentry>
-
             <stentry><b>Default Port</b>
-
             </stentry>
-
             <stentry><b>Flag to Specify an Alternate Port</b>
-
             </stentry>
-
           </strow>
-
           <strow>
-
             <stentry>HTTP</stentry>
-
             <stentry>28000</stentry>
-
             <stentry><codeph>&#8209;&#8209;hs2_http_port</codeph>
-
             </stentry>
-
           </strow>
-
           <strow>
-
             <stentry>Binary TCP</stentry>
-
             <stentry>21050</stentry>
-
             <stentry><codeph>&#8209;&#8209;hs2_port</codeph>
-
             </stentry>
-
           </strow>
-
         </simpletable>
       </p>
-
-      <p>
-        Make sure the port for the protocol you are using is available for communication with
-        clients, for example, that it is not blocked by firewall software.
-      </p>
-
-      <p>
-        If your JDBC client software connects to a different port, specify that alternative port
-        number with the flag in the above table when starting the <codeph>impalad</codeph>.
-      </p>
-
+      <p> Make sure the port for the protocol you are using is available for
+        communication with clients, for example, that it is not blocked by
+        firewall software. </p>
+      <p> If your JDBC client software connects to a different port, specify
+        that alternative port number with the flag in the above table when
+        starting the <codeph>impalad</codeph>. </p>
     </conbody>
-
   </concept>
-
   <concept id="jdbc_driver_choice">
-
     <title>Choosing the JDBC Driver</title>
-
     <prolog>
       <metadata>
         <data name="Category" value="Planning"/>
       </metadata>
     </prolog>
-
     <conbody>
-
-      <p>
-        In Impala 2.0 and later, you can use the Hive 0.13 JDBC driver. If you are already using
-        JDBC applications with an earlier Impala release, you should update your JDBC driver,
-        because the Hive 0.12 driver that was formerly the only choice is not compatible with
-        Impala 2.0 and later.
-      </p>
-
-      <p>
-        The Hive JDBC driver provides a substantial speed increase for JDBC applications with
-        Impala 2.0 and higher, for queries that return large result sets.
-      </p>
-
+      <p> In Impala 2.0 and later, you can use the Hive 0.13 or higher JDBC
+        driver. If you are already using JDBC applications with an earlier
+        Impala release, you should update your JDBC driver, because the Hive
+        0.12 driver that was formerly the only choice is not compatible with
+        Impala 2.0 and later. </p>
+      <p> The Hive JDBC driver provides a substantial speed increase for JDBC
+        applications with Impala 2.0 and higher, for queries that return large
+        result sets. </p>
     </conbody>
-
   </concept>
-
   <concept id="jdbc_setup">
-
     <title>Enabling Impala JDBC Support on Client Systems</title>
-
     <prolog>
       <metadata>
         <data name="Category" value="Installing"/>
       </metadata>
     </prolog>
-
     <conbody>
-
       <section id="install_hive_driver">
-
         <title>Using the Hive JDBC Driver</title>
-
-        <p>
-          You install the Hive JDBC driver (<codeph>hive-jdbc</codeph> package) through the
-          Linux package manager, on hosts within the cluster. The driver consists of several
-          Java JAR files. The same driver can be used by Impala and Hive.
-        </p>
-
-        <p>
-          To get the JAR files, install the Hive JDBC driver on each host in the cluster that
-          will run JDBC applications.
-<!-- TODO: Find a URL to point to for instructions and downloads -->
-        </p>
-
-        <note>
-          The latest JDBC driver, corresponding to Hive 0.13, provides substantial performance
-          improvements for Impala queries that return large result sets. Impala 2.0 and later
-          are compatible with the Hive 0.13 driver. If you already have an older JDBC driver
-          installed, and are running Impala 2.0 or higher, consider upgrading to the latest Hive
-          JDBC driver for best performance with JDBC applications.
-        </note>
-
-        <p>
-          If you are using JDBC-enabled applications on hosts outside the cluster, you cannot
-          use the the same install procedure on the hosts. Install the JDBC driver on at least
-          one cluster host using the preceding procedure. Then download the JAR files to each
-          client machine that will use JDBC with Impala:
-        </p>
-
-<codeblock>commons-logging-X.X.X.jar
+        <p> You install the Hive JDBC driver (<codeph>hive-jdbc</codeph>
+          package) through the Linux package manager, on hosts within the
+          cluster. The driver consists of several JAR files. The same driver can
+          be used by Impala and Hive. </p>
+        <p> To get the JAR files, install the Hive JDBC driver on each host in
+          the cluster that will run JDBC applications.  </p>
+        <note> The latest JDBC driver, corresponding to Hive 0.13, provides
+          substantial performance improvements for Impala queries that return
+          large result sets. Impala 2.0 and later are compatible with the Hive
+          0.13 driver. If you already have an older JDBC driver installed, and
+          are running Impala 2.0 or higher, consider upgrading to the latest
+          Hive JDBC driver for best performance with JDBC applications. </note>
+        <p> If you are using JDBC-enabled applications on hosts outside the
+          cluster, you cannot use the the same install procedure on the hosts.
+          Install the JDBC driver on at least one cluster host using the
+          preceding procedure. Then download the JAR files to each client
+          machine that will use JDBC with Impala: </p>
+        <codeblock>commons-logging-X.X.X.jar
   hadoop-common.jar
   hive-common-X.XX.X.jar
   hive-jdbc-X.XX.X.jar
@@ -222,185 +145,136 @@ under the License.
   slf4j-api-X.X.X.jar
   slf4j-logXjXX-X.X.X.jar
   </codeblock>
-
         <p>
-          <b>To enable JDBC support for Impala on the system where you run the JDBC
-          application:</b>
+          <b>To enable JDBC support for Impala on the system where you run the
+            JDBC application:</b>
         </p>
-
         <ol>
-          <li>
-            Download the JAR files listed above to each client machine.
-            <note>
-              For Maven users, see <xref keyref="Impala-JDBC-Example">this sample github
-              page</xref> for an example of the dependencies you could add to a
-              <codeph>pom</codeph> file instead of downloading the individual JARs.
-            </note>
+          <li> Download the JAR files listed above to each client machine.
+              <note> For Maven users, see <xref keyref="Impala-JDBC-Example"
+                >this sample github page</xref> for an example of the
+              dependencies you could add to a <codeph>pom</codeph> file instead
+              of downloading the individual JARs. </note>
           </li>
-
-          <li>
-            Store the JAR files in a location of your choosing, ideally a directory already
-            referenced in your <codeph>CLASSPATH</codeph> setting. For example:
-            <ul>
-              <li>
-                On Linux, you might use a location such as <codeph>/opt/jars/</codeph>.
-              </li>
-
-              <li>
-                On Windows, you might use a subdirectory underneath <filepath>C:\Program
-                Files</filepath>.
-              </li>
+          <li> Store the JAR files in a location of your choosing, ideally a
+            directory already referenced in your <codeph>CLASSPATH</codeph>
+            setting. For example: <ul>
+              <li> On Linux, you might use a location such as
+                  <codeph>/opt/jars/</codeph>. </li>
+              <li> On Windows, you might use a subdirectory underneath
+                  <filepath>C:\Program Files</filepath>. </li>
             </ul>
           </li>
-
-          <li>
-            To successfully load the Impala JDBC driver, client programs must be able to locate
-            the associated JAR files. This often means setting the <codeph>CLASSPATH</codeph>
-            for the client process to include the JARs. Consult the documentation for your JDBC
-            client for more details on how to install new JDBC drivers, but some examples of how
-            to set <codeph>CLASSPATH</codeph> variables include:
-            <ul>
-              <li>
-                On Linux, if you extracted the JARs to <codeph>/opt/jars/</codeph>, you might
-                issue the following command to prepend the JAR files path to an existing
-                classpath:
-<codeblock>export CLASSPATH=/opt/jars/*.jar:$CLASSPATH</codeblock>
+          <li> To successfully load the Impala JDBC driver, client programs must
+            be able to locate the associated JAR files. This often means setting
+            the <codeph>CLASSPATH</codeph> for the client process to include the
+            JARs. Consult the documentation for your JDBC client for more
+            details on how to install new JDBC drivers, but some examples of how
+            to set <codeph>CLASSPATH</codeph> variables include: <ul>
+              <li> On Linux, if you extracted the JARs to
+                  <codeph>/opt/jars/</codeph>, you might issue the following
+                command to prepend the JAR files path to an existing classpath:
+                <codeblock>export CLASSPATH=/opt/jars/*.jar:$CLASSPATH</codeblock>
               </li>
-
-              <li>
-                On Windows, use the <b>System Properties</b> control panel item to modify the
-                <b>Environment Variables</b> for your system. Modify the environment variables
-                to include the path to which you extracted the files.
-                <note>
-                  If the existing <codeph>CLASSPATH</codeph> on your client machine refers to
-                  some older version of the Hive JARs, ensure that the new JARs are the first
-                  ones listed. Either put the new JAR files earlier in the listings, or delete
-                  the other references to Hive JAR files.
-                </note>
+              <li> On Windows, use the <b>System Properties</b> control panel
+                item to modify the <b>Environment Variables</b> for your system.
+                Modify the environment variables to include the path to which
+                you extracted the files. <note> If the existing
+                    <codeph>CLASSPATH</codeph> on your client machine refers to
+                  some older version of the Hive JARs, ensure that the new JARs
+                  are the first ones listed. Either put the new JAR files
+                  earlier in the listings, or delete the other references to
+                  Hive JAR files. </note>
               </li>
             </ul>
           </li>
         </ol>
-
       </section>
-
     </conbody>
-
   </concept>
-
   <concept id="jdbc_connect">
-
     <title>Establishing JDBC Connections</title>
-
     <conbody>
-
-      <p>
-        The JDBC driver class depends on which driver you select.
-      </p>
-
+      <p> The JDBC driver class depends on which driver you select. </p>
       <note conref="../shared/impala_common.xml#common/proxy_jdbc_caveat"/>
-
       <section id="class_hive_driver">
-
         <title>Using the Hive JDBC Driver</title>
-
-        <p>
-          For example, with the Hive JDBC driver, the class name is
-          <codeph>org.apache.hive.jdbc.HiveDriver</codeph>. Once you have configured Impala to
-          work with JDBC, you can establish connections between the two. To do so for a cluster
-          that does not use Kerberos authentication, use a connection string of the form
-          <codeph>jdbc:hive2://<varname>host</varname>:<varname>port</varname>/;auth=noSasl</codeph>.
-<!--
+        <p> For example, with the Hive JDBC driver, the class name is
+            <codeph>org.apache.hive.jdbc.HiveDriver</codeph>. Once you have
+          configured Impala to work with JDBC, you can establish connections
+          between the two. To do so for a cluster that does not use Kerberos
+          authentication, use a connection string of the form
+              <codeph>jdbc:hive2://<varname>host</varname>:<varname>port</varname>/;auth=noSasl</codeph>.
+          <!--
         Include the <codeph>auth=noSasl</codeph> argument
         only when connecting to a non-Kerberos cluster; if Kerberos is enabled, omit the <codeph>auth</codeph> argument.
 -->
-          For example, you might use:
-        </p>
-
-<codeblock>jdbc:hive2://myhost.example.com:21050/;auth=noSasl</codeblock>
-
-        <p>
-          To connect to an instance of Impala that requires Kerberos authentication, use a
-          connection string of the form
-          <codeph>jdbc:hive2://<varname>host</varname>:<varname>port</varname>/;principal=<varname>principal_name</varname></codeph>.
-          The principal must be the same user principal you used when starting Impala. For
-          example, you might use:
+          For example, you might use: </p>
+        <codeblock>jdbc:hive2://myhost.example.com:21050/;auth=noSasl</codeblock>
+        <p> To connect to an instance of Impala that requires Kerberos
+          authentication, use a connection string of the form
+              <codeph>jdbc:hive2://<varname>host</varname>:<varname>port</varname>/;principal=<varname>principal_name</varname></codeph>.
+          The principal must be the same user principal you used when starting
+          Impala. For example, you might use: </p>
+        <codeblock>jdbc:hive2://myhost.example.com:21050/;principal=impala/myhost.example.com@H2.EXAMPLE.COM</codeblock>
+        <p> To connect to an instance of Impala that requires LDAP
+          authentication, use a connection string of the form
+              <codeph>jdbc:hive2://<varname>host</varname>:<varname>port</varname>/<varname>db_name</varname>;user=<varname>ldap_userid</varname>;password=<varname>ldap_password</varname></codeph>.
+          For example, you might use: </p>
+        <codeblock>jdbc:hive2://myhost.example.com:21050/test_db;user=fred;password=xyz123</codeblock>
+        <p> To connect to an instance of Impala over HTTP, specify the HTTP
+          port, 28000 by default, and <codeph>transportMode=http</codeph> in the
+          connection string. For example:
+          <codeblock>jdbc:hive2://myhost.example.com:28000/;transportMode=http</codeblock>
         </p>
-
-<codeblock>jdbc:hive2://myhost.example.com:21050/;principal=impala/myhost.example.com@H2.EXAMPLE.COM</codeblock>
-
-        <p>
-          To connect to an instance of Impala that requires LDAP authentication, use a
-          connection string of the form
-          <codeph>jdbc:hive2://<varname>host</varname>:<varname>port</varname>/<varname>db_name</varname>;user=<varname>ldap_userid</varname>;password=<varname>ldap_password</varname></codeph>.
-          For example, you might use:
-        </p>
-
-<codeblock>jdbc:hive2://myhost.example.com:21050/test_db;user=fred;password=xyz123</codeblock>
-
-        <p>
-          To connect to an instance of Impala over HTTP, specify the HTTP port, 28000 by
-          default, and <codeph>transportMode=http</codeph> in the connection string. For
-          example:
-<codeblock>jdbc:hive2://myhost.example.com:28000/;transportMode=http</codeblock>
-        </p>
-
         <note>
-          <p conref="../shared/impala_common.xml#common/hive_jdbc_ssl_kerberos_caveat"/>
+          <p
+            conref="../shared/impala_common.xml#common/hive_jdbc_ssl_kerberos_caveat"
+          />
         </note>
-
       </section>
-
     </conbody>
-
   </concept>
-
   <concept rev="2.3.0" id="jdbc_odbc_notes">
-
-    <title>Notes about JDBC and ODBC Interaction with Impala SQL Features</title>
-
+    <title>Notes about JDBC and ODBC Interaction with Impala SQL
+      Features</title>
     <conbody>
-
-      <p>
-        Most Impala SQL features work equivalently through the <cmdname>impala-shell</cmdname>
-        interpreter of the JDBC or ODBC APIs. The following are some exceptions to keep in mind
-        when switching between the interactive shell and applications using the APIs:
-      </p>
-
+      <p> Most Impala SQL features work equivalently through the
+          <cmdname>impala-shell</cmdname> interpreter of the JDBC or ODBC APIs.
+        The following are some exceptions to keep in mind when switching between
+        the interactive shell and applications using the APIs: </p>
       <ul>
         <li>
           <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
           <ul>
             <li>
-              <p>
-                Queries involving the complex types (<codeph>ARRAY</codeph>,
-                <codeph>STRUCT</codeph>, and <codeph>MAP</codeph>) require notation that might
-                not be available in all levels of JDBC and ODBC drivers. If you have trouble
-                querying such a table due to the driver level or inability to edit the queries
-                used by the application, you can create a view that exposes a <q>flattened</q>
-                version of the complex columns and point the application at the view. See
-                <xref href="impala_complex_types.xml#complex_types"/> for details.
+              <p> Queries involving the complex types (<codeph>ARRAY</codeph>,
+                  <codeph>STRUCT</codeph>, and <codeph>MAP</codeph>) require
+                notation that might not be available in all levels of JDBC and
+                ODBC drivers. If you have trouble querying such a table due to
+                the driver level or inability to edit the queries used by the
+                application, you can create a view that exposes a
+                  <q>flattened</q> version of the complex columns and point the
+                application at the view. See <xref
+                  href="impala_complex_types.xml#complex_types"/> for details.
               </p>
             </li>
-
             <li>
-              <p>
-                The complex types available in <keyword keyref="impala23_full"/> and higher are
-                supported by the JDBC <codeph>getColumns()</codeph> API. Both
-                <codeph>MAP</codeph> and <codeph>ARRAY</codeph> are reported as the JDBC SQL
-                Type <codeph>ARRAY</codeph>, because this is the closest matching Java SQL type.
-                This behavior is consistent with Hive. <codeph>STRUCT</codeph> types are
-                reported as the JDBC SQL Type <codeph>STRUCT</codeph>.
-              </p>
-
-              <p>
-                To be consistent with Hive's behavior, the TYPE_NAME field is populated with the
-                primitive type name for scalar types, and with the full <codeph>toSql()</codeph>
-                for complex types. The resulting type names are somewhat inconsistent, because
-                nested types are printed differently than top-level types. For example, the
-                following list shows how <codeph>toSQL()</codeph> for Impala types are
-                translated to <codeph>TYPE_NAME</codeph> values:
-<codeblock><![CDATA[DECIMAL(10,10)         becomes  DECIMAL
+              <p> The complex types available in <keyword keyref="impala23_full"
+                /> and higher are supported by the JDBC
+                  <codeph>getColumns()</codeph> API. Both <codeph>MAP</codeph>
+                and <codeph>ARRAY</codeph> are reported as the JDBC SQL Type
+                  <codeph>ARRAY</codeph>, because this is the closest matching
+                Java SQL type. This behavior is consistent with Hive.
+                  <codeph>STRUCT</codeph> types are reported as the JDBC SQL
+                Type <codeph>STRUCT</codeph>. </p>
+              <p> To be consistent with Hive's behavior, the TYPE_NAME field is
+                populated with the primitive type name for scalar types, and
+                with the full <codeph>toSql()</codeph> for complex types. The
+                resulting type names are somewhat inconsistent, because nested
+                types are printed differently than top-level types. For example,
+                the following list shows how <codeph>toSQL()</codeph> for Impala
+                types are translated to <codeph>TYPE_NAME</codeph> values: <codeblock><![CDATA[DECIMAL(10,10)         becomes  DECIMAL
 CHAR(10)               becomes  CHAR
 VARCHAR(10)            becomes  VARCHAR
 ARRAY<DECIMAL(10,10)>  becomes  ARRAY<DECIMAL(10,10)>
@@ -413,27 +287,17 @@ ARRAY<VARCHAR(10)>     becomes  ARRAY<VARCHAR(10)>
           </ul>
         </li>
       </ul>
-
     </conbody>
-
   </concept>
-
   <concept id="jdbc_kudu">
-
     <title>Kudu Considerations for DML Statements</title>
-
     <conbody>
-
-      <p>
-        Currently, Impala <codeph>INSERT</codeph>, <codeph>UPDATE</codeph>, or other DML
-        statements issued through the JDBC interface against a Kudu table do not return JDBC
-        error codes for conditions such as duplicate primary key columns. Therefore, for
-        applications that issue a high volume of DML statements, prefer to use the Kudu Java API
-        directly rather than a JDBC application.
-      </p>
-
+      <p> Currently, Impala <codeph>INSERT</codeph>, <codeph>UPDATE</codeph>, or
+        other DML statements issued through the JDBC interface against a Kudu
+        table do not return JDBC error codes for conditions such as duplicate
+        primary key columns. Therefore, for applications that issue a high
+        volume of DML statements, prefer to use the Kudu Java API directly
+        rather than a JDBC application. </p>
     </conbody>
-
   </concept>
-
 </concept>
diff --git a/docs/topics/impala_proxy.xml b/docs/topics/impala_proxy.xml
index 453c485..ca67885 100644
--- a/docs/topics/impala_proxy.xml
+++ b/docs/topics/impala_proxy.xml
@@ -48,9 +48,7 @@ under the License.
     </p>
 
     <p>
-      Currently, the Impala statestore mechanism does not include such proxying and
-      load-balancing features. Set up a software package of your choice to perform these
-      functions.
+      Set up a software package of your choice to perform these functions.
     </p>
 
     <note>
@@ -107,9 +105,7 @@ under the License.
         <li>
           Select and download the load-balancing proxy software or other load-balancing hardware
           appliance. It should only need to be installed and configured on a single host,
-          typically on an edge node. Pick a host other than the DataNodes where
-          <cmdname>impalad</cmdname> is running, because the intention is to protect against the
-          possibility of one or more of these DataNodes becoming unavailable.
+          typically on an edge node.
         </li>
 
         <li>
@@ -117,13 +113,15 @@ under the License.
           particular:
           <ul>
             <li>
-              Set up a port that the load balancer will listen on to relay Impala requests back
-              and forth.
+              To relay Impala requests back and forth, set up a port that the load balancer will
+              listen on.
             </li>
 
             <li>
-              See <xref href="#proxy_balancing" format="dita"/> for load balancing algorithm
-              options.
+              Select a load balancing algorithm. See
+              <xref
+                href="#proxy_balancing" format="dita"/> for load balancing
+              algorithm options.
             </li>
 
             <li>
@@ -136,7 +134,7 @@ under the License.
 
         <li>
           If you are using Hue or JDBC-based applications, you typically set up load balancing
-          for both ports 21000 and 21050, because these client applications connect through port
+          for both ports 21000 and 21050 because these client applications connect through port
           21050 while the <cmdname>impala-shell</cmdname> command connects through port 21000.
           See <xref href="impala_ports.xml#ports"/> for when to use port 21000, 21050, or
           another value depending on what type of connections you are load balancing.
@@ -149,8 +147,8 @@ under the License.
 
         <li>
           For any scripts, jobs, or configuration settings for applications that formerly
-          connected to a specific DataNode to run Impala SQL statements, change the connection
-          information (such as the <codeph>-i</codeph> option in
+          connected to a specific <cmdname>impalad</cmdname> to run Impala SQL statements,
+          change the connection information (such as the <codeph>-i</codeph> option in
           <cmdname>impala-shell</cmdname>) to point to the load balancer instead.
         </li>
       </ol>
@@ -231,10 +229,8 @@ under the License.
           </dt>
 
           <dd>
-            <p>
-              Distributes connections to all coordinator nodes. Typically not recommended for
-              Impala.
-            </p>
+            Distributes connections to all coordinator nodes. Typically not recommended for
+            Impala.
           </dd>
 
         </dlentry>
@@ -267,8 +263,7 @@ under the License.
 
       <p>
         In a cluster using Kerberos, applications check host credentials to verify that the host
-        they are connecting to is the same one that is actually processing the request, to
-        prevent man-in-the-middle attacks.
+        they are connecting to is the same one that is actually processing the request.
       </p>
 
       <p>
@@ -278,13 +273,12 @@ under the License.
       </p>
 
       <p>
-        In <keyword keyref="impala212_full">Impala 2.12</keyword> and higher, if you enable a
-        proxy server in a Kerberized cluster, users have an option to connect to Impala daemons
-        directly from <cmdname>impala-shell</cmdname> using the <codeph>-b</codeph> /
-        <codeph>--kerberos_host_fqdn</codeph> option when you start
-        <cmdname>impala-shell</cmdname>. This option can be used for testing or troubleshooting
-        purposes, but not recommended for live production environments as it defeats the purpose
-        of a load balancer/proxy.
+        In <keyword keyref="impala212_full">Impala 2.12</keyword> and higher versions, when you
+        enable a proxy server in a Kerberized cluster, users have an option to connect to Impala
+        daemons directly from <cmdname>impala-shell</cmdname> using the <codeph>-b</codeph> /
+        <codeph>--kerberos_host_fqdn</codeph> <cmdname>impala-shell</cmdname> flag. This option
+        can be used for testing or troubleshooting purposes, but not recommended for live
+        production environments as it defeats the purpose of a load balancer/proxy.
       </p>
 
       <p>
@@ -305,8 +299,7 @@ impala-shell -i impalad-1.mydomain.com -k -b loadbalancer-1.mydomain.com
       </p>
 
       <p>
-        To clarify that the load-balancing proxy server is legitimate, perform these extra
-        Kerberos setup steps:
+        To validate the load-balancing proxy server, perform these extra Kerberos setup steps:
       </p>
 
       <ol>
@@ -321,26 +314,29 @@ impala-shell -i impalad-1.mydomain.com -k -b loadbalancer-1.mydomain.com
           Choose the host you will use for the proxy server. Based on the Kerberos setup
           procedure, it should already have an entry
           <codeph>impala/<varname>proxy_host</varname>@<varname>realm</varname></codeph> in its
-          keytab. If not, go back over the initial Kerberos configuration steps for the keytab
-          on each host running the <cmdname>impalad</cmdname> daemon.
+          <filepath>keytab</filepath>. If not, go back over the initial Kerberos configuration
+          steps for the <filepath>keytab</filepath> on each host running the
+          <cmdname>impalad</cmdname> daemon.
         </li>
 
         <li>
-          Copy the keytab file from the proxy host to all other hosts in the cluster that run
-          the <cmdname>impalad</cmdname> daemon. (For optimal performance,
-          <cmdname>impalad</cmdname> should be running on all DataNodes in the cluster.) Put the
-          keytab file in a secure location on each of these other hosts.
+          Copy the <filepath>keytab</filepath> file from the proxy host to all other hosts in
+          the cluster that run the <cmdname>impalad</cmdname> daemon. Put the
+          <filepath>keytab</filepath> file in a secure location on each of these other hosts.
         </li>
 
         <li>
           Add an entry
           <codeph>impala/<varname>actual_hostname</varname>@<varname>realm</varname></codeph> to
-          the keytab on each host running the <cmdname>impalad</cmdname> daemon.
+          the <filepath>keytab</filepath> on each host running the <cmdname>impalad</cmdname>
+          daemon.
         </li>
 
         <li>
-          For each impalad node, merge the existing keytab with the proxy’s keytab using
-          <cmdname>ktutil</cmdname>, producing a new keytab file. For example:
+          For each <cmdname>impalad</cmdname> node, merge the existing
+          <filepath>keytab</filepath> with the proxy’s <filepath>keytab</filepath> using
+          <cmdname>ktutil</cmdname>, producing a new <filepath>keytab</filepath> file. For
+          example:
 <codeblock>$ ktutil
   ktutil: read_kt proxy.keytab
   ktutil: read_kt impala.keytab
@@ -349,44 +345,39 @@ impala-shell -i impalad-1.mydomain.com -k -b loadbalancer-1.mydomain.com
         </li>
 
         <li>
-          To verify that the keytabs are merged, run the command:
+          To verify that the <filepath>keytabs</filepath> are merged, run the command:
 <codeblock>
 klist -k <varname>keytabfile</varname>
 </codeblock>
-          which lists the credentials for both <codeph>principal</codeph> and
+          The command lists the credentials for both <codeph>principal</codeph> and
           <codeph>be_principal</codeph> on all nodes.
         </li>
 
         <li>
-          Make sure that the <codeph>impala</codeph> user has permission to read this merged
-          keytab file.
+          Make sure that the <codeph>impala</codeph> user has the permission to read this merged
+          <filepath>keytab</filepath> file.
         </li>
 
         <li>
-          Change the following configuration settings for each host in the cluster that
-          participates in the load balancing:
-          <ul>
-            <li>
-              In the <cmdname>impalad</cmdname> option definition, add:
+          For each coordinator <codeph>impalad</codeph> host in the cluster that participates in
+          the load balancing, add the following configuration options to receive client
+          connections coming through the load balancer proxy server:
 <codeblock>
---principal=impala/<i>proxy_host@realm</i>
-  --be_principal=impala/<i>actual_host@realm</i>
-  --keytab_file=<i>path_to_merged_keytab</i>
+--principal=impala/<varname>proxy_host@realm</varname>
+  --be_principal=impala/<varname>actual_host@realm</varname>
+  --keytab_file=<varname>path_to_merged_keytab</varname>
 </codeblock>
-              <note>
-                Every host has different <codeph>--be_principal</codeph> because the actual
-                hostname is different on each host. Specify the fully qualified domain name
-                (FQDN) for the proxy host, not the IP address. Use the exact FQDN as returned by
-                a reverse DNS lookup for the associated IP address.
-              </note>
-            </li>
+          <p>
+            The <codeph>--principal</codeph> setting prevents a client from connecting to a
+            coordinator <codeph>impalad</codeph> using a principal other than one specified.
+          </p>
 
-            <li>
-              Modify the startup options. See
-              <xref href="impala_config_options.xml#config_options"/> for the procedure to
-              modify the startup options.
-            </li>
-          </ul>
+          <note>
+            Every host has different <codeph>--be_principal</codeph> because the actual host
+            name is different on each host. Specify the fully qualified domain name (FQDN) for
+            the proxy host, not the IP address. Use the exact FQDN as returned by a reverse DNS
+            lookup for the associated IP address.
+          </note>
         </li>
 
         <li>
@@ -396,6 +387,40 @@ klist -k <varname>keytabfile</varname>
         </li>
       </ol>
 
+      <section id="section_fjz_mfn_yjb">
+
+        <title>Client Connection to Proxy Server in Kerberized Clusters</title>
+
+        <p>
+          When a client connect to Impala, the service principal specified by the client must
+          match the <codeph>-principal</codeph> setting of the Impala proxy server. And the
+          client should connect to the proxy server port.
+        </p>
+
+        <p>
+          In <filepath>hue.ini</filepath>, set the following for to configure Hue to
+          automatically connect to the proxy server:
+        </p>
+
+<codeblock>[impala]
+server_host=<varname>proxy_host</varname>
+impala_principal=impala/<varname>proxy_host</varname></codeblock>
+
+        <p>
+          The following are the JDBC connection string formats when connecting through the load
+          balancer with the load balancer's host name in the principal:
+        </p>
+
+<codeblock>jdbc:hive2://<varname>proxy_host</varname>:<varname>load_balancer_port</varname>/;principal=impala/_HOST@<varname>realm</varname>
+jdbc:hive2://<varname>proxy_host</varname>:<varname>load_balancer_port</varname>/;principal=impala/<varname>proxy_host</varname>@<varname>realm</varname></codeblock>
+
+        <p>
+          When starting <cmdname>impala-shell</cmdname>, specify the service principal via the
+          <codeph>-b</codeph> or <codeph>--kerberos_host_fqdn</codeph> flag.
+        </p>
+
+      </section>
+
     </conbody>
 
   </concept>
@@ -512,8 +537,9 @@ klist -k <varname>keytabfile</varname>
       <ul>
         <li>
           <p>
-            Install the load balancer: <codeph>yum install haproxy</codeph>
+            Install the load balancer:
           </p>
+<codeblock>yum install haproxy</codeblock>
         </li>
 
         <li>
@@ -604,7 +630,8 @@ listen stats :25002
     stats enable
     stats auth <varname>username</varname>:<varname>password</varname>
 
-# This is the setup for Impala. Impala client connect to load_balancer_host:25003.
+# Setup for Impala.
+# Impala client connect to load_balancer_host:25003.
 # HAProxy will balance connections among the list of servers listed below.
 # The list of Impalad is listening at port 21000 for beeswax (impala-shell) or original ODBC driver.
 # For JDBC or ODBC version 2.x driver, use port 21050 instead of 21000.
@@ -621,12 +648,13 @@ listen impala :25003
 # Setup for Hue or other JDBC-enabled applications.
 # In particular, Hue requires sticky sessions.
 # The application connects to load_balancer_host:21051, and HAProxy balances
-# connections to the associated hosts, where Impala listens for JDBC
-# requests on port 21050.
+# connections to the associated hosts, where Impala listens for
+# JDBC requests at port 21050.
 listen impalajdbc :21051
     mode tcp
     option tcplog
     balance source
+
     server <varname>symbolic_name_5</varname> impala-host-1.example.com:21050 check
     server <varname>symbolic_name_6</varname> impala-host-2.example.com:21050 check
     server <varname>symbolic_name_7</varname> impala-host-3.example.com:21050 check
@@ -635,8 +663,8 @@ listen impalajdbc :21051
 
       <note type="important">
         Hue requires the <codeph>check</codeph> option at end of each line in the above file to
-        ensure HAProxy can detect any unreachable Impalad server, and failover can be
-        successful. Without the TCP check, you may hit an error when the
+        ensure HAProxy can detect any unreachable <cmdname>Impalad</cmdname> server, and
+        failover can be successful. Without the TCP check, you may hit an error when the
         <cmdname>impalad</cmdname> daemon to which Hue tries to connect is down.
       </note>