You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jr...@apache.org on 2016/07/26 23:05:04 UTC
[12/22] incubator-impala git commit: First try at porting over the
source files necessary for the Impala SQL Reference.
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_double.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_double.xml b/docs/topics/impala_double.xml
new file mode 100644
index 0000000..f1d1756
--- /dev/null
+++ b/docs/topics/impala_double.xml
@@ -0,0 +1,100 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="double">
+
+ <title>DOUBLE Data Type</title>
+ <titlealts><navtitle>DOUBLE</navtitle></titlealts>
+ <prolog>
+ <metadata>
+ <data name="Category" value="Impala"/>
+ <data name="Category" value="Impala Data Types"/>
+ <data name="Category" value="SQL"/>
+ <data name="Category" value="Data Analysts"/>
+ <data name="Category" value="Developers"/>
+ <data name="Category" value="Schemas"/>
+ </metadata>
+ </prolog>
+
+ <conbody>
+
+ <p>
+ A double precision floating-point data type used in <codeph>CREATE TABLE</codeph> and <codeph>ALTER
+ TABLE</codeph> statements.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+ <p>
+ In the column definition of a <codeph>CREATE TABLE</codeph> statement:
+ </p>
+
+<codeblock><varname>column_name</varname> DOUBLE</codeblock>
+
+ <p>
+ <b>Range:</b> 4.94065645841246544e-324d .. 1.79769313486231570e+308, positive or negative
+ </p>
+
+ <p>
+ <b>Precision:</b> 15 to 17 significant digits, depending on usage. The number of significant digits does
+ not depend on the position of the decimal point.
+ </p>
+
+ <p>
+ <b>Representation:</b> The values are stored in 8 bytes, using
+ <xref href="https://en.wikipedia.org/wiki/Double-precision_floating-point_format" scope="external" format="html">IEEE 754 Double Precision Binary Floating Point</xref> format.
+ </p>
+
+ <p>
+ <b>Conversions:</b> Impala does not automatically convert <codeph>DOUBLE</codeph> to any other type. You can
+ use <codeph>CAST()</codeph> to convert <codeph>DOUBLE</codeph> values to <codeph>FLOAT</codeph>,
+ <codeph>TINYINT</codeph>, <codeph>SMALLINT</codeph>, <codeph>INT</codeph>, <codeph>BIGINT</codeph>,
+ <codeph>STRING</codeph>, <codeph>TIMESTAMP</codeph>, or <codeph>BOOLEAN</codeph>. You can use exponential
+ notation in <codeph>DOUBLE</codeph> literals or when casting from <codeph>STRING</codeph>, for example
+ <codeph>1.0e6</codeph> to represent one million.
+ <ph conref="../shared/impala_common.xml#common/cast_int_to_timestamp"/>
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+ <p>
+ The data type <codeph>REAL</codeph> is an alias for <codeph>DOUBLE</codeph>.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>CREATE TABLE t1 (x DOUBLE);
+SELECT CAST(1000.5 AS DOUBLE);
+</codeblock>
+
+ <p conref="../shared/impala_common.xml#common/partitioning_imprecise"/>
+
+ <p conref="../shared/impala_common.xml#common/hbase_ok"/>
+
+ <p conref="../shared/impala_common.xml#common/parquet_ok"/>
+
+ <p conref="../shared/impala_common.xml#common/text_bulky"/>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/compatibility_blurb"/> -->
+
+ <p conref="../shared/impala_common.xml#common/internals_8_bytes"/>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/added_in_20"/> -->
+
+ <p conref="../shared/impala_common.xml#common/column_stats_constant"/>
+
+ <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+<!-- This conref appears under SUM(), AVG(), FLOAT, and DOUBLE topics. -->
+
+ <p conref="../shared/impala_common.xml#common/sum_double"/>
+
+ <p conref="../shared/impala_common.xml#common/float_double_decimal_caveat"/>
+
+ <p conref="../shared/impala_common.xml#common/related_info"/>
+
+ <p>
+ <xref href="impala_literals.xml#numeric_literals"/>, <xref href="impala_math_functions.xml#math_functions"/>,
+ <xref href="impala_float.xml#float"/>
+ </p>
+ </conbody>
+</concept>
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_drop_database.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_drop_database.xml b/docs/topics/impala_drop_database.xml
new file mode 100644
index 0000000..c6a1b64
--- /dev/null
+++ b/docs/topics/impala_drop_database.xml
@@ -0,0 +1,124 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="drop_database">
+
+ <title>DROP DATABASE Statement</title>
+ <titlealts><navtitle>DROP DATABASE</navtitle></titlealts>
+ <prolog>
+ <metadata>
+ <data name="Category" value="Impala"/>
+ <data name="Category" value="SQL"/>
+ <data name="Category" value="Databases"/>
+ <data name="Category" value="DDL"/>
+ <data name="Category" value="Schemas"/>
+ </metadata>
+ </prolog>
+
+ <conbody>
+
+ <p>
+ <indexterm audience="Cloudera">DROP DATABASE statement</indexterm>
+ Removes a database from the system. The physical operations involve removing the metadata for the database
+ from the metastore, and deleting the corresponding <codeph>*.db</codeph> directory from HDFS.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>DROP (DATABASE|SCHEMA) [IF EXISTS] <varname>database_name</varname> <ph rev="2.3.0">[RESTRICT | CASCADE]</ph>;</codeblock>
+
+ <p conref="../shared/impala_common.xml#common/ddl_blurb"/>
+
+ <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+ <p>
+ By default, the database must be empty before it can be dropped, to avoid losing any data.
+ </p>
+
+ <p rev="2.3.0">
+ In CDH 5.5 / Impala 2.3 and higher, you can include the <codeph>CASCADE</codeph>
+ clause to make Impala drop all tables and other objects in the database before dropping the database itself.
+ The <codeph>RESTRICT</codeph> clause enforces the original requirement that the database be empty
+ before being dropped. Because the <codeph>RESTRICT</codeph> behavior is still the default, this
+ clause is optional.
+ </p>
+
+ <p rev="2.3.0">
+ The automatic dropping resulting from the <codeph>CASCADE</codeph> clause follows the same rules as the
+ corresponding <codeph>DROP TABLE</codeph>, <codeph>DROP VIEW</codeph>, and <codeph>DROP FUNCTION</codeph> statements.
+ In particular, the HDFS directories and data files for any external tables are left behind when the
+ tables are removed.
+ </p>
+
+ <p>
+ When you do not use the <codeph>CASCADE</codeph> clause, drop or move all the objects inside the database manually
+ before dropping the database itself:
+ </p>
+
+ <ul>
+ <li>
+ <p>
+ Use the <codeph>SHOW TABLES</codeph> statement to locate all tables and views in the database,
+ and issue <codeph>DROP TABLE</codeph> and <codeph>DROP VIEW</codeph> statements to remove them all.
+ </p>
+ </li>
+ <li>
+ <p>
+ Use the <codeph>SHOW FUNCTIONS</codeph> and <codeph>SHOW AGGREGATE FUNCTIONS</codeph> statements
+ to locate all user-defined functions in the database, and issue <codeph>DROP FUNCTION</codeph>
+ and <codeph>DROP AGGREGATE FUNCTION</codeph> statements to remove them all.
+ </p>
+ </li>
+ <li>
+ <p>
+ To keep tables or views contained by a database while removing the database itself, use
+ <codeph>ALTER TABLE</codeph> and <codeph>ALTER VIEW</codeph> to move the relevant
+ objects to a different database before dropping the original database.
+ </p>
+ </li>
+ </ul>
+
+ <p>
+ You cannot drop the current database, that is, the database your session connected to
+ either through the <codeph>USE</codeph> statement or the <codeph>-d</codeph> option of <cmdname>impala-shell</cmdname>.
+ Issue a <codeph>USE</codeph> statement to switch to a different database first.
+ Because the <codeph>default</codeph> database is always available, issuing
+ <codeph>USE default</codeph> is a convenient way to leave the current database
+ before dropping it.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/hive_blurb"/>
+
+ <p>
+ When you drop a database in Impala, the database can no longer be used by Hive.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<!-- Better to conref the same examples in both places. -->
+
+ <p>
+ See <xref href="impala_create_database.xml#create_database"/> for examples covering <codeph>CREATE
+ DATABASE</codeph>, <codeph>USE</codeph>, and <codeph>DROP DATABASE</codeph>.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+ <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+ <p rev="CDH-19187">
+ The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+ typically the <codeph>impala</codeph> user, must have write
+ permission for the directory associated with the database.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+ <codeblock conref="../shared/impala_common.xml#common/create_drop_db_example"/>
+
+ <p conref="../shared/impala_common.xml#common/related_info"/>
+
+ <p>
+ <xref href="impala_databases.xml#databases"/>, <xref href="impala_create_database.xml#create_database"/>,
+ <xref href="impala_use.xml#use"/>, <xref href="impala_show.xml#show_databases"/>, <xref href="impala_drop_table.xml#drop_table"/>
+ </p>
+ </conbody>
+</concept>
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_drop_function.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_drop_function.xml b/docs/topics/impala_drop_function.xml
new file mode 100644
index 0000000..51a4d90
--- /dev/null
+++ b/docs/topics/impala_drop_function.xml
@@ -0,0 +1,60 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.2" id="drop_function">
+
+ <title>DROP FUNCTION Statement</title>
+ <titlealts><navtitle>DROP FUNCTION</navtitle></titlealts>
+ <prolog>
+ <metadata>
+ <data name="Category" value="Impala"/>
+ <data name="Category" value="SQL"/>
+ <data name="Category" value="DDL"/>
+ <data name="Category" value="Impala Functions"/>
+ <data name="Category" value="UDFs"/>
+ <data name="Category" value="Schemas"/>
+ </metadata>
+ </prolog>
+
+ <conbody>
+
+ <p>
+ <indexterm audience="Cloudera">DROP FUNCTION statement</indexterm>
+ Removes a user-defined function (UDF), so that it is not available for execution during Impala
+ <codeph>SELECT</codeph> or <codeph>INSERT</codeph> operations.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>DROP [AGGREGATE] FUNCTION [IF EXISTS] [<varname>db_name</varname>.]<varname>function_name</varname>(<varname>type</varname>[, <varname>type</varname>...])</codeblock>
+
+ <p conref="../shared/impala_common.xml#common/ddl_blurb"/>
+
+ <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+ <p>
+ Because the same function name could be overloaded with different argument signatures, you specify the
+ argument types to identify the exact function to drop.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+ <p conref="../shared/impala_common.xml#common/udf_persistence_restriction"/>
+
+ <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+ <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+ <p rev="CDH-19187">
+ The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+ typically the <codeph>impala</codeph> user, does not need any
+ particular HDFS permissions to perform this statement.
+ All read and write operations are on the metastore database,
+ not HDFS files and directories.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/related_info"/>
+
+ <p>
+ <xref href="impala_udf.xml#udfs"/>, <xref href="impala_create_function.xml#create_function"/>
+ </p>
+ </conbody>
+</concept>
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_drop_role.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_drop_role.xml b/docs/topics/impala_drop_role.xml
new file mode 100644
index 0000000..35d2157
--- /dev/null
+++ b/docs/topics/impala_drop_role.xml
@@ -0,0 +1,67 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.4.0" id="drop_role">
+
+ <title>DROP ROLE Statement (CDH 5.2 or higher only)</title>
+ <titlealts><navtitle>DROP ROLE (CDH 5.2 or higher only)</navtitle></titlealts>
+ <prolog>
+ <metadata>
+ <data name="Category" value="Impala"/>
+ <data name="Category" value="DDL"/>
+ <data name="Category" value="SQL"/>
+ <data name="Category" value="Sentry"/>
+ <data name="Category" value="Roles"/>
+ <!-- Consider whether to go deeper into categories like Security for the Sentry-related statements. -->
+ </metadata>
+ </prolog>
+
+ <conbody>
+
+ <p>
+ <indexterm audience="Cloudera">DROP ROLE statement</indexterm>
+<!-- Copied from Sentry docs. Turn into conref. I did some rewording for clarity. -->
+ The <codeph>DROP ROLE</codeph> statement removes a role from the metastore database. Once dropped, the role
+ is revoked for all users to whom it was previously assigned, and all privileges granted to that role are
+ revoked. Queries that are already executing are not affected. Impala verifies the role information
+ approximately every 60 seconds, so the effects of <codeph>DROP ROLE</codeph> might not take effect for new
+ Impala queries for a brief period.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>DROP ROLE <varname>role_name</varname>
+</codeblock>
+
+ <p conref="../shared/impala_common.xml#common/privileges_blurb"/>
+
+ <p>
+ Only administrative users (initially, a predefined set of users specified in the Sentry service configuration
+ file) can use this statement.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/compatibility_blurb"/>
+
+ <p>
+ Impala makes use of any roles and privileges specified by the <codeph>GRANT</codeph> and
+ <codeph>REVOKE</codeph> statements in Hive, and Hive makes use of any roles and privileges specified by the
+ <codeph>GRANT</codeph> and <codeph>REVOKE</codeph> statements in Impala. The Impala <codeph>GRANT</codeph>
+ and <codeph>REVOKE</codeph> statements for privileges do not require the <codeph>ROLE</codeph> keyword to be
+ repeated before each role name, unlike the equivalent Hive statements.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/related_info"/>
+
+ <p>
+ <xref href="impala_authorization.xml#authorization"/>, <xref href="impala_grant.xml#grant"/>
+ <xref href="impala_revoke.xml#revoke"/>, <xref href="impala_create_role.xml#create_role"/>,
+ <xref href="impala_show.xml#show"/>
+ </p>
+
+<!-- To do: nail down the new SHOW syntax, e.g. SHOW ROLES, SHOW CURRENT ROLES, SHOW GROUPS. -->
+
+ <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+ <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+ </conbody>
+</concept>
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_drop_stats.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_drop_stats.xml b/docs/topics/impala_drop_stats.xml
new file mode 100644
index 0000000..56697f4
--- /dev/null
+++ b/docs/topics/impala_drop_stats.xml
@@ -0,0 +1,275 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="2.1.0" id="drop_stats" xml:lang="en-US">
+
+ <title>DROP STATS Statement</title>
+ <titlealts><navtitle>DROP STATS</navtitle></titlealts>
+ <prolog>
+ <metadata>
+ <data name="Category" value="Impala"/>
+ <data name="Category" value="SQL"/>
+ <data name="Category" value="DDL"/>
+ <data name="Category" value="Tables"/>
+ <data name="Category" value="Performance"/>
+ <data name="Category" value="Scalability"/>
+ </metadata>
+ </prolog>
+
+ <conbody>
+
+ <p>
+ <indexterm audience="Cloudera">DROP STATS statement</indexterm>
+ Removes the specified statistics from a table or partition. The statistics were originally created by the
+ <codeph>COMPUTE STATS</codeph> or <codeph>COMPUTE INCREMENTAL STATS</codeph> statement.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock rev="2.1.0">DROP STATS [<varname>database_name</varname>.]<varname>table_name</varname>
+DROP INCREMENTAL STATS [<varname>database_name</varname>.]<varname>table_name</varname> PARTITION (<varname>partition_spec</varname>)
+
+<varname>partition_spec</varname> ::= <varname>partition_col</varname>=<varname>constant_value</varname>
+</codeblock>
+
+ <p conref="../shared/impala_common.xml#common/incremental_partition_spec"/>
+
+ <p>
+ <codeph>DROP STATS</codeph> removes all statistics from the table, whether created by <codeph>COMPUTE
+ STATS</codeph> or <codeph>COMPUTE INCREMENTAL STATS</codeph>.
+ </p>
+
+ <p rev="2.1.0">
+ <codeph>DROP INCREMENTAL STATS</codeph> only affects incremental statistics for a single partition, specified
+ through the <codeph>PARTITION</codeph> clause. The incremental stats are marked as outdated, so that they are
+ recomputed by the next <codeph>COMPUTE INCREMENTAL STATS</codeph> statement.
+ </p>
+
+<!-- To do: what release was this added in? -->
+
+ <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+ <p>
+ You typically use this statement when the statistics for a table or a partition have become stale due to data
+ files being added to or removed from the associated HDFS data directories, whether by manual HDFS operations
+ or <codeph>INSERT</codeph>, <codeph>INSERT OVERWRITE</codeph>, or <codeph>LOAD DATA</codeph> statements, or
+ adding or dropping partitions.
+ </p>
+
+ <p>
+ When a table or partition has no associated statistics, Impala treats it as essentially zero-sized when
+ constructing the execution plan for a query. In particular, the statistics influence the order in which
+ tables are joined in a join query. To ensure proper query planning and good query performance and
+ scalability, make sure to run <codeph>COMPUTE STATS</codeph> or <codeph>COMPUTE INCREMENTAL STATS</codeph> on
+ the table or partition after removing any stale statistics.
+ </p>
+
+ <p>
+ Dropping the statistics is not required for an unpartitioned table or a partitioned table covered by the
+ original type of statistics. A subsequent <codeph>COMPUTE STATS</codeph> statement replaces any existing
+ statistics with new ones, for all partitions, regardless of whether the old ones were outdated. Therefore,
+ this statement was rarely used before the introduction of incremental statistics.
+ </p>
+
+ <p>
+ Dropping the statistics is required for a partitioned table containing incremental statistics, to make a
+ subsequent <codeph>COMPUTE INCREMENTAL STATS</codeph> statement rescan an existing partition. See
+ <xref href="impala_perf_stats.xml#perf_stats"/> for information about incremental statistics, a new feature
+ available in Impala 2.1.0 and higher.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/ddl_blurb"/>
+
+ <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+ <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+ <p rev="CDH-19187">
+ The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+ typically the <codeph>impala</codeph> user, does not need any
+ particular HDFS permissions to perform this statement.
+ All read and write operations are on the metastore database,
+ not HDFS files and directories.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+ <p>
+ The following example shows a partitioned table that has associated statistics produced by the
+ <codeph>COMPUTE INCREMENTAL STATS</codeph> statement, and how the situation evolves as statistics are dropped
+ from specific partitions, then the entire table.
+ </p>
+
+ <p>
+ Initially, all table and column statistics are filled in.
+ </p>
+
+<!-- Note: chopped off any excess characters at position 87 and after,
+ to avoid weird wrapping in PDF.
+ Applies to any subsequent examples with output from SHOW ... STATS too. -->
+
+<codeblock>show table stats item_partitioned;
++-------------+-------+--------+----------+--------------+---------+------------------
+| i_category | #Rows | #Files | Size | Bytes Cached | Format | Incremental stats
++-------------+-------+--------+----------+--------------+---------+------------------
+| Books | 1733 | 1 | 223.74KB | NOT CACHED | PARQUET | true
+| Children | 1786 | 1 | 230.05KB | NOT CACHED | PARQUET | true
+| Electronics | 1812 | 1 | 232.67KB | NOT CACHED | PARQUET | true
+| Home | 1807 | 1 | 232.56KB | NOT CACHED | PARQUET | true
+| Jewelry | 1740 | 1 | 223.72KB | NOT CACHED | PARQUET | true
+| Men | 1811 | 1 | 231.25KB | NOT CACHED | PARQUET | true
+| Music | 1860 | 1 | 237.90KB | NOT CACHED | PARQUET | true
+| Shoes | 1835 | 1 | 234.90KB | NOT CACHED | PARQUET | true
+| Sports | 1783 | 1 | 227.97KB | NOT CACHED | PARQUET | true
+| Women | 1790 | 1 | 226.27KB | NOT CACHED | PARQUET | true
+| Total | 17957 | 10 | 2.25MB | 0B | |
++-------------+-------+--------+----------+--------------+---------+------------------
+show column stats item_partitioned;
++------------------+-----------+------------------+--------+----------+---------------
+| Column | Type | #Distinct Values | #Nulls | Max Size | Avg Size
++------------------+-----------+------------------+--------+----------+---------------
+| i_item_sk | INT | 19443 | -1 | 4 | 4
+| i_item_id | STRING | 9025 | -1 | 16 | 16
+| i_rec_start_date | TIMESTAMP | 4 | -1 | 16 | 16
+| i_rec_end_date | TIMESTAMP | 3 | -1 | 16 | 16
+| i_item_desc | STRING | 13330 | -1 | 200 | 100.3028030395
+| i_current_price | FLOAT | 2807 | -1 | 4 | 4
+| i_wholesale_cost | FLOAT | 2105 | -1 | 4 | 4
+| i_brand_id | INT | 965 | -1 | 4 | 4
+| i_brand | STRING | 725 | -1 | 22 | 16.17760086059
+| i_class_id | INT | 16 | -1 | 4 | 4
+| i_class | STRING | 101 | -1 | 15 | 7.767499923706
+| i_category_id | INT | 10 | -1 | 4 | 4
+| i_manufact_id | INT | 1857 | -1 | 4 | 4
+| i_manufact | STRING | 1028 | -1 | 15 | 11.32950019836
+| i_size | STRING | 8 | -1 | 11 | 4.334599971771
+| i_formulation | STRING | 12884 | -1 | 20 | 19.97999954223
+| i_color | STRING | 92 | -1 | 10 | 5.380899906158
+| i_units | STRING | 22 | -1 | 7 | 4.186900138854
+| i_container | STRING | 2 | -1 | 7 | 6.992599964141
+| i_manager_id | INT | 105 | -1 | 4 | 4
+| i_product_name | STRING | 19094 | -1 | 25 | 18.02330017089
+| i_category | STRING | 10 | 0 | -1 | -1
++------------------+-----------+------------------+--------+----------+---------------
+</codeblock>
+
+ <p>
+ To remove statistics for particular partitions, use the <codeph>DROP INCREMENTAL STATS</codeph> statement.
+ After removing statistics for two partitions, the table-level statistics reflect that change in the
+ <codeph>#Rows</codeph> and <codeph>Incremental stats</codeph> fields. The counts, maximums, and averages of
+ the column-level statistics are unaffected.
+ </p>
+
+ <note>
+ (It is possible that the row count might be preserved in future after a <codeph>DROP INCREMENTAL
+ STATS</codeph> statement. Check the resolution of the issue
+ <xref href="https://issues.cloudera.org/browse/IMPALA-1615" scope="external" format="html">IMPALA-1615</xref>.)
+ </note>
+
+<codeblock>drop incremental stats item_partitioned partition (i_category='Sports');
+drop incremental stats item_partitioned partition (i_category='Electronics');
+
+show table stats item_partitioned
++-------------+-------+--------+----------+--------------+---------+------------------
+| i_category | #Rows | #Files | Size | Bytes Cached | Format | Incremental stats
++-------------+-------+--------+----------+--------------+---------+------------------
+| Books | 1733 | 1 | 223.74KB | NOT CACHED | PARQUET | true
+| Children | 1786 | 1 | 230.05KB | NOT CACHED | PARQUET | true
+| Electronics | -1 | 1 | 232.67KB | NOT CACHED | PARQUET | false
+| Home | 1807 | 1 | 232.56KB | NOT CACHED | PARQUET | true
+| Jewelry | 1740 | 1 | 223.72KB | NOT CACHED | PARQUET | true
+| Men | 1811 | 1 | 231.25KB | NOT CACHED | PARQUET | true
+| Music | 1860 | 1 | 237.90KB | NOT CACHED | PARQUET | true
+| Shoes | 1835 | 1 | 234.90KB | NOT CACHED | PARQUET | true
+| Sports | -1 | 1 | 227.97KB | NOT CACHED | PARQUET | false
+| Women | 1790 | 1 | 226.27KB | NOT CACHED | PARQUET | true
+| Total | 17957 | 10 | 2.25MB | 0B | |
++-------------+-------+--------+----------+--------------+---------+------------------
+show column stats item_partitioned
++------------------+-----------+------------------+--------+----------+---------------
+| Column | Type | #Distinct Values | #Nulls | Max Size | Avg Size
++------------------+-----------+------------------+--------+----------+---------------
+| i_item_sk | INT | 19443 | -1 | 4 | 4
+| i_item_id | STRING | 9025 | -1 | 16 | 16
+| i_rec_start_date | TIMESTAMP | 4 | -1 | 16 | 16
+| i_rec_end_date | TIMESTAMP | 3 | -1 | 16 | 16
+| i_item_desc | STRING | 13330 | -1 | 200 | 100.3028030395
+| i_current_price | FLOAT | 2807 | -1 | 4 | 4
+| i_wholesale_cost | FLOAT | 2105 | -1 | 4 | 4
+| i_brand_id | INT | 965 | -1 | 4 | 4
+| i_brand | STRING | 725 | -1 | 22 | 16.17760086059
+| i_class_id | INT | 16 | -1 | 4 | 4
+| i_class | STRING | 101 | -1 | 15 | 7.767499923706
+| i_category_id | INT | 10 | -1 | 4 | 4
+| i_manufact_id | INT | 1857 | -1 | 4 | 4
+| i_manufact | STRING | 1028 | -1 | 15 | 11.32950019836
+| i_size | STRING | 8 | -1 | 11 | 4.334599971771
+| i_formulation | STRING | 12884 | -1 | 20 | 19.97999954223
+| i_color | STRING | 92 | -1 | 10 | 5.380899906158
+| i_units | STRING | 22 | -1 | 7 | 4.186900138854
+| i_container | STRING | 2 | -1 | 7 | 6.992599964141
+| i_manager_id | INT | 105 | -1 | 4 | 4
+| i_product_name | STRING | 19094 | -1 | 25 | 18.02330017089
+| i_category | STRING | 10 | 0 | -1 | -1
++------------------+-----------+------------------+--------+----------+---------------
+</codeblock>
+
+ <p>
+ To remove all statistics from the table, whether produced by <codeph>COMPUTE STATS</codeph> or
+ <codeph>COMPUTE INCREMENTAL STATS</codeph>, use the <codeph>DROP STATS</codeph> statement without the
+ <codeph>INCREMENTAL</codeph> clause). Now, both table-level and column-level statistics are reset.
+ </p>
+
+<codeblock>drop stats item_partitioned;
+
+show table stats item_partitioned
++-------------+-------+--------+----------+--------------+---------+------------------
+| i_category | #Rows | #Files | Size | Bytes Cached | Format | Incremental stats
++-------------+-------+--------+----------+--------------+---------+------------------
+| Books | -1 | 1 | 223.74KB | NOT CACHED | PARQUET | false
+| Children | -1 | 1 | 230.05KB | NOT CACHED | PARQUET | false
+| Electronics | -1 | 1 | 232.67KB | NOT CACHED | PARQUET | false
+| Home | -1 | 1 | 232.56KB | NOT CACHED | PARQUET | false
+| Jewelry | -1 | 1 | 223.72KB | NOT CACHED | PARQUET | false
+| Men | -1 | 1 | 231.25KB | NOT CACHED | PARQUET | false
+| Music | -1 | 1 | 237.90KB | NOT CACHED | PARQUET | false
+| Shoes | -1 | 1 | 234.90KB | NOT CACHED | PARQUET | false
+| Sports | -1 | 1 | 227.97KB | NOT CACHED | PARQUET | false
+| Women | -1 | 1 | 226.27KB | NOT CACHED | PARQUET | false
+| Total | -1 | 10 | 2.25MB | 0B | |
++-------------+-------+--------+----------+--------------+---------+------------------
+show column stats item_partitioned
++------------------+-----------+------------------+--------+----------+----------+
+| Column | Type | #Distinct Values | #Nulls | Max Size | Avg Size |
++------------------+-----------+------------------+--------+----------+----------+
+| i_item_sk | INT | -1 | -1 | 4 | 4 |
+| i_item_id | STRING | -1 | -1 | -1 | -1 |
+| i_rec_start_date | TIMESTAMP | -1 | -1 | 16 | 16 |
+| i_rec_end_date | TIMESTAMP | -1 | -1 | 16 | 16 |
+| i_item_desc | STRING | -1 | -1 | -1 | -1 |
+| i_current_price | FLOAT | -1 | -1 | 4 | 4 |
+| i_wholesale_cost | FLOAT | -1 | -1 | 4 | 4 |
+| i_brand_id | INT | -1 | -1 | 4 | 4 |
+| i_brand | STRING | -1 | -1 | -1 | -1 |
+| i_class_id | INT | -1 | -1 | 4 | 4 |
+| i_class | STRING | -1 | -1 | -1 | -1 |
+| i_category_id | INT | -1 | -1 | 4 | 4 |
+| i_manufact_id | INT | -1 | -1 | 4 | 4 |
+| i_manufact | STRING | -1 | -1 | -1 | -1 |
+| i_size | STRING | -1 | -1 | -1 | -1 |
+| i_formulation | STRING | -1 | -1 | -1 | -1 |
+| i_color | STRING | -1 | -1 | -1 | -1 |
+| i_units | STRING | -1 | -1 | -1 | -1 |
+| i_container | STRING | -1 | -1 | -1 | -1 |
+| i_manager_id | INT | -1 | -1 | 4 | 4 |
+| i_product_name | STRING | -1 | -1 | -1 | -1 |
+| i_category | STRING | 10 | 0 | -1 | -1 |
++------------------+-----------+------------------+--------+----------+----------+
+</codeblock>
+
+ <p conref="../shared/impala_common.xml#common/related_info"/>
+
+ <p>
+ <xref href="impala_compute_stats.xml#compute_stats"/>, <xref href="impala_show.xml#show_table_stats"/>,
+ <xref href="impala_show.xml#show_column_stats"/>, <xref href="impala_perf_stats.xml#perf_stats"/>
+ </p>
+ </conbody>
+</concept>
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_drop_table.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_drop_table.xml b/docs/topics/impala_drop_table.xml
new file mode 100644
index 0000000..33cb726
--- /dev/null
+++ b/docs/topics/impala_drop_table.xml
@@ -0,0 +1,142 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="drop_table">
+
+ <title>DROP TABLE Statement</title>
+ <titlealts><navtitle>DROP TABLE</navtitle></titlealts>
+ <prolog>
+ <metadata>
+ <data name="Category" value="Impala"/>
+ <data name="Category" value="SQL"/>
+ <data name="Category" value="DDL"/>
+ <data name="Category" value="Tables"/>
+ <data name="Category" value="Schemas"/>
+ </metadata>
+ </prolog>
+
+ <conbody>
+
+ <p>
+ <indexterm audience="Cloudera">DROP TABLE statement</indexterm>
+ Removes an Impala table. Also removes the underlying HDFS data files for internal tables, although not for
+ external tables.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>DROP TABLE [IF EXISTS] [<varname>db_name</varname>.]<varname>table_name</varname> <ph rev="2.3.0">[PURGE]</ph></codeblock>
+
+ <p>
+ <b>IF EXISTS clause:</b>
+ </p>
+
+ <p>
+ The optional <codeph>IF EXISTS</codeph> clause makes the statement succeed whether or not the table exists.
+ If the table does exist, it is dropped; if it does not exist, the statement has no effect. This capability is
+ useful in standardized setup scripts that remove existing schema objects and create new ones. By using some
+ combination of <codeph>IF EXISTS</codeph> for the <codeph>DROP</codeph> statements and <codeph>IF NOT
+ EXISTS</codeph> clauses for the <codeph>CREATE</codeph> statements, the script can run successfully the first
+ time you run it (when the objects do not exist yet) and subsequent times (when some or all of the objects do
+ already exist).
+ </p>
+
+ <p rev="2.3.0">
+ <b>PURGE clause:</b>
+ </p>
+
+ <p rev="2.3.0">
+ The optional <codeph>PURGE</codeph> keyword, available in CDH 5.5 / Impala 2.3 and higher,
+ causes Impala to remove the associated HDFS data files
+ immediately, rather than going through the HDFS trashcan mechanism. Use this keyword when dropping
+ a table if it is crucial to remove the data as quickly as possible to free up space, or if there is
+ a problem with the trashcan, such as the trashcan not being configured or being in a different
+ HDFS encryption zone than the data files.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/ddl_blurb"/>
+
+ <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+ <p>
+ By default, Impala removes the associated HDFS directory and data files for the table. If you issue a
+ <codeph>DROP TABLE</codeph> and the data files are not deleted, it might be for the following reasons:
+ </p>
+
+ <ul>
+ <li>
+ If the table was created with the
+ <codeph><xref href="impala_tables.xml#external_tables">EXTERNAL</xref></codeph> clause, Impala leaves all
+ files and directories untouched. Use external tables when the data is under the control of other Hadoop
+ components, and Impala is only used to query the data files from their original locations.
+ </li>
+
+ <li>
+ Impala might leave the data files behind unintentionally, if there is no HDFS location available to hold
+ the HDFS trashcan for the <codeph>impala</codeph> user. See
+ <xref href="impala_prereqs.xml#prereqs_account"/> for the procedure to set up the required HDFS home
+ directory.
+ </li>
+ </ul>
+
+ <p>
+ Make sure that you are in the correct database before dropping a table, either by issuing a
+ <codeph>USE</codeph> statement first or by using a fully qualified name
+ <codeph><varname>db_name</varname>.<varname>table_name</varname></codeph>.
+ </p>
+
+ <p>
+ If you intend to issue a <codeph>DROP DATABASE</codeph> statement, first issue <codeph>DROP TABLE</codeph>
+ statements to remove all the tables in that database.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>create database temporary;
+use temporary;
+create table unimportant (x int);
+create table trivial (s string);
+-- Drop a table in the current database.
+drop table unimportant;
+-- Switch to a different database.
+use default;
+-- To drop a table in a different database...
+drop table trivial;
+<i>ERROR: AnalysisException: Table does not exist: default.trivial</i>
+-- ...use a fully qualified name.
+drop table temporary.trivial;</codeblock>
+
+ <p conref="../shared/impala_common.xml#common/disk_space_blurb"/>
+
+ <p conref="../shared/impala_common.xml#common/s3_blurb"/>
+ <p rev="2.2.0">
+ Although Impala cannot write new data to a table stored in the Amazon
+ S3 filesystem, the <codeph>DROP TABLE</codeph> statement can remove data files from S3
+ if the associated S3 table is an internal table.
+ See <xref href="impala_s3.xml#s3"/> for details about working with S3 tables.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+ <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+ <p rev="CDH-19187">
+ For an internal table, the user ID that the <cmdname>impalad</cmdname> daemon runs under,
+ typically the <codeph>impala</codeph> user, must have write
+ permission for all the files and directories that make up the table.
+ </p>
+ <p>
+ For an external table, dropping the table only involves changes to metadata in the metastore database.
+ Because Impala does not remove any HDFS files or directories when external tables are dropped,
+ no particular permissions are needed for the associated HDFS files or directories.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/related_info"/>
+
+ <p>
+ <xref href="impala_tables.xml#tables"/>,
+ <xref href="impala_alter_table.xml#alter_table"/>, <xref href="impala_create_table.xml#create_table"/>,
+ <xref href="impala_partitioning.xml#partitioning"/>, <xref href="impala_tables.xml#internal_tables"/>,
+ <xref href="impala_tables.xml#external_tables"/>
+ </p>
+
+ </conbody>
+</concept>
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_drop_view.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_drop_view.xml b/docs/topics/impala_drop_view.xml
new file mode 100644
index 0000000..edcab58
--- /dev/null
+++ b/docs/topics/impala_drop_view.xml
@@ -0,0 +1,48 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.1" id="drop_view">
+
+ <title>DROP VIEW Statement</title>
+ <titlealts><navtitle>DROP VIEW</navtitle></titlealts>
+ <prolog>
+ <metadata>
+ <data name="Category" value="Impala"/>
+ <data name="Category" value="SQL"/>
+ <data name="Category" value="DDL"/>
+ <data name="Category" value="Schemas"/>
+ <data name="Category" value="Tables"/>
+ </metadata>
+ </prolog>
+
+ <conbody>
+
+ <p>
+ <indexterm audience="Cloudera">DROP VIEW statement</indexterm>
+ Removes the specified view, which was originally created by the <codeph>CREATE VIEW</codeph> statement.
+ Because a view is purely a logical construct (an alias for a query) with no physical data behind it,
+ <codeph>DROP VIEW</codeph> only involves changes to metadata in the metastore database, not any data files in
+ HDFS.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>DROP VIEW [IF EXISTS] [<varname>db_name</varname>.]<varname>view_name</varname></codeblock>
+
+ <p conref="../shared/impala_common.xml#common/ddl_blurb"/>
+
+ <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+ <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+ <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+ <p conref="../shared/impala_common.xml#common/create_drop_view_examples"/>
+
+ <p conref="../shared/impala_common.xml#common/related_info"/>
+
+ <p>
+ <xref href="impala_views.xml#views"/>, <xref href="impala_create_view.xml#create_view"/>,
+ <xref href="impala_alter_view.xml#alter_view"/>
+ </p>
+ </conbody>
+</concept>
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_exec_single_node_rows_threshold.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_exec_single_node_rows_threshold.xml b/docs/topics/impala_exec_single_node_rows_threshold.xml
new file mode 100644
index 0000000..fa3007d
--- /dev/null
+++ b/docs/topics/impala_exec_single_node_rows_threshold.xml
@@ -0,0 +1,91 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="2.0.0" id="exec_single_node_rows_threshold" xml:lang="en-US">
+
+ <title>EXEC_SINGLE_NODE_ROWS_THRESHOLD Query Option</title>
+ <prolog>
+ <metadata>
+ <data name="Category" value="Impala"/>
+ <data name="Category" value="Impala Query Options"/>
+ <data name="Category" value="Scalability"/>
+ <data name="Category" value="Performance"/>
+ </metadata>
+ </prolog>
+
+ <conbody>
+
+ <p>
+ <indexterm audience="Cloudera">EXEC_SINGLE_NODE_ROWS_THRESHOLD query option</indexterm>
+ This setting controls the cutoff point (in terms of number of rows scanned) below which Impala treats a query
+ as a <q>small</q> query, turning off optimizations such as parallel execution and native code generation. The
+ overhead for these optimizations is applicable for queries involving substantial amounts of data, but it
+ makes sense to skip them for queries involving tiny amounts of data. Reducing the overhead for small queries
+ allows Impala to complete them more quickly, keeping YARN resources, admission control slots, and so on
+ available for data-intensive queries.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>SET EXEC_SINGLE_NODE_ROWS_THRESHOLD=<varname>number_of_rows</varname></codeblock>
+
+ <p>
+ <b>Type:</b> numeric
+ </p>
+
+ <p>
+ <b>Default:</b> 100
+ </p>
+
+ <p>
+ <b>Usage notes:</b> Typically, you increase the default value to make this optimization apply to more queries.
+ If incorrect or corrupted table and column statistics cause Impala to apply this optimization
+ incorrectly to queries that actually involve substantial work, you might see the queries being slower as a
+ result of remote reads. In that case, recompute statistics with the <codeph>COMPUTE STATS</codeph>
+ or <codeph>COMPUTE INCREMENTAL STATS</codeph> statement. If there is a problem collecting accurate
+ statistics, you can turn this feature off by setting the value to -1.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/internals_blurb"/>
+
+ <p>
+ This setting applies to query fragments where the amount of data to scan can be accurately determined, either
+ through table and column statistics, or by the presence of a <codeph>LIMIT</codeph> clause. If Impala cannot
+ accurately estimate the size of the input data, this setting does not apply.
+ </p>
+
+ <p rev="2.3.0">
+ In CDH 5.5 / Impala 2.3 and higher, where Impala supports the complex data types <codeph>STRUCT</codeph>,
+ <codeph>ARRAY</codeph>, and <codeph>MAP</codeph>, if a query refers to any column of those types,
+ the small-query optimization is turned off for that query regardless of the
+ <codeph>EXEC_SINGLE_NODE_ROWS_THRESHOLD</codeph> setting.
+ </p>
+
+ <p>
+ For a query that is determined to be <q>small</q>, all work is performed on the coordinator node. This might
+ result in some I/O being performed by remote reads. The savings from not distributing the query work and not
+ generating native code are expected to outweigh any overhead from the remote reads.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+ <p>
+ A common use case is to query just a few rows from a table to inspect typical data values. In this example,
+ Impala does not parallelize the query or perform native code generation because the result set is guaranteed
+ to be smaller than the threshold value from this query option:
+ </p>
+
+<codeblock>SET EXEC_SINGLE_NODE_ROWS_THRESHOLD=500;
+SELECT * FROM enormous_table LIMIT 300;
+</codeblock>
+
+<!-- Don't have any other places that tie into this particular optimization technique yet.
+Potentially: conceptual topics about code generation, distributed queries
+
+<p conref="/Content/impala_common_xi44078.xml#common/related_info"/>
+<p>
+</p>
+-->
+
+ </conbody>
+
+</concept>
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_explain.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_explain.xml b/docs/topics/impala_explain.xml
new file mode 100644
index 0000000..c9e8846
--- /dev/null
+++ b/docs/topics/impala_explain.xml
@@ -0,0 +1,224 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="explain">
+
+ <title>EXPLAIN Statement</title>
+ <titlealts><navtitle>EXPLAIN</navtitle></titlealts>
+ <prolog>
+ <metadata>
+ <data name="Category" value="Impala"/>
+ <data name="Category" value="SQL"/>
+ <data name="Category" value="Querying"/>
+ <data name="Category" value="Reports"/>
+ <data name="Category" value="Planning"/>
+ <data name="Category" value="Performance"/>
+ <data name="Category" value="Troubleshooting"/>
+ </metadata>
+ </prolog>
+
+ <conbody>
+
+ <p>
+ <indexterm audience="Cloudera">EXPLAIN statement</indexterm>
+ Returns the execution plan for a statement, showing the low-level mechanisms that Impala will use to read the
+ data, divide the work among nodes in the cluster, and transmit intermediate and final results across the
+ network. Use <codeph>explain</codeph> followed by a complete <codeph>SELECT</codeph> query. For example:
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>EXPLAIN { <varname>select_query</varname> | <varname>ctas_stmt</varname> | <varname>insert_stmt</varname> }
+</codeblock>
+
+ <p>
+ The <varname>select_query</varname> is a <codeph>SELECT</codeph> statement, optionally prefixed by a
+ <codeph>WITH</codeph> clause. See <xref href="impala_select.xml#select"/> for details.
+ </p>
+
+ <p>
+ The <varname>insert_stmt</varname> is an <codeph>INSERT</codeph> statement that inserts into or overwrites an
+ existing table. It can use either the <codeph>INSERT ... SELECT</codeph> or <codeph>INSERT ...
+ VALUES</codeph> syntax. See <xref href="impala_insert.xml#insert"/> for details.
+ </p>
+
+ <p>
+ The <varname>ctas_stmt</varname> is a <codeph>CREATE TABLE</codeph> statement using the <codeph>AS
+ SELECT</codeph> clause, typically abbreviated as a <q>CTAS</q> operation. See
+ <xref href="impala_create_table.xml#create_table"/> for details.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+ <p>
+ You can interpret the output to judge whether the query is performing efficiently, and adjust the query
+ and/or the schema if not. For example, you might change the tests in the <codeph>WHERE</codeph> clause, add
+ hints to make join operations more efficient, introduce subqueries, change the order of tables in a join, add
+ or change partitioning for a table, collect column statistics and/or table statistics in Hive, or any other
+ performance tuning steps.
+ </p>
+
+ <p>
+ The <codeph>EXPLAIN</codeph> output reminds you if table or column statistics are missing from any table
+ involved in the query. These statistics are important for optimizing queries involving large tables or
+ multi-table joins. See <xref href="impala_compute_stats.xml#compute_stats"/> for how to gather statistics,
+ and <xref href="impala_perf_stats.xml#perf_stats"/> for how to use this information for query tuning.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/explain_interpret"/>
+
+ <p>
+ If you come from a traditional database background and are not familiar with data warehousing, keep in mind
+ that Impala is optimized for full table scans across very large tables. The structure and distribution of
+ this data is typically not suitable for the kind of indexing and single-row lookups that are common in OLTP
+ environments. Seeing a query scan entirely through a large table is common, not necessarily an indication of
+ an inefficient query. Of course, if you can reduce the volume of scanned data by orders of magnitude, for
+ example by using a query that affects only certain partitions within a partitioned table, then you might be
+ able to optimize a query so that it executes in seconds rather than minutes.
+ </p>
+
+ <p>
+ For more information and examples to help you interpret <codeph>EXPLAIN</codeph> output, see
+ <xref href="impala_explain_plan.xml#perf_explain"/>.
+ </p>
+
+ <p rev="1.2">
+ <b>Extended EXPLAIN output:</b>
+ </p>
+
+ <p rev="1.2">
+ For performance tuning of complex queries, and capacity planning (such as using the admission control and
+ resource management features), you can enable more detailed and informative output for the
+ <codeph>EXPLAIN</codeph> statement. In the <cmdname>impala-shell</cmdname> interpreter, issue the command
+ <codeph>SET EXPLAIN_LEVEL=<varname>level</varname></codeph>, where <varname>level</varname> is an integer
+ from 0 to 3 or corresponding mnemonic values <codeph>minimal</codeph>, <codeph>standard</codeph>,
+ <codeph>extended</codeph>, or <codeph>verbose</codeph>.
+ </p>
+
+ <p rev="1.2">
+ When extended <codeph>EXPLAIN</codeph> output is enabled, <codeph>EXPLAIN</codeph> statements print
+ information about estimated memory requirements, minimum number of virtual cores, and so on that you can use
+ to fine-tune the resource management options explained in
+ <xref href="impala_resource_management.xml#rm_options"/>. (The estimated memory requirements are
+ intentionally on the high side, to allow a margin for error, to avoid cancelling a query unnecessarily if you
+ set the <codeph>MEM_LIMIT</codeph> option to the estimated memory figure.)
+ </p>
+
+ <p>
+ See <xref href="impala_explain_level.xml#explain_level"/> for details and examples.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+ <p>
+ This example shows how the standard <codeph>EXPLAIN</codeph> output moves from the lowest (physical) level to
+ the higher (logical) levels. The query begins by scanning a certain amount of data; each node performs an
+ aggregation operation (evaluating <codeph>COUNT(*)</codeph>) on some subset of data that is local to that
+ node; the intermediate results are transmitted back to the coordinator node (labelled here as the
+ <codeph>EXCHANGE</codeph> node); lastly, the intermediate results are summed to display the final result.
+ </p>
+
+<codeblock id="explain_plan_simple">[impalad-host:21000] > explain select count(*) from customer_address;
++----------------------------------------------------------+
+| Explain String |
++----------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=42.00MB VCores=1 |
+| |
+| 03:AGGREGATE [MERGE FINALIZE] |
+| | output: sum(count(*)) |
+| | |
+| 02:EXCHANGE [PARTITION=UNPARTITIONED] |
+| | |
+| 01:AGGREGATE |
+| | output: count(*) |
+| | |
+| 00:SCAN HDFS [default.customer_address] |
+| partitions=1/1 size=5.25MB |
++----------------------------------------------------------+
+</codeblock>
+
+ <p>
+ These examples show how the extended <codeph>EXPLAIN</codeph> output becomes more accurate and informative as
+ statistics are gathered by the <codeph>COMPUTE STATS</codeph> statement. Initially, much of the information
+ about data size and distribution is marked <q>unavailable</q>. Impala can determine the raw data size, but
+ not the number of rows or number of distinct values for each column without additional analysis. The
+ <codeph>COMPUTE STATS</codeph> statement performs this analysis, so a subsequent <codeph>EXPLAIN</codeph>
+ statement has additional information to use in deciding how to optimize the distributed query.
+ </p>
+
+ <draft-comment translate="no">
+Re-run these examples with more substantial tables populated with data.
+</draft-comment>
+
+<codeblock rev="1.2">[localhost:21000] > set explain_level=extended;
+EXPLAIN_LEVEL set to extended
+[localhost:21000] > explain select x from t1;
+[localhost:21000] > explain select x from t1;
++----------------------------------------------------------+
+| Explain String |
++----------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=32.00MB VCores=1 |
+| |
+| 01:EXCHANGE [PARTITION=UNPARTITIONED] |
+| | hosts=1 per-host-mem=unavailable |
+<b>| | tuple-ids=0 row-size=4B cardinality=unavailable |</b>
+| | |
+| 00:SCAN HDFS [default.t2, PARTITION=RANDOM] |
+| partitions=1/1 size=36B |
+<b>| table stats: unavailable |</b>
+<b>| column stats: unavailable |</b>
+| hosts=1 per-host-mem=32.00MB |
+<b>| tuple-ids=0 row-size=4B cardinality=unavailable |</b>
++----------------------------------------------------------+
+</codeblock>
+
+<codeblock rev="1.2">[localhost:21000] > compute stats t1;
++-----------------------------------------+
+| summary |
++-----------------------------------------+
+| Updated 1 partition(s) and 1 column(s). |
++-----------------------------------------+
+[localhost:21000] > explain select x from t1;
++----------------------------------------------------------+
+| Explain String |
++----------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=64.00MB VCores=1 |
+| |
+| 01:EXCHANGE [PARTITION=UNPARTITIONED] |
+| | hosts=1 per-host-mem=unavailable |
+| | tuple-ids=0 row-size=4B cardinality=0 |
+| | |
+| 00:SCAN HDFS [default.t1, PARTITION=RANDOM] |
+| partitions=1/1 size=36B |
+<b>| table stats: 0 rows total |</b>
+<b>| column stats: all |</b>
+| hosts=1 per-host-mem=64.00MB |
+<b>| tuple-ids=0 row-size=4B cardinality=0 |</b>
++----------------------------------------------------------+
+</codeblock>
+
+ <p conref="../shared/impala_common.xml#common/security_blurb"/>
+ <p conref="../shared/impala_common.xml#common/redaction_yes"/>
+
+ <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+ <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+ <p rev="CDH-19187">
+ <!-- Doublecheck these details. Does EXPLAIN really need any permissions? -->
+ The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+ typically the <codeph>impala</codeph> user, must have read
+ and execute permissions for all applicable directories in all source tables
+ for the query that is being explained.
+ (A <codeph>SELECT</codeph> operation could read files from multiple different HDFS directories
+ if the source table is partitioned.)
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/related_info"/>
+ <p>
+ <xref href="impala_select.xml#select"/>,
+ <xref href="impala_insert.xml#insert"/>,
+ <xref href="impala_create_table.xml#create_table"/>,
+ <xref href="impala_explain_plan.xml#explain_plan"/>
+ </p>
+
+ </conbody>
+</concept>
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_explain_level.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_explain_level.xml b/docs/topics/impala_explain_level.xml
new file mode 100644
index 0000000..f54e8a8
--- /dev/null
+++ b/docs/topics/impala_explain_level.xml
@@ -0,0 +1,338 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.2" id="explain_level">
+
+ <title>EXPLAIN_LEVEL Query Option</title>
+ <prolog>
+ <metadata>
+ <data name="Category" value="Impala"/>
+ <data name="Category" value="Impala Query Options"/>
+ <data name="Category" value="Troubleshooting"/>
+ <data name="Category" value="Querying"/>
+ <data name="Category" value="Performance"/>
+ <data name="Category" value="Reports"/>
+ </metadata>
+ </prolog>
+
+ <conbody>
+
+ <p>
+ <indexterm audience="Cloudera">EXPLAIN_LEVEL query option</indexterm>
+ Controls the amount of detail provided in the output of the <codeph>EXPLAIN</codeph> statement. The basic
+ output can help you identify high-level performance issues such as scanning a higher volume of data or more
+ partitions than you expect. The higher levels of detail show how intermediate results flow between nodes and
+ how different SQL operations such as <codeph>ORDER BY</codeph>, <codeph>GROUP BY</codeph>, joins, and
+ <codeph>WHERE</codeph> clauses are implemented within a distributed query.
+ </p>
+
+ <p>
+ <b>Type:</b> <codeph>STRING</codeph> or <codeph>INT</codeph>
+ </p>
+
+ <p>
+ <b>Default:</b> <codeph>1</codeph>
+ </p>
+
+ <p>
+ <b>Arguments:</b>
+ </p>
+
+ <p>
+ The allowed range of numeric values for this option is 0 to 3:
+ </p>
+
+ <ul>
+ <li>
+ <codeph>0</codeph> or <codeph>MINIMAL</codeph>: A barebones list, one line per operation. Primarily useful
+ for checking the join order in very long queries where the regular <codeph>EXPLAIN</codeph> output is too
+ long to read easily.
+ </li>
+
+ <li>
+ <codeph>1</codeph> or <codeph>STANDARD</codeph>: The default level of detail, showing the logical way that
+ work is split up for the distributed query.
+ </li>
+
+ <li>
+ <codeph>2</codeph> or <codeph>EXTENDED</codeph>: Includes additional detail about how the query planner
+ uses statistics in its decision-making process, to understand how a query could be tuned by gathering
+ statistics, using query hints, adding or removing predicates, and so on.
+ </li>
+
+ <li>
+ <codeph>3</codeph> or <codeph>VERBOSE</codeph>: The maximum level of detail, showing how work is split up
+ within each node into <q>query fragments</q> that are connected in a pipeline. This extra detail is
+ primarily useful for low-level performance testing and tuning within Impala itself, rather than for
+ rewriting the SQL code at the user level.
+ </li>
+ </ul>
+
+ <note>
+ Prior to Impala 1.3, the allowed argument range for <codeph>EXPLAIN_LEVEL</codeph> was 0 to 1: level 0 had
+ the mnemonic <codeph>NORMAL</codeph>, and level 1 was <codeph>VERBOSE</codeph>. In Impala 1.3 and higher,
+ <codeph>NORMAL</codeph> is not a valid mnemonic value, and <codeph>VERBOSE</codeph> still applies to the
+ highest level of detail but now corresponds to level 3. You might need to adjust the values if you have any
+ older <codeph>impala-shell</codeph> script files that set the <codeph>EXPLAIN_LEVEL</codeph> query option.
+ </note>
+
+ <p>
+ Changing the value of this option controls the amount of detail in the output of the <codeph>EXPLAIN</codeph>
+ statement. The extended information from level 2 or 3 is especially useful during performance tuning, when
+ you need to confirm whether the work for the query is distributed the way you expect, particularly for the
+ most resource-intensive operations such as join queries against large tables, queries against tables with
+ large numbers of partitions, and insert operations for Parquet tables. The extended information also helps to
+ check estimated resource usage when you use the admission control or resource management features explained
+ in <xref href="impala_resource_management.xml#resource_management"/>. See
+ <xref href="impala_explain.xml#explain"/> for the syntax of the <codeph>EXPLAIN</codeph> statement, and
+ <xref href="impala_explain_plan.xml#perf_explain"/> for details about how to use the extended information.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+ <p>
+ As always, read the <codeph>EXPLAIN</codeph> output from bottom to top. The lowest lines represent the
+ initial work of the query (scanning data files), the lines in the middle represent calculations done on each
+ node and how intermediate results are transmitted from one node to another, and the topmost lines represent
+ the final results being sent back to the coordinator node.
+ </p>
+
+ <p>
+ The numbers in the left column are generated internally during the initial planning phase and do not
+ represent the actual order of operations, so it is not significant if they appear out of order in the
+ <codeph>EXPLAIN</codeph> output.
+ </p>
+
+ <p>
+ At all <codeph>EXPLAIN</codeph> levels, the plan contains a warning if any tables in the query are missing
+ statistics. Use the <codeph>COMPUTE STATS</codeph> statement to gather statistics for each table and suppress
+ this warning. See <xref href="impala_perf_stats.xml#perf_stats"/> for details about how the statistics help
+ query performance.
+ </p>
+
+ <p>
+ The <codeph>PROFILE</codeph> command in <cmdname>impala-shell</cmdname> always starts with an explain plan
+ showing full detail, the same as with <codeph>EXPLAIN_LEVEL=3</codeph>. <ph rev="1.4.0">After the explain
+ plan comes the executive summary, the same output as produced by the <codeph>SUMMARY</codeph> command in
+ <cmdname>impala-shell</cmdname>.</ph>
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+ <p>
+ These examples use a trivial, empty table to illustrate how the essential aspects of query planning are shown
+ in <codeph>EXPLAIN</codeph> output:
+ </p>
+
+<codeblock>[localhost:21000] > create table t1 (x int, s string);
+[localhost:21000] > set explain_level=1;
+[localhost:21000] > explain select count(*) from t1;
++------------------------------------------------------------------------------------+
+| Explain String |
++------------------------------------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=10.00MB VCores=1 |
+| WARNING: The following tables are missing relevant table and/or column statistics. |
+| explain_plan.t1 |
+| |
+| 03:AGGREGATE [MERGE FINALIZE] |
+| | output: sum(count(*)) |
+| | |
+| 02:EXCHANGE [PARTITION=UNPARTITIONED] |
+| | |
+| 01:AGGREGATE |
+| | output: count(*) |
+| | |
+| 00:SCAN HDFS [explain_plan.t1] |
+| partitions=1/1 size=0B |
++------------------------------------------------------------------------------------+
+[localhost:21000] > explain select * from t1;
++------------------------------------------------------------------------------------+
+| Explain String |
++------------------------------------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=-9223372036854775808B VCores=0 |
+| WARNING: The following tables are missing relevant table and/or column statistics. |
+| explain_plan.t1 |
+| |
+| 01:EXCHANGE [PARTITION=UNPARTITIONED] |
+| | |
+| 00:SCAN HDFS [explain_plan.t1] |
+| partitions=1/1 size=0B |
++------------------------------------------------------------------------------------+
+[localhost:21000] > set explain_level=2;
+[localhost:21000] > explain select * from t1;
++------------------------------------------------------------------------------------+
+| Explain String |
++------------------------------------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=-9223372036854775808B VCores=0 |
+| WARNING: The following tables are missing relevant table and/or column statistics. |
+| explain_plan.t1 |
+| |
+| 01:EXCHANGE [PARTITION=UNPARTITIONED] |
+| | hosts=0 per-host-mem=unavailable |
+| | tuple-ids=0 row-size=19B cardinality=unavailable |
+| | |
+| 00:SCAN HDFS [explain_plan.t1, PARTITION=RANDOM] |
+| partitions=1/1 size=0B |
+| table stats: unavailable |
+| column stats: unavailable |
+| hosts=0 per-host-mem=0B |
+| tuple-ids=0 row-size=19B cardinality=unavailable |
++------------------------------------------------------------------------------------+
+[localhost:21000] > set explain_level=3;
+[localhost:21000] > explain select * from t1;
++------------------------------------------------------------------------------------+
+| Explain String |
++------------------------------------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=-9223372036854775808B VCores=0 |
+<b>| WARNING: The following tables are missing relevant table and/or column statistics. |</b>
+<b>| explain_plan.t1 |</b>
+| |
+| F01:PLAN FRAGMENT [PARTITION=UNPARTITIONED] |
+| 01:EXCHANGE [PARTITION=UNPARTITIONED] |
+| hosts=0 per-host-mem=unavailable |
+| tuple-ids=0 row-size=19B cardinality=unavailable |
+| |
+| F00:PLAN FRAGMENT [PARTITION=RANDOM] |
+| DATASTREAM SINK [FRAGMENT=F01, EXCHANGE=01, PARTITION=UNPARTITIONED] |
+| 00:SCAN HDFS [explain_plan.t1, PARTITION=RANDOM] |
+| partitions=1/1 size=0B |
+<b>| table stats: unavailable |</b>
+<b>| column stats: unavailable |</b>
+| hosts=0 per-host-mem=0B |
+| tuple-ids=0 row-size=19B cardinality=unavailable |
++------------------------------------------------------------------------------------+
+</codeblock>
+
+ <p>
+ As the warning message demonstrates, most of the information needed for Impala to do efficient query
+ planning, and for you to understand the performance characteristics of the query, requires running the
+ <codeph>COMPUTE STATS</codeph> statement for the table:
+ </p>
+
+<codeblock>[localhost:21000] > compute stats t1;
++-----------------------------------------+
+| summary |
++-----------------------------------------+
+| Updated 1 partition(s) and 2 column(s). |
++-----------------------------------------+
+[localhost:21000] > explain select * from t1;
++------------------------------------------------------------------------+
+| Explain String |
++------------------------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=-9223372036854775808B VCores=0 |
+| |
+| F01:PLAN FRAGMENT [PARTITION=UNPARTITIONED] |
+| 01:EXCHANGE [PARTITION=UNPARTITIONED] |
+| hosts=0 per-host-mem=unavailable |
+| tuple-ids=0 row-size=20B cardinality=0 |
+| |
+| F00:PLAN FRAGMENT [PARTITION=RANDOM] |
+| DATASTREAM SINK [FRAGMENT=F01, EXCHANGE=01, PARTITION=UNPARTITIONED] |
+| 00:SCAN HDFS [explain_plan.t1, PARTITION=RANDOM] |
+| partitions=1/1 size=0B |
+<b>| table stats: 0 rows total |</b>
+<b>| column stats: all |</b>
+| hosts=0 per-host-mem=0B |
+| tuple-ids=0 row-size=20B cardinality=0 |
++------------------------------------------------------------------------+
+</codeblock>
+
+ <p>
+ Joins and other complicated, multi-part queries are the ones where you most commonly need to examine the
+ <codeph>EXPLAIN</codeph> output and customize the amount of detail in the output. This example shows the
+ default <codeph>EXPLAIN</codeph> output for a three-way join query, then the equivalent output with a
+ <codeph>[SHUFFLE]</codeph> hint to change the join mechanism between the first two tables from a broadcast
+ join to a shuffle join.
+ </p>
+
+<codeblock>[localhost:21000] > set explain_level=1;
+[localhost:21000] > explain select one.*, two.*, three.* from t1 one, t1 two, t1 three where one.x = two.x and two.x = three.x;
++------------------------------------------------------------------------------------+
+| Explain String |
++------------------------------------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=4.00GB VCores=3 |
+| |
+| 07:EXCHANGE [PARTITION=UNPARTITIONED] |
+| | |
+<b>| 04:HASH JOIN [INNER JOIN, BROADCAST] |</b>
+| | hash predicates: two.x = three.x |
+| | |
+<b>| |--06:EXCHANGE [BROADCAST] |</b>
+| | | |
+| | 02:SCAN HDFS [explain_plan.t1 three] |
+| | partitions=1/1 size=0B |
+| | |
+<b>| 03:HASH JOIN [INNER JOIN, BROADCAST] |</b>
+| | hash predicates: one.x = two.x |
+| | |
+<b>| |--05:EXCHANGE [BROADCAST] |</b>
+| | | |
+| | 01:SCAN HDFS [explain_plan.t1 two] |
+| | partitions=1/1 size=0B |
+| | |
+| 00:SCAN HDFS [explain_plan.t1 one] |
+| partitions=1/1 size=0B |
++------------------------------------------------------------------------------------+
+[localhost:21000] > explain select one.*, two.*, three.* from t1 one join [shuffle] t1 two join t1 three where one.x = two.x and two.x = three.x;
++------------------------------------------------------------------------------------+
+| Explain String |
++------------------------------------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=4.00GB VCores=3 |
+| |
+| 08:EXCHANGE [PARTITION=UNPARTITIONED] |
+| | |
+<b>| 04:HASH JOIN [INNER JOIN, BROADCAST] |</b>
+| | hash predicates: two.x = three.x |
+| | |
+<b>| |--07:EXCHANGE [BROADCAST] |</b>
+| | | |
+| | 02:SCAN HDFS [explain_plan.t1 three] |
+| | partitions=1/1 size=0B |
+| | |
+<b>| 03:HASH JOIN [INNER JOIN, PARTITIONED] |</b>
+| | hash predicates: one.x = two.x |
+| | |
+<b>| |--06:EXCHANGE [PARTITION=HASH(two.x)] |</b>
+| | | |
+| | 01:SCAN HDFS [explain_plan.t1 two] |
+| | partitions=1/1 size=0B |
+| | |
+<b>| 05:EXCHANGE [PARTITION=HASH(one.x)] |</b>
+| | |
+| 00:SCAN HDFS [explain_plan.t1 one] |
+| partitions=1/1 size=0B |
++------------------------------------------------------------------------------------+
+</codeblock>
+
+ <p>
+ For a join involving many different tables, the default <codeph>EXPLAIN</codeph> output might stretch over
+ several pages, and the only details you care about might be the join order and the mechanism (broadcast or
+ shuffle) for joining each pair of tables. In that case, you might set <codeph>EXPLAIN_LEVEL</codeph> to its
+ lowest value of 0, to focus on just the join order and join mechanism for each stage. The following example
+ shows how the rows from the first and second joined tables are hashed and divided among the nodes of the
+ cluster for further filtering; then the entire contents of the third table are broadcast to all nodes for the
+ final stage of join processing.
+ </p>
+
+<codeblock>[localhost:21000] > set explain_level=0;
+[localhost:21000] > explain select one.*, two.*, three.* from t1 one join [shuffle] t1 two join t1 three where one.x = two.x and two.x = three.x;
++---------------------------------------------------------+
+| Explain String |
++---------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=4.00GB VCores=3 |
+| |
+| 08:EXCHANGE [PARTITION=UNPARTITIONED] |
+<b>| 04:HASH JOIN [INNER JOIN, BROADCAST] |</b>
+<b>| |--07:EXCHANGE [BROADCAST] |</b>
+| | 02:SCAN HDFS [explain_plan.t1 three] |
+<b>| 03:HASH JOIN [INNER JOIN, PARTITIONED] |</b>
+<b>| |--06:EXCHANGE [PARTITION=HASH(two.x)] |</b>
+| | 01:SCAN HDFS [explain_plan.t1 two] |
+<b>| 05:EXCHANGE [PARTITION=HASH(one.x)] |</b>
+| 00:SCAN HDFS [explain_plan.t1 one] |
++---------------------------------------------------------+
+</codeblock>
+
+<!-- Consider adding a related info section to collect the xrefs earlier on this page. -->
+
+ </conbody>
+</concept>
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_float.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_float.xml b/docs/topics/impala_float.xml
new file mode 100644
index 0000000..51e3311
--- /dev/null
+++ b/docs/topics/impala_float.xml
@@ -0,0 +1,94 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="float">
+
+ <title>FLOAT Data Type</title>
+ <titlealts><navtitle>FLOAT</navtitle></titlealts>
+ <prolog>
+ <metadata>
+ <data name="Category" value="Impala"/>
+ <data name="Category" value="Impala Data Types"/>
+ <data name="Category" value="SQL"/>
+ <data name="Category" value="Data Analysts"/>
+ <data name="Category" value="Developers"/>
+ <data name="Category" value="Schemas"/>
+ </metadata>
+ </prolog>
+
+ <conbody>
+
+ <p>
+ A single precision floating-point data type used in <codeph>CREATE TABLE</codeph> and <codeph>ALTER
+ TABLE</codeph> statements.
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+ <p>
+ In the column definition of a <codeph>CREATE TABLE</codeph> statement:
+ </p>
+
+<codeblock><varname>column_name</varname> FLOAT</codeblock>
+
+ <p>
+ <b>Range:</b> 1.40129846432481707e-45 .. 3.40282346638528860e+38, positive or negative
+ </p>
+
+ <p>
+ <b>Precision:</b> 6 to 9 significant digits, depending on usage. The number of significant digits does
+ not depend on the position of the decimal point.
+ </p>
+
+ <p>
+ <b>Representation:</b> The values are stored in 4 bytes, using
+ <xref href="https://en.wikipedia.org/wiki/Single-precision_floating-point_format" scope="external" format="html">IEEE 754 Single Precision Binary Floating Point</xref> format.
+ </p>
+
+ <p>
+ <b>Conversions:</b> Impala automatically converts <codeph>FLOAT</codeph> to more precise
+ <codeph>DOUBLE</codeph> values, but not the other way around. You can use <codeph>CAST()</codeph> to convert
+ <codeph>FLOAT</codeph> values to <codeph>TINYINT</codeph>, <codeph>SMALLINT</codeph>, <codeph>INT</codeph>,
+ <codeph>BIGINT</codeph>, <codeph>STRING</codeph>, <codeph>TIMESTAMP</codeph>, or <codeph>BOOLEAN</codeph>.
+ You can use exponential notation in <codeph>FLOAT</codeph> literals or when casting from
+ <codeph>STRING</codeph>, for example <codeph>1.0e6</codeph> to represent one million.
+ <ph conref="../shared/impala_common.xml#common/cast_int_to_timestamp"/>
+ </p>
+
+ <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>CREATE TABLE t1 (x FLOAT);
+SELECT CAST(1000.5 AS FLOAT);
+</codeblock>
+
+ <p conref="../shared/impala_common.xml#common/partitioning_imprecise"/>
+
+ <p conref="../shared/impala_common.xml#common/hbase_ok"/>
+
+ <p conref="../shared/impala_common.xml#common/parquet_ok"/>
+
+ <p conref="../shared/impala_common.xml#common/text_bulky"/>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/compatibility_blurb"/> -->
+
+ <p conref="../shared/impala_common.xml#common/internals_4_bytes"/>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/added_in_20"/> -->
+
+ <p conref="../shared/impala_common.xml#common/column_stats_constant"/>
+
+ <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+<!-- This conref appears under SUM(), AVG(), FLOAT, and DOUBLE topics. -->
+
+ <p conref="../shared/impala_common.xml#common/sum_double"/>
+
+ <p conref="../shared/impala_common.xml#common/float_double_decimal_caveat"/>
+
+ <p conref="../shared/impala_common.xml#common/related_info"/>
+
+ <p>
+ <xref href="impala_literals.xml#numeric_literals"/>, <xref href="impala_math_functions.xml#math_functions"/>,
+ <xref href="impala_double.xml#double"/>
+ </p>
+ </conbody>
+</concept>