You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@impala.apache.org by jr...@apache.org on 2016/07/26 23:04:53 UTC

[01/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

Repository: incubator-impala
Updated Branches:
  refs/heads/doc_prototype 0ad935b63 -> 463ddf924


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_varchar.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_varchar.xml b/docs/topics/impala_varchar.xml
new file mode 100644
index 0000000..32db4ae
--- /dev/null
+++ b/docs/topics/impala_varchar.xml
@@ -0,0 +1,215 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="varchar" rev="2.0.0">
+
+  <title>VARCHAR Data Type (CDH 5.2 or higher only)</title>
+  <titlealts><navtitle>VARCHAR (CDH 5.2 or higher only)</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">VARCHAR data type</indexterm>
+      A variable-length character type, truncated during processing if necessary to fit within the specified
+      length.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      In the column definition of a <codeph>CREATE TABLE</codeph> statement:
+    </p>
+
+<codeblock><varname>column_name</varname> VARCHAR(<varname>max_length</varname>)</codeblock>
+
+    <p>
+      The maximum length you can specify is 65,535.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/partitioning_bad"/>
+
+<!--
+<p>
+This type can be used for partition key columns.
+Because of the efficiency advantage of numeric values over character-based values,
+if the partition key is a string representation of a number,
+prefer to use an integer data type with sufficient range (<codeph>INT</codeph>,
+<codeph>BIGINT</codeph>, and so on) rather than this type.
+</p>
+-->
+
+    <p conref="../shared/impala_common.xml#common/hbase_no"/>
+
+    <p conref="../shared/impala_common.xml#common/parquet_blurb"/>
+
+    <ul>
+      <li>
+        This type can be read from and written to Parquet files.
+      </li>
+
+      <li>
+        There is no requirement for a particular level of Parquet.
+      </li>
+
+      <li>
+        Parquet files generated by Impala and containing this type can be freely interchanged with other components
+        such as Hive and MapReduce.
+      </li>
+
+      <li>
+        Parquet data files can contain values that are longer than allowed by the
+        <codeph>VARCHAR(<varname>n</varname>)</codeph> length limit. Impala ignores any extra trailing characters
+        when it processes those values during a query.
+      </li>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/text_blurb"/>
+
+    <p>
+      Text data files can contain values that are longer than allowed by the
+      <codeph>VARCHAR(<varname>n</varname>)</codeph> length limit. Any extra trailing characters are ignored when
+      Impala processes those values during a query.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/schema_evolution_blurb"/>
+
+    <p>
+      You can use <codeph>ALTER TABLE ... CHANGE</codeph> to switch column data types to and from
+      <codeph>VARCHAR</codeph>. You can convert from <codeph>STRING</codeph> to
+      <codeph>VARCHAR(<varname>n</varname>)</codeph>, or from <codeph>VARCHAR(<varname>n</varname>)</codeph> to
+      <codeph>STRING</codeph>, or from <codeph>CHAR(<varname>n</varname>)</codeph> to
+      <codeph>VARCHAR(<varname>n</varname>)</codeph>, or from <codeph>VARCHAR(<varname>n</varname>)</codeph> to
+      <codeph>CHAR(<varname>n</varname>)</codeph>. When switching back and forth between <codeph>VARCHAR</codeph>
+      and <codeph>CHAR</codeph>, you can also change the length value. This schema evolution works the same for
+      tables using any file format. If a table contains values longer than the maximum length defined for a
+      <codeph>VARCHAR</codeph> column, Impala does not return an error. Any extra trailing characters are ignored
+      when Impala processes those values during a query.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/compatibility_blurb"/>
+
+    <p>
+      This type is available using Impala 2.0 or higher under CDH 4, or with Impala on CDH 5.2 or higher. There are
+      no compatibility issues with other components when exchanging data files or running Impala on CDH 4.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/internals_min_bytes"/>
+
+    <p conref="../shared/impala_common.xml#common/added_in_20"/>
+
+    <p conref="../shared/impala_common.xml#common/column_stats_variable"/>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/blobs_are_strings"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      The following examples show how long and short <codeph>VARCHAR</codeph> values are treated. Values longer
+      than the maximum specified length are truncated by <codeph>CAST()</codeph>, or when queried from existing
+      data files. Values shorter than the maximum specified length are represented as the actual length of the
+      value, with no extra padding as seen with <codeph>CHAR</codeph> values.
+    </p>
+
+<codeblock>create table varchar_1 (s varchar(1));
+create table varchar_4 (s varchar(4));
+create table varchar_20 (s varchar(20));
+
+insert into varchar_1 values (cast('a' as varchar(1))), (cast('b' as varchar(1))), (cast('hello' as varchar(1))), (cast('world' as varchar(1)));
+insert into varchar_4 values (cast('a' as varchar(4))), (cast('b' as varchar(4))), (cast('hello' as varchar(4))), (cast('world' as varchar(4)));
+insert into varchar_20 values (cast('a' as varchar(20))), (cast('b' as varchar(20))), (cast('hello' as varchar(20))), (cast('world' as varchar(20)));
+
+select * from varchar_1;
++---+
+| s |
++---+
+| a |
+| b |
+| h |
+| w |
++---+
+select * from varchar_4;
++------+
+| s    |
++------+
+| a    |
+| b    |
+| hell |
+| worl |
++------+
+[localhost:21000] &gt; select * from varchar_20;
++-------+
+| s     |
++-------+
+| a     |
+| b     |
+| hello |
+| world |
++-------+
+select concat('[',s,']') as s from varchar_20;
++---------+
+| s       |
++---------+
+| [a]     |
+| [b]     |
+| [hello] |
+| [world] |
++---------+
+</codeblock>
+
+    <p>
+      The following example shows how identical <codeph>VARCHAR</codeph> values compare as equal, even if the
+      columns are defined with different maximum lengths. Both tables contain <codeph>'a'</codeph> and
+      <codeph>'b'</codeph> values. The longer <codeph>'hello'</codeph> and <codeph>'world'</codeph> values from the
+      <codeph>VARCHAR_20</codeph> table were truncated when inserted into the <codeph>VARCHAR_1</codeph> table.
+    </p>
+
+<codeblock>select s from varchar_1 join varchar_20 using (s);
++-------+
+| s     |
++-------+
+| a     |
+| b     |
++-------+
+</codeblock>
+
+    <p>
+      The following examples show how <codeph>VARCHAR</codeph> values are freely interchangeable with
+      <codeph>STRING</codeph> values in contexts such as comparison operators and built-in functions:
+    </p>
+
+<codeblock>select length(cast('foo' as varchar(100))) as length;
++--------+
+| length |
++--------+
+| 3      |
++--------+
+select cast('xyz' as varchar(5)) &gt; cast('abc' as varchar(10)) as greater;
++---------+
+| greater |
++---------+
+| true    |
++---------+
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/udf_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_string.xml#string"/>, <xref href="impala_char.xml#char"/>,
+      <xref href="impala_literals.xml#string_literals"/>,
+      <xref href="impala_string_functions.xml#string_functions"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_variance.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_variance.xml b/docs/topics/impala_variance.xml
new file mode 100644
index 0000000..e0c5d02
--- /dev/null
+++ b/docs/topics/impala_variance.xml
@@ -0,0 +1,127 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.4" id="variance">
+
+  <title>VARIANCE, VARIANCE_SAMP, VARIANCE_POP, VAR_SAMP, VAR_POP Functions</title>
+  <titlealts><navtitle>VARIANCE, VARIANCE_SAMP, VARIANCE_POP, VAR_SAMP, VAR_POP</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="Aggregate Functions"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">variance() function</indexterm>
+      <indexterm audience="Cloudera">variance_samp() function</indexterm>
+      <indexterm audience="Cloudera">variance_pop() function</indexterm>
+      <indexterm audience="Cloudera">var_samp() function</indexterm>
+      <indexterm audience="Cloudera">var_pop() function</indexterm>
+      An aggregate function that returns the
+      <xref href="http://en.wikipedia.org/wiki/Variance" scope="external" format="html">variance</xref> of a set of
+      numbers. This is a mathematical property that signifies how far the values spread apart from the mean. The
+      return value can be zero (if the input is a single value, or a set of identical values), or a positive number
+      otherwise.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>{ VARIANCE | VAR[IANCE]_SAMP | VAR[IANCE]_POP } ([DISTINCT | ALL] <varname>expression</varname>)</codeblock>
+
+    <p>
+      This function works with any numeric data type.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/former_odd_return_type_string"/>
+
+    <p>
+      This function is typically used in mathematical formulas related to probability distributions.
+    </p>
+
+    <p>
+      The <codeph>VARIANCE_SAMP()</codeph> and <codeph>VARIANCE_POP()</codeph> functions compute the sample
+      variance and population variance, respectively, of the input values. (<codeph>VARIANCE()</codeph> is an alias
+      for <codeph>VARIANCE_SAMP()</codeph>.) Both functions evaluate all input rows matched by the query. The
+      difference is that <codeph>STDDEV_SAMP()</codeph> is scaled by <codeph>1/(N-1)</codeph> while
+      <codeph>STDDEV_POP()</codeph> is scaled by <codeph>1/N</codeph>.
+    </p>
+
+    <p rev="2.0.0">
+      The functions <codeph>VAR_SAMP()</codeph> and <codeph>VAR_POP()</codeph> are the same as
+      <codeph>VARIANCE_SAMP()</codeph> and <codeph>VARIANCE_POP()</codeph>, respectively. These aliases are
+      available in Impala 2.0 and later.
+    </p>
+
+    <p>
+      If no input rows match the query, the result of any of these functions is <codeph>NULL</codeph>. If a single
+      input row matches the query, the result of any of these functions is <codeph>"0.0"</codeph>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      This example demonstrates how <codeph>VARIANCE()</codeph> and <codeph>VARIANCE_SAMP()</codeph> return the
+      same result, while <codeph>VARIANCE_POP()</codeph> uses a slightly different calculation to reflect that the
+      input data is considered part of a larger <q>population</q>.
+    </p>
+
+<codeblock>[localhost:21000] &gt; select variance(score) from test_scores;
++-----------------+
+| variance(score) |
++-----------------+
+| 812.25          |
++-----------------+
+[localhost:21000] &gt; select variance_samp(score) from test_scores;
++----------------------+
+| variance_samp(score) |
++----------------------+
+| 812.25               |
++----------------------+
+[localhost:21000] &gt; select variance_pop(score) from test_scores;
++---------------------+
+| variance_pop(score) |
++---------------------+
+| 811.438             |
++---------------------+
+</codeblock>
+
+    <p>
+      This example demonstrates that, because the return value of these aggregate functions is a
+      <codeph>STRING</codeph>, you convert the result with <codeph>CAST</codeph> if you need to do further
+      calculations as a numeric value.
+    </p>
+
+<codeblock>[localhost:21000] &gt; create table score_stats as select cast(stddev(score) as decimal(7,4)) `standard_deviation`, cast(variance(score) as decimal(7,4)) `variance` from test_scores;
++-------------------+
+| summary           |
++-------------------+
+| Inserted 1 row(s) |
++-------------------+
+[localhost:21000] &gt; desc score_stats;
++--------------------+--------------+---------+
+| name               | type         | comment |
++--------------------+--------------+---------+
+| standard_deviation | decimal(7,4) |         |
+| variance           | decimal(7,4) |         |
++--------------------+--------------+---------+
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/analytic_not_allowed_caveat"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      The <codeph>STDDEV()</codeph>, <codeph>STDDEV_POP()</codeph>, and <codeph>STDDEV_SAMP()</codeph> functions
+      compute the standard deviation (square root of the variance) based on the results of
+      <codeph>VARIANCE()</codeph>, <codeph>VARIANCE_POP()</codeph>, and <codeph>VARIANCE_SAMP()</codeph>
+      respectively. See <xref href="impala_stddev.xml#stddev"/> for details about the standard deviation property.
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_views.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_views.xml b/docs/topics/impala_views.xml
new file mode 100644
index 0000000..a6c1a41
--- /dev/null
+++ b/docs/topics/impala_views.xml
@@ -0,0 +1,185 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.1" id="views">
+
+  <title>Overview of Impala Views</title>
+  <titlealts><navtitle>Views</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Views are lightweight logical constructs that act as aliases for queries. You can specify a view name in a
+      query (a <codeph>SELECT</codeph> statement or the <codeph>SELECT</codeph> portion of an
+      <codeph>INSERT</codeph> statement) where you would usually specify a table name.
+    </p>
+
+    <p>
+      A view lets you:
+    </p>
+
+    <ul>
+      <li>
+        Issue complicated queries with compact and simple syntax:
+<codeblock>-- Take a complicated reporting query, plug it into a CREATE VIEW statement...
+create view v1 as select c1, c2, avg(c3) from t1 group by c3 order by c1 desc limit 10;
+-- ... and now you can produce the report with 1 line of code.
+select * from v1;</codeblock>
+      </li>
+
+      <li>
+        Reduce maintenance, by avoiding the duplication of complicated queries across multiple applications in
+        multiple languages:
+<codeblock>create view v2 as select t1.c1, t1.c2, t2.c3 from t1 join t2 on (t1.id = t2.id);
+-- This simple query is safer to embed in reporting applications than the longer query above.
+-- The view definition can remain stable even if the structure of the underlying tables changes.
+select c1, c2, c3 from v2;</codeblock>
+      </li>
+
+      <li>
+        Build a new, more refined query on top of the original query by adding new clauses, select-list
+        expressions, function calls, and so on:
+<codeblock>create view average_price_by_category as select category, avg(price) as avg_price from products group by category;
+create view expensive_categories as select category, avg_price from average_price_by_category order by avg_price desc limit 10000;
+create view top_10_expensive_categories as select category, avg_price from expensive_categories limit 10;</codeblock>
+        This technique lets you build up several more or less granular variations of the same query, and switch
+        between them when appropriate.
+<!-- My original assumption was confirmed correct by Alex: outer ORDER BY not actually needed.
+In this case, we put an <codeph>ORDER BY</codeph> clause on the <q>top 10</q> view, even though there was already an <codeph>ORDER BY</codeph>
+on the <q>top 10000</q> view, because when a query is executed in parallel and distributed among multiple nodes, the ordering is only
+guaranteed if there is an <codeph>ORDER BY</codeph> clause at the outermost level.
+-->
+      </li>
+
+      <li>
+        Set up aliases with intuitive names for tables, columns, result sets from joins, and so on:
+<codeblock>-- The original tables might have cryptic names inherited from a legacy system.
+create view action_items as select rrptsk as assignee, treq as due_date, dmisc as notes from vxy_t1_br;
+-- You can leave original names for compatibility, build new applications using more intuitive ones.
+select assignee, due_date, notes from action_items;</codeblock>
+      </li>
+
+      <li>
+        Swap tables with others that use different file formats, partitioning schemes, and so on without any
+        downtime for data copying or conversion:
+<codeblock>create table slow (x int, s string) stored as textfile;
+create view report as select s from slow where x between 20 and 30;
+-- Query is kind of slow due to inefficient table definition, but it works.
+select * from report;
+
+create table fast (s string) partitioned by (x int) stored as parquet;
+-- ...Copy data from SLOW to FAST. Queries against REPORT view continue to work...
+
+-- After changing the view definition, queries will be faster due to partitioning,
+-- binary format, and compression in the new table.
+alter view report as select s from fast where x between 20 and 30;
+select * from report;</codeblock>
+      </li>
+
+      <li>
+        Avoid coding lengthy subqueries and repeating the same subquery text in many other queries.
+      </li>
+
+      <li rev="2.3.0 collevelauth">
+        Set up fine-grained security where a user can query some columns from a table but not other columns.
+        Because CDH 5.5 / Impala 2.3 and higher support column-level authorization, this technique is no longer
+        required. If you formerly implemented column-level security through views, see
+        <xref href="sg_hive_sql.xml#concept_c2q_4qx_p4/col_level_auth_sentry"/> for details about the
+        column-level authorization feature.
+        <!-- See <xref href="impala_authorization.xml#security_examples/sec_ex_views"/> for details. -->
+      </li>
+    </ul>
+
+    <p>
+      The SQL statements that configure views are <xref href="impala_create_view.xml#create_view"/>,
+      <xref href="impala_alter_view.xml#alter_view"/>, and <xref href="impala_drop_view.xml#drop_view"/>. You can
+      specify view names when querying data (<xref href="impala_select.xml#select"/>) and copying data from one
+      table to another (<xref href="impala_insert.xml#insert"/>). The <xref href="impala_with.xml#with">WITH</xref>
+      clause creates an inline view, that only exists for the duration of a single query.
+    </p>
+
+<codeblock>[localhost:21000] &gt; create view trivial as select * from customer;
+[localhost:21000] &gt; create view some_columns as select c_first_name, c_last_name, c_login from customer;
+[localhost:21000] &gt; select * from some_columns limit 5;
+Query finished, fetching results ...
++--------------+-------------+---------+
+| c_first_name | c_last_name | c_login |
++--------------+-------------+---------+
+| Javier       | Lewis       |         |
+| Amy          | Moses       |         |
+| Latisha      | Hamilton    |         |
+| Michael      | White       |         |
+| Robert       | Moran       |         |
++--------------+-------------+---------+
+[localhost:21000] &gt; create view ordered_results as select * from some_columns order by c_last_name desc, c_first_name desc limit 1000;
+[localhost:21000] &gt; select * from ordered_results limit 5;
+Query: select * from ordered_results limit 5
+Query finished, fetching results ...
++--------------+-------------+---------+
+| c_first_name | c_last_name | c_login |
++--------------+-------------+---------+
+| Thomas       | Zuniga      |         |
+| Sarah        | Zuniga      |         |
+| Norma        | Zuniga      |         |
+| Lloyd        | Zuniga      |         |
+| Lisa         | Zuniga      |         |
++--------------+-------------+---------+
+Returned 5 row(s) in 0.48s</codeblock>
+
+    <p>
+      The previous example uses descending order for <codeph>ORDERED_RESULTS</codeph> because in the sample TPCD-H
+      data, there are some rows with empty strings for both <codeph>C_FIRST_NAME</codeph> and
+      <codeph>C_LAST_NAME</codeph>, making the lowest-ordered names unuseful in a sample query.
+    </p>
+
+<codeblock>create view visitors_by_day as select day, count(distinct visitors) as howmany from web_traffic group by day;
+create view top_10_days as select day, howmany from visitors_by_day order by howmany limit 10;
+select * from top_10_days;</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/describe_formatted_view"/>
+
+    <p conref="../shared/impala_common.xml#common/create_table_like_view"/>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_views"/>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <ul>
+      <li>
+        <p>
+          You cannot insert into an Impala view. (In some database systems, this operation is allowed and inserts
+          rows into the base table.) You can use a view name on the right-hand side of an <codeph>INSERT</codeph>
+          statement, in the <codeph>SELECT</codeph> part.
+        </p>
+      </li>
+
+      <li>
+<!-- This same text is conref'ed in the #views and the #partition_pruning topics. -->
+        <p conref="../shared/impala_common.xml#common/partitions_and_views"/>
+      </li>
+
+      <li rev="1.4.0">
+        <p conref="../shared/impala_common.xml#common/order_by_view_restriction"/>
+      </li>
+    </ul>
+
+    <p>
+      <b>Related statements:</b> <xref href="impala_create_view.xml#create_view"/>,
+      <xref href="impala_alter_view.xml#alter_view"/>, <xref href="impala_drop_view.xml#drop_view"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_with.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_with.xml b/docs/topics/impala_with.xml
new file mode 100644
index 0000000..8d1001c
--- /dev/null
+++ b/docs/topics/impala_with.xml
@@ -0,0 +1,64 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.1" id="with">
+
+  <title>WITH Clause</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      A clause that can be added before a <codeph>SELECT</codeph> statement, to define aliases for complicated
+      expressions that are referenced multiple times within the body of the <codeph>SELECT</codeph>. Similar to
+      <codeph>CREATE VIEW</codeph>, except that the table and column names defined in the <codeph>WITH</codeph>
+      clause do not persist after the query finishes, and do not conflict with names used in actual tables or
+      views. Also known as <q>subquery factoring</q>.
+    </p>
+
+    <p>
+      You can rewrite a query using subqueries to work the same as with the <codeph>WITH</codeph> clause. The
+      purposes of the <codeph>WITH</codeph> clause are:
+    </p>
+
+    <ul>
+      <li>
+        Convenience and ease of maintenance from less repetition with the body of the query. Typically used with
+        queries involving <codeph>UNION</codeph>, joins, or aggregation functions where the similar complicated
+        expressions are referenced multiple times.
+      </li>
+
+      <li>
+        SQL code that is easier to read and understand by abstracting the most complex part of the query into a
+        separate block.
+      </li>
+
+      <li>
+        Improved compatibility with SQL from other database systems that support the same clause (primarily Oracle
+        Database).
+        <note>
+          <p>
+            The Impala <codeph>WITH</codeph> clause does not support recursive queries in the
+            <codeph>WITH</codeph>, which is supported in some other database systems.
+          </p>
+        </note>
+      </li>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/sql1999"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>-- Define 2 subqueries that can be referenced from the body of a longer query.
+with t1 as (select 1), t2 as (select 2) insert into tab select * from t1 union all select * from t2;
+
+-- Define one subquery at the outer level, and another at the inner level as part of the
+-- initial stage of the UNION ALL query.
+with t1 as (select 1) (with t2 as (select 2) select * from t2) union all select * from t1;</codeblock>
+  </conbody>
+</concept>

[18/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

Posted by jr...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_array.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_array.xml b/docs/topics/impala_array.xml
new file mode 100644
index 0000000..1e60795
--- /dev/null
+++ b/docs/topics/impala_array.xml
@@ -0,0 +1,266 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="array">
+
+  <title>ARRAY Complex Type (CDH 5.5 or higher only)</title>
+
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      A complex data type that can represent an arbitrary number of ordered elements.
+      The elements can be scalars or another complex type (<codeph>ARRAY</codeph>,
+      <codeph>STRUCT</codeph>, or <codeph>MAP</codeph>).
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<!-- To do: make sure there is sufficient syntax info under the SELECT statement to understand how to query all the complex types. -->
+
+<codeblock><varname>column_name</varname> ARRAY &lt; <varname>type</varname> &gt;
+
+type ::= <varname>primitive_type</varname> | <varname>complex_type</varname>
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/complex_types_combo"/>
+
+      <p>
+        The elements of the array have no names. You refer to the value of the array item using the
+        <codeph>ITEM</codeph> pseudocolumn, or its position in the array with the <codeph>POS</codeph>
+        pseudocolumn. See <xref href="impala_complex_types.xml#item"/> for information about
+        these pseudocolumns.
+      </p>
+
+<!-- Array is a frequently used idiom; don't recommend MAP right up front, since that is more rarely used. STRUCT has all different considerations.
+      <p>
+        If it would be logical to have a fixed number of elements and give each one a name, consider using a
+        <codeph>MAP</codeph> (when all elements are of the same type) or a <codeph>STRUCT</codeph> (if different
+        elements have different types) instead of an <codeph>ARRAY</codeph>.
+      </p>
+-->
+
+    <p>
+      Each row can have a different number of elements (including none) in the array for that row.
+    </p>
+
+<!-- Since you don't use numeric indexes, this assertion and advice doesn't make sense.
+      <p>
+        If you attempt to refer to a non-existent array element, the result is <codeph>NULL</codeph>. Therefore,
+        when using operations such as addition or string concatenation involving array elements, you might use
+        conditional functions to substitute default values such as 0 or <codeph>""</codeph> in the place of missing
+        array elements.
+      </p>
+-->
+
+      <p>
+        When an array contains items of scalar types, you can use aggregation functions on the array elements without using join notation. For
+        example, you can find the <codeph>COUNT()</codeph>, <codeph>AVG()</codeph>, <codeph>SUM()</codeph>, and so on of numeric array
+        elements, or the <codeph>MAX()</codeph> and <codeph>MIN()</codeph> of any scalar array elements by referring to
+        <codeph><varname>table_name</varname>.<varname>array_column</varname></codeph> in the <codeph>FROM</codeph> clause of the query. When
+        you need to cross-reference values from the array with scalar values from the same row, such as by including a <codeph>GROUP
+        BY</codeph> clause to produce a separate aggregated result for each row, then the join clause is required.
+      </p>
+
+      <p>
+        A common usage pattern with complex types is to have an array as the top-level type for the column:
+        an array of structs, an array of maps, or an array of arrays.
+        For example, you can model a denormalized table by creating a column that is an <codeph>ARRAY</codeph>
+        of <codeph>STRUCT</codeph> elements; each item in the array represents a row from a table that would
+        normally be used in a join query. This kind of data structure lets you essentially denormalize tables by
+        associating multiple rows from one table with the matching row in another table.
+      </p>
+
+      <p>
+        You typically do not create more than one top-level <codeph>ARRAY</codeph> column, because if there is
+        some relationship between the elements of multiple arrays, it is convenient to model the data as
+        an array of another complex type element (either <codeph>STRUCT</codeph> or <codeph>MAP</codeph>).
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/complex_types_describe"/>
+
+      <p conref="../shared/impala_common.xml#common/added_in_230"/>
+
+      <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+      <ul conref="../shared/impala_common.xml#common/complex_types_restrictions">
+        <li/>
+      </ul>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <note conref="../shared/impala_common.xml#common/complex_type_schema_pointer"/>
+
+      <p>
+        The following example shows how to construct a table with various kinds of <codeph>ARRAY</codeph> columns,
+        both at the top level and nested within other complex types.
+        Whenever the <codeph>ARRAY</codeph> consists of a scalar value, such as in the <codeph>PETS</codeph>
+        column or the <codeph>CHILDREN</codeph> field, you can see that future expansion is limited.
+        For example, you could not easily evolve the schema to record the kind of pet or the child's birthday alongside the name.
+        Therefore, it is more common to use an <codeph>ARRAY</codeph> whose elements are of <codeph>STRUCT</codeph> type,
+        to associate multiple fields with each array element.
+      </p>
+
+      <note>
+        Practice the <codeph>CREATE TABLE</codeph> and query notation for complex type columns
+        using empty tables, until you can visualize a complex data structure and construct corresponding SQL statements reliably.
+      </note>
+
+<!-- To do: verify and flesh out this example. -->
+
+<codeblock><![CDATA[CREATE TABLE array_demo
+(
+  id BIGINT,
+  name STRING,
+-- An ARRAY of scalar type as a top-level column.
+  pets ARRAY <STRING>,
+
+-- An ARRAY with elements of complex type (STRUCT).
+  places_lived ARRAY < STRUCT <
+    place: STRING,
+    start_year: INT
+  >>,
+
+-- An ARRAY as a field (CHILDREN) within a STRUCT.
+-- (The STRUCT is inside another ARRAY, because it is rare
+-- for a STRUCT to be a top-level column.)
+  marriages ARRAY < STRUCT <
+    spouse: STRING,
+    children: ARRAY <STRING>
+  >>,
+
+-- An ARRAY as the value part of a MAP.
+-- The first MAP field (the key) would be a value such as
+-- 'Parent' or 'Grandparent', and the corresponding array would
+-- represent 2 parents, 4 grandparents, and so on.
+  ancestors MAP < STRING, ARRAY <STRING> >
+)
+STORED AS PARQUET;
+]]>
+</codeblock>
+
+    <p>
+      The following example shows how to examine the structure of a table containing one or more <codeph>ARRAY</codeph> columns by using the
+      <codeph>DESCRIBE</codeph> statement. You can visualize each <codeph>ARRAY</codeph> as its own two-column table, with columns
+      <codeph>ITEM</codeph> and <codeph>POS</codeph>.
+    </p>
+
+<!-- To do: extend the examples to include MARRIAGES and ANCESTORS columns, or get rid of those columns. -->
+
+<codeblock><![CDATA[DESCRIBE array_demo;
++--------------+---------------------------+
+| name         | type                      |
++--------------+---------------------------+
+| id           | bigint                    |
+| name         | string                    |
+| pets         | array<string>             |
+| marriages    | array<struct<             |
+|              |   spouse:string,          |
+|              |   children:array<string>  |
+|              | >>                        |
+| places_lived | array<struct<             |
+|              |   place:string,           |
+|              |   start_year:int          |
+|              | >>                        |
+| ancestors    | map<string,array<string>> |
++--------------+---------------------------+
+
+DESCRIBE array_demo.pets;
++------+--------+
+| name | type   |
++------+--------+
+| item | string |
+| pos  | bigint |
++------+--------+
+
+DESCRIBE array_demo.marriages;
++------+--------------------------+
+| name | type                     |
++------+--------------------------+
+| item | struct<                  |
+|      |   spouse:string,         |
+|      |   children:array<string> |
+|      | >                        |
+| pos  | bigint                   |
++------+--------------------------+
+
+DESCRIBE array_demo.places_lived;
++------+------------------+
+| name | type             |
++------+------------------+
+| item | struct<          |
+|      |   place:string,  |
+|      |   start_year:int |
+|      | >                |
+| pos  | bigint           |
++------+------------------+
+
+DESCRIBE array_demo.ancestors;
++-------+---------------+
+| name  | type          |
++-------+---------------+
+| key   | string        |
+| value | array<string> |
++-------+---------------+
+]]>
+</codeblock>
+
+    <p>
+      The following example shows queries involving <codeph>ARRAY</codeph> columns containing elements of scalar or complex types. You
+      <q>unpack</q> each <codeph>ARRAY</codeph> column by referring to it in a join query, as if it were a separate table with
+      <codeph>ITEM</codeph> and <codeph>POS</codeph> columns. If the array element is a scalar type, you refer to its value using the
+      <codeph>ITEM</codeph> pseudocolumn. If the array element is a <codeph>STRUCT</codeph>, you refer to the <codeph>STRUCT</codeph> fields
+      using dot notation and the field names. If the array element is another <codeph>ARRAY</codeph> or a <codeph>MAP</codeph>, you use
+      another level of join to unpack the nested collection elements.
+    </p>
+
+<!-- To do: have some sample output to show for these queries. -->
+
+<codeblock><![CDATA[-- Array of scalar values.
+-- Each array element represents a single string, plus we know its position in the array.
+SELECT id, name, pets.pos, pets.item FROM array_demo, array_demo.pets;
+
+-- Array of structs.
+-- Now each array element has named fields, possibly of different types.
+-- You can consider an ARRAY of STRUCT to represent a table inside another table.
+SELECT id, name, places_lived.pos, places_lived.item.place, places_lived.item.start_year
+FROM array_demo, array_demo.places_lived;
+
+-- The .ITEM name is optional for array elements that are structs.
+-- The following query is equivalent to the previous one, with .ITEM
+-- removed from the column references.
+SELECT id, name, places_lived.pos, places_lived.place, places_lived.start_year
+  FROM array_demo, array_demo.places_lived;
+
+-- To filter specific items from the array, do comparisons against the .POS or .ITEM
+-- pseudocolumns, or names of struct fields, in the WHERE clause.
+SELECT id, name, pets.item FROM array_demo, array_demo.pets
+  WHERE pets.pos in (0, 1, 3);
+
+SELECT id, name, pets.item FROM array_demo, array_demo.pets
+  WHERE pets.item LIKE 'Mr. %';
+
+SELECT id, name, places_lived.pos, places_lived.place, places_lived.start_year
+  FROM array_demo, array_demo.places_lived
+WHERE places_lived.place like '%California%';
+]]>
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_complex_types.xml#complex_types"/>,
+<!-- <xref href="impala_array.xml#array"/>, -->
+      <xref href="impala_struct.xml#struct"/>, <xref href="impala_map.xml#map"/>
+    </p>
+
+  </conbody>
+
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_avg.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_avg.xml b/docs/topics/impala_avg.xml
new file mode 100644
index 0000000..26f5450
--- /dev/null
+++ b/docs/topics/impala_avg.xml
@@ -0,0 +1,223 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="avg">
+
+  <title>AVG Function</title>
+  <titlealts><navtitle>AVG</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="Analytic Functions"/>
+      <data name="Category" value="Aggregate Functions"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">avg() function</indexterm>
+      An aggregate function that returns the average value from a set of numbers or <codeph>TIMESTAMP</codeph> values.
+      Its single argument can be numeric column, or the numeric result of a function or expression applied to the
+      column value. Rows with a <codeph>NULL</codeph> value for the specified column are ignored. If the table is empty,
+      or all the values supplied to <codeph>AVG</codeph> are <codeph>NULL</codeph>, <codeph>AVG</codeph> returns
+      <codeph>NULL</codeph>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>AVG([DISTINCT | ALL] <varname>expression</varname>) [OVER (<varname>analytic_clause</varname>)]
+</codeblock>
+
+    <p>
+      When the query contains a <codeph>GROUP BY</codeph> clause, returns one value for each combination of
+      grouping values.
+    </p>
+
+    <p>
+      <b>Return type:</b> <codeph>DOUBLE</codeph> for numeric values; <codeph>TIMESTAMP</codeph> for
+      <codeph>TIMESTAMP</codeph> values
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_aggregation_explanation"/>
+    
+    <p conref="../shared/impala_common.xml#common/complex_types_aggregation_example"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>-- Average all the non-NULL values in a column.
+insert overwrite avg_t values (2),(4),(6),(null),(null);
+-- The average of the above values is 4: (2+4+6) / 3. The 2 NULL values are ignored.
+select avg(x) from avg_t;
+-- Average only certain values from the column.
+select avg(x) from t1 where month = 'January' and year = '2013';
+-- Apply a calculation to the value of the column before averaging.
+select avg(x/3) from t1;
+-- Apply a function to the value of the column before averaging.
+-- Here we are substituting a value of 0 for all NULLs in the column,
+-- so that those rows do factor into the return value.
+select avg(isnull(x,0)) from t1;
+-- Apply some number-returning function to a string column and average the results.
+-- If column s contains any NULLs, length(s) also returns NULL and those rows are ignored.
+select avg(length(s)) from t1;
+-- Can also be used in combination with DISTINCT and/or GROUP BY.
+-- Return more than one result.
+select month, year, avg(page_visits) from web_stats group by month, year;
+-- Filter the input to eliminate duplicates before performing the calculation.
+select avg(distinct x) from t1;
+-- Filter the output after performing the calculation.
+select avg(x) from t1 group by y having avg(x) between 1 and 20;
+</codeblock>
+
+    <p rev="2.0.0">
+      The following examples show how to use <codeph>AVG()</codeph> in an analytic context. They use a table
+      containing integers from 1 to 10. Notice how the <codeph>AVG()</codeph> is reported for each input value, as
+      opposed to the <codeph>GROUP BY</codeph> clause which condenses the result set.
+<codeblock>select x, property, avg(x) over (partition by property) as avg from int_t where property in ('odd','even');
++----+----------+-----+
+| x  | property | avg |
++----+----------+-----+
+| 2  | even     | 6   |
+| 4  | even     | 6   |
+| 6  | even     | 6   |
+| 8  | even     | 6   |
+| 10 | even     | 6   |
+| 1  | odd      | 5   |
+| 3  | odd      | 5   |
+| 5  | odd      | 5   |
+| 7  | odd      | 5   |
+| 9  | odd      | 5   |
++----+----------+-----+
+</codeblock>
+
+Adding an <codeph>ORDER BY</codeph> clause lets you experiment with results that are cumulative or apply to a moving
+set of rows (the <q>window</q>). The following examples use <codeph>AVG()</codeph> in an analytic context
+(that is, with an <codeph>OVER()</codeph> clause) to produce a running average of all the even values,
+then a running average of all the odd values. The basic <codeph>ORDER BY x</codeph> clause implicitly
+activates a window clause of <codeph>RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW</codeph>,
+which is effectively the same as <codeph>ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW</codeph>,
+therefore all of these examples produce the same results:
+<codeblock>select x, property,
+  avg(x) over (partition by property <b>order by x</b>) as 'cumulative average'
+  from int_t where property in ('odd','even');
++----+----------+--------------------+
+| x  | property | cumulative average |
++----+----------+--------------------+
+| 2  | even     | 2                  |
+| 4  | even     | 3                  |
+| 6  | even     | 4                  |
+| 8  | even     | 5                  |
+| 10 | even     | 6                  |
+| 1  | odd      | 1                  |
+| 3  | odd      | 2                  |
+| 5  | odd      | 3                  |
+| 7  | odd      | 4                  |
+| 9  | odd      | 5                  |
++----+----------+--------------------+
+
+select x, property,
+  avg(x) over
+  (
+    partition by property
+    <b>order by x</b>
+    <b>range between unbounded preceding and current row</b>
+  ) as 'cumulative average'
+from int_t where property in ('odd','even');
++----+----------+--------------------+
+| x  | property | cumulative average |
++----+----------+--------------------+
+| 2  | even     | 2                  |
+| 4  | even     | 3                  |
+| 6  | even     | 4                  |
+| 8  | even     | 5                  |
+| 10 | even     | 6                  |
+| 1  | odd      | 1                  |
+| 3  | odd      | 2                  |
+| 5  | odd      | 3                  |
+| 7  | odd      | 4                  |
+| 9  | odd      | 5                  |
++----+----------+--------------------+
+
+select x, property,
+  avg(x) over
+  (
+    partition by property
+    <b>order by x</b>
+    <b>rows between unbounded preceding and current row</b>
+  ) as 'cumulative average'
+  from int_t where property in ('odd','even');
++----+----------+--------------------+
+| x  | property | cumulative average |
++----+----------+--------------------+
+| 2  | even     | 2                  |
+| 4  | even     | 3                  |
+| 6  | even     | 4                  |
+| 8  | even     | 5                  |
+| 10 | even     | 6                  |
+| 1  | odd      | 1                  |
+| 3  | odd      | 2                  |
+| 5  | odd      | 3                  |
+| 7  | odd      | 4                  |
+| 9  | odd      | 5                  |
++----+----------+--------------------+
+</codeblock>
+
+The following examples show how to construct a moving window, with a running average taking into account 1 row before
+and 1 row after the current row, within the same partition (all the even values or all the odd values).
+Because of a restriction in the Impala <codeph>RANGE</codeph> syntax, this type of
+moving window is possible with the <codeph>ROWS BETWEEN</codeph> clause but not the <codeph>RANGE BETWEEN</codeph>
+clause:
+<codeblock>select x, property,
+  avg(x) over
+  (
+    partition by property
+    <b>order by x</b>
+    <b>rows between 1 preceding and 1 following</b>
+  ) as 'moving average'
+  from int_t where property in ('odd','even');
++----+----------+----------------+
+| x  | property | moving average |
++----+----------+----------------+
+| 2  | even     | 3              |
+| 4  | even     | 4              |
+| 6  | even     | 6              |
+| 8  | even     | 8              |
+| 10 | even     | 9              |
+| 1  | odd      | 2              |
+| 3  | odd      | 3              |
+| 5  | odd      | 5              |
+| 7  | odd      | 7              |
+| 9  | odd      | 8              |
++----+----------+----------------+
+
+-- Doesn't work because of syntax restriction on RANGE clause.
+select x, property,
+  avg(x) over
+  (
+    partition by property
+    <b>order by x</b>
+    <b>range between 1 preceding and 1 following</b>
+  ) as 'moving average'
+from int_t where property in ('odd','even');
+ERROR: AnalysisException: RANGE is only supported with both the lower and upper bounds UNBOUNDED or one UNBOUNDED and the other CURRENT ROW.
+</codeblock>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+<!-- This conref appears under SUM(), AVG(), FLOAT, and DOUBLE topics. -->
+
+    <p conref="../shared/impala_common.xml#common/sum_double"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_analytic_functions.xml#analytic_functions"/>, <xref href="impala_max.xml#max"/>,
+      <xref href="impala_min.xml#min"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_batch_size.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_batch_size.xml b/docs/topics/impala_batch_size.xml
new file mode 100644
index 0000000..13a4b18
--- /dev/null
+++ b/docs/topics/impala_batch_size.xml
@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="batch_size">
+
+  <title>BATCH_SIZE Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">BATCH_SIZE query option</indexterm>
+      Number of rows evaluated at a time by SQL operators. Unspecified or a size of 0 uses a predefined default
+      size. Using a large number improves responsiveness, especially for scan operations, at the cost of a higher memory footprint.
+    </p>
+
+    <p>
+      This option is primarily for Cloudera testing, or for use under the direction of Cloudera Support.
+    </p>
+
+    <p>
+      <b>Type:</b> numeric
+    </p>
+
+    <p>
+      <b>Default:</b> 0 (meaning the predefined default of 1024)
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_bigint.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_bigint.xml b/docs/topics/impala_bigint.xml
new file mode 100644
index 0000000..8f31bc6
--- /dev/null
+++ b/docs/topics/impala_bigint.xml
@@ -0,0 +1,100 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="bigint">
+
+  <title>BIGINT Data Type</title>
+  <titlealts><navtitle>BIGINT</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      An 8-byte integer data type used in <codeph>CREATE TABLE</codeph> and <codeph>ALTER TABLE</codeph>
+      statements.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      In the column definition of a <codeph>CREATE TABLE</codeph> statement:
+    </p>
+
+<codeblock><varname>column_name</varname> BIGINT</codeblock>
+
+    <p>
+      <b>Range:</b> -9223372036854775808 .. 9223372036854775807. There is no <codeph>UNSIGNED</codeph> subtype.
+    </p>
+
+    <p>
+      <b>Conversions:</b> Impala automatically converts to a floating-point type (<codeph>FLOAT</codeph> or
+      <codeph>DOUBLE</codeph>) automatically. Use <codeph>CAST()</codeph> to convert to <codeph>TINYINT</codeph>,
+      <codeph>SMALLINT</codeph>, <codeph>INT</codeph>, <codeph>STRING</codeph>, or <codeph>TIMESTAMP</codeph>.
+      <ph conref="../shared/impala_common.xml#common/cast_int_to_timestamp"/>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>CREATE TABLE t1 (x BIGINT);
+SELECT CAST(1000 AS BIGINT);
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      <codeph>BIGINT</codeph> is a convenient type to use for column declarations because you can use any kind of
+      integer values in <codeph>INSERT</codeph> statements and they are promoted to <codeph>BIGINT</codeph> where
+      necessary. However, <codeph>BIGINT</codeph> also requires the most bytes of any integer type on disk and in
+      memory, meaning your queries are not as efficient and scalable as possible if you overuse this type.
+      Therefore, prefer to use the smallest integer type with sufficient range to hold all input values, and
+      <codeph>CAST()</codeph> when necessary to the appropriate type.
+    </p>
+
+    <p>
+      For a convenient and automated way to check the bounds of the <codeph>BIGINT</codeph> type, call the
+      functions <codeph>MIN_BIGINT()</codeph> and <codeph>MAX_BIGINT()</codeph>.
+    </p>
+
+    <p>
+      If an integer value is too large to be represented as a <codeph>BIGINT</codeph>, use a
+      <codeph>DECIMAL</codeph> instead with sufficient digits of precision.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/null_bad_numeric_cast"/>
+
+    <p conref="../shared/impala_common.xml#common/partitioning_good"/>
+
+    <p conref="../shared/impala_common.xml#common/hbase_ok"/>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/parquet_blurb"/> -->
+
+    <p conref="../shared/impala_common.xml#common/text_bulky"/>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/compatibility_blurb"/> -->
+
+    <p conref="../shared/impala_common.xml#common/internals_8_bytes"/>
+
+    <p conref="../shared/impala_common.xml#common/added_forever"/>
+
+    <p conref="../shared/impala_common.xml#common/column_stats_constant"/>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/restrictions_blurb"/> -->
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_literals.xml#numeric_literals"/>, <xref href="impala_tinyint.xml#tinyint"/>,
+      <xref href="impala_smallint.xml#smallint"/>, <xref href="impala_int.xml#int"/>,
+      <xref href="impala_bigint.xml#bigint"/>, <xref href="impala_decimal.xml#decimal"/>,
+      <xref href="impala_math_functions.xml#math_functions"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_bit_functions.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_bit_functions.xml b/docs/topics/impala_bit_functions.xml
new file mode 100644
index 0000000..77c7e5d
--- /dev/null
+++ b/docs/topics/impala_bit_functions.xml
@@ -0,0 +1,798 @@
+<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="bit_functions" rev="2.3.0">
+
+  <title>Impala Bit Functions</title>
+  <titlealts><navtitle>Bit Functions</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Bit manipulation functions perform bitwise operations involved in scientific processing or computer science algorithms.
+      For example, these functions include setting, clearing, or testing bits within an integer value, or changing the
+      positions of bits with or without wraparound.
+    </p>
+
+    <p>
+      If a function takes two integer arguments that are required to be of the same type, the smaller argument is promoted
+      to the type of the larger one if required. For example, <codeph>BITAND(1,4096)</codeph> treats both arguments as
+      <codeph>SMALLINT</codeph>, because 1 can be represented as a <codeph>TINYINT</codeph> but 4096 requires a <codeph>SMALLINT</codeph>.
+    </p>
+
+    <p>
+     Remember that all Impala integer values are signed. Therefore, when dealing with binary values where the most significant
+     bit is 1, the specified or returned values might be negative when represented in base 10.
+    </p>
+
+    <p>
+      Whenever any argument is <codeph>NULL</codeph>, either the input value, bit position, or number of shift or rotate positions,
+      the return value from any of these functions is also <codeph>NULL</codeph>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      The bit functions operate on all the integral data types: <xref href="impala_int.xml#int"/>,
+      <xref href="impala_bigint.xml#bigint"/>, <xref href="impala_smallint.xml#smallint"/>, and
+      <xref href="impala_tinyint.xml#tinyint"/>.
+    </p>
+
+    <p>
+      <b>Function reference:</b>
+    </p>
+
+    <p>
+      Impala supports the following bit functions:
+    </p>
+
+<!--
+bitand
+bitnot
+bitor
+bitxor
+countset
+getbit
+rotateleft
+rotateright
+setbit
+shiftleft
+shiftright
+-->
+
+<!-- Include this conref for all the bit functions, all newly added in Impala 2.3.0.
+          <p conref="../shared/impala_common.xml#common/added_in_230"/>
+-->
+
+    <dl>
+
+      <dlentry id="bitand">
+
+        <dt>
+          <codeph>bitand(integer_type a, same_type b)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">bitand() function</indexterm>
+          <b>Purpose:</b> Returns an integer value representing the bits that are set to 1 in both of the arguments.
+          If the arguments are of different sizes, the smaller is promoted to the type of the larger.
+          <p>
+            <b>Usage notes:</b> The <codeph>bitand()</codeph> function is equivalent to the <codeph>&amp;</codeph> binary operator.
+          </p>
+          <p conref="../shared/impala_common.xml#common/return_type_same"/>
+          <p conref="../shared/impala_common.xml#common/added_in_230"/>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p>
+            The following examples show the results of ANDing integer values.
+            255 contains all 1 bits in its lowermost 7 bits.
+            32767 contains all 1 bits in its lowermost 15 bits.
+            <!--
+            Negative numbers have a 1 in the sign bit and the value is the
+            <xref href="https://en.wikipedia.org/wiki/Two%27s_complement" scope="external" format="html">two's complement</xref>
+            of the positive equivalent.
+            -->
+            You can use the <codeph>bin()</codeph> function to check the binary representation of any
+            integer value, although the result is always represented as a 64-bit value.
+            If necessary, the smaller argument is promoted to the
+            type of the larger one.
+          </p>
+<codeblock>select bitand(255, 32767); /* 0000000011111111 &amp; 0111111111111111 */
++--------------------+
+| bitand(255, 32767) |
++--------------------+
+| 255                |
++--------------------+
+
+select bitand(32767, 1); /* 0111111111111111 &amp; 0000000000000001 */
++------------------+
+| bitand(32767, 1) |
++------------------+
+| 1                |
++------------------+
+
+select bitand(32, 16); /* 00010000 &amp; 00001000 */
++----------------+
+| bitand(32, 16) |
++----------------+
+| 0              |
++----------------+
+
+select bitand(12,5); /* 00001100 &amp; 00000101 */
++---------------+
+| bitand(12, 5) |
++---------------+
+| 4             |
++---------------+
+
+select bitand(-1,15); /* 11111111 &amp; 00001111 */
++----------------+
+| bitand(-1, 15) |
++----------------+
+| 15             |
++----------------+
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="bitnot">
+
+        <dt>
+          <codeph>bitnot(integer_type a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">bitnot() function</indexterm>
+          <b>Purpose:</b> Inverts all the bits of the input argument.
+          <p>
+            <b>Usage notes:</b> The <codeph>bitnot()</codeph> function is equivalent to the <codeph>~</codeph> unary operator.
+          </p>
+          <p conref="../shared/impala_common.xml#common/return_type_same"/>
+          <p conref="../shared/impala_common.xml#common/added_in_230"/>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p>
+            These examples illustrate what happens when you flip all the bits of an integer value.
+            The sign always changes. The decimal representation is one different between the positive and
+            negative values.
+            <!--
+            because negative values are represented as the
+            <xref href="https://en.wikipedia.org/wiki/Two%27s_complement" scope="external" format="html">two's complement</xref>
+            of the corresponding positive value.
+            -->
+          </p>
+<codeblock>select bitnot(127); /* 01111111 -> 10000000 */
++-------------+
+| bitnot(127) |
++-------------+
+| -128        |
++-------------+
+
+select bitnot(16); /* 00010000 -> 11101111 */
++------------+
+| bitnot(16) |
++------------+
+| -17        |
++------------+
+
+select bitnot(0); /* 00000000 -> 11111111 */
++-----------+
+| bitnot(0) |
++-----------+
+| -1        |
++-----------+
+
+select bitnot(-128); /* 10000000 -> 01111111 */
++--------------+
+| bitnot(-128) |
++--------------+
+| 127          |
++--------------+
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="bitor">
+
+        <dt>
+          <codeph>bitor(integer_type a, same_type b)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">bitor() function</indexterm>
+          <b>Purpose:</b> Returns an integer value representing the bits that are set to 1 in either of the arguments.
+          If the arguments are of different sizes, the smaller is promoted to the type of the larger.
+          <p>
+            <b>Usage notes:</b> The <codeph>bitor()</codeph> function is equivalent to the <codeph>|</codeph> binary operator.
+          </p>
+          <p conref="../shared/impala_common.xml#common/return_type_same"/>
+          <p conref="../shared/impala_common.xml#common/added_in_230"/>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p>
+            The following examples show the results of ORing integer values.
+          </p>
+<codeblock>select bitor(1,4); /* 00000001 | 00000100 */
++-------------+
+| bitor(1, 4) |
++-------------+
+| 5           |
++-------------+
+
+select bitor(16,48); /* 00001000 | 00011000 */
++---------------+
+| bitor(16, 48) |
++---------------+
+| 48            |
++---------------+
+
+select bitor(0,7); /* 00000000 | 00000111 */
++-------------+
+| bitor(0, 7) |
++-------------+
+| 7           |
++-------------+
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="bitxor">
+
+        <dt>
+          <codeph>bitxor(integer_type a, same_type b)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">bitxor() function</indexterm>
+          <b>Purpose:</b> Returns an integer value representing the bits that are set to 1 in one but not both of the arguments.
+          If the arguments are of different sizes, the smaller is promoted to the type of the larger.
+          <p>
+            <b>Usage notes:</b> The <codeph>bitxor()</codeph> function is equivalent to the <codeph>^</codeph> binary operator.
+          </p>
+          <p conref="../shared/impala_common.xml#common/return_type_same"/>
+          <p conref="../shared/impala_common.xml#common/added_in_230"/>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p>
+            The following examples show the results of XORing integer values.
+            XORing a non-zero value with zero returns the non-zero value.
+            XORing two identical values returns zero, because all the 1 bits from the first argument are also 1 bits in the second argument.
+            XORing different non-zero values turns off some bits and leaves others turned on, based on whether the same bit is set in both arguments. 
+          </p>
+<codeblock>select bitxor(0,15); /* 00000000 ^ 00001111 */
++---------------+
+| bitxor(0, 15) |
++---------------+
+| 15            |
++---------------+
+
+select bitxor(7,7); /* 00000111 ^ 00000111 */
++--------------+
+| bitxor(7, 7) |
++--------------+
+| 0            |
++--------------+
+
+select bitxor(8,4); /* 00001000 ^ 00000100 */
++--------------+
+| bitxor(8, 4) |
++--------------+
+| 12           |
++--------------+
+
+select bitxor(3,7); /* 00000011 ^ 00000111 */
++--------------+
+| bitxor(3, 7) |
++--------------+
+| 4            |
++--------------+
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="countset">
+
+        <dt>
+          <codeph>countset(integer_type a [, int zero_or_one])</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">countset() function</indexterm>
+          <b>Purpose:</b> By default, returns the number of 1 bits in the specified integer value.
+          If the optional second argument is set to zero, it returns the number of 0 bits instead.
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            In discussions of information theory, this operation is referred to as the
+            <q><xref href="https://en.wikipedia.org/wiki/Hamming_weight" scope="external" format="html">population count</xref></q>
+            or <q>popcount</q>.
+          </p>
+          <p conref="../shared/impala_common.xml#common/return_type_same"/>
+          <p conref="../shared/impala_common.xml#common/added_in_230"/>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p>
+            The following examples show how to count the number of 1 bits in an integer value.
+          </p>
+<codeblock>select countset(1); /* 00000001 */
++-------------+
+| countset(1) |
++-------------+
+| 1           |
++-------------+
+
+select countset(3); /* 00000011 */
++-------------+
+| countset(3) |
++-------------+
+| 2           |
++-------------+
+
+select countset(16); /* 00010000 */
++--------------+
+| countset(16) |
++--------------+
+| 1            |
++--------------+
+
+select countset(17); /* 00010001 */
++--------------+
+| countset(17) |
++--------------+
+| 2            |
++--------------+
+
+select countset(7,1); /* 00000111 = 3 1 bits; the function counts 1 bits by default */
++----------------+
+| countset(7, 1) |
++----------------+
+| 3              |
++----------------+
+
+select countset(7,0); /* 00000111 = 5 0 bits; third argument can only be 0 or 1 */
++----------------+
+| countset(7, 0) |
++----------------+
+| 5              |
++----------------+
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="getbit">
+
+        <dt>
+          <codeph>getbit(integer_type a, int position)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">getbit() function</indexterm>
+          <b>Purpose:</b> Returns a 0 or 1 representing the bit at a
+          specified position. The positions are numbered right to left, starting at zero.
+          The position argument cannot be negative.
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            When you use a literal input value, it is treated as an 8-bit, 16-bit,
+            and so on value, the smallest type that is appropriate.
+            The type of the input value limits the range of the positions.
+            Cast the input value to the appropriate type if you need to
+            ensure it is treated as a 64-bit, 32-bit, and so on value.
+          </p>
+          <p conref="../shared/impala_common.xml#common/return_type_same"/>
+          <p conref="../shared/impala_common.xml#common/added_in_230"/>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p>
+            The following examples show how to test a specific bit within an integer value.
+          </p>
+<codeblock>select getbit(1,0); /* 00000001 */
++--------------+
+| getbit(1, 0) |
++--------------+
+| 1            |
++--------------+
+
+select getbit(16,1) /* 00010000 */
++---------------+
+| getbit(16, 1) |
++---------------+
+| 0             |
++---------------+
+
+select getbit(16,4) /* 00010000 */
++---------------+
+| getbit(16, 4) |
++---------------+
+| 1             |
++---------------+
+
+select getbit(16,5) /* 00010000 */
++---------------+
+| getbit(16, 5) |
++---------------+
+| 0             |
++---------------+
+
+select getbit(-1,3); /* 11111111 */
++---------------+
+| getbit(-1, 3) |
++---------------+
+| 1             |
++---------------+
+
+select getbit(-1,25); /* 11111111 */
+ERROR: Invalid bit position: 25
+
+select getbit(cast(-1 as int),25); /* 11111111111111111111111111111111 */
++-----------------------------+
+| getbit(cast(-1 as int), 25) |
++-----------------------------+
+| 1                           |
++-----------------------------+
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="rotateleft">
+
+        <dt>
+          <codeph>rotateleft(integer_type a, int positions)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">rotateleft() function</indexterm>
+          <b>Purpose:</b> Rotates an integer value left by a specified number of bits.
+          As the most significant bit is taken out of the original value,
+          if it is a 1 bit, it is <q>rotated</q> back to the least significant bit.
+          Therefore, the final value has the same number of 1 bits as the original value,
+          just in different positions.
+          In computer science terms, this operation is a
+          <q><xref href="https://en.wikipedia.org/wiki/Circular_shift" scope="external" format="html">circular shift</xref></q>.
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            Specifying a second argument of zero leaves the original value unchanged.
+            Rotating a -1 value by any number of positions still returns -1,
+            because the original value has all 1 bits and all the 1 bits are
+            preserved during rotation.
+            Similarly, rotating a 0 value by any number of positions still returns 0.
+            Rotating a value by the same number of bits as in the value returns the same value.
+            Because this is a circular operation, the number of positions is not limited
+            to the number of bits in the input value.
+            For example, rotating an 8-bit value by 1, 9, 17, and so on positions returns an
+            identical result in each case.
+          </p>
+          <p conref="../shared/impala_common.xml#common/return_type_same"/>
+          <p conref="../shared/impala_common.xml#common/added_in_230"/>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+<codeblock>select rotateleft(1,4); /* 00000001 -> 00010000 */
++------------------+
+| rotateleft(1, 4) |
++------------------+
+| 16               |
++------------------+
+
+select rotateleft(-1,155); /* 11111111 -> 11111111 */
++---------------------+
+| rotateleft(-1, 155) |
++---------------------+
+| -1                  |
++---------------------+
+
+select rotateleft(-128,1); /* 10000000 -> 00000001 */
++---------------------+
+| rotateleft(-128, 1) |
++---------------------+
+| 1                   |
++---------------------+
+
+select rotateleft(-127,3); /* 10000001 -> 00001100 */
++---------------------+
+| rotateleft(-127, 3) |
++---------------------+
+| 12                  |
++---------------------+
+
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="rotateright">
+
+        <dt>
+          <codeph>rotateright(integer_type a, int positions)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">rotateright() function</indexterm>
+          <b>Purpose:</b> Rotates an integer value right by a specified number of bits.
+          As the least significant bit is taken out of the original value,
+          if it is a 1 bit, it is <q>rotated</q> back to the most significant bit.
+          Therefore, the final value has the same number of 1 bits as the original value,
+          just in different positions.
+          In computer science terms, this operation is a
+          <q><xref href="https://en.wikipedia.org/wiki/Circular_shift" scope="external" format="html">circular shift</xref></q>.
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            Specifying a second argument of zero leaves the original value unchanged.
+            Rotating a -1 value by any number of positions still returns -1,
+            because the original value has all 1 bits and all the 1 bits are
+            preserved during rotation.
+            Similarly, rotating a 0 value by any number of positions still returns 0.
+            Rotating a value by the same number of bits as in the value returns the same value.
+            Because this is a circular operation, the number of positions is not limited
+            to the number of bits in the input value.
+            For example, rotating an 8-bit value by 1, 9, 17, and so on positions returns an
+            identical result in each case.
+          </p>
+          <p conref="../shared/impala_common.xml#common/return_type_same"/>
+          <p conref="../shared/impala_common.xml#common/added_in_230"/>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+<codeblock>select rotateright(16,4); /* 00010000 -> 00000001 */
++--------------------+
+| rotateright(16, 4) |
++--------------------+
+| 1                  |
++--------------------+
+
+select rotateright(-1,155); /* 11111111 -> 11111111 */
++----------------------+
+| rotateright(-1, 155) |
++----------------------+
+| -1                   |
++----------------------+
+
+select rotateright(-128,1); /* 10000000 -> 01000000 */
++----------------------+
+| rotateright(-128, 1) |
++----------------------+
+| 64                   |
++----------------------+
+
+select rotateright(-127,3); /* 10000001 -> 00110000 */
++----------------------+
+| rotateright(-127, 3) |
++----------------------+
+| 48                   |
++----------------------+
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="setbit">
+
+        <dt>
+          <codeph>setbit(integer_type a, int position [, int zero_or_one])</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">setbit() function</indexterm>
+          <b>Purpose:</b> By default, changes a bit at a specified position to a 1, if it is not already.
+          If the optional third argument is set to zero, the specified bit is set to 0 instead.
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          If the bit at the specified position was already 1 (by default)
+          or 0 (with a third argument of zero), the return value is
+          the same as the first argument.
+          The positions are numbered right to left, starting at zero.
+          (Therefore, the return value could be different from the first argument
+          even if the position argument is zero.)
+          The position argument cannot be negative.
+          <p>
+            When you use a literal input value, it is treated as an 8-bit, 16-bit,
+            and so on value, the smallest type that is appropriate.
+            The type of the input value limits the range of the positions.
+            Cast the input value to the appropriate type if you need to
+            ensure it is treated as a 64-bit, 32-bit, and so on value.
+          </p>
+          <p conref="../shared/impala_common.xml#common/return_type_same"/>
+          <p conref="../shared/impala_common.xml#common/added_in_230"/>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+<codeblock>select setbit(0,0); /* 00000000 -> 00000001 */
++--------------+
+| setbit(0, 0) |
++--------------+
+| 1            |
++--------------+
+
+select setbit(0,3); /* 00000000 -> 00001000 */
++--------------+
+| setbit(0, 3) |
++--------------+
+| 8            |
++--------------+
+
+select setbit(7,3); /* 00000111 -> 00001111 */
++--------------+
+| setbit(7, 3) |
++--------------+
+| 15           |
++--------------+
+
+select setbit(15,3); /* 00001111 -> 00001111 */
++---------------+
+| setbit(15, 3) |
++---------------+
+| 15            |
++---------------+
+
+select setbit(0,32); /* By default, 0 is a TINYINT with only 8 bits. */
+ERROR: Invalid bit position: 32
+
+select setbit(cast(0 as bigint),32); /* For BIGINT, the position can be 0..63. */
++-------------------------------+
+| setbit(cast(0 as bigint), 32) |
++-------------------------------+
+| 4294967296                    |
++-------------------------------+
+
+select setbit(7,3,1); /* 00000111 -> 00001111; setting to 1 is the default */
++-----------------+
+| setbit(7, 3, 1) |
++-----------------+
+| 15              |
++-----------------+
+
+select setbit(7,2,0); /* 00000111 -> 00000011; third argument of 0 clears instead of sets */
++-----------------+
+| setbit(7, 2, 0) |
++-----------------+
+| 3               |
++-----------------+
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="shiftleft">
+
+        <dt>
+          <codeph>shiftleft(integer_type a, int positions)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">shiftleft() function</indexterm>
+          <b>Purpose:</b> Shifts an integer value left by a specified number of bits.
+          As the most significant bit is taken out of the original value,
+          it is discarded and the least significant bit becomes 0.
+          In computer science terms, this operation is a <q><xref href="https://en.wikipedia.org/wiki/Logical_shift" scope="external" format="html">logical shift</xref></q>.
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            The final value has either the same number of 1 bits as the original value, or fewer.
+            Shifting an 8-bit value by 8 positions, a 16-bit value by 16 positions, and so on produces
+            a result of zero.
+          </p>
+          <p>
+            Specifying a second argument of zero leaves the original value unchanged.
+            Shifting any value by 0 returns the original value.
+            Shifting any value by 1 is the same as multiplying it by 2,
+            as long as the value is small enough; larger values eventually
+            become negative when shifted, as the sign bit is set.
+            Starting with the value 1 and shifting it left by N positions gives
+            the same result as 2 to the Nth power, or <codeph>pow(2,<varname>N</varname>)</codeph>.
+          </p>
+          <p conref="../shared/impala_common.xml#common/return_type_same"/>
+          <p conref="../shared/impala_common.xml#common/added_in_230"/>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+<codeblock>select shiftleft(1,0); /* 00000001 -> 00000001 */
++-----------------+
+| shiftleft(1, 0) |
++-----------------+
+| 1               |
++-----------------+
+
+select shiftleft(1,3); /* 00000001 -> 00001000 */
++-----------------+
+| shiftleft(1, 3) |
++-----------------+
+| 8               |
++-----------------+
+
+select shiftleft(8,2); /* 00001000 -> 00100000 */
++-----------------+
+| shiftleft(8, 2) |
++-----------------+
+| 32              |
++-----------------+
+
+select shiftleft(127,1); /* 01111111 -> 11111110 */
++-------------------+
+| shiftleft(127, 1) |
++-------------------+
+| -2                |
++-------------------+
+
+select shiftleft(127,5); /* 01111111 -> 11100000 */
++-------------------+
+| shiftleft(127, 5) |
++-------------------+
+| -32               |
++-------------------+
+
+select shiftleft(-1,4); /* 11111111 -> 11110000 */
++------------------+
+| shiftleft(-1, 4) |
++------------------+
+| -16              |
++------------------+
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="shiftright">
+
+        <dt>
+          <codeph>shiftright(integer_type a, int positions)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">shiftright() function</indexterm>
+          <b>Purpose:</b> Shifts an integer value right by a specified number of bits.
+          As the least significant bit is taken out of the original value,
+          it is discarded and the most significant bit becomes 0.
+          In computer science terms, this operation is a <q><xref href="https://en.wikipedia.org/wiki/Logical_shift" scope="external" format="html">logical shift</xref></q>.
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+          Therefore, the final value has either the same number of 1 bits as the original value, or fewer.
+          Shifting an 8-bit value by 8 positions, a 16-bit value by 16 positions, and so on produces
+          a result of zero.
+          </p>
+          <p>
+            Specifying a second argument of zero leaves the original value unchanged.
+            Shifting any value by 0 returns the original value.
+            Shifting any positive value right by 1 is the same as dividing it by 2.
+            Negative values become positive when shifted right.
+          </p>
+          <p conref="../shared/impala_common.xml#common/return_type_same"/>
+          <p conref="../shared/impala_common.xml#common/added_in_230"/>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+<codeblock>select shiftright(16,0); /* 00010000 -> 00000000 */
++-------------------+
+| shiftright(16, 0) |
++-------------------+
+| 16                |
++-------------------+
+
+select shiftright(16,4); /* 00010000 -> 00000000 */
++-------------------+
+| shiftright(16, 4) |
++-------------------+
+| 1                 |
++-------------------+
+
+select shiftright(16,5); /* 00010000 -> 00000000 */
++-------------------+
+| shiftright(16, 5) |
++-------------------+
+| 0                 |
++-------------------+
+
+select shiftright(-1,1); /* 11111111 -> 01111111 */
++-------------------+
+| shiftright(-1, 1) |
++-------------------+
+| 127               |
++-------------------+
+
+select shiftright(-1,5); /* 11111111 -> 00000111 */
++-------------------+
+| shiftright(-1, 5) |
++-------------------+
+| 7                 |
++-------------------+
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+    </dl>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_boolean.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_boolean.xml b/docs/topics/impala_boolean.xml
new file mode 100644
index 0000000..6a8e299
--- /dev/null
+++ b/docs/topics/impala_boolean.xml
@@ -0,0 +1,128 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="boolean">
+
+  <title>BOOLEAN Data Type</title>
+  <titlealts><navtitle>BOOLEAN</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      A data type used in <codeph>CREATE TABLE</codeph> and <codeph>ALTER TABLE</codeph> statements, representing a
+      single true/false choice.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      In the column definition of a <codeph>CREATE TABLE</codeph> statement:
+    </p>
+
+<codeblock><varname>column_name</varname> BOOLEAN</codeblock>
+
+    <p>
+      <b>Range:</b> <codeph>TRUE</codeph> or <codeph>FALSE</codeph>. Do not use quotation marks around the
+      <codeph>TRUE</codeph> and <codeph>FALSE</codeph> literal values. You can write the literal values in
+      uppercase, lowercase, or mixed case. The values queried from a table are always returned in lowercase,
+      <codeph>true</codeph> or <codeph>false</codeph>.
+    </p>
+
+    <p>
+      <b>Conversions:</b> Impala does not automatically convert any other type to <codeph>BOOLEAN</codeph>. All
+      conversions must use an explicit call to the <codeph>CAST()</codeph> function.
+    </p>
+
+    <p>
+      You can use <codeph>CAST()</codeph> to convert <codeph>TINYINT</codeph>, <codeph>SMALLINT</codeph>,
+      <codeph>INT</codeph>, <codeph>BIGINT</codeph>, <codeph>FLOAT</codeph>, or <codeph>DOUBLE</codeph>
+<!-- any integer or floating-point type to -->
+      <codeph>BOOLEAN</codeph>: a value of 0 represents <codeph>false</codeph>, and any non-zero value is converted
+      to <codeph>true</codeph>.
+    </p>
+
+    <p rev="1.4.0">
+<!-- BOOLEAN-to-DECIMAL casting requested in IMPALA-991. As of Sept. 2014, designated "won't fix". -->
+      You can cast <codeph>DECIMAL</codeph> values to <codeph>BOOLEAN</codeph>, with the same treatment of zero and
+      non-zero values as the other numeric types. You cannot cast a <codeph>BOOLEAN</codeph> to a
+      <codeph>DECIMAL</codeph>.
+    </p>
+
+    <p>
+      You cannot cast a <codeph>STRING</codeph> value to <codeph>BOOLEAN</codeph>, although you can cast a
+      <codeph>BOOLEAN</codeph> value to <codeph>STRING</codeph>, returning <codeph>'1'</codeph> for
+      <codeph>true</codeph> values and <codeph>'0'</codeph> for <codeph>false</codeph> values.
+    </p>
+
+    <p>
+      Although you can cast a <codeph>TIMESTAMP</codeph> to a <codeph>BOOLEAN</codeph> or a
+      <codeph>BOOLEAN</codeph> to a <codeph>TIMESTAMP</codeph>, the results are unlikely to be useful. Any non-zero
+      <codeph>TIMESTAMP</codeph> (that is, any value other than <codeph>1970-01-01 00:00:00</codeph>) becomes
+      <codeph>TRUE</codeph> when converted to <codeph>BOOLEAN</codeph>, while <codeph>1970-01-01 00:00:00</codeph>
+      becomes <codeph>FALSE</codeph>. A value of <codeph>FALSE</codeph> becomes <codeph>1970-01-01
+      00:00:00</codeph> when converted to <codeph>BOOLEAN</codeph>, and <codeph>TRUE</codeph> becomes one second
+      past this epoch date, that is, <codeph>1970-01-01 00:00:01</codeph>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/null_null_arguments"/>
+
+    <p conref="../shared/impala_common.xml#common/partitioning_blurb"/>
+
+    <p>
+      Do not use a <codeph>BOOLEAN</codeph> column as a partition key. Although you can create such a table,
+      subsequent operations produce errors:
+    </p>
+
+<codeblock>[localhost:21000] &gt; create table truth_table (assertion string) partitioned by (truth boolean);
+[localhost:21000] &gt; insert into truth_table values ('Pigs can fly',false);
+ERROR: AnalysisException: INSERT into table with BOOLEAN partition column (truth) is not supported: partitioning.truth_table
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>SELECT 1 &lt; 2;
+SELECT 2 = 5;
+SELECT 100 &lt; NULL, 100 &gt; NULL;
+CREATE TABLE assertions (claim STRING, really BOOLEAN);
+INSERT INTO assertions VALUES
+  ("1 is less than 2", 1 &lt; 2),
+  ("2 is the same as 5", 2 = 5),
+  ("Grass is green", true),
+  ("The moon is made of green cheese", false);
+SELECT claim FROM assertions WHERE really = TRUE;
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/hbase_ok"/>
+
+    <p conref="../shared/impala_common.xml#common/parquet_ok"/>
+
+    <p conref="../shared/impala_common.xml#common/text_bulky"/>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/compatibility_blurb"/> -->
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/internals_blurb"/> -->
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/added_in_20"/> -->
+
+    <p conref="../shared/impala_common.xml#common/column_stats_constant"/>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/restrictions_blurb"/> -->
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/related_info"/> -->
+
+    <p>
+      <b>Related information:</b> <xref href="impala_literals.xml#boolean_literals"/>,
+      <xref href="impala_operators.xml#operators"/>,
+      <xref href="impala_conditional_functions.xml#conditional_functions"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_char.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_char.xml b/docs/topics/impala_char.xml
new file mode 100644
index 0000000..68cabeb
--- /dev/null
+++ b/docs/topics/impala_char.xml
@@ -0,0 +1,275 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="char" rev="2.0.0">
+
+  <title>CHAR Data Type (CDH 5.2 or higher only)</title>
+  <titlealts><navtitle>CHAR (CDH 5.2 or higher only)</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">CHAR data type</indexterm>
+      A fixed-length character type, padded with trailing spaces if necessary to achieve the specified length. If
+      values are longer than the specified length, Impala truncates any trailing characters.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      In the column definition of a <codeph>CREATE TABLE</codeph> statement:
+    </p>
+
+<codeblock><varname>column_name</varname> CHAR(<varname>length</varname>)</codeblock>
+
+    <p>
+      The maximum length you can specify is 255.
+    </p>
+
+    <p>
+      <b>Semantics of trailing spaces:</b>
+    </p>
+
+    <ul>
+      <li>
+        When you store a <codeph>CHAR</codeph> value shorter than the specified length in a table, queries return
+        the value padded with trailing spaces if necessary; the resulting value has the same length as specified in
+        the column definition.
+      </li>
+
+      <li>
+        If you store a <codeph>CHAR</codeph> value containing trailing spaces in a table, those trailing spaces are
+        not stored in the data file. When the value is retrieved by a query, the result could have a different
+        number of trailing spaces. That is, the value includes however many spaces are needed to pad it to the
+        specified length of the column.
+      </li>
+
+      <li>
+        If you compare two <codeph>CHAR</codeph> values that differ only in the number of trailing spaces, those
+        values are considered identical.
+      </li>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/partitioning_bad"/>
+
+    <p conref="../shared/impala_common.xml#common/hbase_no"/>
+
+    <p conref="../shared/impala_common.xml#common/parquet_blurb"/>
+
+    <ul>
+      <li>
+        This type can be read from and written to Parquet files.
+      </li>
+
+      <li>
+        There is no requirement for a particular level of Parquet.
+      </li>
+
+      <li>
+        Parquet files generated by Impala and containing this type can be freely interchanged with other components
+        such as Hive and MapReduce.
+      </li>
+
+      <li>
+        Any trailing spaces, whether implicitly or explicitly specified, are not written to the Parquet data files.
+      </li>
+
+      <li>
+        Parquet data files might contain values that are longer than allowed by the
+        <codeph>CHAR(<varname>n</varname>)</codeph> length limit. Impala ignores any extra trailing characters when
+        it processes those values during a query.
+      </li>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/text_blurb"/>
+
+    <p>
+      Text data files might contain values that are longer than allowed for a particular
+      <codeph>CHAR(<varname>n</varname>)</codeph> column. Any extra trailing characters are ignored when Impala
+      processes those values during a query. Text data files can also contain values that are shorter than the
+      defined length limit, and Impala pads them with trailing spaces up to the specified length. Any text data
+      files produced by Impala <codeph>INSERT</codeph> statements do not include any trailing blanks for
+      <codeph>CHAR</codeph> columns.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/compatibility_blurb"/>
+
+    <p>
+      This type is available using Impala 2.0 or higher under CDH 4, or with Impala on CDH 5.2 or higher. There are
+      no compatibility issues with other components when exchanging data files or running Impala on CDH 4.
+    </p>
+
+    <p>
+      Some other database systems make the length specification optional. For Impala, the length is required.
+    </p>
+
+<!--
+<p>
+The Impala maximum length is larger than for the <codeph>CHAR</codeph> data type in Hive.
+If a Hive query encounters a <codeph>CHAR</codeph> value longer than 255 during processing,
+it silently treats the value as length 255.
+</p>
+-->
+
+    <p conref="../shared/impala_common.xml#common/internals_max_bytes"/>
+
+    <p conref="../shared/impala_common.xml#common/added_in_20"/>
+
+    <p conref="../shared/impala_common.xml#common/column_stats_constant"/>
+
+<!-- Seems like a logical design decision but don't think it's currently implemented like this.
+<p>
+Because both the maximum and average length are always known and always the same for
+any given <codeph>CHAR(<varname>n</varname>)</codeph> column, those fields are always filled
+in for <codeph>SHOW COLUMN STATS</codeph> output, even before you run
+<codeph>COMPUTE STATS</codeph> on the table.
+</p>
+-->
+
+    <p conref="../shared/impala_common.xml#common/udf_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      These examples show how trailing spaces are not considered significant when comparing or processing
+      <codeph>CHAR</codeph> values. <codeph>CAST()</codeph> truncates any longer string to fit within the defined
+      length. If a <codeph>CHAR</codeph> value is shorter than the specified length, it is padded on the right with
+      spaces until it matches the specified length. Therefore, <codeph>LENGTH()</codeph> represents the length
+      including any trailing spaces, and <codeph>CONCAT()</codeph> also treats the column value as if it has
+      trailing spaces.
+    </p>
+
+<codeblock>select cast('x' as char(4)) = cast('x   ' as char(4)) as "unpadded equal to padded";
++--------------------------+
+| unpadded equal to padded |
++--------------------------+
+| true                     |
++--------------------------+
+
+create table char_length(c char(3));
+insert into char_length values (cast('1' as char(3))), (cast('12' as char(3))), (cast('123' as char(3))), (cast('123456' as char(3)));
+select concat("[",c,"]") as c, length(c) from char_length;
++-------+-----------+
+| c     | length(c) |
++-------+-----------+
+| [1  ] | 3         |
+| [12 ] | 3         |
+| [123] | 3         |
+| [123] | 3         |
++-------+-----------+
+</codeblock>
+
+    <p>
+      This example shows a case where data values are known to have a specific length, where <codeph>CHAR</codeph>
+      is a logical data type to use.
+<!--
+Because all the <codeph>CHAR</codeph> values have a constant predictable length,
+Impala can efficiently analyze how best to use these values in join queries,
+aggregation queries, and other contexts where column length is significant.
+-->
+    </p>
+
+<codeblock>create table addresses
+  (id bigint,
+   street_name string,
+   state_abbreviation char(2),
+   country_abbreviation char(2));
+</codeblock>
+
+    <p>
+      The following example shows how values written by Impala do not physically include the trailing spaces. It
+      creates a table using text format, with <codeph>CHAR</codeph> values much shorter than the declared length,
+      and then prints the resulting data file to show that the delimited values are not separated by spaces. The
+      same behavior applies to binary-format Parquet data files.
+    </p>
+
+<codeblock>create table char_in_text (a char(20), b char(30), c char(40))
+  row format delimited fields terminated by ',';
+
+insert into char_in_text values (cast('foo' as char(20)), cast('bar' as char(30)), cast('baz' as char(40))), (cast('hello' as char(20)), cast('goodbye' as char(30)), cast('aloha' as char(40)));
+
+-- Running this Linux command inside impala-shell using the ! shortcut.
+!hdfs dfs -cat 'hdfs://127.0.0.1:8020/user/hive/warehouse/impala_doc_testing.db/char_in_text/*.*';
+foo,bar,baz
+hello,goodbye,aloha
+</codeblock>
+
+    <p>
+      The following example further illustrates the treatment of spaces. It replaces the contents of the previous
+      table with some values including leading spaces, trailing spaces, or both. Any leading spaces are preserved
+      within the data file, but trailing spaces are discarded. Then when the values are retrieved by a query, the
+      leading spaces are retrieved verbatim while any necessary trailing spaces are supplied by Impala.
+    </p>
+
+<codeblock>insert overwrite char_in_text values (cast('trailing   ' as char(20)), cast('   leading and trailing   ' as char(30)), cast('   leading' as char(40)));
+!hdfs dfs -cat 'hdfs://127.0.0.1:8020/user/hive/warehouse/impala_doc_testing.db/char_in_text/*.*';
+trailing,   leading and trailing,   leading
+
+select concat('[',a,']') as a, concat('[',b,']') as b, concat('[',c,']') as c from char_in_text;
++------------------------+----------------------------------+--------------------------------------------+
+| a                      | b                                | c                                          |
++------------------------+----------------------------------+--------------------------------------------+
+| [trailing            ] | [   leading and trailing       ] | [   leading                              ] |
++------------------------+----------------------------------+--------------------------------------------+
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <p>
+      Because the blank-padding behavior requires allocating the maximum length for each value in memory, for
+      scalability reasons avoid declaring <codeph>CHAR</codeph> columns that are much longer than typical values in
+      that column.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/blobs_are_strings"/>
+
+    <p>
+      When an expression compares a <codeph>CHAR</codeph> with a <codeph>STRING</codeph> or
+      <codeph>VARCHAR</codeph>, the <codeph>CHAR</codeph> value is implicitly converted to <codeph>STRING</codeph>
+      first, with trailing spaces preserved.
+    </p>
+
+<codeblock>select cast("foo  " as char(5)) = 'foo' as "char equal to string";
++----------------------+
+| char equal to string |
++----------------------+
+| false                |
++----------------------+
+</codeblock>
+
+    <p>
+      This behavior differs from other popular database systems. To get the expected result of
+      <codeph>TRUE</codeph>, cast the expressions on both sides to <codeph>CHAR</codeph> values of the appropriate
+      length:
+    </p>
+
+<codeblock>select cast("foo  " as char(5)) = cast('foo' as char(3)) as "char equal to string";
++----------------------+
+| char equal to string |
++----------------------+
+| true                 |
++----------------------+
+</codeblock>
+
+    <p>
+      This behavior is subject to change in future releases.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_string.xml#string"/>, <xref href="impala_varchar.xml#varchar"/>,
+      <xref href="impala_literals.xml#string_literals"/>,
+      <xref href="impala_string_functions.xml#string_functions"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_comments.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_comments.xml b/docs/topics/impala_comments.xml
new file mode 100644
index 0000000..96b9479
--- /dev/null
+++ b/docs/topics/impala_comments.xml
@@ -0,0 +1,51 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="comments">
+
+  <title>Comments</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">comments (SQL)</indexterm>
+      Impala supports the familiar styles of SQL comments:
+    </p>
+
+    <ul>
+      <li>
+        All text from a <codeph>--</codeph> sequence to the end of the line is considered a comment and ignored.
+        This type of comment can occur on a single line by itself, or after all or part of a statement.
+      </li>
+
+      <li>
+        All text from a <codeph>/*</codeph> sequence to the next <codeph>*/</codeph> sequence is considered a
+        comment and ignored. This type of comment can stretch over multiple lines. This type of comment can occur
+        on one or more lines by itself, in the middle of a statement, or before or after a statement.
+      </li>
+    </ul>
+
+    <p>
+      For example:
+    </p>
+
+<codeblock>-- This line is a comment about a table.
+create table ...;
+
+/*
+This is a multi-line comment about a query.
+*/
+select ...;
+
+select * from t /* This is an embedded comment about a query. */ where ...;
+
+select * from t -- This is a trailing comment within a multi-line command.
+where ...;
+</codeblock>
+  </conbody>
+</concept>

[16/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

Posted by jr...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_compression_codec.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_compression_codec.xml b/docs/topics/impala_compression_codec.xml
new file mode 100644
index 0000000..d99ac04
--- /dev/null
+++ b/docs/topics/impala_compression_codec.xml
@@ -0,0 +1,95 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="2.0.0" id="compression_codec">
+
+  <title>COMPRESSION_CODEC Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Compression"/>
+      <data name="Category" value="File Formats"/>
+      <data name="Category" value="Parquet"/>
+      <data name="Category" value="Snappy"/>
+      <data name="Category" value="GZip"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+<!-- The initial part of this paragraph is copied straight from the #parquet_compression topic. -->
+
+<!-- Could turn into a conref. -->
+
+    <p>
+      <indexterm audience="Cloudera">COMPRESSION_CODEC query option</indexterm>
+      When Impala writes Parquet data files using the <codeph>INSERT</codeph> statement, the underlying compression
+      is controlled by the <codeph>COMPRESSION_CODEC</codeph> query option.
+    </p>
+
+    <note>
+      Prior to Impala 2.0, this option was named <codeph>PARQUET_COMPRESSION_CODEC</codeph>. In Impala 2.0 and
+      later, the <codeph>PARQUET_COMPRESSION_CODEC</codeph> name is not recognized. Use the more general name
+      <codeph>COMPRESSION_CODEC</codeph> for new code.
+    </note>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>SET COMPRESSION_CODEC=<varname>codec_name</varname>;</codeblock>
+
+    <p>
+      The allowed values for this query option are <codeph>SNAPPY</codeph> (the default), <codeph>GZIP</codeph>,
+      and <codeph>NONE</codeph>.
+    </p>
+
+    <note>
+      A Parquet file created with <codeph>COMPRESSION_CODEC=NONE</codeph> is still typically smaller than the
+      original data, due to encoding schemes such as run-length encoding and dictionary encoding that are applied
+      separately from compression.
+    </note>
+
+    <p></p>
+
+    <p>
+      The option value is not case-sensitive.
+    </p>
+
+    <p>
+      If the option is set to an unrecognized value, all kinds of queries will fail due to the invalid option
+      setting, not just queries involving Parquet tables. (The value <codeph>BZIP2</codeph> is also recognized, but
+      is not compatible with Parquet tables.)
+    </p>
+
+    <p>
+      <b>Type:</b> <codeph>STRING</codeph>
+    </p>
+
+    <p>
+      <b>Default:</b> SNAPPY
+    </p>
+
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>set compression_codec=gzip;
+insert into parquet_table_highly_compressed select * from t1;
+
+set compression_codec=snappy;
+insert into parquet_table_compression_plus_fast_queries select * from t1;
+
+set compression_codec=none;
+insert into parquet_table_no_compression select * from t1;
+
+set compression_codec=foo;
+select * from t1 limit 5;
+ERROR: Invalid compression codec: foo
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      For information about how compressing Parquet data files affects query performance, see
+      <xref href="impala_parquet.xml#parquet_compression"/>.
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_compute_stats.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_compute_stats.xml b/docs/topics/impala_compute_stats.xml
new file mode 100644
index 0000000..abf6645
--- /dev/null
+++ b/docs/topics/impala_compute_stats.xml
@@ -0,0 +1,418 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.2.2" id="compute_stats">
+
+  <title>COMPUTE STATS Statement</title>
+  <titlealts><navtitle>COMPUTE STATS</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Performance"/>
+      <data name="Category" value="Scalability"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Tables"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">COMPUTE STATS statement</indexterm>
+      Gathers information about volume and distribution of data in a table and all associated columns and
+      partitions. The information is stored in the metastore database, and used by Impala to help optimize queries.
+      For example, if Impala can determine that a table is large or small, or has many or few distinct values it
+      can organize parallelize the work appropriately for a join query or insert operation. For details about the
+      kinds of information gathered by this statement, see <xref href="impala_perf_stats.xml#perf_stats"/>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock rev="2.1.0">COMPUTE STATS [<varname>db_name</varname>.]<varname>table_name</varname>
+COMPUTE INCREMENTAL STATS [<varname>db_name</varname>.]<varname>table_name</varname> [PARTITION (<varname>partition_spec</varname>)]
+
+<varname>partition_spec</varname> ::= <varname>partition_col</varname>=<varname>constant_value</varname>
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/incremental_partition_spec"/>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      Originally, Impala relied on users to run the Hive <codeph>ANALYZE TABLE</codeph> statement, but that method
+      of gathering statistics proved unreliable and difficult to use. The Impala <codeph>COMPUTE STATS</codeph>
+      statement is built from the ground up to improve the reliability and user-friendliness of this operation.
+      <codeph>COMPUTE STATS</codeph> does not require any setup steps or special configuration. You only run a
+      single Impala <codeph>COMPUTE STATS</codeph> statement to gather both table and column statistics, rather
+      than separate Hive <codeph>ANALYZE TABLE</codeph> statements for each kind of statistics.
+    </p>
+
+    <p rev="2.1.0">
+      The <codeph>COMPUTE INCREMENTAL STATS</codeph> variation is a shortcut for partitioned tables that works on a
+      subset of partitions rather than the entire table. The incremental nature makes it suitable for large tables
+      with many partitions, where a full <codeph>COMPUTE STATS</codeph> operation takes too long to be practical
+      each time a partition is added or dropped. See <xref href="impala_perf_stats.xml#perf_stats_incremental"/>
+      for full usage details.
+    </p>
+
+    <p>
+      <codeph>COMPUTE INCREMENTAL STATS</codeph> only applies to partitioned tables. If you use the
+      <codeph>INCREMENTAL</codeph> clause for an unpartitioned table, Impala automatically uses the original
+      <codeph>COMPUTE STATS</codeph> statement. Such tables display <codeph>false</codeph> under the
+      <codeph>Incremental stats</codeph> column of the <codeph>SHOW TABLE STATS</codeph> output.
+    </p>
+
+    <note>
+      Because many of the most performance-critical and resource-intensive operations rely on table and column
+      statistics to construct accurate and efficient plans, <codeph>COMPUTE STATS</codeph> is an important step at
+      the end of your ETL process. Run <codeph>COMPUTE STATS</codeph> on all tables as your first step during
+      performance tuning for slow queries, or troubleshooting for out-of-memory conditions:
+      <ul>
+        <li>
+          Accurate statistics help Impala construct an efficient query plan for join queries, improving performance
+          and reducing memory usage.
+        </li>
+
+        <li>
+          Accurate statistics help Impala distribute the work effectively for insert operations into Parquet
+          tables, improving performance and reducing memory usage.
+        </li>
+
+        <li rev="1.3.0">
+          Accurate statistics help Impala estimate the memory required for each query, which is important when you
+          use resource management features, such as admission control and the YARN resource management framework.
+          The statistics help Impala to achieve high concurrency, full utilization of available memory, and avoid
+          contention with workloads from other Hadoop components.
+        </li>
+      </ul>
+    </note>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p rev="2.3.0">
+      Currently, the statistics created by the <codeph>COMPUTE STATS</codeph> statement do not include
+      information about complex type columns. The column stats metrics for complex columns are always shown
+      as -1. For queries involving complex type columns, Impala uses
+      heuristics to estimate the data distribution within such columns.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/hbase_blurb"/>
+
+    <p>
+      <codeph>COMPUTE STATS</codeph> works for HBase tables also. The statistics gathered for HBase tables are
+      somewhat different than for HDFS-backed tables, but that metadata is still used for optimization when HBase
+      tables are involved in join queries.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/s3_blurb"/>
+
+    <p rev="2.2.0">
+      <codeph>COMPUTE STATS</codeph> also works for tables where data resides in the Amazon Simple Storage Service (S3).
+      See <xref href="impala_s3.xml#s3"/> for details.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/performance_blurb"/>
+
+    <p>
+      The statistics collected by <codeph>COMPUTE STATS</codeph> are used to optimize join queries
+      <codeph>INSERT</codeph> operations into Parquet tables, and other resource-intensive kinds of SQL statements.
+      See <xref href="impala_perf_stats.xml#perf_stats"/> for details.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      This example shows two tables, <codeph>T1</codeph> and <codeph>T2</codeph>, with a small number distinct
+      values linked by a parent-child relationship between <codeph>T1.ID</codeph> and <codeph>T2.PARENT</codeph>.
+      <codeph>T1</codeph> is tiny, while <codeph>T2</codeph> has approximately 100K rows. Initially, the statistics
+      includes physical measurements such as the number of files, the total size, and size measurements for
+      fixed-length columns such as with the <codeph>INT</codeph> type. Unknown values are represented by -1. After
+      running <codeph>COMPUTE STATS</codeph> for each table, much more information is available through the
+      <codeph>SHOW STATS</codeph> statements. If you were running a join query involving both of these tables, you
+      would need statistics for both tables to get the most effective optimization for the query.
+    </p>
+
+<!-- Note: chopped off any excess characters at position 87 and after,
+           to avoid weird wrapping in PDF.
+           Applies to any subsequent examples with output from SHOW ... STATS too. -->
+
+<codeblock>[localhost:21000] &gt; show table stats t1;
+Query: show table stats t1
++-------+--------+------+--------+
+| #Rows | #Files | Size | Format |
++-------+--------+------+--------+
+| -1    | 1      | 33B  | TEXT   |
++-------+--------+------+--------+
+Returned 1 row(s) in 0.02s
+[localhost:21000] &gt; show table stats t2;
+Query: show table stats t2
++-------+--------+----------+--------+
+| #Rows | #Files | Size     | Format |
++-------+--------+----------+--------+
+| -1    | 28     | 960.00KB | TEXT   |
++-------+--------+----------+--------+
+Returned 1 row(s) in 0.01s
+[localhost:21000] &gt; show column stats t1;
+Query: show column stats t1
++--------+--------+------------------+--------+----------+----------+
+| Column | Type   | #Distinct Values | #Nulls | Max Size | Avg Size |
++--------+--------+------------------+--------+----------+----------+
+| id     | INT    | -1               | -1     | 4        | 4        |
+| s      | STRING | -1               | -1     | -1       | -1       |
++--------+--------+------------------+--------+----------+----------+
+Returned 2 row(s) in 1.71s
+[localhost:21000] &gt; show column stats t2;
+Query: show column stats t2
++--------+--------+------------------+--------+----------+----------+
+| Column | Type   | #Distinct Values | #Nulls | Max Size | Avg Size |
++--------+--------+------------------+--------+----------+----------+
+| parent | INT    | -1               | -1     | 4        | 4        |
+| s      | STRING | -1               | -1     | -1       | -1       |
++--------+--------+------------------+--------+----------+----------+
+Returned 2 row(s) in 0.01s
+[localhost:21000] &gt; compute stats t1;
+Query: compute stats t1
++-----------------------------------------+
+| summary                                 |
++-----------------------------------------+
+| Updated 1 partition(s) and 2 column(s). |
++-----------------------------------------+
+Returned 1 row(s) in 5.30s
+[localhost:21000] &gt; show table stats t1;
+Query: show table stats t1
++-------+--------+------+--------+
+| #Rows | #Files | Size | Format |
++-------+--------+------+--------+
+| 3     | 1      | 33B  | TEXT   |
++-------+--------+------+--------+
+Returned 1 row(s) in 0.01s
+[localhost:21000] &gt; show column stats t1;
+Query: show column stats t1
++--------+--------+------------------+--------+----------+----------+
+| Column | Type   | #Distinct Values | #Nulls | Max Size | Avg Size |
++--------+--------+------------------+--------+----------+----------+
+| id     | INT    | 3                | -1     | 4        | 4        |
+| s      | STRING | 3                | -1     | -1       | -1       |
++--------+--------+------------------+--------+----------+----------+
+Returned 2 row(s) in 0.02s
+[localhost:21000] &gt; compute stats t2;
+Query: compute stats t2
++-----------------------------------------+
+| summary                                 |
++-----------------------------------------+
+| Updated 1 partition(s) and 2 column(s). |
++-----------------------------------------+
+Returned 1 row(s) in 5.70s
+[localhost:21000] &gt; show table stats t2;
+Query: show table stats t2
++-------+--------+----------+--------+
+| #Rows | #Files | Size     | Format |
++-------+--------+----------+--------+
+| 98304 | 1      | 960.00KB | TEXT   |
++-------+--------+----------+--------+
+Returned 1 row(s) in 0.03s
+[localhost:21000] &gt; show column stats t2;
+Query: show column stats t2
++--------+--------+------------------+--------+----------+----------+
+| Column | Type   | #Distinct Values | #Nulls | Max Size | Avg Size |
++--------+--------+------------------+--------+----------+----------+
+| parent | INT    | 3                | -1     | 4        | 4        |
+| s      | STRING | 6                | -1     | 14       | 9.3      |
++--------+--------+------------------+--------+----------+----------+
+Returned 2 row(s) in 0.01s</codeblock>
+
+    <p rev="2.1.0">
+      The following example shows how to use the <codeph>INCREMENTAL</codeph> clause, available in Impala 2.1.0 and
+      higher. The <codeph>COMPUTE INCREMENTAL STATS</codeph> syntax lets you collect statistics for newly added or
+      changed partitions, without rescanning the entire table.
+    </p>
+
+<codeblock>-- Initially the table has no incremental stats, as indicated
+-- by -1 under #Rows and false under Incremental stats.
+show table stats item_partitioned;
++-------------+-------+--------+----------+--------------+---------+------------------
+| i_category  | #Rows | #Files | Size     | Bytes Cached | Format  | Incremental stats
++-------------+-------+--------+----------+--------------+---------+------------------
+| Books       | -1    | 1      | 223.74KB | NOT CACHED   | PARQUET | false
+| Children    | -1    | 1      | 230.05KB | NOT CACHED   | PARQUET | false
+| Electronics | -1    | 1      | 232.67KB | NOT CACHED   | PARQUET | false
+| Home        | -1    | 1      | 232.56KB | NOT CACHED   | PARQUET | false
+| Jewelry     | -1    | 1      | 223.72KB | NOT CACHED   | PARQUET | false
+| Men         | -1    | 1      | 231.25KB | NOT CACHED   | PARQUET | false
+| Music       | -1    | 1      | 237.90KB | NOT CACHED   | PARQUET | false
+| Shoes       | -1    | 1      | 234.90KB | NOT CACHED   | PARQUET | false
+| Sports      | -1    | 1      | 227.97KB | NOT CACHED   | PARQUET | false
+| Women       | -1    | 1      | 226.27KB | NOT CACHED   | PARQUET | false
+| Total       | -1    | 10     | 2.25MB   | 0B           |         |
++-------------+-------+--------+----------+--------------+---------+------------------
+
+-- After the first COMPUTE INCREMENTAL STATS,
+-- all partitions have stats.
+compute incremental stats item_partitioned;
++-------------------------------------------+
+| summary                                   |
++-------------------------------------------+
+| Updated 10 partition(s) and 21 column(s). |
++-------------------------------------------+
+show table stats item_partitioned;
++-------------+-------+--------+----------+--------------+---------+------------------
+| i_category  | #Rows | #Files | Size     | Bytes Cached | Format  | Incremental stats
++-------------+-------+--------+----------+--------------+---------+------------------
+| Books       | 1733  | 1      | 223.74KB | NOT CACHED   | PARQUET | true
+| Children    | 1786  | 1      | 230.05KB | NOT CACHED   | PARQUET | true
+| Electronics | 1812  | 1      | 232.67KB | NOT CACHED   | PARQUET | true
+| Home        | 1807  | 1      | 232.56KB | NOT CACHED   | PARQUET | true
+| Jewelry     | 1740  | 1      | 223.72KB | NOT CACHED   | PARQUET | true
+| Men         | 1811  | 1      | 231.25KB | NOT CACHED   | PARQUET | true
+| Music       | 1860  | 1      | 237.90KB | NOT CACHED   | PARQUET | true
+| Shoes       | 1835  | 1      | 234.90KB | NOT CACHED   | PARQUET | true
+| Sports      | 1783  | 1      | 227.97KB | NOT CACHED   | PARQUET | true
+| Women       | 1790  | 1      | 226.27KB | NOT CACHED   | PARQUET | true
+| Total       | 17957 | 10     | 2.25MB   | 0B           |         |
++-------------+-------+--------+----------+--------------+---------+------------------
+
+-- Add a new partition...
+alter table item_partitioned add partition (i_category='Camping');
+-- Add or replace files in HDFS outside of Impala,
+-- rendering the stats for a partition obsolete.
+!import_data_into_sports_partition.sh
+refresh item_partitioned;
+drop incremental stats item_partitioned partition (i_category='Sports');
+-- Now some partitions have incremental stats
+-- and some don't.
+show table stats item_partitioned;
++-------------+-------+--------+----------+--------------+---------+------------------
+| i_category  | #Rows | #Files | Size     | Bytes Cached | Format  | Incremental stats
++-------------+-------+--------+----------+--------------+---------+------------------
+| Books       | 1733  | 1      | 223.74KB | NOT CACHED   | PARQUET | true
+| Camping     | -1    | 1      | 408.02KB | NOT CACHED   | PARQUET | false
+| Children    | 1786  | 1      | 230.05KB | NOT CACHED   | PARQUET | true
+| Electronics | 1812  | 1      | 232.67KB | NOT CACHED   | PARQUET | true
+| Home        | 1807  | 1      | 232.56KB | NOT CACHED   | PARQUET | true
+| Jewelry     | 1740  | 1      | 223.72KB | NOT CACHED   | PARQUET | true
+| Men         | 1811  | 1      | 231.25KB | NOT CACHED   | PARQUET | true
+| Music       | 1860  | 1      | 237.90KB | NOT CACHED   | PARQUET | true
+| Shoes       | 1835  | 1      | 234.90KB | NOT CACHED   | PARQUET | true
+| Sports      | -1    | 1      | 227.97KB | NOT CACHED   | PARQUET | false
+| Women       | 1790  | 1      | 226.27KB | NOT CACHED   | PARQUET | true
+| Total       | 17957 | 11     | 2.65MB   | 0B           |         |
++-------------+-------+--------+----------+--------------+---------+------------------
+
+-- After another COMPUTE INCREMENTAL STATS,
+-- all partitions have incremental stats, and only the 2
+-- partitions without incremental stats were scanned.
+compute incremental stats item_partitioned;
++------------------------------------------+
+| summary                                  |
++------------------------------------------+
+| Updated 2 partition(s) and 21 column(s). |
++------------------------------------------+
+show table stats item_partitioned;
++-------------+-------+--------+----------+--------------+---------+------------------
+| i_category  | #Rows | #Files | Size     | Bytes Cached | Format  | Incremental stats
++-------------+-------+--------+----------+--------------+---------+------------------
+| Books       | 1733  | 1      | 223.74KB | NOT CACHED   | PARQUET | true
+| Camping     | 5328  | 1      | 408.02KB | NOT CACHED   | PARQUET | true
+| Children    | 1786  | 1      | 230.05KB | NOT CACHED   | PARQUET | true
+| Electronics | 1812  | 1      | 232.67KB | NOT CACHED   | PARQUET | true
+| Home        | 1807  | 1      | 232.56KB | NOT CACHED   | PARQUET | true
+| Jewelry     | 1740  | 1      | 223.72KB | NOT CACHED   | PARQUET | true
+| Men         | 1811  | 1      | 231.25KB | NOT CACHED   | PARQUET | true
+| Music       | 1860  | 1      | 237.90KB | NOT CACHED   | PARQUET | true
+| Shoes       | 1835  | 1      | 234.90KB | NOT CACHED   | PARQUET | true
+| Sports      | 1783  | 1      | 227.97KB | NOT CACHED   | PARQUET | true
+| Women       | 1790  | 1      | 226.27KB | NOT CACHED   | PARQUET | true
+| Total       | 17957 | 11     | 2.65MB   | 0B           |         |
++-------------+-------+--------+----------+--------------+---------+------------------
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/file_format_blurb"/>
+
+    <p>
+      The <codeph>COMPUTE STATS</codeph> statement works with tables created with any of the file formats supported
+      by Impala. See <xref href="impala_file_formats.xml#file_formats"/> for details about working with the
+      different file formats. The following considerations apply to <codeph>COMPUTE STATS</codeph> depending on the
+      file format of the table.
+    </p>
+
+    <p>
+      The <codeph>COMPUTE STATS</codeph> statement works with text tables with no restrictions. These tables can be
+      created through either Impala or Hive.
+    </p>
+
+    <p>
+      The <codeph>COMPUTE STATS</codeph> statement works with Parquet tables. These tables can be created through
+      either Impala or Hive.
+      <note conref="../shared/impala_common.xml#common/compute_stats_parquet"/>
+    </p>
+
+    <p>
+      The <codeph>COMPUTE STATS</codeph> statement works with Avro tables, as long as they are created with
+      SQL-style column names and types rather than an Avro-style schema specification. These tables are currently
+      always created through Hive rather than Impala.
+    </p>
+
+    <p>
+      The <codeph>COMPUTE STATS</codeph> statement works with RCFile tables with no restrictions. These tables can
+      be created through either Impala or Hive.
+    </p>
+
+    <p>
+      The <codeph>COMPUTE STATS</codeph> statement works with SequenceFile tables with no restrictions. These
+      tables can be created through either Impala or Hive.
+    </p>
+
+    <p>
+      The <codeph>COMPUTE STATS</codeph> statement works with partitioned tables, whether all the partitions use
+      the same file format, or some partitions are defined through <codeph>ALTER TABLE</codeph> to use different
+      file formats.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/ddl_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_maybe"/>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/decimal_no_stats"/>
+
+    <note conref="../shared/impala_common.xml#common/compute_stats_nulls"/>
+
+    <p conref="../shared/impala_common.xml#common/internals_blurb"/>
+    <p>
+      Behind the scenes, the <codeph>COMPUTE STATS</codeph> statement
+      executes two statements: one to count the rows of each partition
+      in the table (or the entire table if unpartitioned) through the
+      <codeph>COUNT(*)</codeph> function,
+      and another to count the approximate number of distinct values
+      in each column through the <codeph>NDV()</codeph> function.
+      You might see these queries in your monitoring and diagnostic displays.
+      The same factors that affect the performance, scalability, and
+      execution of other queries (such as parallel execution, memory usage,
+      admission control, and timeouts) also apply to the queries run by the
+      <codeph>COMPUTE STATS</codeph> statement.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+    <p rev="CDH-19187">
+      The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+      typically the <codeph>impala</codeph> user, must have read
+      permission for all affected files in the source directory:
+      all files in the case of an unpartitioned table or
+      a partitioned table in the case of <codeph>COMPUTE STATS</codeph>;
+      or all the files in partitions without incremental stats in
+      the case of <codeph>COMPUTE INCREMENTAL STATS</codeph>.
+      It must also have read and execute permissions for all
+      relevant directories holding the data files.
+      (Essentially, <codeph>COMPUTE STATS</codeph> requires the
+      same permissions as the underlying <codeph>SELECT</codeph> queries it runs
+      against the table.)
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_drop_stats.xml#drop_stats"/>, <xref href="impala_show.xml#show_table_stats"/>,
+      <xref href="impala_show.xml#show_column_stats"/>, <xref href="impala_perf_stats.xml#perf_stats"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_conditional_functions.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_conditional_functions.xml b/docs/topics/impala_conditional_functions.xml
new file mode 100644
index 0000000..b922710
--- /dev/null
+++ b/docs/topics/impala_conditional_functions.xml
@@ -0,0 +1,443 @@
+<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="conditional_functions">
+
+  <title>Impala Conditional Functions</title>
+  <titlealts><navtitle>Conditional Functions</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Impala supports the following conditional functions for testing equality, comparison operators, and nullity:
+    </p>
+
+    <dl>
+      <dlentry id="case">
+
+        <dt>
+          <codeph>CASE a WHEN b THEN c [WHEN d THEN e]... [ELSE f] END</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">CASE expression</indexterm>
+          <b>Purpose:</b> Compares an expression to one or more possible values, and returns a corresponding result
+          when a match is found.
+          <p conref="../shared/impala_common.xml#common/return_same_type"/>
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            In this form of the <codeph>CASE</codeph> expression, the initial value <codeph>A</codeph>
+            being evaluated for each row it typically a column reference, or an expression involving
+            a column. This form can only compare against a set of specified values, not ranges,
+            multi-value comparisons such as <codeph>BETWEEN</codeph> or <codeph>IN</codeph>,
+            regular expressions, or <codeph>NULL</codeph>.
+          </p>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p>
+            Although this example is split across multiple lines, you can put any or all parts of a <codeph>CASE</codeph> expression
+            on a single line, with no punctuation or other separators between the <codeph>WHEN</codeph>,
+            <codeph>ELSE</codeph>, and <codeph>END</codeph> clauses.
+          </p>
+<codeblock>select case x
+    when 1 then 'one'
+    when 2 then 'two'
+    when 0 then 'zero'
+    else 'out of range'
+  end
+    from t1;
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="case2">
+
+        <dt>
+          <codeph>CASE WHEN a THEN b [WHEN c THEN d]... [ELSE e] END</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">CASE expression</indexterm>
+          <b>Purpose:</b> Tests whether any of a sequence of expressions is true, and returns a corresponding
+          result for the first true expression.
+          <p conref="../shared/impala_common.xml#common/return_same_type"/>
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            <codeph>CASE</codeph> expressions without an initial test value have more flexibility.
+            For example, they can test different columns in different <codeph>WHEN</codeph> clauses,
+            or use comparison operators such as <codeph>BETWEEN</codeph>, <codeph>IN</codeph> and <codeph>IS NULL</codeph>
+            rather than comparing against discrete values.
+          </p>
+          <p>
+            <codeph>CASE</codeph> expressions are often the foundation of long queries that
+            summarize and format results for easy-to-read reports. For example, you might
+            use a <codeph>CASE</codeph> function call to turn values from a numeric column
+            into category strings corresponding to integer values, or labels such as <q>Small</q>,
+            <q>Medium</q> and <q>Large</q> based on ranges. Then subsequent parts of the
+            query might aggregate based on the transformed values, such as how many
+            values are classified as small, medium, or large. You can also use <codeph>CASE</codeph>
+            to signal problems with out-of-bounds values, <codeph>NULL</codeph> values,
+            and so on.
+          </p>
+          <p>
+            By using operators such as <codeph>OR</codeph>, <codeph>IN</codeph>,
+            <codeph>REGEXP</codeph>, and so on in <codeph>CASE</codeph> expressions,
+            you can build extensive tests and transformations into a single query.
+            Therefore, applications that construct SQL statements often rely heavily on <codeph>CASE</codeph>
+            calls in the generated SQL code.
+          </p>
+          <p>
+            Because this flexible form of the <codeph>CASE</codeph> expressions allows you to perform
+            many comparisons and call multiple functions when evaluating each row, be careful applying
+            elaborate <codeph>CASE</codeph> expressions to queries that process large amounts of data.
+            For example, when practical, evaluate and transform values through <codeph>CASE</codeph>
+            after applying operations such as aggregations that reduce the size of the result set;
+            transform numbers to strings after performing joins with the original numeric values.
+          </p>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p>
+            Although this example is split across multiple lines, you can put any or all parts of a <codeph>CASE</codeph> expression
+            on a single line, with no punctuation or other separators between the <codeph>WHEN</codeph>,
+            <codeph>ELSE</codeph>, and <codeph>END</codeph> clauses.
+          </p>
+<codeblock>select case
+    when dayname(now()) in ('Saturday','Sunday') then 'result undefined on weekends'
+    when x > y then 'x greater than y'
+    when x = y then 'x and y are equal'
+    when x is null or y is null then 'one of the columns is null'
+    else null
+  end
+    from t1;
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="coalesce">
+
+        <dt>
+          <codeph>coalesce(type v1, type v2, ...)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">coalesce() function</indexterm>
+          <b>Purpose:</b> Returns the first specified argument that is not <codeph>NULL</codeph>, or
+          <codeph>NULL</codeph> if all arguments are <codeph>NULL</codeph>.
+          <p conref="../shared/impala_common.xml#common/return_same_type"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="2.0.0" id="decode">
+
+        <dt>
+          <codeph>decode(type expression, type search1, type result1 [, type search2, type result2 ...] [, type
+          default] )</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">decode() function</indexterm>
+          <b>Purpose:</b> Compares an expression to one or more possible values, and returns a corresponding result
+          when a match is found.
+          <p conref="../shared/impala_common.xml#common/return_same_type"/>
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            Can be used as shorthand for a <codeph>CASE</codeph> expression.
+          </p>
+          <p>
+            The original expression and the search expressions must of the same type or convertible types. The
+            result expression can be a different type, but all result expressions must be of the same type.
+          </p>
+          <p>
+            Returns a successful match If the original expression is <codeph>NULL</codeph> and a search expression
+            is also <codeph>NULL</codeph>. the
+          </p>
+          <p>
+            Returns <codeph>NULL</codeph> if the final <codeph>default</codeph> value is omitted and none of the
+            search expressions match the original expression.
+          </p>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p>
+            The following example translates numeric day values into descriptive names:
+          </p>
+<codeblock>SELECT event, decode(day_of_week, 1, "Monday", 2, "Tuesday", 3, "Wednesday",
+  4, "Thursday", 5, "Friday", 6, "Saturday", 7, "Sunday", "Unknown day")
+  FROM calendar;
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="if">
+
+        <dt>
+          <codeph>if(boolean condition, type ifTrue, type ifFalseOrNull)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">if() function</indexterm>
+          <b>Purpose:</b> Tests an expression and returns a corresponding result depending on whether the result is
+          true, false, or <codeph>NULL</codeph>.
+          <p>
+            <b>Return type:</b> Same as the <codeph>ifTrue</codeph> argument value
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="ifnull">
+
+        <dt>
+          <codeph>ifnull(type a, type ifNotNull)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">isnull() function</indexterm>
+          <b>Purpose:</b> Alias for the <codeph>isnull()</codeph> function, with the same behavior. To simplify
+          porting SQL with vendor extensions to Impala.
+          <p conref="../shared/impala_common.xml#common/added_in_130"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="isfalse" rev="2.2.0">
+
+        <dt>
+          <codeph>isfalse(<varname>boolean</varname>)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">isfalse() function</indexterm>
+          <b>Purpose:</b> Tests if a Boolean expression is <codeph>false</codeph> or not.
+          Returns <codeph>true</codeph> if so.
+          If the argument is <codeph>NULL</codeph>, returns <codeph>false</codeph>.
+          Identical to <codeph>isnottrue()</codeph>, except it returns the opposite value for a <codeph>NULL</codeph> argument.
+          <p conref="../shared/impala_common.xml#common/return_type_boolean"/>
+          <p conref="../shared/impala_common.xml#common/added_in_220"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="isnotfalse" rev="2.2.0">
+
+        <dt>
+          <codeph>isnotfalse(<varname>boolean</varname>)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">isnotfalse() function</indexterm>
+          <b>Purpose:</b> Tests if a Boolean expression is not <codeph>false</codeph> (that is, either <codeph>true</codeph> or <codeph>NULL</codeph>).
+          Returns <codeph>true</codeph> if so.
+          If the argument is <codeph>NULL</codeph>, returns <codeph>true</codeph>.
+          Identical to <codeph>istrue()</codeph>, except it returns the opposite value for a <codeph>NULL</codeph> argument.
+          <p conref="../shared/impala_common.xml#common/return_type_boolean"/>
+          <p conref="../shared/impala_common.xml#common/for_compatibility_only"/>
+          <p conref="../shared/impala_common.xml#common/added_in_220"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="isnottrue" rev="2.2.0">
+
+        <dt>
+          <codeph>isnottrue(<varname>boolean</varname>)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">isnottrue() function</indexterm>
+          <b>Purpose:</b> Tests if a Boolean expression is not <codeph>true</codeph> (that is, either <codeph>false</codeph> or <codeph>NULL</codeph>).
+          Returns <codeph>true</codeph> if so.
+          If the argument is <codeph>NULL</codeph>, returns <codeph>true</codeph>.
+          Identical to <codeph>isfalse()</codeph>, except it returns the opposite value for a <codeph>NULL</codeph> argument.
+          <p conref="../shared/impala_common.xml#common/return_type_boolean"/>
+          <p conref="../shared/impala_common.xml#common/added_in_220"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="isnull">
+
+        <dt>
+          <codeph>isnull(type a, type ifNotNull)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">isnull() function</indexterm>
+          <b>Purpose:</b> Tests if an expression is <codeph>NULL</codeph>, and returns the expression result value
+          if not. If the first argument is <codeph>NULL</codeph>, returns the second argument.
+          <p>
+            <b>Compatibility notes:</b> Equivalent to the <codeph>nvl()</codeph> function from Oracle Database or
+            <codeph>ifnull()</codeph> from MySQL. The <codeph>nvl()</codeph> and <codeph>ifnull()</codeph>
+            functions are also available in Impala.
+          </p>
+          <p>
+            <b>Return type:</b> Same as the first argument value
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="istrue" rev="2.2.0">
+
+        <dt>
+          <codeph>istrue(<varname>boolean</varname>)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">istrue() function</indexterm>
+          <b>Purpose:</b> Tests if a Boolean expression is <codeph>true</codeph> or not.
+          Returns <codeph>true</codeph> if so.
+          If the argument is <codeph>NULL</codeph>, returns <codeph>false</codeph>.
+          Identical to <codeph>isnotfalse()</codeph>, except it returns the opposite value for a <codeph>NULL</codeph> argument.
+          <p conref="../shared/impala_common.xml#common/return_type_boolean"/>
+          <p conref="../shared/impala_common.xml#common/for_compatibility_only"/>
+          <p conref="../shared/impala_common.xml#common/added_in_220"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="notnullvalue" rev="2.2.0">
+
+        <dt>
+          <codeph>notnullvalue(<varname>expression</varname>)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">function</indexterm>
+          <b>Purpose:</b> Tests if an expression (of any type) is <codeph>NULL</codeph> or not.
+          Returns <codeph>false</codeph> if so.
+          The converse of <codeph>nullvalue()</codeph>.
+          <p conref="../shared/impala_common.xml#common/return_type_boolean"/>
+          <p conref="../shared/impala_common.xml#common/for_compatibility_only"/>
+          <p conref="../shared/impala_common.xml#common/added_in_220"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="nullif">
+
+        <dt>
+          <codeph>nullif(<varname>expr1</varname>,<varname>expr2</varname>)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">nullif() function</indexterm>
+          <b>Purpose:</b> Returns <codeph>NULL</codeph> if the two specified arguments are equal. If the specified
+          arguments are not equal, returns the value of <varname>expr1</varname>. The data types of the expressions
+          must be compatible, according to the conversion rules from <xref href="impala_datatypes.xml#datatypes"/>.
+          You cannot use an expression that evaluates to <codeph>NULL</codeph> for <varname>expr1</varname>; that
+          way, you can distinguish a return value of <codeph>NULL</codeph> from an argument value of
+          <codeph>NULL</codeph>, which would never match <varname>expr2</varname>.
+          <p>
+            <b>Usage notes:</b> This function is effectively shorthand for a <codeph>CASE</codeph> expression of
+            the form:
+          </p>
+<codeblock>CASE
+  WHEN <varname>expr1</varname> = <varname>expr2</varname> THEN NULL
+  ELSE <varname>expr1</varname>
+END</codeblock>
+          <p>
+            It is commonly used in division expressions, to produce a <codeph>NULL</codeph> result instead of a
+            divide-by-zero error when the divisor is equal to zero:
+          </p>
+<codeblock>select 1.0 / nullif(c1,0) as reciprocal from t1;</codeblock>
+          <p>
+            You might also use it for compatibility with other database systems that support the same
+            <codeph>NULLIF()</codeph> function.
+          </p>
+          <p conref="../shared/impala_common.xml#common/return_same_type"/>
+          <p conref="../shared/impala_common.xml#common/added_in_130"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="nullifzero">
+
+        <dt>
+          <codeph>nullifzero(<varname>numeric_expr</varname>)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">nullifzero() function</indexterm>
+          <b>Purpose:</b> Returns <codeph>NULL</codeph> if the numeric expression evaluates to 0, otherwise returns
+          the result of the expression.
+          <p>
+            <b>Usage notes:</b> Used to avoid error conditions such as divide-by-zero in numeric calculations.
+            Serves as shorthand for a more elaborate <codeph>CASE</codeph> expression, to simplify porting SQL with
+            vendor extensions to Impala.
+          </p>
+          <p conref="../shared/impala_common.xml#common/return_same_type"/>
+          <p conref="../shared/impala_common.xml#common/added_in_130"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="nullvalue" rev="2.2.0">
+
+        <dt>
+          <codeph>nullvalue(<varname>expression</varname>)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">function</indexterm>
+          <b>Purpose:</b> Tests if an expression (of any type) is <codeph>NULL</codeph> or not.
+          Returns <codeph>true</codeph> if so.
+          The converse of <codeph>notnullvalue()</codeph>.
+          <p conref="../shared/impala_common.xml#common/return_type_boolean"/>
+          <p conref="../shared/impala_common.xml#common/for_compatibility_only"/>
+          <p conref="../shared/impala_common.xml#common/added_in_220"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="nvl" rev="1.1">
+
+        <dt>
+          <codeph>nvl(type a, type ifNotNull)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">nvl() function</indexterm>
+          <b>Purpose:</b> Alias for the <codeph>isnull()</codeph> function. Tests if an expression is
+          <codeph>NULL</codeph>, and returns the expression result value if not. If the first argument is
+          <codeph>NULL</codeph>, returns the second argument. Equivalent to the <codeph>nvl()</codeph> function
+          from Oracle Database or <codeph>ifnull()</codeph> from MySQL.
+          <p>
+            <b>Return type:</b> Same as the first argument value
+          </p>
+          <p conref="../shared/impala_common.xml#common/added_in_11"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="zeroifnull">
+
+        <dt>
+          <codeph>zeroifnull(<varname>numeric_expr</varname>)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">zeroifnull() function</indexterm>
+          <b>Purpose:</b> Returns 0 if the numeric expression evaluates to <codeph>NULL</codeph>, otherwise returns
+          the result of the expression.
+          <p>
+            <b>Usage notes:</b> Used to avoid unexpected results due to unexpected propagation of
+            <codeph>NULL</codeph> values in numeric calculations. Serves as shorthand for a more elaborate
+            <codeph>CASE</codeph> expression, to simplify porting SQL with vendor extensions to Impala.
+          </p>
+          <p conref="../shared/impala_common.xml#common/return_same_type"/>
+          <p conref="../shared/impala_common.xml#common/added_in_130"/>
+        </dd>
+
+      </dlentry>
+    </dl>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_conversion_functions.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_conversion_functions.xml b/docs/topics/impala_conversion_functions.xml
new file mode 100644
index 0000000..1050d0c
--- /dev/null
+++ b/docs/topics/impala_conversion_functions.xml
@@ -0,0 +1,758 @@
+<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="conversion_functions">
+
+  <title>Impala Type Conversion Functions</title>
+  <titlealts><navtitle>Type Conversion Functions</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Conversion functions are usually used in combination with other functions, to explicitly pass the expected
+      data types. Impala has strict rules regarding data types for function parameters. For example, Impala does
+      not automatically convert a <codeph>DOUBLE</codeph> value to <codeph>FLOAT</codeph>, a
+      <codeph>BIGINT</codeph> value to <codeph>INT</codeph>, or other conversion where precision could be lost or
+      overflow could occur. Also, for reporting or dealing with loosely defined schemas in big data contexts,
+      you might frequently need to convert values to or from the <codeph>STRING</codeph> type.
+    </p>
+      
+    <note>
+      Although in CDH 5.5.0, the <codeph>SHOW FUNCTIONS</codeph> output for
+      database <codeph>_IMPALA_BUILTINS</codeph> contains some function signatures
+      matching the pattern <codeph>castto*</codeph>, these functions are not intended
+      for public use and are expected to be hidden in future.
+    </note>
+
+    <p>
+      <b>Function reference:</b>
+    </p>
+
+    <p>
+      Impala supports the following type conversion functions:
+    </p>
+
+<dl>
+
+<dlentry id="cast">
+<dt>
+<codeph>cast(<varname>expr</varname> AS <varname>type</varname>)</codeph>
+</dt>
+
+<dd>
+<indexterm audience="Cloudera">cast() function</indexterm>
+<b>Purpose:</b> Converts the value of an expression to any other type.
+If the expression value is of a type that cannot be converted to the target type, the result is <codeph>NULL</codeph>.
+<p><b>Usage notes:</b>
+Use <codeph>CAST</codeph> when passing a column value or literal to a function that
+expects a parameter with a different type.
+Frequently used in SQL operations such as <codeph>CREATE TABLE AS SELECT</codeph>
+and <codeph>INSERT ... VALUES</codeph> to ensure that values from various sources
+are of the appropriate type for the destination columns.
+Where practical, do a one-time <codeph>CAST()</codeph> operation during the ingestion process
+to make each column into the appropriate type, rather than using many <codeph>CAST()</codeph>
+operations in each query; doing type conversions for each row during each query can be expensive
+for tables with millions or billions of rows.
+</p>
+    <p conref="../shared/impala_common.xml#common/timezone_conversion_caveat"/>
+
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<codeblock>select concat('Here are the first ',10,' results.'); -- Fails
+select concat('Here are the first ',cast(10 as string),' results.'); -- Succeeds
+</codeblock>
+<p>
+The following example starts with a text table where every column has a type of <codeph>STRING</codeph>,
+which might be how you ingest data of unknown schema until you can verify the cleanliness of the underly values.
+Then it uses <codeph>CAST()</codeph> to create a new Parquet table with the same data, but using specific
+numeric data types for the columns with numeric data. Using numeric types of appropriate sizes can result in
+substantial space savings on disk and in memory, and performance improvements in queries,
+over using strings or larger-than-necessary numeric types.
+</p>
+<codeblock>create table t1 (name string, x string, y string, z string);
+
+create table t2 stored as parquet
+as select
+  name,
+  cast(x as bigint) x,
+  cast(y as timestamp) y,
+  cast(z as smallint) z
+from t1;
+
+describe t2;
++------+----------+---------+
+| name | type     | comment |
++------+----------+---------+
+| name | string   |         |
+| x    | bigint   |         |
+| y    | smallint |         |
+| z    | tinyint  |         |
++------+----------+---------+
+</codeblock>
+<p conref="../shared/impala_common.xml#common/related_info"/>
+<p>
+<!-- TK: Can you cast to or from MAP, ARRAY, STRUCT? -->
+  For details of casts from each kind of data type, see the description of
+  the appropriate type:
+  <xref href="impala_tinyint.xml#tinyint"/>,
+  <xref href="impala_smallint.xml#smallint"/>,
+  <xref href="impala_int.xml#int"/>,
+  <xref href="impala_bigint.xml#bigint"/>,
+  <xref href="impala_float.xml#float"/>,
+  <xref href="impala_double.xml#double"/>,
+  <xref href="impala_decimal.xml#decimal"/>,
+  <xref href="impala_string.xml#string"/>,
+  <xref href="impala_char.xml#char"/>,
+  <xref href="impala_varchar.xml#varchar"/>,
+  <xref href="impala_timestamp.xml#timestamp"/>,
+  <xref href="impala_boolean.xml#boolean"/>
+</p>
+</dd>
+</dlentry>
+
+<dlentry rev="2.3.0" id="casttobigint" audience="Cloudera">
+<dt>
+<codeph>casttobigint(type value)</codeph>
+</dt>
+<dd>
+<indexterm audience="Cloudera">casttobigint() function</indexterm>
+<b>Purpose:</b> Converts the value of an expression to <codeph>BIGINT</codeph>. If the expression value is of a type that cannot be converted to the target type, the result is <codeph>NULL</codeph>.
+<p><b>Return type:</b> <codeph>bigint</codeph></p>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_usage"/>
+<p conref="../shared/impala_common.xml#common/added_in_230"/>
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_example"/>
+<codeblock>create table small_types (x tinyint, y smallint, z int);
+
+create table big_types as
+  select casttobigint(x) as x, casttobigint(y) as y, casttobigint(z) as z
+    from small_types;
+
+describe big_types;
++------+--------+---------+
+| name | type   | comment |
++------+--------+---------+
+| x    | bigint |         |
+| y    | bigint |         |
+| z    | bigint |         |
++------+--------+---------+
+</codeblock>
+</dd>
+</dlentry>
+
+<dlentry rev="2.3.0" id="casttoboolean" audience="Cloudera">
+<dt>
+<codeph>casttoboolean(type value)</codeph>
+</dt>
+<dd>
+<indexterm audience="Cloudera">casttoboolean() function</indexterm>
+<b>Purpose:</b> Converts the value of an expression to <codeph>BOOLEAN</codeph>.
+Numeric values of 0 evaluate to <codeph>false</codeph>, and non-zero values evaluate to <codeph>true</codeph>.
+If the expression value is of a type that cannot be converted to the target type, the result is <codeph>NULL</codeph>.
+In particular, <codeph>STRING</codeph> values (even <codeph>'1'</codeph>, <codeph>'0'</codeph>, <codeph>'true'</codeph>
+or <codeph>'false'</codeph>) always return <codeph>NULL</codeph> when converted to <codeph>BOOLEAN</codeph>.
+<p><b>Return type:</b> <codeph>boolean</codeph></p>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_usage"/>
+<p conref="../shared/impala_common.xml#common/added_in_230"/>
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_example"/>
+<codeblock>select casttoboolean(0);
++------------------+
+| casttoboolean(0) |
++------------------+
+| false            |
++------------------+
+
+select casttoboolean(1);
++------------------+
+| casttoboolean(1) |
++------------------+
+| true             |
++------------------+
+
+select casttoboolean(99);
++-------------------+
+| casttoboolean(99) |
++-------------------+
+| true              |
++-------------------+
+
+select casttoboolean(0.0);
++--------------------+
+| casttoboolean(0.0) |
++--------------------+
+| false              |
++--------------------+
+
+select casttoboolean(0.5);
++--------------------+
+| casttoboolean(0.5) |
++--------------------+
+| true               |
++--------------------+
+
+select casttoboolean('');
++-------------------+
+| casttoboolean('') |
++-------------------+
+| NULL              |
++-------------------+
+
+select casttoboolean('yes');
++----------------------+
+| casttoboolean('yes') |
++----------------------+
+| NULL                 |
++----------------------+
+
+select casttoboolean('0');
++--------------------+
+| casttoboolean('0') |
++--------------------+
+| NULL               |
++--------------------+
+
+select casttoboolean('true');
++-----------------------+
+| casttoboolean('true') |
++-----------------------+
+| NULL                  |
++-----------------------+
+
+select casttoboolean('false');
++------------------------+
+| casttoboolean('false') |
++------------------------+
+| NULL                   |
++------------------------+
+</codeblock>
+</dd>
+</dlentry>
+
+<dlentry rev="2.3.0" id="casttochar" audience="Cloudera">
+<dt>
+<codeph>casttochar(type value)</codeph>
+</dt>
+<dd>
+<indexterm audience="Cloudera">casttochar() function</indexterm>
+<b>Purpose:</b> Converts the value of an expression to <codeph>CHAR</codeph>. If the expression value is of a type that cannot be converted to the target type, the result is <codeph>NULL</codeph>.
+<p><b>Return type:</b> <codeph>char</codeph></p>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_usage"/>
+<p conref="../shared/impala_common.xml#common/added_in_230"/>
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_example"/>
+<codeblock>create table char_types as select casttochar('hello world') as c1, casttochar('xyz') as c2, casttochar('x') as c3;
++-------------------+
+| summary           |
++-------------------+
+| Inserted 1 row(s) |
++-------------------+
+
+describe char_types;
++------+--------+---------+
+| name | type   | comment |
++------+--------+---------+
+| c1   | string |         |
+| c2   | string |         |
+| c3   | string |         |
++------+--------+---------+
+</codeblock>
+</dd>
+</dlentry>
+
+<dlentry rev="2.3.0" id="casttodecimal" audience="Cloudera">
+<dt>
+<codeph>casttodecimal(type value)</codeph>
+</dt>
+<dd>
+<indexterm audience="Cloudera">casttodecimal() function</indexterm>
+<b>Purpose:</b> Converts the value of an expression to <codeph>DECIMAL</codeph>. If the expression value is of a type that cannot be converted to the target type, the result is <codeph>NULL</codeph>.
+<p><b>Return type:</b> <codeph>decimal</codeph></p>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_usage"/>
+<p conref="../shared/impala_common.xml#common/added_in_230"/>
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_example"/>
+<codeblock>select casttodecimal(5.4);
++--------------------+
+| casttodecimal(5.4) |
++--------------------+
+| 5.4                |
++--------------------+
+</codeblock>
+</dd>
+</dlentry>
+
+<dlentry rev="2.3.0" id="casttodouble" audience="Cloudera">
+<dt>
+<codeph>casttodouble(type value)</codeph>
+</dt>
+<dd>
+<indexterm audience="Cloudera">casttodouble() function</indexterm>
+<b>Purpose:</b> Converts the value of an expression to <codeph>DOUBLE</codeph>. If the expression value is of a type that cannot be converted to the target type, the result is <codeph>NULL</codeph>.
+<p><b>Return type:</b> <codeph>double</codeph></p>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_usage"/>
+<p conref="../shared/impala_common.xml#common/added_in_230"/>
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_example"/>
+<codeblock>select casttodouble(5);
++-----------------+
+| casttodouble(5) |
++-----------------+
+| 5               |
++-----------------+
+
+select casttodouble('3.141');
++-----------------------+
+| casttodouble('3.141') |
++-----------------------+
+| 3.141                 |
++-----------------------+
+
+select casttodouble(1e6);
++--------------------+
+| casttodouble(1e+6) |
++--------------------+
+| 1000000            |
++--------------------+
+
+select casttodouble(true);
++--------------------+
+| casttodouble(true) |
++--------------------+
+| 1                  |
++--------------------+
+
+select casttodouble(now());
++---------------------+
+| casttodouble(now()) |
++---------------------+
+| 1447622306.031178   |
++---------------------+
+</codeblock>
+</dd>
+</dlentry>
+
+<dlentry rev="2.3.0" id="casttofloat" audience="Cloudera">
+<dt>
+<codeph>casttofloat(type value)</codeph>
+</dt>
+<dd>
+<indexterm audience="Cloudera">casttofloat() function</indexterm>
+<b>Purpose:</b> Converts the value of an expression to <codeph>FLOAT</codeph>. If the expression value is of a type that cannot be converted to the target type, the result is <codeph>NULL</codeph>.
+<p><b>Return type:</b> <codeph>float</codeph></p>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_usage"/>
+<p conref="../shared/impala_common.xml#common/added_in_230"/>
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_example"/>
+<codeblock>select casttofloat(5);
++----------------+
+| casttofloat(5) |
++----------------+
+| 5              |
++----------------+
+
+select casttofloat('3.141');
++----------------------+
+| casttofloat('3.141') |
++----------------------+
+| 3.141000032424927    |
++----------------------+
+
+select casttofloat(1e6);
++-------------------+
+| casttofloat(1e+6) |
++-------------------+
+| 1000000           |
++-------------------+
+
+select casttofloat(true);
++-------------------+
+| casttofloat(true) |
++-------------------+
+| 1                 |
++-------------------+
+
+select casttofloat(now());
++--------------------+
+| casttofloat(now()) |
++--------------------+
+| 1447622400         |
++--------------------+
+</codeblock>
+</dd>
+</dlentry>
+
+<dlentry rev="2.3.0" id="casttoint" audience="Cloudera">
+<dt>
+<codeph>casttoint(type value)</codeph>
+</dt>
+<dd>
+<indexterm audience="Cloudera">casttoint() function</indexterm>
+<b>Purpose:</b> Converts the value of an expression to <codeph>INT</codeph>. If the expression value is of a type that cannot be converted to the target type, the result is <codeph>NULL</codeph>.
+<p><b>Return type:</b> <codeph>int</codeph></p>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_usage"/>
+<p conref="../shared/impala_common.xml#common/added_in_230"/>
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_example"/>
+<codeblock>select casttoint(5.4);
++----------------+
+| casttoint(5.4) |
++----------------+
+| 5              |
++----------------+
+
+select casttoint(true);
++-----------------+
+| casttoint(true) |
++-----------------+
+| 1               |
++-----------------+
+
+select casttoint(now());
++------------------+
+| casttoint(now()) |
++------------------+
+| 1447622487       |
++------------------+
+
+select casttoint('3.141');
++--------------------+
+| casttoint('3.141') |
++--------------------+
+| NULL               |
++--------------------+
+
+select casttoint('3');
++----------------+
+| casttoint('3') |
++----------------+
+| 3              |
++----------------+
+</codeblock>
+</dd>
+</dlentry>
+
+<dlentry rev="2.3.0" id="casttosmallint" audience="Cloudera">
+<dt>
+<codeph>casttosmallint(type value)</codeph>
+</dt>
+<dd>
+<indexterm audience="Cloudera">casttosmallint() function</indexterm>
+<b>Purpose:</b> Converts the value of an expression to <codeph>SMALLINT</codeph>. If the expression value is of a type that cannot be converted to the target type, the result is <codeph>NULL</codeph>.
+<p><b>Return type:</b> <codeph>smallint</codeph></p>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_usage"/>
+<p conref="../shared/impala_common.xml#common/added_in_230"/>
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_example"/>
+<codeblock>create table big_types (x bigint, y int, z smallint);
+
+create table small_types as
+  select casttosmallint(x) as x, casttosmallint(y) as y, casttosmallint(z) as z
+    from big_types;
+
+describe small_types;
++------+----------+---------+
+| name | type     | comment |
++------+----------+---------+
+| x    | smallint |         |
+| y    | smallint |         |
+| z    | smallint |         |
++------+----------+---------+
+</codeblock>
+</dd>
+</dlentry>
+
+<dlentry rev="2.3.0" id="casttostring" audience="Cloudera">
+<dt>
+<codeph>casttostring(type value)</codeph>
+</dt>
+<dd>
+<indexterm audience="Cloudera">casttostring() function</indexterm>
+<b>Purpose:</b> Converts the value of an expression to <codeph>STRING</codeph>. If the expression value is of a type that cannot be converted to the target type, the result is <codeph>NULL</codeph>.
+<p><b>Return type:</b> <codeph>string</codeph></p>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_usage"/>
+<p conref="../shared/impala_common.xml#common/added_in_230"/>
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_example"/>
+<codeblock>create table numeric_types (x int, y bigint, z tinyint);
+
+create table string_types as
+  select casttostring(x) as x, casttostring(y) as y, casttostring(z) as z
+    from numeric_types;
+
+describe string_types;
++------+--------+---------+
+| name | type   | comment |
++------+--------+---------+
+| x    | string |         |
+| y    | string |         |
+| z    | string |         |
++------+--------+---------+
+</codeblock>
+</dd>
+</dlentry>
+
+<dlentry rev="2.3.0" id="casttotimestamp" audience="Cloudera">
+<dt>
+<codeph>casttotimestamp(type value)</codeph>
+</dt>
+<dd>
+<indexterm audience="Cloudera">casttotimestamp() function</indexterm>
+<b>Purpose:</b> Converts the value of an expression to <codeph>TIMESTAMP</codeph>. If the expression value is of a type that cannot be converted to the target type, the result is <codeph>NULL</codeph>.
+<p><b>Return type:</b> <codeph>timestamp</codeph></p>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_usage"/>
+<p conref="../shared/impala_common.xml#common/added_in_230"/>
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_example"/>
+<codeblock>select casttotimestamp(1000);
++-----------------------+
+| casttotimestamp(1000) |
++-----------------------+
+| 1970-01-01 00:16:40   |
++-----------------------+
+
+select casttotimestamp(1000.0);
++-------------------------+
+| casttotimestamp(1000.0) |
++-------------------------+
+| 1970-01-01 00:16:40     |
++-------------------------+
+
+select casttotimestamp('1000');
++-------------------------+
+| casttotimestamp('1000') |
++-------------------------+
+| NULL                    |
++-------------------------+
+</codeblock>
+</dd>
+</dlentry>
+
+<dlentry rev="2.3.0" id="casttotinyint" audience="Cloudera">
+<dt>
+<codeph>casttotinyint(type value)</codeph>
+</dt>
+<dd>
+<indexterm audience="Cloudera">casttotinyint() function</indexterm>
+<b>Purpose:</b> Converts the value of an expression to <codeph>TINYINT</codeph>. If the expression value is of a type that cannot be converted to the target type, the result is <codeph>NULL</codeph>.
+<p><b>Return type:</b> <codeph>tinyint</codeph></p>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_usage"/>
+<p conref="../shared/impala_common.xml#common/added_in_230"/>
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_example"/>
+<codeblock>create table big_types (x bigint, y int, z smallint);
+
+create table tiny_types as
+  select casttotinyint(x) as x, casttotinyint(y) as y, casttotinyint(z) as z
+    from big_types;
+
+describe tiny_types;
++------+---------+---------+
+| name | type    | comment |
++------+---------+---------+
+| x    | tinyint |         |
+| y    | tinyint |         |
+| z    | tinyint |         |
++------+---------+---------+
+</codeblock>
+</dd>
+</dlentry>
+
+<dlentry rev="2.3.0" id="casttovarchar" audience="Cloudera">
+<dt>
+<codeph>casttovarchar(type value)</codeph>
+</dt>
+<dd>
+<indexterm audience="Cloudera">casttovarchar() function</indexterm>
+<b>Purpose:</b> Converts the value of an expression to <codeph>VARCHAR</codeph>. If the expression value is of a type that cannot be converted to the target type, the result is <codeph>NULL</codeph>.
+<p><b>Return type:</b> <codeph>varchar</codeph></p>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_usage"/>
+<p conref="../shared/impala_common.xml#common/added_in_230"/>
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_example"/>
+<codeblock>select casttovarchar('abcd');
++-----------------------+
+| casttovarchar('abcd') |
++-----------------------+
+| abcd                  |
++-----------------------+
+
+select casttovarchar(999);
++--------------------+
+| casttovarchar(999) |
++--------------------+
+| 999                |
++--------------------+
+
+select casttovarchar(999.5);
++----------------------+
+| casttovarchar(999.5) |
++----------------------+
+| 999.5                |
++----------------------+
+
+select casttovarchar(now());
++-------------------------------+
+| casttovarchar(now())          |
++-------------------------------+
+| 2015-11-15 21:26:13.528073000 |
++-------------------------------+
+
+select casttovarchar(true);
++---------------------+
+| casttovarchar(true) |
++---------------------+
+| 1                   |
++---------------------+
+</codeblock>
+</dd>
+</dlentry>
+
+<dlentry rev="2.3.0" id="typeof">
+<dt>
+<codeph>typeof(type value)</codeph>
+</dt>
+<dd>
+<indexterm audience="Cloudera">typeof() function</indexterm>
+<b>Purpose:</b> Returns the name of the data type corresponding to an expression. For types with
+extra attributes, such as length for <codeph>CHAR</codeph> and <codeph>VARCHAR</codeph>,
+or precision and scale for <codeph>DECIMAL</codeph>, includes the full specification of the type.
+<!-- To do: How about for columns of complex types? Or fields within complex types? -->
+<p><b>Return type:</b> <codeph>string</codeph></p>
+<p><b>Usage notes:</b> Typically used in interactive exploration of a schema, or in application code that programmatically generates schema definitions such as <codeph>CREATE TABLE</codeph> statements.
+For example, previously, to understand the type of an expression such as
+<codeph>col1 / col2</codeph> or <codeph>concat(col1, col2, col3)</codeph>,
+you might have created a dummy table with a single row, using syntax such as <codeph>CREATE TABLE foo AS SELECT 5 / 3.0</codeph>,
+and then doing a <codeph>DESCRIBE</codeph> to see the type of the row.
+Or you might have done a <codeph>CREATE TABLE AS SELECT</codeph> operation to create a table and
+copy data into it, only learning the types of the columns by doing a <codeph>DESCRIBE</codeph> afterward.
+This technique is especially useful for arithmetic expressions involving <codeph>DECIMAL</codeph> types,
+because the precision and scale of the result is typically different than that of the operands.
+</p>
+<p conref="../shared/impala_common.xml#common/added_in_230"/>
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<p>
+These examples show how to check the type of a simple literal or function value.
+Notice how adding even tiny integers together changes the data type of the result to
+avoid overflow, and how the results of arithmetic operations on <codeph>DECIMAL</codeph> values
+have specific precision and scale attributes.
+</p>
+<codeblock>select typeof(2)
++-----------+
+| typeof(2) |
++-----------+
+| TINYINT   |
++-----------+
+
+select typeof(2+2)
++---------------+
+| typeof(2 + 2) |
++---------------+
+| SMALLINT      |
++---------------+
+
+select typeof('xyz')
++---------------+
+| typeof('xyz') |
++---------------+
+| STRING        |
++---------------+
+
+select typeof(now())
++---------------+
+| typeof(now()) |
++---------------+
+| TIMESTAMP     |
++---------------+
+
+select typeof(5.3 / 2.1)
++-------------------+
+| typeof(5.3 / 2.1) |
++-------------------+
+| DECIMAL(6,4)      |
++-------------------+
+
+select typeof(5.30001 / 2342.1);
++--------------------------+
+| typeof(5.30001 / 2342.1) |
++--------------------------+
+| DECIMAL(13,11)           |
++--------------------------+
+
+select typeof(typeof(2+2))
++-----------------------+
+| typeof(typeof(2 + 2)) |
++-----------------------+
+| STRING                |
++-----------------------+
+</codeblock>
+
+<p>
+This example shows how even if you do not have a record of the type of a column,
+for example because the type was changed by <codeph>ALTER TABLE</codeph> after the
+original <codeph>CREATE TABLE</codeph>, you can still find out the type in a
+more compact form than examining the full <codeph>DESCRIBE</codeph> output.
+Remember to use <codeph>LIMIT 1</codeph> in such cases, to avoid an identical
+result value for every row in the table.
+</p>
+<codeblock>create table typeof_example (a int, b tinyint, c smallint, d bigint);
+
+/* Empty result set if there is no data in the table. */
+select typeof(a) from typeof_example;
+
+/* OK, now we have some data but the type of column A is being changed. */
+insert into typeof_example values (1, 2, 3, 4);
+alter table typeof_example change a a bigint;
+
+/* We can always find out the current type of that column without doing a full DESCRIBE. */
+select typeof(a) from typeof_example limit 1;
++-----------+
+| typeof(a) |
++-----------+
+| BIGINT    |
++-----------+
+</codeblock>
+<p>
+This example shows how you might programmatically generate a <codeph>CREATE TABLE</codeph> statement
+with the appropriate column definitions to hold the result values of arbitrary expressions.
+The <codeph>typeof()</codeph> function lets you construct a detailed <codeph>CREATE TABLE</codeph> statement
+without actually creating the table, as opposed to <codeph>CREATE TABLE AS SELECT</codeph> operations
+where you create the destination table but only learn the column data types afterward through <codeph>DESCRIBE</codeph>.
+</p>
+<codeblock>describe typeof_example;
++------+----------+---------+
+| name | type     | comment |
++------+----------+---------+
+| a    | bigint   |         |
+| b    | tinyint  |         |
+| c    | smallint |         |
+| d    | bigint   |         |
++------+----------+---------+
+
+/* An ETL or business intelligence tool might create variations on a table with different file formats,
+   different sets of columns, and so on. TYPEOF() lets an application introspect the types of the original columns. */
+select concat('create table derived_table (a ', typeof(a), ', b ', typeof(b), ', c ',
+    typeof(c), ', d ', typeof(d), ') stored as parquet;')
+  as 'create table statement'
+from typeof_example limit 1;
++-------------------------------------------------------------------------------------------+
+| create table statement                                                                    |
++-------------------------------------------------------------------------------------------+
+| create table derived_table (a BIGINT, b TINYINT, c SMALLINT, d BIGINT) stored as parquet; |
++-------------------------------------------------------------------------------------------+
+</codeblock>
+</dd>
+</dlentry>
+
+</dl>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_count.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_count.xml b/docs/topics/impala_count.xml
new file mode 100644
index 0000000..2f3f519
--- /dev/null
+++ b/docs/topics/impala_count.xml
@@ -0,0 +1,230 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="count">
+
+  <title>COUNT Function</title>
+  <titlealts><navtitle>COUNT</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="Analytic Functions"/>
+      <data name="Category" value="Aggregate Functions"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">count() function</indexterm>
+      An aggregate function that returns the number of rows, or the number of non-<codeph>NULL</codeph> rows.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>COUNT([DISTINCT | ALL] <varname>expression</varname>) [OVER (<varname>analytic_clause</varname>)]</codeblock>
+
+    <p>
+      Depending on the argument, <codeph>COUNT()</codeph> considers rows that meet certain conditions:
+    </p>
+
+    <ul>
+      <li>
+        The notation <codeph>COUNT(*)</codeph> includes <codeph>NULL</codeph> values in the total.
+      </li>
+
+      <li>
+        The notation <codeph>COUNT(<varname>column_name</varname>)</codeph> only considers rows where the column
+        contains a non-<codeph>NULL</codeph> value.
+      </li>
+
+      <li>
+        You can also combine <codeph>COUNT</codeph> with the <codeph>DISTINCT</codeph> operator to eliminate
+        duplicates before counting, and to count the combinations of values across multiple columns.
+      </li>
+    </ul>
+
+    <p>
+      When the query contains a <codeph>GROUP BY</codeph> clause, returns one value for each combination of
+      grouping values.
+    </p>
+
+    <p>
+      <b>Return type:</b> <codeph>BIGINT</codeph>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_aggregation_explanation"/>
+    
+    <p conref="../shared/impala_common.xml#common/complex_types_aggregation_example"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>-- How many rows total are in the table, regardless of NULL values?
+select count(*) from t1;
+-- How many rows are in the table with non-NULL values for a column?
+select count(c1) from t1;
+-- Count the rows that meet certain conditions.
+-- Again, * includes NULLs, so COUNT(*) might be greater than COUNT(col).
+select count(*) from t1 where x &gt; 10;
+select count(c1) from t1 where x &gt; 10;
+-- Can also be used in combination with DISTINCT and/or GROUP BY.
+-- Combine COUNT and DISTINCT to find the number of unique values.
+-- Must use column names rather than * with COUNT(DISTINCT ...) syntax.
+-- Rows with NULL values are not counted.
+select count(distinct c1) from t1;
+-- Rows with a NULL value in _either_ column are not counted.
+select count(distinct c1, c2) from t1;
+-- Return more than one result.
+select month, year, count(distinct visitor_id) from web_stats group by month, year;
+</codeblock>
+
+    <p rev="2.0.0">
+      The following examples show how to use <codeph>COUNT()</codeph> in an analytic context. They use a table
+      containing integers from 1 to 10. Notice how the <codeph>COUNT()</codeph> is reported for each input value, as
+      opposed to the <codeph>GROUP BY</codeph> clause which condenses the result set.
+<codeblock>select x, property, count(x) over (partition by property) as count from int_t where property in ('odd','even');
++----+----------+-------+
+| x  | property | count |
++----+----------+-------+
+| 2  | even     | 5     |
+| 4  | even     | 5     |
+| 6  | even     | 5     |
+| 8  | even     | 5     |
+| 10 | even     | 5     |
+| 1  | odd      | 5     |
+| 3  | odd      | 5     |
+| 5  | odd      | 5     |
+| 7  | odd      | 5     |
+| 9  | odd      | 5     |
++----+----------+-------+
+</codeblock>
+
+Adding an <codeph>ORDER BY</codeph> clause lets you experiment with results that are cumulative or apply to a moving
+set of rows (the <q>window</q>). The following examples use <codeph>COUNT()</codeph> in an analytic context
+(that is, with an <codeph>OVER()</codeph> clause) to produce a running count of all the even values,
+then a running count of all the odd values. The basic <codeph>ORDER BY x</codeph> clause implicitly
+activates a window clause of <codeph>RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW</codeph>,
+which is effectively the same as <codeph>ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW</codeph>,
+therefore all of these examples produce the same results:
+<codeblock>select x, property,
+  count(x) over (partition by property <b>order by x</b>) as 'cumulative count'
+  from int_t where property in ('odd','even');
++----+----------+------------------+
+| x  | property | cumulative count |
++----+----------+------------------+
+| 2  | even     | 1                |
+| 4  | even     | 2                |
+| 6  | even     | 3                |
+| 8  | even     | 4                |
+| 10 | even     | 5                |
+| 1  | odd      | 1                |
+| 3  | odd      | 2                |
+| 5  | odd      | 3                |
+| 7  | odd      | 4                |
+| 9  | odd      | 5                |
++----+----------+------------------+
+
+select x, property,
+  count(x) over
+  (
+    partition by property
+    <b>order by x</b>
+    <b>range between unbounded preceding and current row</b>
+  ) as 'cumulative total'
+from int_t where property in ('odd','even');
++----+----------+------------------+
+| x  | property | cumulative count |
++----+----------+------------------+
+| 2  | even     | 1                |
+| 4  | even     | 2                |
+| 6  | even     | 3                |
+| 8  | even     | 4                |
+| 10 | even     | 5                |
+| 1  | odd      | 1                |
+| 3  | odd      | 2                |
+| 5  | odd      | 3                |
+| 7  | odd      | 4                |
+| 9  | odd      | 5                |
++----+----------+------------------+
+
+select x, property,
+  count(x) over
+  (
+    partition by property
+    <b>order by x</b>
+    <b>rows between unbounded preceding and current row</b>
+  ) as 'cumulative total'
+  from int_t where property in ('odd','even');
++----+----------+------------------+
+| x  | property | cumulative count |
++----+----------+------------------+
+| 2  | even     | 1                |
+| 4  | even     | 2                |
+| 6  | even     | 3                |
+| 8  | even     | 4                |
+| 10 | even     | 5                |
+| 1  | odd      | 1                |
+| 3  | odd      | 2                |
+| 5  | odd      | 3                |
+| 7  | odd      | 4                |
+| 9  | odd      | 5                |
++----+----------+------------------+
+</codeblock>
+
+The following examples show how to construct a moving window, with a running count taking into account 1 row before
+and 1 row after the current row, within the same partition (all the even values or all the odd values).
+Therefore, the count is consistently 3 for rows in the middle of the window, and 2 for
+rows near the ends of the window, where there is no preceding or no following row in the partition.
+Because of a restriction in the Impala <codeph>RANGE</codeph> syntax, this type of
+moving window is possible with the <codeph>ROWS BETWEEN</codeph> clause but not the <codeph>RANGE BETWEEN</codeph>
+clause:
+<codeblock>select x, property,
+  count(x) over
+  (
+    partition by property
+    <b>order by x</b>
+    <b>rows between 1 preceding and 1 following</b>
+  ) as 'moving total'
+  from int_t where property in ('odd','even');
++----+----------+--------------+
+| x  | property | moving total |
++----+----------+--------------+
+| 2  | even     | 2            |
+| 4  | even     | 3            |
+| 6  | even     | 3            |
+| 8  | even     | 3            |
+| 10 | even     | 2            |
+| 1  | odd      | 2            |
+| 3  | odd      | 3            |
+| 5  | odd      | 3            |
+| 7  | odd      | 3            |
+| 9  | odd      | 2            |
++----+----------+--------------+
+
+-- Doesn't work because of syntax restriction on RANGE clause.
+select x, property,
+  count(x) over
+  (
+    partition by property
+    <b>order by x</b>
+    <b>range between 1 preceding and 1 following</b>
+  ) as 'moving total'
+from int_t where property in ('odd','even');
+ERROR: AnalysisException: RANGE is only supported with both the lower and upper bounds UNBOUNDED or one UNBOUNDED and the other CURRENT ROW.
+</codeblock>
+    </p>
+
+    <note conref="../shared/impala_common.xml#common/multiple_count_distinct"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_analytic_functions.xml#analytic_functions"/>
+    </p>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_create_database.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_create_database.xml b/docs/topics/impala_create_database.xml
new file mode 100644
index 0000000..f4153e0
--- /dev/null
+++ b/docs/topics/impala_create_database.xml
@@ -0,0 +1,115 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="create_database">
+
+  <title>CREATE DATABASE Statement</title>
+  <titlealts><navtitle>CREATE DATABASE</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Databases"/>
+      <data name="Category" value="Schemas"/>
+      <data name="Category" value="DDL"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">CREATE DATABASE statement</indexterm>
+      Creates a new database.
+    </p>
+
+    <p>
+      In Impala, a database is both:
+    </p>
+
+    <ul>
+      <li>
+        A logical construct for grouping together related tables, views, and functions within their own namespace.
+        You might use a separate database for each application, set of related tables, or round of experimentation.
+      </li>
+
+      <li>
+        A physical construct represented by a directory tree in HDFS. Tables (internal tables), partitions, and
+        data files are all located under this directory. You can perform HDFS-level operations such as backing it up and measuring space usage,
+        or remove it with a <codeph>DROP DATABASE</codeph> statement.
+      </li>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>CREATE (DATABASE|SCHEMA) [IF NOT EXISTS] <varname>database_name</varname>[COMMENT '<varname>database_comment</varname>']
+  [LOCATION <varname>hdfs_path</varname>];</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/ddl_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      A database is physically represented as a directory in HDFS, with a filename extension <codeph>.db</codeph>,
+      under the main Impala data directory. If the associated HDFS directory does not exist, it is created for you.
+      All databases and their associated directories are top-level objects, with no physical or logical nesting.
+    </p>
+
+    <p>
+      After creating a database, to make it the current database within an <cmdname>impala-shell</cmdname> session,
+      use the <codeph>USE</codeph> statement. You can refer to tables in the current database without prepending
+      any qualifier to their names.
+    </p>
+
+    <p>
+      When you first connect to Impala through <cmdname>impala-shell</cmdname>, the database you start in (before
+      issuing any <codeph>CREATE DATABASE</codeph> or <codeph>USE</codeph> statements) is named
+      <codeph>default</codeph>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/builtins_db"/>
+
+    <p>
+      After creating a database, your <cmdname>impala-shell</cmdname> session or another
+      <cmdname>impala-shell</cmdname> connected to the same node can immediately access that database. To access
+      the database through the Impala daemon on a different node, issue the <codeph>INVALIDATE METADATA</codeph>
+      statement first while connected to that other node.
+    </p>
+
+    <p>
+      Setting the <codeph>LOCATION</codeph> attribute for a new database is a way to work with sets of files in an
+      HDFS directory structure outside the default Impala data directory, as opposed to setting the
+      <codeph>LOCATION</codeph> attribute for each individual table.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/sync_ddl_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/hive_blurb"/>
+
+    <p>
+      When you create a database in Impala, the database can also be used by Hive.
+      When you create a database in Hive, issue an <codeph>INVALIDATE METADATA</codeph>
+      statement in Impala to make Impala permanently aware of the new database. 
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+    <p rev="CDH-19187">
+      The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+      typically the <codeph>impala</codeph> user, must have write
+      permission for the parent HDFS directory under which the database
+      is located.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <codeblock conref="../shared/impala_common.xml#common/create_drop_db_example"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_databases.xml#databases"/>, <xref href="impala_drop_database.xml#drop_database"/>,
+      <xref href="impala_use.xml#use"/>, <xref href="impala_show.xml#show_databases"/>,
+      <xref href="impala_tables.xml#tables"/>
+    </p>
+  </conbody>
+</concept>

[09/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

Posted by jr...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_live_summary.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_live_summary.xml b/docs/topics/impala_live_summary.xml
new file mode 100644
index 0000000..bfe71bf
--- /dev/null
+++ b/docs/topics/impala_live_summary.xml
@@ -0,0 +1,207 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="2.3.0" id="live_summary">
+
+  <title>LIVE_SUMMARY Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Performance"/>
+      <data name="Category" value="Reports"/>
+      <data name="Category" value="impala-shell"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">LIVE_SUMMARY query option</indexterm>
+      For queries submitted through the <cmdname>impala-shell</cmdname> command,
+      displays the same output as the <codeph>SUMMARY</codeph> command,
+      with the measurements updated in real time as the query progresses.
+      When the query finishes, the final <codeph>SUMMARY</codeph> output remains
+      visible in the <cmdname>impala-shell</cmdname> console output.
+    </p>
+
+    <p>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/type_boolean"/>
+    <p conref="../shared/impala_common.xml#common/default_false_0"/>
+
+    <p conref="../shared/impala_common.xml#common/command_line_blurb"/>
+    <p>
+      You can enable this query option within <cmdname>impala-shell</cmdname>
+      by starting the shell with the <codeph>--live_summary</codeph>
+      command-line option.
+      You can still turn this setting off and on again within the shell through the
+      <codeph>SET</codeph> command.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+    <p>
+      The live summary output can be useful for evaluating long-running queries,
+      to evaluate which phase of execution takes up the most time, or if some hosts
+      take much longer than others for certain operations, dragging overall performance down.
+      By making the information available in real time, this feature lets you decide what
+      action to take even before you cancel a query that is taking much longer than normal.
+    </p>
+    <p>
+      For example, you might see the HDFS scan phase taking a long time, and therefore revisit
+      performance-related aspects of your schema design such as constructing a partitioned table,
+      switching to the Parquet file format, running the <codeph>COMPUTE STATS</codeph> statement
+      for the table, and so on.
+      Or you might see a wide variation between the average and maximum times for all hosts to
+      perform some phase of the query, and therefore investigate if one particular host
+      needed more memory or was experiencing a network problem.
+    </p>
+    <p conref="../shared/impala_common.xml#common/live_reporting_details"/>
+    <p>
+      For a simple and concise way of tracking the progress of an interactive query, see
+      <xref href="impala_live_progress.xml#live_progress"/>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+    <p conref="../shared/impala_common.xml#common/impala_shell_progress_reports_compute_stats_caveat"/>
+    <p conref="../shared/impala_common.xml#common/impala_shell_progress_reports_shell_only_caveat"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      The following example shows a series of <codeph>LIVE_SUMMARY</codeph> reports that
+      are displayed during the course of a query, showing how the numbers increase to
+      show the progress of different phases of the distributed query. When you do the same
+      in <cmdname>impala-shell</cmdname>, only a single report is displayed at any one time,
+      with each update overwriting the previous numbers.
+    </p>
+
+<codeblock><![CDATA[[localhost:21000] > set live_summary=true;
+LIVE_SUMMARY set to true
+[localhost:21000] > select count(*) from customer t1 cross join customer t2;
++---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
+| Operator            | #Hosts | Avg Time | Max Time | #Rows   | Est. #Rows | Peak Mem | Est. Peak Mem | Detail                |
++---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
+| 06:AGGREGATE        | 0      | 0ns      | 0ns      | 0       | 1          | 0 B      | -1 B          | FINALIZE              |
+| 05:EXCHANGE         | 0      | 0ns      | 0ns      | 0       | 1          | 0 B      | -1 B          | UNPARTITIONED         |
+| 03:AGGREGATE        | 0      | 0ns      | 0ns      | 0       | 1          | 0 B      | 10.00 MB      |                       |
+| 02:NESTED LOOP JOIN | 0      | 0ns      | 0ns      | 0       | 22.50B     | 0 B      | 0 B           | CROSS JOIN, BROADCAST |
+| |--04:EXCHANGE      | 0      | 0ns      | 0ns      | 0       | 150.00K    | 0 B      | 0 B           | BROADCAST             |
+| |  01:SCAN HDFS     | 1      | 503.57ms | 503.57ms | 150.00K | 150.00K    | 24.09 MB | 64.00 MB      | tpch.customer t2      |
+| 00:SCAN HDFS        | 0      | 0ns      | 0ns      | 0       | 150.00K    | 0 B      | 64.00 MB      | tpch.customer t1      |
++---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
+
++---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
+| Operator            | #Hosts | Avg Time | Max Time | #Rows   | Est. #Rows | Peak Mem | Est. Peak Mem | Detail                |
++---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
+| 06:AGGREGATE        | 0      | 0ns      | 0ns      | 0       | 1          | 0 B      | -1 B          | FINALIZE              |
+| 05:EXCHANGE         | 0      | 0ns      | 0ns      | 0       | 1          | 0 B      | -1 B          | UNPARTITIONED         |
+| 03:AGGREGATE        | 1      | 0ns      | 0ns      | 0       | 1          | 20.00 KB | 10.00 MB      |                       |
+| 02:NESTED LOOP JOIN | 1      | 17.62s   | 17.62s   | 81.14M  | 22.50B     | 3.23 MB  | 0 B           | CROSS JOIN, BROADCAST |
+| |--04:EXCHANGE      | 1      | 26.29ms  | 26.29ms  | 150.00K | 150.00K    | 0 B      | 0 B           | BROADCAST             |
+| |  01:SCAN HDFS     | 1      | 503.57ms | 503.57ms | 150.00K | 150.00K    | 24.09 MB | 64.00 MB      | tpch.customer t2      |
+| 00:SCAN HDFS        | 1      | 247.53ms | 247.53ms | 1.02K   | 150.00K    | 24.39 MB | 64.00 MB      | tpch.customer t1      |
++---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
+
++---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
+| Operator            | #Hosts | Avg Time | Max Time | #Rows   | Est. #Rows | Peak Mem | Est. Peak Mem | Detail                |
++---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
+| 06:AGGREGATE        | 0      | 0ns      | 0ns      | 0       | 1          | 0 B      | -1 B          | FINALIZE              |
+| 05:EXCHANGE         | 0      | 0ns      | 0ns      | 0       | 1          | 0 B      | -1 B          | UNPARTITIONED         |
+| 03:AGGREGATE        | 1      | 0ns      | 0ns      | 0       | 1          | 20.00 KB | 10.00 MB      |                       |
+| 02:NESTED LOOP JOIN | 1      | 61.85s   | 61.85s   | 283.43M | 22.50B     | 3.23 MB  | 0 B           | CROSS JOIN, BROADCAST |
+| |--04:EXCHANGE      | 1      | 26.29ms  | 26.29ms  | 150.00K | 150.00K    | 0 B      | 0 B           | BROADCAST             |
+| |  01:SCAN HDFS     | 1      | 503.57ms | 503.57ms | 150.00K | 150.00K    | 24.09 MB | 64.00 MB      | tpch.customer t2      |
+| 00:SCAN HDFS        | 1      | 247.59ms | 247.59ms | 2.05K   | 150.00K    | 24.39 MB | 64.00 MB      | tpch.customer t1      |
++---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
+]]>
+</codeblock>
+
+<!-- Keeping this sample output that illustrates a couple of glitches in the LIVE_SUMMARY display, hidden, to help filing JIRAs. -->
+<codeblock audience="Cloudera"><![CDATA[[
++---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
+| Operator            | #Hosts | Avg Time | Max Time | #Rows   | Est. #Rows | Peak Mem | Est. Peak Mem | Detail                |
++---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
+| 06:AGGREGATE        | 0      | 0ns      | 0ns      | 0       | 1          | 0 B      | -1 B          | FINALIZE              |
+| 05:EXCHANGE         | 0      | 0ns      | 0ns      | 0       | 1          | 0 B      | -1 B          | UNPARTITIONED         |
+| 03:AGGREGATE        | 1      | 0ns      | 0ns      | 0       | 1          | 20.00 KB | 10.00 MB      |                       |
+| 02:NESTED LOOP JOIN | 1      | 91.34s   | 91.34s   | 419.48M | 22.50B     | 3.23 MB  | 0 B           | CROSS JOIN, BROADCAST |
+| |--04:EXCHANGE      | 1      | 26.29ms  | 26.29ms  | 150.00K | 150.00K    | 0 B      | 0 B           | BROADCAST             |
+| |  01:SCAN HDFS     | 1      | 503.57ms | 503.57ms | 150.00K | 150.00K    | 24.09 MB | 64.00 MB      | tpch.customer t2      |
+| 00:SCAN HDFS        | 1      | 247.63ms | 247.63ms | 3.07K   | 150.00K    | 24.39 MB | 64.00 MB      | tpch.customer t1      |
++---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
+
++---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
+| Operator            | #Hosts | Avg Time | Max Time | #Rows   | Est. #Rows | Peak Mem | Est. Peak Mem | Detail                |
++---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
+| 06:AGGREGATE        | 0      | 0ns      | 0ns      | 0       | 1          | 0 B      | -1 B          | FINALIZE              |
+| 05:EXCHANGE         | 0      | 0ns      | 0ns      | 0       | 1          | 0 B      | -1 B          | UNPARTITIONED         |
+| 03:AGGREGATE        | 1      | 0ns      | 0ns      | 0       | 1          | 20.00 KB | 10.00 MB      |                       |
+| 02:NESTED LOOP JOIN | 1      | 140.49s  | 140.49s  | 646.82M | 22.50B     | 3.23 MB  | 0 B           | CROSS JOIN, BROADCAST |
+| |--04:EXCHANGE      | 1      | 26.29ms  | 26.29ms  | 150.00K | 150.00K    | 0 B      | 0 B           | BROADCAST             |
+| |  01:SCAN HDFS     | 1      | 503.57ms | 503.57ms | 150.00K | 150.00K    | 24.09 MB | 64.00 MB      | tpch.customer t2      |
+| 00:SCAN HDFS        | 1      | 247.73ms | 247.73ms | 5.12K   | 150.00K    | 24.39 MB | 64.00 MB      | tpch.customer t1      |
++---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
+
++---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
+| Operator            | #Hosts | Avg Time | Max Time | #Rows   | Est. #Rows | Peak Mem | Est. Peak Mem | Detail                |
++---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
+| 06:AGGREGATE        | 0      | 0ns      | 0ns      | 0       | 1          | 0 B      | -1 B          | FINALIZE              |
+| 05:EXCHANGE         | 0      | 0ns      | 0ns      | 0       | 1          | 0 B      | -1 B          | UNPARTITIONED         |
+| 03:AGGREGATE        | 1      | 0ns      | 0ns      | 0       | 1          | 20.00 KB | 10.00 MB      |                       |
+| 02:NESTED LOOP JOIN | 1      | 228.96s  | 228.96s  | 1.06B   | 22.50B     | 3.23 MB  | 0 B           | CROSS JOIN, BROADCAST |
+| |--04:EXCHANGE      | 1      | 26.29ms  | 26.29ms  | 150.00K | 150.00K    | 0 B      | 0 B           | BROADCAST             |
+| |  01:SCAN HDFS     | 1      | 503.57ms | 503.57ms | 150.00K | 150.00K    | 24.09 MB | 64.00 MB      | tpch.customer t2      |
+| 00:SCAN HDFS        | 1      | 247.83ms | 247.83ms | 7.17K   | 150.00K    | 24.39 MB | 64.00 MB      | tpch.customer t1      |
++---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
+
++---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
+| Operator            | #Hosts | Avg Time | Max Time | #Rows   | Est. #Rows | Peak Mem | Est. Peak Mem | Detail                |
++---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
+| 06:AGGREGATE        | 0      | 0ns      | 0ns      | 0       | 1          | 0 B      | -1 B          | FINALIZE              |
+| 05:EXCHANGE         | 0      | 0ns      | 0ns      | 0       | 1          | 0 B      | -1 B          | UNPARTITIONED         |
+| 03:AGGREGATE        | 1      | 0ns      | 0ns      | 0       | 1          | 20.00 KB | 10.00 MB      |                       |
+| 02:NESTED LOOP JOIN | 1      | 563.11s  | 563.11s  | 2.59B   | 22.50B     | 3.23 MB  | 0 B           | CROSS JOIN, BROADCAST |
+| |--04:EXCHANGE      | 1      | 26.29ms  | 26.29ms  | 150.00K | 150.00K    | 0 B      | 0 B           | BROADCAST             |
+| |  01:SCAN HDFS     | 1      | 503.57ms | 503.57ms | 150.00K | 150.00K    | 24.09 MB | 64.00 MB      | tpch.customer t2      |
+| 00:SCAN HDFS        | 1      | 248.11ms | 248.11ms | 17.41K  | 150.00K    | 24.39 MB | 64.00 MB      | tpch.customer t1      |
++---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
+
++---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
+| Operator            | #Hosts | Avg Time | Max Time | #Rows   | Est. #Rows | Peak Mem | Est. Peak Mem | Detail                |
++---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
+| 06:AGGREGATE        | 0      | 0ns      | 0ns      | 0       | 1          | 0 B      | -1 B          | FINALIZE              |
+| 05:EXCHANGE         | 0      | 0ns      | 0ns      | 0       | 1          | 0 B      | -1 B          | UNPARTITIONED         |
+| 03:AGGREGATE        | 1      | 0ns      | 0ns      | 0       | 1          | 20.00 KB | 10.00 MB      |                       |
+| 02:NESTED LOOP JOIN | 1      | 985.71s  | 985.71s  | 4.54B   | 22.50B     | 3.23 MB  | 0 B           | CROSS JOIN, BROADCAST |
+| |--04:EXCHANGE      | 1      | 26.29ms  | 26.29ms  | 150.00K | 150.00K    | 0 B      | 0 B           | BROADCAST             |
+| |  01:SCAN HDFS     | 1      | 503.57ms | 503.57ms | 150.00K | 150.00K    | 24.09 MB | 64.00 MB      | tpch.customer t2      |
+| 00:SCAN HDFS        | 1      | 248.49ms | 248.49ms | 30.72K  | 150.00K    | 24.39 MB | 64.00 MB      | tpch.customer t1      |
++---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
+
++---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
+| Operator            | #Hosts | Avg Time | Max Time | #Rows   | Est. #Rows | Peak Mem | Est. Peak Mem | Detail                |
++---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
+| 06:AGGREGATE        | 0      | 0ns      | 0ns      | 0       | 1          | 0 B      | -1 B          | FINALIZE              |
+| 05:EXCHANGE         | 0      | 0ns      | 0ns      | 0       | 1          | 0 B      | -1 B          | UNPARTITIONED         |
+| 03:AGGREGATE        | 1      | 0ns      | 0ns      | 0       | 1          | 20.00 KB | 10.00 MB      |                       |
+| 02:NESTED LOOP JOIN | 1      | None     | None     | 5.42B   | 22.50B     | 3.23 MB  | 0 B           | CROSS JOIN, BROADCAST |
+| |--04:EXCHANGE      | 1      | 26.29ms  | 26.29ms  | 150.00K | 150.00K    | 0 B      | 0 B           | BROADCAST             |
+| |  01:SCAN HDFS     | 1      | 503.57ms | 503.57ms | 150.00K | 150.00K    | 24.09 MB | 64.00 MB      | tpch.customer t2      |
+| 00:SCAN HDFS        | 1      | 248.66ms | 248.66ms | 36.86K  | 150.00K    | 24.39 MB | 64.00 MB      | tpch.customer t1      |
++---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
+
+[localhost:21000] > select count(*) from customer t1 cross join customer t2;
+Query: select count(*) from customer t1 cross join customer t2
+[####################################################################################################] 100%
++---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
+| Operator            | #Hosts | Avg Time | Max Time | #Rows   | Est. #Rows | Peak Mem | Est. Peak Mem | Detail                |
+[localhost:21000] > 
+]]>
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/live_progress_live_summary_asciinema"/>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_load_data.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_load_data.xml b/docs/topics/impala_load_data.xml
new file mode 100644
index 0000000..e3517f0
--- /dev/null
+++ b/docs/topics/impala_load_data.xml
@@ -0,0 +1,237 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.1" id="load_data">
+
+  <title>LOAD DATA Statement</title>
+  <titlealts><navtitle>LOAD DATA</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="ETL"/>
+      <data name="Category" value="Ingest"/>
+      <data name="Category" value="DML"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="HDFS"/>
+      <data name="Category" value="Tables"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">LOAD DATA statement</indexterm>
+      The <codeph>LOAD DATA</codeph> statement streamlines the ETL process for an internal Impala table by moving a
+      data file or all the data files in a directory from an HDFS location into the Impala data directory for that
+      table.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>LOAD DATA INPATH '<varname>hdfs_file_or_directory_path</varname>' [OVERWRITE] INTO TABLE <varname>tablename</varname>
+  [PARTITION (<varname>partcol1</varname>=<varname>val1</varname>, <varname>partcol2</varname>=<varname>val2</varname> ...)]</codeblock>
+
+    <p>
+      When the <codeph>LOAD DATA</codeph> statement operates on a partitioned table,
+      it always operates on one partition at a time. Specify the <codeph>PARTITION</codeph> clauses
+      and list all the partition key columns, with a constant value specified for each.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/dml_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <ul>
+      <li>
+        The loaded data files are moved, not copied, into the Impala data directory.
+      </li>
+
+      <li>
+        You can specify the HDFS path of a single file to be moved, or the HDFS path of a directory to move all the
+        files inside that directory. You cannot specify any sort of wildcard to take only some of the files from a
+        directory. When loading a directory full of data files, keep all the data files at the top level, with no
+        nested directories underneath.
+      </li>
+
+      <li>
+        Currently, the Impala <codeph>LOAD DATA</codeph> statement only imports files from HDFS, not from the local
+        filesystem. It does not support the <codeph>LOCAL</codeph> keyword of the Hive <codeph>LOAD DATA</codeph>
+        statement. You must specify a path, not an <codeph>hdfs://</codeph> URI.
+      </li>
+
+      <li>
+        In the interest of speed, only limited error checking is done. If the loaded files have the wrong file
+        format, different columns than the destination table, or other kind of mismatch, Impala does not raise any
+        error for the <codeph>LOAD DATA</codeph> statement. Querying the table afterward could produce a runtime
+        error or unexpected results. Currently, the only checking the <codeph>LOAD DATA</codeph> statement does is
+        to avoid mixing together uncompressed and LZO-compressed text files in the same table.
+      </li>
+
+      <li>
+        When you specify an HDFS directory name as the <codeph>LOAD DATA</codeph> argument, any hidden files in
+        that directory (files whose names start with a <codeph>.</codeph>) are not moved to the Impala data
+        directory.
+      </li>
+
+      <li>
+        The loaded data files retain their original names in the new location, unless a name conflicts with an
+        existing data file, in which case the name of the new file is modified slightly to be unique. (The
+        name-mangling is a slight difference from the Hive <codeph>LOAD DATA</codeph> statement, which replaces
+        identically named files.)
+      </li>
+
+      <li>
+        By providing an easy way to transport files from known locations in HDFS into the Impala data directory
+        structure, the <codeph>LOAD DATA</codeph> statement lets you avoid memorizing the locations and layout of
+        HDFS directory tree containing the Impala databases and tables. (For a quick way to check the location of
+        the data files for an Impala table, issue the statement <codeph>DESCRIBE FORMATTED
+        <varname>table_name</varname></codeph>.)
+      </li>
+
+      <li>
+        The <codeph>PARTITION</codeph> clause is especially convenient for ingesting new data for a partitioned
+        table. As you receive new data for a time period, geographic region, or other division that corresponds to
+        one or more partitioning columns, you can load that data straight into the appropriate Impala data
+        directory, which might be nested several levels down if the table is partitioned by multiple columns. When
+        the table is partitioned, you must specify constant values for all the partitioning columns.
+      </li>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p rev="2.3.0">
+      Because Impala currently cannot create Parquet data files containing complex types
+      (<codeph>ARRAY</codeph>, <codeph>STRUCT</codeph>, and <codeph>MAP</codeph>), the
+      <codeph>LOAD DATA</codeph> statement is especially important when working with
+      tables containing complex type columns. You create the Parquet data files outside
+      Impala, then use either <codeph>LOAD DATA</codeph>, an external table, or HDFS-level
+      file operations followed by <codeph>REFRESH</codeph> to associate the data files with
+      the corresponding table.
+      See <xref href="impala_complex_types.xml#complex_types"/> for details about using complex types.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/sync_ddl_blurb"/>
+
+    <note conref="../shared/impala_common.xml#common/compute_stats_next"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      First, we use a trivial Python script to write different numbers of strings (one per line) into files stored
+      in the <codeph>cloudera</codeph> HDFS user account. (Substitute the path for your own HDFS user account when
+      doing <cmdname>hdfs dfs</cmdname> operations like these.)
+    </p>
+
+<codeblock>$ random_strings.py 1000 | hdfs dfs -put - /user/cloudera/thousand_strings.txt
+$ random_strings.py 100 | hdfs dfs -put - /user/cloudera/hundred_strings.txt
+$ random_strings.py 10 | hdfs dfs -put - /user/cloudera/ten_strings.txt</codeblock>
+
+    <p>
+      Next, we create a table and load an initial set of data into it. Remember, unless you specify a
+      <codeph>STORED AS</codeph> clause, Impala tables default to <codeph>TEXTFILE</codeph> format with Ctrl-A (hex
+      01) as the field delimiter. This example uses a single-column table, so the delimiter is not significant. For
+      large-scale ETL jobs, you would typically use binary format data files such as Parquet or Avro, and load them
+      into Impala tables that use the corresponding file format.
+    </p>
+
+<codeblock>[localhost:21000] &gt; create table t1 (s string);
+[localhost:21000] &gt; load data inpath '/user/cloudera/thousand_strings.txt' into table t1;
+Query finished, fetching results ...
++----------------------------------------------------------+
+| summary                                                  |
++----------------------------------------------------------+
+| Loaded 1 file(s). Total files in destination location: 1 |
++----------------------------------------------------------+
+Returned 1 row(s) in 0.61s
+[kilo2-202-961.cs1cloud.internal:21000] &gt; select count(*) from t1;
+Query finished, fetching results ...
++------+
+| _c0  |
++------+
+| 1000 |
++------+
+Returned 1 row(s) in 0.67s
+[localhost:21000] &gt; load data inpath '/user/cloudera/thousand_strings.txt' into table t1;
+ERROR: AnalysisException: INPATH location '/user/cloudera/thousand_strings.txt' does not exist. </codeblock>
+
+    <p>
+      As indicated by the message at the end of the previous example, the data file was moved from its original
+      location. The following example illustrates how the data file was moved into the Impala data directory for
+      the destination table, keeping its original filename:
+    </p>
+
+<codeblock>$ hdfs dfs -ls /user/hive/warehouse/load_data_testing.db/t1
+Found 1 items
+-rw-r--r--   1 cloudera cloudera      13926 2013-06-26 15:40 /user/hive/warehouse/load_data_testing.db/t1/thousand_strings.txt</codeblock>
+
+    <p>
+      The following example demonstrates the difference between the <codeph>INTO TABLE</codeph> and
+      <codeph>OVERWRITE TABLE</codeph> clauses. The table already contains 1000 rows. After issuing the
+      <codeph>LOAD DATA</codeph> statement with the <codeph>INTO TABLE</codeph> clause, the table contains 100 more
+      rows, for a total of 1100. After issuing the <codeph>LOAD DATA</codeph> statement with the <codeph>OVERWRITE
+      INTO TABLE</codeph> clause, the former contents are gone, and now the table only contains the 10 rows from
+      the just-loaded data file.
+    </p>
+
+<codeblock>[localhost:21000] &gt; load data inpath '/user/cloudera/hundred_strings.txt' into table t1;
+Query finished, fetching results ...
++----------------------------------------------------------+
+| summary                                                  |
++----------------------------------------------------------+
+| Loaded 1 file(s). Total files in destination location: 2 |
++----------------------------------------------------------+
+Returned 1 row(s) in 0.24s
+[localhost:21000] &gt; select count(*) from t1;
+Query finished, fetching results ...
++------+
+| _c0  |
++------+
+| 1100 |
++------+
+Returned 1 row(s) in 0.55s
+[localhost:21000] &gt; load data inpath '/user/cloudera/ten_strings.txt' overwrite into table t1;
+Query finished, fetching results ...
++----------------------------------------------------------+
+| summary                                                  |
++----------------------------------------------------------+
+| Loaded 1 file(s). Total files in destination location: 1 |
++----------------------------------------------------------+
+Returned 1 row(s) in 0.26s
+[localhost:21000] &gt; select count(*) from t1;
+Query finished, fetching results ...
++-----+
+| _c0 |
++-----+
+| 10  |
++-----+
+Returned 1 row(s) in 0.62s</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/s3_blurb"/>
+    <p conref="../shared/impala_common.xml#common/s3_dml"/>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+    <p rev="CDH-19187">
+      The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+      typically the <codeph>impala</codeph> user, must have read and write
+      permissions for the files in the source directory, and write
+      permission for the destination directory.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+    <p>
+      The <codeph>LOAD DATA</codeph> statement is an alternative to the
+      <codeph>INSERT</codeph> statement. Use <codeph>LOAD DATA</codeph>
+      when you have the data files in HDFS but outside of any Impala table.
+    </p>
+    <p>
+      The <codeph>LOAD DATA</codeph> statement is also an alternative
+      to the <codeph>CREATE EXTERNAL TABLE</codeph> statement. Use
+      <codeph>LOAD DATA</codeph> when it is appropriate to move the
+      data files under Impala control rather than querying them
+      from their original location.
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_map.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_map.xml b/docs/topics/impala_map.xml
new file mode 100644
index 0000000..41e4754
--- /dev/null
+++ b/docs/topics/impala_map.xml
@@ -0,0 +1,264 @@
+<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+  <concept id="map">
+
+    <title>MAP Complex Type (CDH 5.5 or higher only)</title>
+
+    <prolog>
+      <metadata>
+        <data name="Category" value="Impala"/>
+        <data name="Category" value="Impala Data Types"/>
+      </metadata>
+    </prolog>
+
+    <conbody>
+
+      <p>
+        A complex data type representing an arbitrary set of key-value pairs.
+        The key part is a scalar type, while the value part can be a scalar or
+        another complex type (<codeph>ARRAY</codeph>, <codeph>STRUCT</codeph>,
+        or <codeph>MAP</codeph>).
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock><varname>column_name</varname> MAP &lt; <varname>primitive_type</varname>, <varname>type</varname> &gt;
+
+type ::= <varname>primitive_type</varname> | <varname>complex_type</varname>
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/complex_types_combo"/>
+
+      <p>
+        The <codeph>MAP</codeph> complex data type represents a set of key-value pairs.
+        Each element of the map is indexed by a primitive type such as <codeph>BIGINT</codeph> or
+        <codeph>STRING</codeph>, letting you define sequences that are not continuous or categories with arbitrary names.
+        You might find it convenient for modelling data produced in other languages, such as a
+        Python dictionary or Java HashMap, where a single scalar value serves as the lookup key.
+      </p>
+
+      <p>
+        In a big data context, the keys in a map column might represent a numeric sequence of events during a
+        manufacturing process, or <codeph>TIMESTAMP</codeph> values corresponding to sensor observations.
+        The map itself is inherently unordered, so you choose whether to make the key values significant
+        (such as a recorded <codeph>TIMESTAMP</codeph>) or synthetic (such as a random global universal ID).
+      </p>
+
+      <note>
+        Behind the scenes, the <codeph>MAP</codeph> type is implemented in a similar way as the
+        <codeph>ARRAY</codeph> type. Impala does not enforce any uniqueness constraint on the
+        <codeph>KEY</codeph> values, and the <codeph>KEY</codeph> values are processed by
+        looping through the elements of the <codeph>MAP</codeph> rather than by a constant-time lookup.
+        Therefore, this type is primarily for ease of understanding when importing data and
+        algorithms from non-SQL contexts, rather than optimizing the performance of key lookups.
+      </note>
+
+      <p conref="../shared/impala_common.xml#common/complex_types_describe"/>
+
+      <p conref="../shared/impala_common.xml#common/added_in_230"/>
+
+      <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+      <ul conref="../shared/impala_common.xml#common/complex_types_restrictions">
+        <li/>
+      </ul>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <note conref="../shared/impala_common.xml#common/complex_type_schema_pointer"/>
+
+      <p>
+        The following example shows a table with various kinds of <codeph>MAP</codeph> columns,
+        both at the top level and nested within other complex types.
+        Each row represents information about a specific country, with complex type fields
+        of various levels of nesting to represent different information associated
+        with the country: factual measurements such as area and population,
+        notable people in different categories, geographic features such as
+        cities, points of interest within each city, and mountains with associated facts.
+        Practice the <codeph>CREATE TABLE</codeph> and query notation for complex type columns
+        using empty tables, until you can visualize a complex data structure and construct corresponding SQL statements reliably.
+      </p>
+
+<codeblock><![CDATA[create TABLE map_demo
+(
+  country_id BIGINT,
+
+-- Numeric facts about each country, looked up by name.
+-- For example, 'Area':1000, 'Population':999999.
+-- Using a MAP instead of a STRUCT because there could be
+-- a different set of facts for each country.
+  metrics MAP <STRING, BIGINT>,
+
+-- MAP whose value part is an ARRAY.
+-- For example, the key 'Famous Politicians' could represent an array of 10 elements,
+-- while the key 'Famous Actors' could represent an array of 20 elements.
+  notables MAP <STRING, ARRAY <STRING>>,
+
+-- MAP that is a field within a STRUCT.
+-- (The STRUCT is inside another ARRAY, because it is rare
+-- for a STRUCT to be a top-level column.)
+-- For example, city #1 might have points of interest with key 'Zoo',
+-- representing an array of 3 different zoos.
+-- City #2 might have completely different kinds of points of interest.
+-- Because the set of field names is potentially large, and most entries could be blank,
+-- a MAP makes more sense than a STRUCT to represent such a sparse data structure.
+  cities ARRAY < STRUCT <
+    name: STRING,
+    points_of_interest: MAP <STRING, ARRAY <STRING>>
+  >>,
+
+-- MAP that is an element within an ARRAY. The MAP is inside a STRUCT field to associate
+-- the mountain name with all the facts about the mountain.
+-- The "key" of the map (the first STRING field) represents the name of some fact whose value
+-- can be expressed as an integer, such as 'Height', 'Year First Climbed', and so on.
+  mountains ARRAY < STRUCT < name: STRING, facts: MAP <STRING, INT > > >
+)
+STORED AS PARQUET;
+]]>
+</codeblock>
+
+<codeblock><![CDATA[DESCRIBE map_demo;
++------------+------------------------------------------------+
+| name       | type                                           |
++------------+------------------------------------------------+
+| country_id | bigint                                         |
+| metrics    | map<string,bigint>                             |
+| notables   | map<string,array<string>>                      |
+| cities     | array<struct<                                  |
+|            |   name:string,                                 |
+|            |   points_of_interest:map<string,array<string>> |
+|            | >>                                             |
+| mountains  | array<struct<                                  |
+|            |   name:string,                                 |
+|            |   facts:map<string,int>                        |
+|            | >>                                             |
++------------+------------------------------------------------+
+
+DESCRIBE map_demo.metrics;
++-------+--------+
+| name  | type   |
++-------+--------+
+| key   | string |
+| value | bigint |
++-------+--------+
+
+DESCRIBE map_demo.notables;
++-------+---------------+
+| name  | type          |
++-------+---------------+
+| key   | string        |
+| value | array<string> |
++-------+---------------+
+
+DESCRIBE map_demo.notables.value;
++------+--------+
+| name | type   |
++------+--------+
+| item | string |
+| pos  | bigint |
++------+--------+
+
+DESCRIBE map_demo.cities;
++------+------------------------------------------------+
+| name | type                                           |
++------+------------------------------------------------+
+| item | struct<                                        |
+|      |   name:string,                                 |
+|      |   points_of_interest:map<string,array<string>> |
+|      | >                                              |
+| pos  | bigint                                         |
++------+------------------------------------------------+
+
+DESCRIBE map_demo.cities.item.points_of_interest;
++-------+---------------+
+| name  | type          |
++-------+---------------+
+| key   | string        |
+| value | array<string> |
++-------+---------------+
+
+DESCRIBE map_demo.cities.item.points_of_interest.value;
++------+--------+
+| name | type   |
++------+--------+
+| item | string |
+| pos  | bigint |
++------+--------+
+
+DESCRIBE map_demo.mountains;
++------+-------------------------+
+| name | type                    |
++------+-------------------------+
+| item | struct<                 |
+|      |   name:string,          |
+|      |   facts:map<string,int> |
+|      | >                       |
+| pos  | bigint                  |
++------+-------------------------+
+
+DESCRIBE map_demo.mountains.item.facts;
++-------+--------+
+| name  | type   |
++-------+--------+
+| key   | string |
+| value | int    |
++-------+--------+
+]]>
+</codeblock>
+
+      <p>
+        The following example shows a table that uses a variety of data types for the <codeph>MAP</codeph>
+        <q>key</q> field. Typically, you use <codeph>BIGINT</codeph> or <codeph>STRING</codeph> to use
+        numeric or character-based keys without worrying about exceeding any size or length constraints.
+      </p>
+
+<codeblock><![CDATA[CREATE TABLE map_demo_obscure
+(
+  id BIGINT,
+  m1 MAP <INT, INT>,
+  m2 MAP <SMALLINT, INT>,
+  m3 MAP <TINYINT, INT>,
+  m4 MAP <TIMESTAMP, INT>,
+  m5 MAP <BOOLEAN, INT>,
+  m6 MAP <CHAR(5), INT>,
+  m7 MAP <VARCHAR(25), INT>,
+  m8 MAP <FLOAT, INT>,
+  m9 MAP <DOUBLE, INT>,
+  m10 MAP <DECIMAL(12,2), INT>
+)
+STORED AS PARQUET;
+]]>
+</codeblock>
+
+<codeblock>CREATE TABLE celebrities (name STRING, birth_year MAP &lt; STRING, SMALLINT &gt;) STORED AS PARQUET;
+-- A typical row might represent values with 2 different birth years, such as:
+-- ("Joe Movie Star", { "real": 1972, "claimed": 1977 })
+
+CREATE TABLE countries (name STRING, famous_leaders MAP &lt; INT, STRING &gt;) STORED AS PARQUET;
+-- A typical row might represent values with different leaders, with key values corresponding to their numeric sequence, such as:
+-- ("United States", { 1: "George Washington", 3: "Thomas Jefferson", 16: "Abraham Lincoln" })
+
+CREATE TABLE airlines (name STRING, special_meals MAP &lt; STRING, MAP &lt; STRING, STRING &gt; &gt;) STORED AS PARQUET;
+-- A typical row might represent values with multiple kinds of meals, each with several components:
+-- ("Elegant Airlines",
+--   {
+--     "vegetarian": { "breakfast": "pancakes", "snack": "cookies", "dinner": "rice pilaf" },
+--     "gluten free": { "breakfast": "oatmeal", "snack": "fruit", "dinner": "chicken" }
+--   } )
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_complex_types.xml#complex_types"/>,
+        <xref href="impala_array.xml#array"/>,
+        <xref href="impala_struct.xml#struct"/>
+        <!-- <xref href="impala_map.xml#map"/> -->
+      </p>
+
+    </conbody>
+
+  </concept>
+
+

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_math_functions.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_math_functions.xml b/docs/topics/impala_math_functions.xml
new file mode 100644
index 0000000..fd16b37
--- /dev/null
+++ b/docs/topics/impala_math_functions.xml
@@ -0,0 +1,1336 @@
+<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="math_functions">
+
+  <title>Impala Mathematical Functions</title>
+  <titlealts><navtitle>Mathematical Functions</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Mathematical functions, or arithmetic functions, perform numeric calculations that are typically more complex
+      than basic addition, subtraction, multiplication, and division. For example, these functions include
+      trigonometric, logarithmic, and base conversion operations.
+    </p>
+
+    <note>
+      In Impala, exponentiation uses the <codeph>pow()</codeph> function rather than an exponentiation operator
+      such as <codeph>**</codeph>.
+    </note>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      The mathematical functions operate mainly on these data types: <xref href="impala_int.xml#int"/>,
+      <xref href="impala_bigint.xml#bigint"/>, <xref href="impala_smallint.xml#smallint"/>,
+      <xref href="impala_tinyint.xml#tinyint"/>, <xref href="impala_double.xml#double"/>,
+      <xref href="impala_float.xml#float"/>, and <xref href="impala_decimal.xml#decimal"/>. For the operators that
+      perform the standard operations such as addition, subtraction, multiplication, and division, see
+      <xref href="impala_operators.xml#arithmetic_operators"/>.
+    </p>
+
+    <p>
+      Functions that perform bitwise operations are explained in <xref href="impala_bit_functions.xml#bit_functions"/>.
+    </p>
+
+    <p>
+      <b>Function reference:</b>
+    </p>
+
+    <p>
+      Impala supports the following mathematical functions:
+    </p>
+
+    <dl>
+      <dlentry rev="1.4.0" id="abs">
+
+        <dt rev="2.0.1">
+          <codeph>abs(numeric_type a)</codeph>
+<!-- <codeph>abs(double a), abs(decimal(p,s) a)</codeph> -->
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">abs() function</indexterm>
+          <b>Purpose:</b> Returns the absolute value of the argument.
+          <p rev="2.0.1" conref="../shared/impala_common.xml#common/return_type_same"/>
+          <p>
+            <b>Usage notes:</b> Use this function to ensure all return values are positive. This is different than
+            the <codeph>positive()</codeph> function, which returns its argument unchanged (even if the argument
+            was negative).
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="acos">
+
+        <dt>
+          <codeph>acos(double a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">acos() function</indexterm>
+          <b>Purpose:</b> Returns the arccosine of the argument.
+          <p>
+            <b>Return type:</b> <codeph>double</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="asin">
+
+        <dt>
+          <codeph>asin(double a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">asin() function</indexterm>
+          <b>Purpose:</b> Returns the arcsine of the argument.
+          <p>
+            <b>Return type:</b> <codeph>double</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="atan">
+
+        <dt>
+          <codeph>atan(double a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">atan() function</indexterm>
+          <b>Purpose:</b> Returns the arctangent of the argument.
+          <p>
+            <b>Return type:</b> <codeph>double</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="bin">
+
+        <dt>
+          <codeph>bin(bigint a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">bin() function</indexterm>
+          <b>Purpose:</b> Returns the binary representation of an integer value, that is, a string of 0 and 1
+          digits.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.4.0" id="ceil">
+
+        <dt>
+          <codeph>ceil(double a)</codeph>,
+          <codeph>ceil(decimal(p,s) a)</codeph>,
+          <codeph id="ceiling">ceiling(double a)</codeph>,
+          <codeph>ceiling(decimal(p,s) a)</codeph>,
+          <codeph id="dceil" rev="2.3.0">dceil(double a)</codeph>,
+          <codeph rev="2.3.0">dceil(decimal(p,s) a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">ceil() function</indexterm>
+          <b>Purpose:</b> Returns the smallest integer that is greater than or equal to the argument.
+          <p>
+            <b>Return type:</b> <codeph>int</codeph> or <codeph>decimal(p,s)</codeph> depending on the type of the
+            input argument
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="conv">
+
+        <dt>
+          <codeph>conv(bigint num, int from_base, int to_base), conv(string num, int from_base, int
+          to_base)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">conv() function</indexterm>
+          <b>Purpose:</b> Returns a string representation of an integer value in a particular base. The input value
+          can be a string, for example to convert a hexadecimal number such as <codeph>fce2</codeph> to decimal. To
+          use the return value as a number (for example, when converting to base 10), use <codeph>CAST()</codeph>
+          to convert to the appropriate type.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="cos">
+
+        <dt>
+          <codeph>cos(double a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">cos() function</indexterm>
+          <b>Purpose:</b> Returns the cosine of the argument.
+          <p>
+            <b>Return type:</b> <codeph>double</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="cot" rev="2.3.0">
+
+        <dt>
+          <codeph>cot(double a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">cot() function</indexterm>
+          <b>Purpose:</b> Returns the cotangent of the argument.
+          <p>
+            <b>Return type:</b> <codeph>double</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/added_in_230"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="degrees">
+
+        <dt>
+          <codeph>degrees(double a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">degrees() function</indexterm>
+          <b>Purpose:</b> Converts argument value from radians to degrees.
+          <p>
+            <b>Return type:</b> <codeph>double</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="e">
+
+        <dt>
+          <codeph>e()</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">e() function</indexterm>
+          <b>Purpose:</b> Returns the
+          <xref href="http://en.wikipedia.org/wiki/E_(mathematical_constant)" scope="external" format="html">mathematical
+          constant e</xref>.
+          <p>
+            <b>Return type:</b> <codeph>double</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="exp">
+
+        <dt>
+          <codeph>exp(double a)</codeph>,
+          <codeph rev="2.3.0" id="dexp">dexp(double a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">exp() function</indexterm>
+          <b>Purpose:</b> Returns the
+          <xref href="http://en.wikipedia.org/wiki/E_(mathematical_constant)" scope="external" format="html">mathematical
+          constant e</xref> raised to the power of the argument.
+          <p>
+            <b>Return type:</b> <codeph>double</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="2.3.0" id="factorial">
+
+        <dt>
+          <codeph>factorial(integer_type a)</codeph>
+        </dt>
+        <dd>
+          <indexterm audience="Cloudera">factorial() function</indexterm>
+          <b>Purpose:</b> Computes the <xref href="https://en.wikipedia.org/wiki/Factorial" scope="external" format="html">factorial</xref> of an integer value.
+          It works with any integer type.
+          <p conref="../shared/impala_common.xml#common/added_in_230"/>
+          <p>
+            <b>Usage notes:</b> You can use either the <codeph>factorial()</codeph> function or the <codeph>!</codeph> operator.
+            The factorial of 0 is 1. Likewise, the <codeph>factorial()</codeph> function returns 1 for any negative value.
+            The maximum positive value for the input argument is 20; a value of 21 or greater overflows the
+            range for a <codeph>BIGINT</codeph> and causes an error.
+          </p>
+          <p>
+            <b>Return type:</b> <codeph>bigint</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/added_in_230"/>
+<codeblock>select factorial(5);
++--------------+
+| factorial(5) |
++--------------+
+| 120          |
++--------------+
+
+select 5!;
++-----+
+| 5!  |
++-----+
+| 120 |
++-----+
+
+select factorial(0);
++--------------+
+| factorial(0) |
++--------------+
+| 1            |
++--------------+
+
+select factorial(-100);
++-----------------+
+| factorial(-100) |
++-----------------+
+| 1               |
++-----------------+
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="floor">
+
+        <dt>
+          <codeph>floor(double a)</codeph>,
+          <codeph>floor(decimal(p,s) a)</codeph>,
+          <codeph rev="2.3.0" id="dfloor">dfloor(double a)</codeph>,
+          <codeph rev="2.3.0">dfloor(decimal(p,s) a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">floor() function</indexterm>
+          <b>Purpose:</b> Returns the largest integer that is less than or equal to the argument.
+          <p>
+            <b>Return type:</b> <codeph>bigint</codeph> or <codeph>decimal(p,s)</codeph> depending on the type of
+            the input argument
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="fmod">
+
+        <dt>
+          <codeph>fmod(double a, double b), fmod(float a, float b)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">fmod() function</indexterm>
+          <b>Purpose:</b> Returns the modulus of a floating-point number. Equivalent to the <codeph>%</codeph> arithmetic operator.
+          <p>
+            <b>Return type:</b> <codeph>float</codeph> or <codeph>double</codeph>, depending on type of arguments
+          </p>
+          <p conref="../shared/impala_common.xml#common/added_in_111"/>
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            Because this function operates on <codeph>DOUBLE</codeph> or <codeph>FLOAT</codeph>
+            values, it is subject to potential rounding errors for values that cannot be
+            represented precisely. Prefer to use whole numbers, or values that you know
+            can be represented precisely by the <codeph>DOUBLE</codeph> or <codeph>FLOAT</codeph>
+            types.
+          </p>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p>
+            The following examples show equivalent operations with the <codeph>fmod()</codeph>
+            function and the <codeph>%</codeph> arithmetic operator, for values not subject
+            to any rounding error.
+          </p>
+<codeblock>select fmod(10,3);
++-------------+
+| fmod(10, 3) |
++-------------+
+| 1           |
++-------------+
+
+select fmod(5.5,2);
++--------------+
+| fmod(5.5, 2) |
++--------------+
+| 1.5          |
++--------------+
+
+select 10 % 3;
++--------+
+| 10 % 3 |
++--------+
+| 1      |
++--------+
+
+select 5.5 % 2;
++---------+
+| 5.5 % 2 |
++---------+
+| 1.5     |
++---------+
+</codeblock>
+          <p>
+            The following examples show operations with the <codeph>fmod()</codeph>
+            function for values that cannot be represented precisely by the
+            <codeph>DOUBLE</codeph> or <codeph>FLOAT</codeph> types, and thus are
+            subject to rounding error. <codeph>fmod(9.9,3.0)</codeph> returns a value
+            slightly different than the expected 0.9 because of rounding.
+            <codeph>fmod(9.9,3.3)</codeph> returns a value quite different from
+            the expected value of 0 because of rounding error during intermediate
+            calculations.
+          </p>
+<codeblock>select fmod(9.9,3.0);
++--------------------+
+| fmod(9.9, 3.0)     |
++--------------------+
+| 0.8999996185302734 |
++--------------------+
+
+select fmod(9.9,3.3);
++-------------------+
+| fmod(9.9, 3.3)    |
++-------------------+
+| 3.299999713897705 |
++-------------------+
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.2.2" id="fnv_hash">
+
+        <dt>
+          <codeph>fnv_hash(type v)</codeph>,
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">fnv_hash() function</indexterm>
+          <b>Purpose:</b> Returns a consistent 64-bit value derived from the input argument, for convenience of
+          implementing hashing logic in an application.
+          <p>
+            <b>Return type:</b> <codeph>BIGINT</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            You might use the return value in an application where you perform load balancing, bucketing, or some
+            other technique to divide processing or storage.
+          </p>
+          <p>
+            Because the result can be any 64-bit value, to restrict the value to a particular range, you can use an
+            expression that includes the <codeph>ABS()</codeph> function and the <codeph>%</codeph> (modulo)
+            operator. For example, to produce a hash value in the range 0-9, you could use the expression
+            <codeph>ABS(FNV_HASH(x)) % 10</codeph>.
+          </p>
+          <p>
+            This function implements the same algorithm that Impala uses internally for hashing, on systems where
+            the CRC32 instructions are not available.
+          </p>
+          <p>
+            This function implements the
+            <xref href="http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function" scope="external" format="html">Fowler\u2013Noll\u2013Vo
+            hash function</xref>, in particular the FNV-1a variation. This is not a perfect hash function: some
+            combinations of values could produce the same result value. It is not suitable for cryptographic use.
+          </p>
+          <p>
+            Similar input values of different types could produce different hash values, for example the same
+            numeric value represented as <codeph>SMALLINT</codeph> or <codeph>BIGINT</codeph>,
+            <codeph>FLOAT</codeph> or <codeph>DOUBLE</codeph>, or <codeph>DECIMAL(5,2)</codeph> or
+            <codeph>DECIMAL(20,5)</codeph>.
+          </p>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+<codeblock>[localhost:21000] &gt; create table h (x int, s string);
+[localhost:21000] &gt; insert into h values (0, 'hello'), (1,'world'), (1234567890,'antidisestablishmentarianism');
+[localhost:21000] &gt; select x, fnv_hash(x) from h;
++------------+----------------------+
+| x          | fnv_hash(x)          |
++------------+----------------------+
+| 0          | -2611523532599129963 |
+| 1          | 4307505193096137732  |
+| 1234567890 | 3614724209955230832  |
++------------+----------------------+
+[localhost:21000] &gt; select s, fnv_hash(s) from h;
++------------------------------+---------------------+
+| s                            | fnv_hash(s)         |
++------------------------------+---------------------+
+| hello                        | 6414202926103426347 |
+| world                        | 6535280128821139475 |
+| antidisestablishmentarianism | -209330013948433970 |
++------------------------------+---------------------+
+[localhost:21000] &gt; select s, abs(fnv_hash(s)) % 10 from h;
++------------------------------+-------------------------+
+| s                            | abs(fnv_hash(s)) % 10.0 |
++------------------------------+-------------------------+
+| hello                        | 8                       |
+| world                        | 6                       |
+| antidisestablishmentarianism | 4                       |
++------------------------------+-------------------------+</codeblock>
+          <p>
+            For short argument values, the high-order bits of the result have relatively low entropy:
+          </p>
+<codeblock>[localhost:21000] &gt; create table b (x boolean);
+[localhost:21000] &gt; insert into b values (true), (true), (false), (false);
+[localhost:21000] &gt; select x, fnv_hash(x) from b;
++-------+---------------------+
+| x     | fnv_hash(x)         |
++-------+---------------------+
+| true  | 2062020650953872396 |
+| true  | 2062020650953872396 |
+| false | 2062021750465500607 |
+| false | 2062021750465500607 |
++-------+---------------------+</codeblock>
+          <p>
+            <b>Added in:</b> Impala 1.2.2
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.4.0" id="greatest">
+
+        <dt>
+          <codeph>greatest(bigint a[, bigint b ...])</codeph>, <codeph>greatest(double a[, double b ...])</codeph>,
+          <codeph>greatest(decimal(p,s) a[, decimal(p,s) b ...])</codeph>, <codeph>greatest(string a[, string b
+          ...])</codeph>, <codeph>greatest(timestamp a[, timestamp b ...])</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">greatest() function</indexterm>
+          <b>Purpose:</b> Returns the largest value from a list of expressions.
+          <p conref="../shared/impala_common.xml#common/return_same_type"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="hex">
+
+        <dt>
+          <codeph>hex(bigint a), hex(string a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">hex() function</indexterm>
+          <b>Purpose:</b> Returns the hexadecimal representation of an integer value, or of the characters in a
+          string.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.4.0" id="is_inf">
+
+        <dt>
+          <codeph>is_inf(double a)</codeph>,
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">is_inf() function</indexterm>
+          <b>Purpose:</b> Tests whether a value is equal to the special value <q>inf</q>, signifying infinity.
+          <p>
+            <b>Return type:</b> <codeph>boolean</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            Infinity and NaN can be specified in text data files as <codeph>inf</codeph> and <codeph>nan</codeph>
+            respectively, and Impala interprets them as these special values. They can also be produced by certain
+            arithmetic expressions; for example, <codeph>pow(-1, 0.5)</codeph> returns infinity and
+            <codeph>1/0</codeph> returns NaN. Or you can cast the literal values, such as <codeph>CAST('nan' AS
+            DOUBLE)</codeph> or <codeph>CAST('inf' AS DOUBLE)</codeph>.
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.4.0" id="is_nan">
+
+        <dt>
+          <codeph>is_nan(double a)</codeph>,
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">is_nan() function</indexterm>
+          <b>Purpose:</b> Tests whether a value is equal to the special value <q>NaN</q>, signifying <q>not a
+          number</q>.
+          <p>
+            <b>Return type:</b> <codeph>boolean</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            Infinity and NaN can be specified in text data files as <codeph>inf</codeph> and <codeph>nan</codeph>
+            respectively, and Impala interprets them as these special values. They can also be produced by certain
+            arithmetic expressions; for example, <codeph>pow(-1, 0.5)</codeph> returns infinity and
+            <codeph>1/0</codeph> returns NaN. Or you can cast the literal values, such as <codeph>CAST('nan' AS
+            DOUBLE)</codeph> or <codeph>CAST('inf' AS DOUBLE)</codeph>.
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.4.0" id="least">
+
+        <dt>
+          <codeph>least(bigint a[, bigint b ...])</codeph>, <codeph>least(double a[, double b ...])</codeph>,
+          <codeph>least(decimal(p,s) a[, decimal(p,s) b ...])</codeph>, <codeph>least(string a[, string b
+          ...])</codeph>, <codeph>least(timestamp a[, timestamp b ...])</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">least() function</indexterm>
+          <b>Purpose:</b> Returns the smallest value from a list of expressions.
+          <p conref="../shared/impala_common.xml#common/return_same_type"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="ln">
+
+        <dt>
+          <codeph>ln(double a)</codeph>,
+          <codeph rev="2.3.0" id="dlog1">dlog1(double a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">ln() function</indexterm>
+          <indexterm audience="Cloudera">dlog1() function</indexterm>
+          <b>Purpose:</b> Returns the
+          <xref href="https://en.wikipedia.org/wiki/Natural_logarithm" scope="external" format="html">natural
+          logarithm</xref> of the argument.
+          <p>
+            <b>Return type:</b> <codeph>double</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="log">
+
+        <dt>
+          <codeph>log(double base, double a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">log() function</indexterm>
+          <b>Purpose:</b> Returns the logarithm of the second argument to the specified base.
+          <p>
+            <b>Return type:</b> <codeph>double</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="log10">
+
+        <dt>
+          <codeph>log10(double a)</codeph>,
+          <codeph rev="2.3.0" id="dlog10">dlog10(double a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">log10() function</indexterm>
+          <indexterm audience="Cloudera">dlog10() function</indexterm>
+          <b>Purpose:</b> Returns the logarithm of the argument to the base 10.
+          <p>
+            <b>Return type:</b> <codeph>double</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="log2">
+
+        <dt>
+          <codeph>log2(double a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">log2() function</indexterm>
+          <b>Purpose:</b> Returns the logarithm of the argument to the base 2.
+          <p>
+            <b>Return type:</b> <codeph>double</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.4.0" id="max_int">
+
+        <dt>
+          <codeph>max_int(), <ph id="max_tinyint">max_tinyint()</ph>, <ph id="max_smallint">max_smallint()</ph>,
+          <ph id="max_bigint">max_bigint()</ph></codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">max_int() function</indexterm>
+          <indexterm audience="Cloudera">max_tinyint() function</indexterm>
+          <indexterm audience="Cloudera">max_smallint() function</indexterm>
+          <indexterm audience="Cloudera">max_bigint() function</indexterm>
+          <b>Purpose:</b> Returns the largest value of the associated integral type.
+          <p>
+            <b>Return type:</b> The same as the integral type being checked.
+          </p>
+          <p>
+<!-- Repeated usage text between max_ and min_ functions, could turn into a conref. -->
+            <b>Usage notes:</b> Use the corresponding <codeph>min_</codeph> and <codeph>max_</codeph> functions to
+            check if all values in a column are within the allowed range, before copying data or altering column
+            definitions. If not, switch to the next higher integral type or to a <codeph>DECIMAL</codeph> with
+            sufficient precision.
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.4.0" id="min_int">
+
+        <dt>
+          <codeph>min_int(), <ph id="min_tinyint">min_tinyint()</ph>, <ph id="min_smallint">min_smallint()</ph>,
+          <ph id="min_bigint">min_bigint()</ph></codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">min_int() function</indexterm>
+          <indexterm audience="Cloudera">min_tinyint() function</indexterm>
+          <indexterm audience="Cloudera">min_smallint() function</indexterm>
+          <indexterm audience="Cloudera">min_bigint() function</indexterm>
+          <b>Purpose:</b> Returns the smallest value of the associated integral type (a negative number).
+          <p>
+            <b>Return type:</b> The same as the integral type being checked.
+          </p>
+          <p>
+            <b>Usage notes:</b> Use the corresponding <codeph>min_</codeph> and <codeph>max_</codeph> functions to
+            check if all values in a column are within the allowed range, before copying data or altering column
+            definitions. If not, switch to the next higher integral type or to a <codeph>DECIMAL</codeph> with
+            sufficient precision.
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="mod" rev="2.2.0">
+
+        <dt>
+          <codeph>mod(<varname>numeric_type</varname> a, <varname>same_type</varname> b)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">mod() function</indexterm>
+          <b>Purpose:</b> Returns the modulus of a number. Equivalent to the <codeph>%</codeph> arithmetic operator.
+          Works with any size integer type, any size floating-point type, and <codeph>DECIMAL</codeph>
+          with any precision and scale.
+          <p conref="../shared/impala_common.xml#common/return_type_same"/>
+          <p conref="../shared/impala_common.xml#common/added_in_220"/>
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            Because this function works with <codeph>DECIMAL</codeph> values, prefer it over <codeph>fmod()</codeph>
+            when working with fractional values. It is not subject to the rounding errors that make
+            <codeph>fmod()</codeph> problematic with floating-point numbers.
+            The <codeph>%</codeph> arithmetic operator now uses the <codeph>mod()</codeph> function
+            in cases where its arguments can be interpreted as <codeph>DECIMAL</codeph> values,
+            increasing the accuracy of that operator.
+          </p>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p>
+            The following examples show how the <codeph>mod()</codeph> function works for
+            whole numbers and fractional values, and how the <codeph>%</codeph> operator
+            works the same way. In the case of <codeph>mod(9.9,3)</codeph>,
+            the type conversion for the second argument results in the first argument
+            being interpreted as <codeph>DOUBLE</codeph>, so to produce an accurate
+            <codeph>DECIMAL</codeph> result requires casting the second argument
+            or writing it as a <codeph>DECIMAL</codeph> literal, 3.0.
+          </p>
+<codeblock>select mod(10,3);
++-------------+
+| fmod(10, 3) |
++-------------+
+| 1           |
++-------------+
+
+select mod(5.5,2);
++--------------+
+| fmod(5.5, 2) |
++--------------+
+| 1.5          |
++--------------+
+
+select 10 % 3;
++--------+
+| 10 % 3 |
++--------+
+| 1      |
++--------+
+
+select 5.5 % 2;
++---------+
+| 5.5 % 2 |
++---------+
+| 1.5     |
++---------+
+
+select mod(9.9,3.3);
++---------------+
+| mod(9.9, 3.3) |
++---------------+
+| 0.0           |
++---------------+
+
+select mod(9.9,3);
++--------------------+
+| mod(9.9, 3)        |
++--------------------+
+| 0.8999996185302734 |
++--------------------+
+
+select mod(9.9, cast(3 as decimal(2,1)));
++-----------------------------------+
+| mod(9.9, cast(3 as decimal(2,1))) |
++-----------------------------------+
+| 0.9                               |
++-----------------------------------+
+
+select mod(9.9,3.0);
++---------------+
+| mod(9.9, 3.0) |
++---------------+
+| 0.9           |
++---------------+
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.4.0" id="negative">
+
+        <dt rev="2.0.1">
+          <codeph>negative(numeric_type a)</codeph>
+<!-- <codeph>negative(int a), negative(double a), negative(decimal(p,s) a)</codeph> -->
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">negative() function</indexterm>
+          <b>Purpose:</b> Returns the argument with the sign reversed; returns a positive value if the argument was
+          already negative.
+          <p rev="2.0.1" conref="../shared/impala_common.xml#common/return_type_same"/>
+<!--
+            <p>
+              <b>Return type:</b> <codeph>int</codeph>, <codeph>double</codeph>,
+              or <codeph>decimal(p,s)</codeph> depending on type of argument
+            </p>
+            -->
+          <p>
+            <b>Usage notes:</b> Use <codeph>-abs(a)</codeph> instead if you need to ensure all return values are
+            negative.
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="pi">
+
+        <dt>
+          <codeph>pi()</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">pi() function</indexterm>
+          <b>Purpose:</b> Returns the constant pi.
+          <p>
+            <b>Return type:</b> <codeph>double</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="pmod">
+
+        <dt>
+          <codeph>pmod(bigint a, bigint b), pmod(double a, double b)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">pmod() function</indexterm>
+          <b>Purpose:</b> Returns the positive modulus of a number.
+          Primarily for <xref href="https://issues.apache.org/jira/browse/HIVE-656" scope="external" format="html">HiveQL compatibility</xref>.
+          <p>
+            <b>Return type:</b> <codeph>int</codeph> or <codeph>double</codeph>, depending on type of arguments
+          </p>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p>
+            The following examples show how the <codeph>fmod()</codeph> function sometimes returns a negative value
+            depending on the sign of its arguments, and the <codeph>pmod()</codeph> function returns the same value
+            as <codeph>fmod()</codeph>, but sometimes with the sign flipped.
+          </p>
+<codeblock>select fmod(-5,2);
++-------------+
+| fmod(-5, 2) |
++-------------+
+| -1          |
++-------------+
+
+select pmod(-5,2);
++-------------+
+| pmod(-5, 2) |
++-------------+
+| 1           |
++-------------+
+
+select fmod(-5,-2);
++--------------+
+| fmod(-5, -2) |
++--------------+
+| -1           |
++--------------+
+
+select pmod(-5,-2);
++--------------+
+| pmod(-5, -2) |
++--------------+
+| -1           |
++--------------+
+
+select fmod(5,-2);
++-------------+
+| fmod(5, -2) |
++-------------+
+| 1           |
++-------------+
+
+select pmod(5,-2);
++-------------+
+| pmod(5, -2) |
++-------------+
+| -1          |
++-------------+
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.4.0" id="positive">
+
+        <dt rev="2.0.1">
+          <codeph>positive(numeric_type a)</codeph>
+<!-- <codeph>positive(int a), positive(double a), positive(decimal(p,s) a</codeph> -->
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">positive() function</indexterm>
+          <b>Purpose:</b> Returns the original argument unchanged (even if the argument is negative).
+          <p rev="2.0.1" conref="../shared/impala_common.xml#common/return_type_same"/>
+<!--
+            <p>
+              <b>Return type:</b> <codeph>int</codeph>, <codeph>double</codeph>,
+              or <codeph>decimal(p,s)</codeph> depending on type of argument
+            </p>
+            -->
+          <p>
+            <b>Usage notes:</b> Use <codeph>abs()</codeph> instead if you need to ensure all return values are
+            positive.
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="pow">
+
+        <dt>
+          <codeph>pow(double a, double p)</codeph>,
+          <codeph id="power">power(double a, double p)</codeph>,
+          <codeph rev="2.3.0" id="dpow">dpow(double a, double p)</codeph>,
+          <codeph rev="2.3.0" id="fpow">fpow(double a, double p)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">pow() function</indexterm>
+          <indexterm audience="Cloudera">power() function</indexterm>
+          <indexterm audience="Cloudera">dpow() function</indexterm>
+          <indexterm audience="Cloudera">fpow() function</indexterm>
+          <b>Purpose:</b> Returns the first argument raised to the power of the second argument.
+          <p>
+            <b>Return type:</b> <codeph>double</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.4.0" id="precision">
+
+        <dt>
+          <codeph>precision(<varname>numeric_expression</varname>)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">precision() function</indexterm>
+          <b>Purpose:</b> Computes the precision (number of decimal digits) needed to represent the type of the
+          argument expression as a <codeph>DECIMAL</codeph> value.
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            Typically used in combination with the <codeph>scale()</codeph> function, to determine the appropriate
+            <codeph>DECIMAL(<varname>precision</varname>,<varname>scale</varname>)</codeph> type to declare in a
+            <codeph>CREATE TABLE</codeph> statement or <codeph>CAST()</codeph> function.
+          </p>
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p conref="../shared/impala_common.xml#common/precision_scale_example"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="quotient">
+
+        <dt>
+          <codeph>quotient(int numerator, int denominator)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">quotient() function</indexterm>
+          <b>Purpose:</b> Returns the first argument divided by the second argument, discarding any fractional
+          part. Avoids promoting arguments to <codeph>DOUBLE</codeph> as happens with the <codeph>/</codeph> SQL
+          operator.
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="radians">
+
+        <dt>
+          <codeph>radians(double a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">radians() function</indexterm>
+          <b>Purpose:</b> Converts argument value from degrees to radians.
+          <p>
+            <b>Return type:</b> <codeph>double</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="rand">
+
+        <dt>
+          <codeph>rand()</codeph>, <codeph>rand(int seed)</codeph>,
+          <codeph rev="2.3.0" id="random">random()</codeph>,
+          <codeph rev="2.3.0">random(int seed)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">rand() function</indexterm>
+          <b>Purpose:</b> Returns a random value between 0 and 1. After <codeph>rand()</codeph> is called with a
+          seed argument, it produces a consistent random sequence based on the seed value.
+          <p>
+            <b>Return type:</b> <codeph>double</codeph>
+          </p>
+          <p>
+            <b>Usage notes:</b> Currently, the random sequence is reset after each query, and multiple calls to
+            <codeph>rand()</codeph> within the same query return the same value each time. For different number
+            sequences that are different for each query, pass a unique seed value to each call to
+            <codeph>rand()</codeph>. For example, <codeph>select rand(unix_timestamp()) from ...</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p>
+            The following examples show how <codeph>rand()</codeph> can produce sequences of varying predictability,
+            so that you can reproduce query results involving random values or generate unique sequences of random
+            values for each query.
+            When <codeph>rand()</codeph> is called with no argument, it generates the same sequence of values each time,
+            regardless of the ordering of the result set.
+            When <codeph>rand()</codeph> is called with a constant integer, it generates a different sequence of values,
+            but still always the same sequence for the same seed value.
+            If you pass in a seed value that changes, such as the return value of the expression <codeph>unix_timestamp(now())</codeph>,
+            each query will use a different sequence of random values, potentially more useful in probability calculations although
+            more difficult to reproduce at a later time. Therefore, the final two examples with an unpredictable seed value
+            also include the seed in the result set, to make it possible to reproduce the same random sequence later.
+          </p>
+<codeblock>select x, rand() from three_rows;
++---+-----------------------+
+| x | rand()                |
++---+-----------------------+
+| 1 | 0.0004714746030380365 |
+| 2 | 0.5895895192351144    |
+| 3 | 0.4431900859080209    |
++---+-----------------------+
+
+select x, rand() from three_rows order by x desc;
++---+-----------------------+
+| x | rand()                |
++---+-----------------------+
+| 3 | 0.0004714746030380365 |
+| 2 | 0.5895895192351144    |
+| 1 | 0.4431900859080209    |
++---+-----------------------+
+
+select x, rand(1234) from three_rows order by x;
++---+----------------------+
+| x | rand(1234)           |
++---+----------------------+
+| 1 | 0.7377511392057646   |
+| 2 | 0.009428468537250751 |
+| 3 | 0.208117277924026    |
++---+----------------------+
+
+select x, rand(1234) from three_rows order by x desc;
++---+----------------------+
+| x | rand(1234)           |
++---+----------------------+
+| 3 | 0.7377511392057646   |
+| 2 | 0.009428468537250751 |
+| 1 | 0.208117277924026    |
++---+----------------------+
+
+select x, unix_timestamp(now()), rand(unix_timestamp(now()))
+  from three_rows order by x;
++---+-----------------------+-----------------------------+
+| x | unix_timestamp(now()) | rand(unix_timestamp(now())) |
++---+-----------------------+-----------------------------+
+| 1 | 1440777752            | 0.002051228658320023        |
+| 2 | 1440777752            | 0.5098743483004506          |
+| 3 | 1440777752            | 0.9517714925817081          |
++---+-----------------------+-----------------------------+
+
+select x, unix_timestamp(now()), rand(unix_timestamp(now()))
+  from three_rows order by x desc;
++---+-----------------------+-----------------------------+
+| x | unix_timestamp(now()) | rand(unix_timestamp(now())) |
++---+-----------------------+-----------------------------+
+| 3 | 1440777761            | 0.9985985015512437          |
+| 2 | 1440777761            | 0.3251255333074953          |
+| 1 | 1440777761            | 0.02422675025846192         |
++---+-----------------------+-----------------------------+
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="round">
+
+        <dt>
+          <codeph>round(double a)</codeph>,
+          <codeph>round(double a, int d)</codeph>,
+          <codeph rev="1.4.0">round(decimal a, int_type d)</codeph>,
+          <codeph rev="2.3.0" id="dround">dround(double a)</codeph>,
+          <codeph rev="2.3.0">dround(double a, int d)</codeph>,
+          <codeph rev="2.3.0">dround(decimal(p,s) a, int_type d)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">round() function</indexterm>
+          <indexterm audience="Cloudera">dround() function</indexterm>
+          <b>Purpose:</b> Rounds a floating-point value. By default (with a single argument), rounds to the nearest
+          integer. Values ending in .5 are rounded up for positive numbers, down for negative numbers (that is,
+          away from zero). The optional second argument specifies how many digits to leave after the decimal point;
+          values greater than zero produce a floating-point return value rounded to the requested number of digits
+          to the right of the decimal point.
+          <p rev="1.4.0">
+            <b>Return type:</b> <codeph>bigint</codeph> for single <codeph>double</codeph> argument.
+            <codeph>double</codeph> for two-argument signature when second argument greater than zero.
+            For <codeph>DECIMAL</codeph> values, the smallest
+            <codeph>DECIMAL(<varname>p</varname>,<varname>s</varname>)</codeph> type with appropriate precision and
+            scale.
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.4.0" id="scale">
+
+        <dt>
+          <codeph>scale(<varname>numeric_expression</varname>)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">scale() function</indexterm>
+          <b>Purpose:</b> Computes the scale (number of decimal digits to the right of the decimal point) needed to
+          represent the type of the argument expression as a <codeph>DECIMAL</codeph> value.
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            Typically used in combination with the <codeph>precision()</codeph> function, to determine the
+            appropriate <codeph>DECIMAL(<varname>precision</varname>,<varname>scale</varname>)</codeph> type to
+            declare in a <codeph>CREATE TABLE</codeph> statement or <codeph>CAST()</codeph> function.
+          </p>
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p conref="../shared/impala_common.xml#common/precision_scale_example"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="sign">
+
+        <dt>
+          <codeph>sign(double a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">sign() function</indexterm>
+          <b>Purpose:</b> Returns -1, 0, or 1 to indicate the signedness of the argument value.
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="sin">
+
+        <dt>
+          <codeph>sin(double a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">sin() function</indexterm>
+          <b>Purpose:</b> Returns the sine of the argument.
+          <p>
+            <b>Return type:</b> <codeph>double</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="sqrt">
+
+        <dt>
+          <codeph>sqrt(double a)</codeph>,
+          <codeph rev="2.3.0" id="dsqrt">dsqrt(double a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">sqrt() function</indexterm>
+          <indexterm audience="Cloudera">dsqrt() function</indexterm>
+          <b>Purpose:</b> Returns the square root of the argument.
+          <p>
+            <b>Return type:</b> <codeph>double</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="tan">
+
+        <dt>
+          <codeph>tan(double a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">tan() function</indexterm>
+          <b>Purpose:</b> Returns the tangent of the argument.
+          <p>
+            <b>Return type:</b> <codeph>double</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="2.3.0" id="truncate">
+
+        <dt>
+          <codeph>truncate(double_or_decimal a[, digits_to_leave])</codeph>,
+          <ph id="dtrunc"><codeph>dtrunc(double_or_decimal a[, digits_to_leave])</codeph></ph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">truncate() function</indexterm>
+          <indexterm audience="Cloudera">dtrunc() function</indexterm>
+          <b>Purpose:</b> Removes some or all fractional digits from a numeric value.
+          With no argument, removes all fractional digits, leaving an integer value.
+          The optional argument specifies the number of fractional digits to include
+          in the return value, and only applies with the argument type is <codeph>DECIMAL</codeph>.
+          <codeph>truncate()</codeph> and <codeph>dtrunc()</codeph> are aliases for the same function.
+          <p>
+            <b>Return type:</b> <codeph>decimal</codeph> for <codeph>DECIMAL</codeph> arguments;
+            <codeph>bigint</codeph> for <codeph>DOUBLE</codeph> arguments
+          </p>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+<codeblock>select truncate(3.45)
++----------------+
+| truncate(3.45) |
++----------------+
+| 3              |
++----------------+
+
+select truncate(-3.45)
++-----------------+
+| truncate(-3.45) |
++-----------------+
+| -3              |
++-----------------+
+
+select truncate(3.456,1)
++--------------------+
+| truncate(3.456, 1) |
++--------------------+
+| 3.4                |
++--------------------+
+
+select dtrunc(3.456,1)
++------------------+
+| dtrunc(3.456, 1) |
++------------------+
+| 3.4              |
++------------------+
+
+select truncate(3.456,2)
++--------------------+
+| truncate(3.456, 2) |
++--------------------+
+| 3.45               |
++--------------------+
+
+select truncate(3.456,7)
++--------------------+
+| truncate(3.456, 7) |
++--------------------+
+| 3.4560000          |
++--------------------+
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="unhex">
+
+        <dt>
+          <codeph>unhex(string a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">unhex() function</indexterm>
+          <b>Purpose:</b> Returns a string of characters with ASCII values corresponding to pairs of hexadecimal
+          digits in the argument.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+    </dl>
+  </conbody>
+</concept>

[10/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

Posted by jr...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_invalidate_metadata.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_invalidate_metadata.xml b/docs/topics/impala_invalidate_metadata.xml
new file mode 100644
index 0000000..96fca7d
--- /dev/null
+++ b/docs/topics/impala_invalidate_metadata.xml
@@ -0,0 +1,236 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.1" id="invalidate_metadata">
+
+  <title>INVALIDATE METADATA Statement</title>
+  <titlealts><navtitle>INVALIDATE METADATA</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="DDL"/>
+      <data name="Category" value="Metastore"/>
+      <data name="Category" value="Schemas"/>
+      <data name="Category" value="Tables"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">INVALIDATE METADATA statement</indexterm>
+      Marks the metadata for one or all tables as stale. Required after a table is created through the Hive shell,
+      before the table is available for Impala queries. The next time the current Impala node performs a query
+      against a table whose metadata is invalidated, Impala reloads the associated metadata before the query
+      proceeds. This is a relatively expensive operation compared to the incremental metadata update done by the
+      <codeph>REFRESH</codeph> statement, so in the common scenario of adding new data files to an existing table,
+      prefer <codeph>REFRESH</codeph> rather than <codeph>INVALIDATE METADATA</codeph>. If you are not familiar
+      with the way Impala uses metadata and how it shares the same metastore database as Hive, see
+      <xref href="impala_hadoop.xml#intro_metastore"/> for background information.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>INVALIDATE METADATA [[<varname>db_name</varname>.]<varname>table_name</varname>]</codeblock>
+
+    <p>
+      By default, the cached metadata for all tables is flushed. If you specify a table name, only the metadata for
+      that one table is flushed. Even for a single table, <codeph>INVALIDATE METADATA</codeph> is more expensive
+      than <codeph>REFRESH</codeph>, so prefer <codeph>REFRESH</codeph> in the common case where you add new data
+      files for an existing table.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/internals_blurb"/>
+
+    <p>
+      To accurately respond to queries, Impala must have current metadata about those databases and tables that
+      clients query directly. Therefore, if some other entity modifies information used by Impala in the metastore
+      that Impala and Hive share, the information cached by Impala must be updated. However, this does not mean
+      that all metadata updates require an Impala update.
+    </p>
+
+    <note>
+      <p conref="../shared/impala_common.xml#common/catalog_server_124"/>
+      <p rev="1.2">
+        In Impala 1.2 and higher, a dedicated daemon (<cmdname>catalogd</cmdname>) broadcasts DDL changes made
+        through Impala to all Impala nodes. Formerly, after you created a database or table while connected to one
+        Impala node, you needed to issue an <codeph>INVALIDATE METADATA</codeph> statement on another Impala node
+        before accessing the new database or table from the other node. Now, newly created or altered objects are
+        picked up automatically by all Impala nodes. You must still use the <codeph>INVALIDATE METADATA</codeph>
+        technique after creating or altering objects through Hive. See
+        <xref href="impala_components.xml#intro_catalogd"/> for more information on the catalog service.
+      </p>
+      <p>
+        The <codeph>INVALIDATE METADATA</codeph> statement is new in Impala 1.1 and higher, and takes over some of
+        the use cases of the Impala 1.0 <codeph>REFRESH</codeph> statement. Because <codeph>REFRESH</codeph> now
+        requires a table name parameter, to flush the metadata for all tables at once, use the <codeph>INVALIDATE
+        METADATA</codeph> statement.
+      </p>
+      <draft-comment translate="no"> Almost-identical wording here, under INVALIDATE METADATA, and in Release Notes :: New Features. Makes sense to conref. </draft-comment>
+      <p>
+        Because <codeph>REFRESH <varname>table_name</varname></codeph> only works for tables that the current
+        Impala node is already aware of, when you create a new table in the Hive shell, you must enter
+        <codeph>INVALIDATE METADATA</codeph> with no table parameter before you can see the new table in
+        <cmdname>impala-shell</cmdname>. Once the table is known by the Impala node, you can issue <codeph>REFRESH
+        <varname>table_name</varname></codeph> after you add data files for that table.
+      </p>
+    </note>
+
+    <p conref="../shared/impala_common.xml#common/refresh_vs_invalidate"/>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      A metadata update for an <codeph>impalad</codeph> instance <b>is</b> required if:
+    </p>
+
+    <ul>
+      <li>
+        A metadata change occurs.
+      </li>
+
+      <li>
+        <b>and</b> the change is made from another <codeph>impalad</codeph> instance in your cluster, or through
+        Hive.
+      </li>
+
+      <li>
+        <b>and</b> the change is made to a database to which clients such as the Impala shell or ODBC directly
+        connect.
+      </li>
+    </ul>
+
+    <p>
+      A metadata update for an Impala node is <b>not</b> required when you issue queries from the same Impala node
+      where you ran <codeph>ALTER TABLE</codeph>, <codeph>INSERT</codeph>, or other table-modifying statement.
+    </p>
+
+    <p>
+      Database and table metadata is typically modified by:
+    </p>
+
+    <ul>
+      <li>
+        Hive - via <codeph>ALTER</codeph>, <codeph>CREATE</codeph>, <codeph>DROP</codeph> or
+        <codeph>INSERT</codeph> operations.
+      </li>
+
+      <li>
+        Impalad - via <codeph>CREATE TABLE</codeph>, <codeph>ALTER TABLE</codeph>, and <codeph>INSERT</codeph>
+        operations.
+      </li>
+    </ul>
+
+    <p>
+      <codeph>INVALIDATE METADATA</codeph> causes the metadata for that table to be marked as stale, and reloaded
+      the next time the table is referenced. For a huge table, that process could take a noticeable amount of time;
+      thus you might prefer to use <codeph>REFRESH</codeph> where practical, to avoid an unpredictable delay later,
+      for example if the next reference to the table is during a benchmark test.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      The following example shows how you might use the <codeph>INVALIDATE METADATA</codeph> statement after
+      creating new tables (such as SequenceFile or HBase tables) through the Hive shell. Before the
+      <codeph>INVALIDATE METADATA</codeph> statement was issued, Impala would give a <q>table not found</q> error
+      if you tried to refer to those table names. The <codeph>DESCRIBE</codeph> statements cause the latest
+      metadata to be immediately loaded for the tables, avoiding a delay the next time those tables are queried.
+    </p>
+
+<codeblock>[impalad-host:21000] &gt; invalidate metadata;
+[impalad-host:21000] &gt; describe t1;
+...
+[impalad-host:21000] &gt; describe t2;
+... </codeblock>
+
+    <p>
+      For more examples of using <codeph>REFRESH</codeph> and <codeph>INVALIDATE METADATA</codeph> with a
+      combination of Impala and Hive operations, see <xref href="impala_tutorial.xml#tutorial_impala_hive"/>.
+    </p>
+
+    <p>
+      If you need to ensure that the metadata is up-to-date when you start an <cmdname>impala-shell</cmdname>
+      session, run <cmdname>impala-shell</cmdname> with the <codeph>-r</codeph> or
+      <codeph>--refresh_after_connect</codeph> command-line option. Because this operation adds a delay to the next
+      query against each table, potentially expensive for large tables with many partitions, try to avoid using
+      this option for day-to-day operations in a production environment.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+    <p rev="CDH-19187">
+      The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+      typically the <codeph>impala</codeph> user, must have execute
+      permissions for all the relevant directories holding table data.
+      (A table could have data spread across multiple directories,
+      or in unexpected paths, if it uses partitioning or
+      specifies a <codeph>LOCATION</codeph> attribute for
+      individual partitions or the entire table.)
+      Issues with permissions might not cause an immediate error for this statement,
+      but subsequent statements such as <codeph>SELECT</codeph>
+      or <codeph>SHOW TABLE STATS</codeph> could fail.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/hdfs_blurb"/>
+
+    <p>
+      By default, the <codeph>INVALIDATE METADATA</codeph> command checks HDFS permissions of the underlying data
+      files and directories, caching this information so that a statement can be cancelled immediately if for
+      example the <codeph>impala</codeph> user does not have permission to write to the data directory for the
+      table. (This checking does not apply if you have set the <cmdname>catalogd</cmdname> configuration option
+      <codeph>--load_catalog_in_background=false</codeph>.) Impala reports any lack of write permissions as an
+      <codeph>INFO</codeph> message in the log file, in case that represents an oversight. If you change HDFS
+      permissions to make data readable or writeable by the Impala user, issue another <codeph>INVALIDATE
+      METADATA</codeph> to make Impala aware of the change.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p rev="1.2.4">
+      This example illustrates creating a new database and new table in Hive, then doing an <codeph>INVALIDATE
+      METADATA</codeph> statement in Impala using the fully qualified table name, after which both the new table
+      and the new database are visible to Impala. The ability to specify <codeph>INVALIDATE METADATA
+      <varname>table_name</varname></codeph> for a table created in Hive is a new capability in Impala 1.2.4. In
+      earlier releases, that statement would have returned an error indicating an unknown table, requiring you to
+      do <codeph>INVALIDATE METADATA</codeph> with no table name, a more expensive operation that reloaded metadata
+      for all tables and databases.
+    </p>
+
+<codeblock rev="1.2.4">$ hive
+hive&gt; create database new_db_from_hive;
+OK
+Time taken: 4.118 seconds
+hive&gt; create table new_db_from_hive.new_table_from_hive (x int);
+OK
+Time taken: 0.618 seconds
+hive&gt; quit;
+$ impala-shell
+[localhost:21000] &gt; show databases like 'new*';
+[localhost:21000] &gt; refresh new_db_from_hive.new_table_from_hive;
+ERROR: AnalysisException: Database does not exist: new_db_from_hive
+[localhost:21000] &gt; invalidate metadata new_db_from_hive.new_table_from_hive;
+[localhost:21000] &gt; show databases like 'new*';
++--------------------+
+| name               |
++--------------------+
+| new_db_from_hive   |
++--------------------+
+[localhost:21000] &gt; show tables in new_db_from_hive;
++---------------------+
+| name                |
++---------------------+
+| new_table_from_hive |
++---------------------+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/s3_blurb"/>
+    <p conref="../shared/impala_common.xml#common/s3_metadata"/>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+    <p>
+      <xref href="impala_hadoop.xml#intro_metastore"/>,
+      <xref href="impala_refresh.xml#refresh"/>
+    </p>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_joins.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_joins.xml b/docs/topics/impala_joins.xml
new file mode 100644
index 0000000..011a488
--- /dev/null
+++ b/docs/topics/impala_joins.xml
@@ -0,0 +1,520 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="joins">
+
+  <title>Joins in Impala SELECT Statements</title>
+  <titlealts><navtitle>Joins</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">joins</indexterm>
+      A join query is a <codeph>SELECT</codeph> statement that combines data from two or more tables,
+      and returns a result set containing items from some or all of those tables. It is a way to
+      cross-reference and correlate related data that is organized into multiple tables, typically
+      using identifiers that are repeated in each of the joined tables.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/join_types"/>
+
+<codeblock>SELECT <varname>select_list</varname> FROM
+  <varname>table_or_subquery1</varname> [INNER] JOIN <varname>table_or_subquery2</varname> |
+  <varname>table_or_subquery1</varname> {LEFT [OUTER] | RIGHT [OUTER] | FULL [OUTER]} JOIN <varname>table_or_subquery2</varname> |
+  <varname>table_or_subquery1</varname> {LEFT | RIGHT} SEMI JOIN <varname>table_or_subquery2</varname> |
+  <ph rev="2.0.0"><varname>table_or_subquery1</varname> {LEFT | RIGHT} ANTI JOIN <varname>table_or_subquery2</varname> |</ph>
+    [ ON <varname>col1</varname> = <varname>col2</varname> [AND <varname>col3</varname> = <varname>col4</varname> ...] |
+      USING (<varname>col1</varname> [, <varname>col2</varname> ...]) ]
+  [<varname>other_join_clause</varname> ...]
+[ WHERE <varname>where_clauses</varname> ]
+
+SELECT <varname>select_list</varname> FROM
+  <varname>table_or_subquery1</varname>, <varname>table_or_subquery2</varname> [, <varname>table_or_subquery3</varname> ...]
+  [<varname>other_join_clause</varname> ...]
+WHERE
+    <varname>col1</varname> = <varname>col2</varname> [AND <varname>col3</varname> = <varname>col4</varname> ...]
+
+SELECT <varname>select_list</varname> FROM
+  <varname>table_or_subquery1</varname> CROSS JOIN <varname>table_or_subquery2</varname>
+  [<varname>other_join_clause</varname> ...]
+[ WHERE <varname>where_clauses</varname> ]</codeblock>
+
+    <p>
+      <b>SQL-92 and SQL-89 Joins:</b>
+    </p>
+
+    <p>
+      Queries with the explicit <codeph>JOIN</codeph> keywords are known as SQL-92 style joins, referring to the
+      level of the SQL standard where they were introduced. The corresponding <codeph>ON</codeph> or
+      <codeph>USING</codeph> clauses clearly show which columns are used as the join keys in each case:
+    </p>
+
+<codeblock>SELECT t1.c1, t2.c2 FROM <b>t1 JOIN t2</b>
+  <b>ON t1.id = t2.id and t1.type_flag = t2.type_flag</b>
+  WHERE t1.c1 &gt; 100;
+
+SELECT t1.c1, t2.c2 FROM <b>t1 JOIN t2</b>
+  <b>USING (id, type_flag)</b>
+  WHERE t1.c1 &gt; 100;</codeblock>
+
+    <p>
+      The <codeph>ON</codeph> clause is a general way to compare columns across the two tables, even if the column
+      names are different. The <codeph>USING</codeph> clause is a shorthand notation for specifying the join
+      columns, when the column names are the same in both tables. You can code equivalent <codeph>WHERE</codeph>
+      clauses that compare the columns, instead of <codeph>ON</codeph> or <codeph>USING</codeph> clauses, but that
+      practice is not recommended because mixing the join comparisons with other filtering clauses is typically
+      less readable and harder to maintain.
+    </p>
+
+    <p>
+      Queries with a comma-separated list of tables and subqueries are known as SQL-89 style joins. In these
+      queries, the equality comparisons between columns of the joined tables go in the <codeph>WHERE</codeph>
+      clause alongside other kinds of comparisons. This syntax is easy to learn, but it is also easy to
+      accidentally remove a <codeph>WHERE</codeph> clause needed for the join to work correctly.
+    </p>
+
+<codeblock>SELECT t1.c1, t2.c2 FROM <b>t1, t2</b>
+  WHERE
+  <b>t1.id = t2.id AND t1.type_flag = t2.type_flag</b>
+  AND t1.c1 &gt; 100;</codeblock>
+
+    <p>
+      <b>Self-joins:</b>
+    </p>
+
+    <p>
+      Impala can do self-joins, for example to join on two different columns in the same table to represent
+      parent-child relationships or other tree-structured data. There is no explicit syntax for this; just use the
+      same table name for both the left-hand and right-hand table, and assign different table aliases to use when
+      referring to the fully qualified column names:
+    </p>
+
+<codeblock>-- Combine fields from both parent and child rows.
+SELECT lhs.id, rhs.parent, lhs.c1, rhs.c2 FROM tree_data lhs, tree_data rhs WHERE lhs.id = rhs.parent;</codeblock>
+
+    <p>
+      <b>Cartesian joins:</b>
+    </p>
+
+    <p>
+      To avoid producing huge result sets by mistake, Impala does not allow Cartesian joins of the form:
+<codeblock>SELECT ... FROM t1 JOIN t2;
+SELECT ... FROM t1, t2;</codeblock>
+      If you intend to join the tables based on common values, add <codeph>ON</codeph> or <codeph>WHERE</codeph>
+      clauses to compare columns across the tables. If you truly intend to do a Cartesian join, use the
+      <codeph>CROSS JOIN</codeph> keyword as the join operator. The <codeph>CROSS JOIN</codeph> form does not use
+      any <codeph>ON</codeph> clause, because it produces a result set with all combinations of rows from the
+      left-hand and right-hand tables. The result set can still be filtered by subsequent <codeph>WHERE</codeph>
+      clauses. For example:
+    </p>
+
+<codeblock>SELECT ... FROM t1 CROSS JOIN t2;
+SELECT ... FROM t1 CROSS JOIN t2 WHERE <varname>tests_on_non_join_columns</varname>;</codeblock>
+
+    <p>
+      <b>Inner and outer joins:</b>
+    </p>
+
+    <p>
+      An inner join is the most common and familiar type: rows in the result set contain the requested columns from
+      the appropriate tables, for all combinations of rows where the join columns of the tables have identical
+      values. If a column with the same name occurs in both tables, use a fully qualified name or a column alias to
+      refer to the column in the select list or other clauses. Impala performs inner joins by default for both
+      SQL-89 and SQL-92 join syntax:
+    </p>
+
+<codeblock>-- The following 3 forms are all equivalent.
+SELECT t1.id, c1, c2 FROM t1, t2 WHERE t1.id = t2.id;
+SELECT t1.id, c1, c2 FROM t1 JOIN t2 ON t1.id = t2.id;
+SELECT t1.id, c1, c2 FROM t1 INNER JOIN t2 ON t1.id = t2.id;</codeblock>
+
+    <p>
+      An outer join retrieves all rows from the left-hand table, or the right-hand table, or both; wherever there
+      is no matching data in the table on the other side of the join, the corresponding columns in the result set
+      are set to <codeph>NULL</codeph>. To perform an outer join, include the <codeph>OUTER</codeph> keyword in the
+      join operator, along with either <codeph>LEFT</codeph>, <codeph>RIGHT</codeph>, or <codeph>FULL</codeph>:
+    </p>
+
+<codeblock>SELECT * FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.id;
+SELECT * FROM t1 RIGHT OUTER JOIN t2 ON t1.id = t2.id;
+SELECT * FROM t1 FULL OUTER JOIN t2 ON t1.id = t2.id;</codeblock>
+
+    <p>
+      For outer joins, Impala requires SQL-92 syntax; that is, the <codeph>JOIN</codeph> keyword instead of
+      comma-separated table names. Impala does not support vendor extensions such as <codeph>(+)</codeph> or
+      <codeph>*=</codeph> notation for doing outer joins with SQL-89 query syntax.
+    </p>
+
+    <p>
+      <b>Equijoins and Non-Equijoins:</b>
+    </p>
+
+    <p>
+      By default, Impala requires an equality comparison between the left-hand and right-hand tables, either
+      through <codeph>ON</codeph>, <codeph>USING</codeph>, or <codeph>WHERE</codeph> clauses. These types of
+      queries are classified broadly as equijoins. Inner, outer, full, and semi joins can all be equijoins based on
+      the presence of equality tests between columns in the left-hand and right-hand tables.
+    </p>
+
+    <p>
+      In Impala 1.2.2 and higher, non-equijoin queries are also possible, with comparisons such as
+      <codeph>!=</codeph> or <codeph>&lt;</codeph> between the join columns. These kinds of queries require care to
+      avoid producing huge result sets that could exceed resource limits. Once you have planned a non-equijoin
+      query that produces a result set of acceptable size, you can code the query using the <codeph>CROSS
+      JOIN</codeph> operator, and add the extra comparisons in the <codeph>WHERE</codeph> clause:
+    </p>
+
+<codeblock>SELECT * FROM t1 CROSS JOIN t2 WHERE t1.total &gt; t2.maximum_price;</codeblock>
+
+    <p rev="2.3.0">
+      In CDH 5.5 / Impala 2.3 and higher, additional non-equijoin queries are possible due to the addition
+      of nested loop joins. These queries typically involve <codeph>SEMI JOIN</codeph>,
+      <codeph>ANTI JOIN</codeph>, or <codeph>FULL OUTER JOIN</codeph> clauses.
+      Impala sometimes also uses nested loop joins internally when evaluating <codeph>OUTER JOIN</codeph>
+      queries involving complex type columns.
+      Query phases involving nested loop joins do not use the spill-to-disk mechanism if they
+      exceed the memory limit. Impala decides internally when to use each join mechanism; you cannot
+      specify any query hint to choose between the nested loop join or the original hash join algorithm.
+    </p>
+
+<codeblock rev="2.3.0">SELECT * FROM t1 LEFT OUTER JOIN t2 ON t1.int_col &lt; t2.int_col;</codeblock>
+
+    <p>
+      <b>Semi-joins:</b>
+    </p>
+
+    <p>
+      Semi-joins are a relatively rarely used variation. With the left semi-join, only data from the left-hand
+      table is returned, for rows where there is matching data in the right-hand table, based on comparisons
+      between join columns in <codeph>ON</codeph> or <codeph>WHERE</codeph> clauses. Only one instance of each row
+      from the left-hand table is returned, regardless of how many matching rows exist in the right-hand table.
+      <ph rev="2.0.0">A right semi-join (available in Impala 2.0 and higher) reverses the comparison and returns
+      data from the right-hand table.</ph>
+    </p>
+
+<codeblock>SELECT t1.c1, t1.c2, t1.c2 FROM t1 LEFT SEMI JOIN t2 ON t1.id = t2.id;</codeblock>
+
+    <p>
+      <b>Natural joins (not supported):</b>
+    </p>
+
+    <p>
+      Impala does not support the <codeph>NATURAL JOIN</codeph> operator, again to avoid inconsistent or huge
+      result sets. Natural joins do away with the <codeph>ON</codeph> and <codeph>USING</codeph> clauses, and
+      instead automatically join on all columns with the same names in the left-hand and right-hand tables. This
+      kind of query is not recommended for rapidly evolving data structures such as are typically used in Hadoop.
+      Thus, Impala does not support the <codeph>NATURAL JOIN</codeph> syntax, which can produce different query
+      results as columns are added to or removed from tables.
+    </p>
+
+    <p>
+      If you do have any queries that use <codeph>NATURAL JOIN</codeph>, make sure to rewrite them with explicit
+      <codeph>USING</codeph> clauses, because Impala could interpret the <codeph>NATURAL</codeph> keyword as a
+      table alias:
+    </p>
+
+<codeblock>-- 'NATURAL' is interpreted as an alias for 't1' and Impala attempts an inner join,
+-- resulting in an error because inner joins require explicit comparisons between columns.
+SELECT t1.c1, t2.c2 FROM t1 NATURAL JOIN t2;
+ERROR: NotImplementedException: Join with 't2' requires at least one conjunctive equality predicate.
+  To perform a Cartesian product between two tables, use a CROSS JOIN.
+
+-- If you expect the tables to have identically named columns with matching values,
+-- list the corresponding column names in a USING clause.
+SELECT t1.c1, t2.c2 FROM t1 JOIN t2 USING (id, type_flag, name, address);</codeblock>
+
+    <p rev="2.0.0">
+      <b>Anti-joins (Impala 2.0 / CDH 5.2 and higher only):</b>
+    </p>
+
+    <p rev="2.0.0">
+      Impala supports the <codeph>LEFT ANTI JOIN</codeph> and <codeph>RIGHT ANTI JOIN</codeph> clauses in Impala
+      2.0 and higher on CDH 4, or CDH 5.2 and higher on CDH 5. The <codeph>LEFT</codeph> or <codeph>RIGHT</codeph>
+      keyword is required for this kind of join. For <codeph>LEFT ANTI JOIN</codeph>, this clause returns those
+      values from the left-hand table that have no matching value in the right-hand table. <codeph>RIGHT ANTI
+      JOIN</codeph> reverses the comparison and returns values from the right-hand table. You can express this
+      negative relationship either through the <codeph>ANTI JOIN</codeph> clause or through a <codeph>NOT
+      EXISTS</codeph> operator with a subquery.
+    </p>
+
+<!-- Restriction lifted in Impala 2.0.
+<p>
+Impala does not support <codeph>WHERE</codeph> clauses
+such as <codeph>IN (<varname>subquery</varname>)</codeph>,
+<codeph>NOT IN (<varname>subquery</varname>)</codeph>,
+<codeph>EXISTS (<varname>subquery</varname>)</codeph>,
+and <codeph>NOT EXISTS (<varname>subquery</varname>)</codeph>.
+Therefore from a practical standpoint, you cannot
+express an anti-join condition, where values from one table
+are returned only if no matching values are present in another table.
+</p>
+-->
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+<!-- To do: reuse some complex types examples with joins here or under Examples farther down. -->
+
+    <p rev="2.3.0">
+      When referring to a column with a complex type (<codeph>STRUCT</codeph>, <codeph>ARRAY</codeph>, or <codeph>MAP</codeph>)
+      in a query, you use join notation to <q>unpack</q> the scalar fields of the struct, the elements of the array, or
+      the key-value pairs of the map. (The join notation is not required for aggregation operations, such as
+      <codeph>COUNT()</codeph> or <codeph>SUM()</codeph> for array elements.) Because Impala recognizes which complex type elements are associated with which row
+      of the result set, you use the same syntax as for a cross or cartesian join, without an explicit join condition.
+      See <xref href="impala_complex_types.xml#complex_types"/> for details about Impala support for complex types.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      You typically use join queries in situations like these:
+    </p>
+
+    <ul>
+      <li>
+        When related data arrives from different sources, with each data set physically residing in a separate
+        table. For example, you might have address data from business records that you cross-check against phone
+        listings or census data.
+        <note>
+          Impala can join tables of different file formats, including Impala-managed tables and HBase tables. For
+          example, you might keep small dimension tables in HBase, for convenience of single-row lookups and
+          updates, and for the larger fact tables use Parquet or other binary file format optimized for scan
+          operations. Then, you can issue a join query to cross-reference the fact tables with the dimension
+          tables.
+        </note>
+      </li>
+
+      <li>
+        When data is normalized, a technique for reducing data duplication by dividing it across multiple tables.
+        This kind of organization is often found in data that comes from traditional relational database systems.
+        For example, instead of repeating some long string such as a customer name in multiple tables, each table
+        might contain a numeric customer ID. Queries that need to display the customer name could <q>join</q> the
+        table that specifies which customer ID corresponds to which name.
+      </li>
+
+      <li>
+        When certain columns are rarely needed for queries, so they are moved into separate tables to reduce
+        overhead for common queries. For example, a <codeph>biography</codeph> field might be rarely needed in
+        queries on employee data. Putting that field in a separate table reduces the amount of I/O for common
+        queries on employee addresses or phone numbers. Queries that do need the <codeph>biography</codeph> column
+        can retrieve it by performing a join with that separate table.
+      </li>
+
+      <li>
+        In CDH 5.5 / Impala 2.3 or higher, when referring to complex type columns in queries.
+        See <xref href="impala_complex_types.xml#complex_types"/> for details.
+      </li>
+    </ul>
+
+    <p>
+      When comparing columns with the same names in <codeph>ON</codeph> or <codeph>WHERE</codeph> clauses, use the
+      fully qualified names such as <codeph><varname>db_name</varname>.<varname>table_name</varname></codeph>, or
+      assign table aliases, column aliases, or both to make the code more compact and understandable:
+    </p>
+
+<codeblock>select t1.c1 as first_id, t2.c2 as second_id from
+  t1 join t2 on first_id = second_id;
+
+select fact.custno, dimension.custno from
+  customer_data as fact join customer_address as dimension
+  using (custno)</codeblock>
+
+    <note>
+      <p>
+        Performance for join queries is a crucial aspect for Impala, because complex join queries are
+        resource-intensive operations. An efficient join query produces much less network traffic and CPU overhead
+        than an inefficient one. For best results:
+      </p>
+      <ul>
+        <li rev="1.2">
+          Make sure that both <xref href="impala_perf_stats.xml#perf_stats">table and column statistics</xref> are
+          available for all the tables involved in a join query, and especially for the columns referenced in any
+          join conditions. Impala uses the statistics to automatically deduce an efficient join order.
+          Use <xref href="impala_show.xml#show"><codeph>SHOW TABLE STATS <varname>table_name</varname></codeph> and
+          <codeph>SHOW COLUMN STATS <varname>table_name</varname></codeph></xref> to check if statistics are
+          already present. Issue the <codeph>COMPUTE STATS <varname>table_name</varname></codeph> for a nonpartitioned table,
+          or (in Impala 2.1.0 and higher) <codeph>COMPUTE INCREMENTAL STATS <varname>table_name</varname></codeph>
+          for a partitioned table, to collect the initial statistics at both the table and column levels, and to keep the
+          statistics up to date after any substantial <codeph>INSERT</codeph> or <codeph>LOAD DATA</codeph> operations.
+        </li>
+
+        <li rev="1.2">
+          If table or column statistics are not available, join the largest table first. You can check the
+          existence of statistics with the <codeph>SHOW TABLE STATS <varname>table_name</varname></codeph> and
+          <codeph>SHOW COLUMN STATS <varname>table_name</varname></codeph> statements.
+        </li>
+
+        <li rev="1.2.2">
+          If table or column statistics are not available, join subsequent tables according to which table has the
+          most selective filter, based on overall size and <codeph>WHERE</codeph> clauses. Joining the table with
+          the most selective filter results in the fewest number of rows being returned.
+        </li>
+      </ul>
+      <p>
+        For more information and examples of performance for join queries, see
+        <xref href="impala_perf_joins.xml#perf_joins"/>.
+      </p>
+    </note>
+
+    <p>
+      To control the result set from a join query, include the names of corresponding column names in both tables
+      in an <codeph>ON</codeph> or <codeph>USING</codeph> clause, or by coding equality comparisons for those
+      columns in the <codeph>WHERE</codeph> clause.
+    </p>
+
+<codeblock>[localhost:21000] &gt; select c_last_name, ca_city from customer join customer_address where c_customer_sk = ca_address_sk;
++-------------+-----------------+
+| c_last_name | ca_city         |
++-------------+-----------------+
+| Lewis       | Fairfield       |
+| Moses       | Fairview        |
+| Hamilton    | Pleasant Valley |
+| White       | Oak Ridge       |
+| Moran       | Glendale        |
+...
+| Richards    | Lakewood         |
+| Day         | Lebanon          |
+| Painter     | Oak Hill         |
+| Bentley     | Greenfield       |
+| Jones       | Stringtown       |
++-------------+------------------+
+Returned 50000 row(s) in 9.82s</codeblock>
+
+    <p>
+      One potential downside of joins is the possibility of excess resource usage in poorly constructed queries.
+      Impala imposes restrictions on join queries to guard against such issues. To minimize the chance of runaway
+      queries on large data sets, Impala requires every join query to contain at least one equality predicate
+      between the columns of the various tables. For example, if <codeph>T1</codeph> contains 1000 rows and
+      <codeph>T2</codeph> contains 1,000,000 rows, a query <codeph>SELECT <varname>columns</varname> FROM t1 JOIN
+      t2</codeph> could return up to 1 billion rows (1000 * 1,000,000); Impala requires that the query include a
+      clause such as <codeph>ON t1.c1 = t2.c2</codeph> or <codeph>WHERE t1.c1 = t2.c2</codeph>.
+    </p>
+
+    <p>
+      Because even with equality clauses, the result set can still be large, as we saw in the previous example, you
+      might use a <codeph>LIMIT</codeph> clause to return a subset of the results:
+    </p>
+
+<codeblock>[localhost:21000] &gt; select c_last_name, ca_city from customer, customer_address where c_customer_sk = ca_address_sk limit 10;
++-------------+-----------------+
+| c_last_name | ca_city         |
++-------------+-----------------+
+| Lewis       | Fairfield       |
+| Moses       | Fairview        |
+| Hamilton    | Pleasant Valley |
+| White       | Oak Ridge       |
+| Moran       | Glendale        |
+| Sharp       | Lakeview        |
+| Wiles       | Farmington      |
+| Shipman     | Union           |
+| Gilbert     | New Hope        |
+| Brunson     | Martinsville    |
++-------------+-----------------+
+Returned 10 row(s) in 0.63s</codeblock>
+
+    <p>
+      Or you might use additional comparison operators or aggregation functions to condense a large result set into
+      a smaller set of values:
+    </p>
+
+<codeblock>[localhost:21000] &gt; -- Find the names of customers who live in one particular town.
+[localhost:21000] &gt; select distinct c_last_name from customer, customer_address where
+  c_customer_sk = ca_address_sk
+  and ca_city = "Green Acres";
++---------------+
+| c_last_name   |
++---------------+
+| Hensley       |
+| Pearson       |
+| Mayer         |
+| Montgomery    |
+| Ricks         |
+...
+| Barrett       |
+| Price         |
+| Hill          |
+| Hansen        |
+| Meeks         |
++---------------+
+Returned 332 row(s) in 0.97s
+
+[localhost:21000] &gt; -- See how many different customers in this town have names starting with "A".
+[localhost:21000] &gt; select count(distinct c_last_name) from customer, customer_address where
+  c_customer_sk = ca_address_sk
+  and ca_city = "Green Acres"
+  and substr(c_last_name,1,1) = "A";
++-----------------------------+
+| count(distinct c_last_name) |
++-----------------------------+
+| 12                          |
++-----------------------------+
+Returned 1 row(s) in 1.00s</codeblock>
+
+    <p>
+      Because a join query can involve reading large amounts of data from disk, sending large amounts of data
+      across the network, and loading large amounts of data into memory to do the comparisons and filtering, you
+      might do benchmarking, performance analysis, and query tuning to find the most efficient join queries for
+      your data set, hardware capacity, network configuration, and cluster workload.
+    </p>
+
+    <p>
+      The two categories of joins in Impala are known as <b>partitioned joins</b> and <b>broadcast joins</b>. If
+      inaccurate table or column statistics, or some quirk of the data distribution, causes Impala to choose the
+      wrong mechanism for a particular join, consider using query hints as a temporary workaround. For details, see
+      <xref href="impala_hints.xml#hints"/>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      The following examples refer to these simple tables containing small sets of integers:
+<codeblock>[localhost:21000] &gt; create table t1 (x int);
+[localhost:21000] &gt; insert into t1 values (1), (2), (3), (4), (5), (6);
+
+[localhost:21000] &gt; create table t2 (y int);
+[localhost:21000] &gt; insert into t2 values (2), (4), (6);
+
+[localhost:21000] &gt; create table t3 (z int);
+[localhost:21000] &gt; insert into t3 values (1), (3), (5);
+</codeblock>
+    </p>
+
+<!-- To do: fill in examples for other join types. -->
+
+    <p>
+      The following example demonstrates an anti-join, returning the values from <codeph>T1</codeph> that do not
+      exist in <codeph>T2</codeph> (in this case, the odd numbers 1, 3, and 5):
+    </p>
+
+<codeblock>[localhost:21000] &gt; select x from t1 left anti join t2 on (t1.x = t2.y);
++---+
+| x |
++---+
+| 1 |
+| 3 |
+| 5 |
++---+
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      See these tutorials for examples of different kinds of joins:
+    </p>
+
+    <ul>
+      <li>
+        <xref href="impala_tutorial.xml#tut_cross_join"/>
+      </li>
+    </ul>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_langref.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_langref.xml b/docs/topics/impala_langref.xml
new file mode 100644
index 0000000..aaa76aa
--- /dev/null
+++ b/docs/topics/impala_langref.xml
@@ -0,0 +1,179 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="langref">
+
+  <title><ph audience="PDF">Impala SQL Language Reference</ph><ph audience="HTML">Overview of Impala SQL</ph></title>
+
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="impala-shell"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Impala uses SQL as its query language. Impala interprets SQL statements and performs the
+      full end-to-end processing for each statement. (As opposed to acting as a translation
+      layer for some other Hadoop subsystem.)
+    </p>
+
+    <p>
+      Impala implements many familiar statements, such as <codeph>CREATE TABLE</codeph>,
+      <codeph>INSERT</codeph>, and <codeph>SELECT</codeph>. Currently, the DML statements
+      <codeph>UPDATE</codeph> and <codeph>DELETE</codeph> are not available in the production
+      level of Impala, because big data analytics with Hadoop and HDFS typically involves
+      unchanging data. <codeph>UPDATE</codeph> and <codeph>DELETE</codeph> <i>are</i> available
+      in beta form in the version of Impala used with the Kudu storage layer. For full details
+      about Impala SQL syntax and semantics, see
+      <xref href="impala_langref_sql.xml#langref_sql"/>.
+    </p>
+
+    <p>
+      Queries include clauses such as <codeph>WHERE</codeph>, <codeph>GROUP BY</codeph>,
+      <codeph>ORDER BY</codeph>, and <codeph>JOIN</codeph>. For information about query syntax,
+      see <xref href="impala_select.xml#select"/>.
+    </p>
+
+    <p>
+      Queries can also include function calls, to scalar functions such as
+      <codeph>sin()</codeph> and <codeph>substr()</codeph>, aggregate functions such as
+      <codeph>count()</codeph> and <codeph>avg()</codeph>, and analytic functions such as
+      <codeph>lag()</codeph> and <codeph>rank()</codeph>. For a list of the built-in functions
+      available in Impala queries, see <xref href="impala_functions.xml#builtins"/>.
+    </p>
+
+    <p outputclass="toc"/>
+
+  </conbody>
+
+  <concept id="langref_performance">
+
+    <title>Performance Features</title>
+
+    <conbody>
+
+      <p>
+        The main performance-related SQL features for Impala are:
+      </p>
+
+      <ul>
+        <li>
+          <p>
+            The <codeph>COMPUTE STATS</codeph> statement, and the underlying table statistics
+            and column statistics used in query planning. The statistics are used to estimate
+            the number of rows and size of the result set for queries, subqueries, and the
+            different <q>sides</q> of a join query.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            The output of the <codeph>EXPLAIN</codeph> statement. It outlines the ways in which
+            the query is parallelized, and how much I/O, memory, and so on the query expects to
+            use. You can control the level of detail in the output through a query option.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            Partitioning for tables. By organizing the data for efficient access along one or
+            more dimensions, this technique lets queries read only the relevant data.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            Query hints, especially for join queries. Impala selects from different join
+            algorithms based on the relative sizes of the result sets for each side of the join.
+            In cases where you know the most effective technique for a particular query, you can
+            override the estimates that Impala uses to make that choice, and select the join
+            technique directly.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            Query options. These options control settings that can influence the performance of
+            individual queries when you know the special considerations based on your workload,
+            hardware configuration, or data distribution.
+          </p>
+        </li>
+      </ul>
+
+      <p>
+        Because analytic queries against high volumes of data tend to require full scans against
+        large portions of data from each table, Impala does not include index-related SQL
+        statements such as <codeph>CREATE INDEX</codeph>. The <codeph>COMPUTE STATS</codeph>
+        serves the purpose of analyzing the distribution of data within each column and the
+        overall table. Partitioning optimizes the physical layout of the data for queries that
+        filter on one or more crucial columns.
+      </p>
+
+    </conbody>
+
+  </concept>
+
+  <concept id="hive_interoperability">
+
+    <title>Sharing Tables, Data, and Queries Between Impala and Hive</title>
+
+    <conbody>
+
+      <p>
+        To protect user investment in skills development and query design, Impala provides a
+        high degree of compatibility with the Hive Query Language (HiveQL):
+      </p>
+
+      <ul>
+        <li>
+          Because Impala uses the same metadata store as Hive to record information about table
+          structure and properties, Impala can access tables defined through the native Impala
+          <codeph>CREATE TABLE</codeph> command, or tables created using the Hive data
+          definition language (DDL).
+        </li>
+
+        <li>
+          Impala supports data manipulation (DML) statements similar to the DML component of
+          HiveQL.
+        </li>
+
+        <li>
+          Impala provides many <xref href="impala_functions.xml#builtins">built-in
+          functions</xref> with the same names and parameter types as their HiveQL equivalents.
+        </li>
+      </ul>
+
+      <p>
+        Impala supports most of the same
+        <xref href="impala_langref_sql.xml#langref_sql">statements and clauses</xref> as HiveQL,
+        including, but not limited to <codeph>JOIN</codeph>, <codeph>AGGREGATE</codeph>,
+        <codeph>DISTINCT</codeph>, <codeph>UNION ALL</codeph>, <codeph>ORDER BY</codeph>,
+        <codeph>LIMIT</codeph> and (uncorrelated) subquery in the <codeph>FROM</codeph> clause.
+        Impala also supports <codeph>INSERT INTO</codeph> and <codeph>INSERT OVERWRITE</codeph>.
+      </p>
+
+      <p>
+        Impala supports data types with the same names and semantics as the equivalent Hive data
+        types: <codeph>STRING</codeph>, <codeph>TINYINT</codeph>, <codeph>SMALLINT</codeph>,
+        <codeph>INT</codeph>, <codeph>BIGINT</codeph>, <codeph>FLOAT</codeph>,
+        <codeph>DOUBLE</codeph>, <codeph>BOOLEAN</codeph>, <codeph>STRING</codeph>,
+        <codeph>TIMESTAMP</codeph>. CDH 5.5 / Impala 2.3 and higher also include the complex
+        types <codeph>ARRAY</codeph>, <codeph>STRUCT</codeph>, and <codeph>MAP</codeph>.
+      </p>
+
+      <p>
+        Most HiveQL <codeph>SELECT</codeph> and <codeph>INSERT</codeph> statements run
+        unmodified with Impala. For information about Hive syntax not available in Impala, see
+        <xref href="impala_langref_unsupported.xml#langref_hiveql_delta"/>.
+      </p>
+
+    </conbody>
+
+  </concept>
+
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_langref_sql.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_langref_sql.xml b/docs/topics/impala_langref_sql.xml
new file mode 100644
index 0000000..d759e76
--- /dev/null
+++ b/docs/topics/impala_langref_sql.xml
@@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="langref_sql">
+
+  <title>Impala SQL Statements</title>
+  <titlealts><navtitle>SQL Statements</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      The Impala SQL dialect supports a range of standard elements, plus some extensions for Big Data use cases
+      related to data loading and data warehousing.
+    </p>
+
+    <note>
+      <p>
+        In the <cmdname>impala-shell</cmdname> interpreter, a semicolon at the end of each statement is required.
+        Since the semicolon is not actually part of the SQL syntax, we do not include it in the syntax definition
+        of each statement, but we do show it in examples intended to be run in <cmdname>impala-shell</cmdname>.
+      </p>
+    </note>
+
+    <p audience="PDF" outputclass="toc all">
+      The following sections show the major SQL statements that you work with in Impala:
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_langref_unsupported.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_langref_unsupported.xml b/docs/topics/impala_langref_unsupported.xml
new file mode 100644
index 0000000..f2b0560
--- /dev/null
+++ b/docs/topics/impala_langref_unsupported.xml
@@ -0,0 +1,296 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="langref_hiveql_delta">
+
+  <title>SQL Differences Between Impala and Hive</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Hive"/>
+      <data name="Category" value="Porting"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">Hive</indexterm>
+      <indexterm audience="Cloudera">HiveQL</indexterm>
+      Impala's SQL syntax follows the SQL-92 standard, and includes many industry extensions in areas such as
+      built-in functions. See <xref href="impala_porting.xml#porting"/> for a general discussion of adapting SQL
+      code from a variety of database systems to Impala.
+    </p>
+
+    <p>
+      Because Impala and Hive share the same metastore database and their tables are often used interchangeably,
+      the following section covers differences between Impala and Hive in detail.
+    </p>
+
+    <p outputclass="toc inpage"/>
+  </conbody>
+
+  <concept id="langref_hiveql_unsupported">
+
+    <title>HiveQL Features not Available in Impala</title>
+
+    <conbody>
+
+      <p>
+        The current release of Impala does not support the following SQL features that you might be familiar with
+        from HiveQL:
+      </p>
+
+      <draft-comment translate="no">
+Yeesh, too many separate lists of unsupported Hive syntax.
+Here, the FAQ, and in some of the intro topics.
+Some discussion in IMP-1061 about how best to reorg.
+Lots of opportunities for conrefs.
+</draft-comment>
+
+      <ul>
+<!-- Now supported in CDH 5.5 / Impala 2.3 and higher. Find places on this page (like already done under lateral views) to note the new data type support.
+        <li>
+          Non-scalar data types such as maps, arrays, structs.
+        </li>
+-->
+
+        <li rev="1.2">
+          Extensibility mechanisms such as <codeph>TRANSFORM</codeph>, custom file formats, or custom SerDes.
+        </li>
+
+        <li>
+          XML and JSON functions.
+        </li>
+
+        <li>
+          Certain aggregate functions from HiveQL: <codeph>covar_pop</codeph>, <codeph>covar_samp</codeph>,
+          <codeph>corr</codeph>, <codeph>percentile</codeph>, <codeph>percentile_approx</codeph>,
+          <codeph>histogram_numeric</codeph>, <codeph>collect_set</codeph>; Impala supports the set of aggregate
+          functions listed in <xref href="impala_aggregate_functions.xml#aggregate_functions"/> and analytic
+          functions listed in <xref href="impala_analytic_functions.xml#analytic_functions"/>.
+        </li>
+
+        <li>
+          Sampling.
+        </li>
+
+        <li>
+          Lateral views. In CDH 5.5 / Impala 2.3 and higher, Impala supports queries on complex types
+          (<codeph>STRUCT</codeph>, <codeph>ARRAY</codeph>, or <codeph>MAP</codeph>), using join notation
+          rather than the <codeph>EXPLODE()</codeph> keyword.
+          See <xref href="impala_complex_types.xml#complex_types"/> for details about Impala support for complex types.
+        </li>
+
+        <li>
+          Multiple <codeph>DISTINCT</codeph> clauses per query, although Impala includes some workarounds for this
+          limitation.
+          <note conref="../shared/impala_common.xml#common/multiple_count_distinct"/>
+        </li>
+      </ul>
+
+      <p>
+        User-defined functions (UDFs) are supported starting in Impala 1.2. See <xref href="impala_udf.xml#udfs"/>
+        for full details on Impala UDFs.
+        <ul>
+          <li>
+            Impala supports high-performance UDFs written in C++, as well as reusing some Java-based Hive UDFs.
+          </li>
+
+          <li>
+            Impala supports scalar UDFs and user-defined aggregate functions (UDAFs). Impala does not currently
+            support user-defined table generating functions (UDTFs).
+          </li>
+
+          <li>
+            Only Impala-supported column types are supported in Java-based UDFs.
+          </li>
+        </ul>
+      </p>
+
+      <p>
+        Impala does not currently support these HiveQL statements:
+      </p>
+
+      <ul>
+        <li>
+          <codeph>ANALYZE TABLE</codeph> (the Impala equivalent is <codeph>COMPUTE STATS</codeph>)
+        </li>
+
+        <li>
+          <codeph>DESCRIBE COLUMN</codeph>
+        </li>
+
+        <li>
+          <codeph>DESCRIBE DATABASE</codeph>
+        </li>
+
+        <li>
+          <codeph>EXPORT TABLE</codeph>
+        </li>
+
+        <li>
+          <codeph>IMPORT TABLE</codeph>
+        </li>
+
+        <li>
+          <codeph>SHOW TABLE EXTENDED</codeph>
+        </li>
+
+        <li>
+          <codeph>SHOW INDEXES</codeph>
+        </li>
+
+        <li>
+          <codeph>SHOW COLUMNS</codeph>
+        </li>
+      </ul>
+    </conbody>
+  </concept>
+
+  <concept id="langref_hiveql_semantics">
+
+    <title>Semantic Differences Between Impala and HiveQL Features</title>
+
+    <conbody>
+
+      <p>
+        This section covers instances where Impala and Hive have similar functionality, sometimes including the
+        same syntax, but there are differences in the runtime semantics of those features.
+      </p>
+
+      <p>
+        <b>Security:</b>
+      </p>
+
+      <p>
+        Impala utilizes the <xref href="http://sentry.incubator.apache.org/" scope="external" format="html">Apache
+        Sentry (incubating)</xref> authorization framework, which provides fine-grained role-based access control
+        to protect data against unauthorized access or tampering.
+      </p>
+
+      <p>
+        The Hive component included in CDH 5.1 and higher now includes Sentry-enabled <codeph>GRANT</codeph>,
+        <codeph>REVOKE</codeph>, and <codeph>CREATE/DROP ROLE</codeph> statements. Earlier Hive releases had a
+        privilege system with <codeph>GRANT</codeph> and <codeph>REVOKE</codeph> statements that were primarily
+        intended to prevent accidental deletion of data, rather than a security mechanism to protect against
+        malicious users.
+      </p>
+
+      <p>
+        Impala can make use of privileges set up through Hive <codeph>GRANT</codeph> and <codeph>REVOKE</codeph> statements.
+        Impala has its own <codeph>GRANT</codeph> and <codeph>REVOKE</codeph> statements in Impala 2.0 and higher.
+        See <xref href="impala_authorization.xml#authorization"/> for the details of authorization in Impala, including
+        how to switch from the original policy file-based privilege model to the Sentry service using privileges
+        stored in the metastore database.
+      </p>
+
+      <p>
+        <b>SQL statements and clauses:</b>
+      </p>
+
+      <p>
+        The semantics of Impala SQL statements varies from HiveQL in some cases where they use similar SQL
+        statement and clause names:
+      </p>
+
+      <ul>
+        <li>
+          Impala uses different syntax and names for query hints, <codeph>[SHUFFLE]</codeph> and
+          <codeph>[NOSHUFFLE]</codeph> rather than <codeph>MapJoin</codeph> or <codeph>StreamJoin</codeph>. See
+          <xref href="impala_joins.xml#joins"/> for the Impala details.
+        </li>
+
+        <li>
+          Impala does not expose MapReduce specific features of <codeph>SORT BY</codeph>, <codeph>DISTRIBUTE
+          BY</codeph>, or <codeph>CLUSTER BY</codeph>.
+        </li>
+
+        <li>
+          Impala does not require queries to include a <codeph>FROM</codeph> clause.
+        </li>
+      </ul>
+
+      <p>
+        <b>Data types:</b>
+      </p>
+
+      <ul>
+        <li>
+          Impala supports a limited set of implicit casts. This can help avoid undesired results from unexpected
+          casting behavior.
+          <ul>
+            <li>
+              Impala does not implicitly cast between string and numeric or Boolean types. Always use
+              <codeph>CAST()</codeph> for these conversions.
+            </li>
+
+            <li>
+              Impala does perform implicit casts among the numeric types, when going from a smaller or less precise
+              type to a larger or more precise one. For example, Impala will implicitly convert a
+              <codeph>SMALLINT</codeph> to a <codeph>BIGINT</codeph> or <codeph>FLOAT</codeph>, but to convert from
+              <codeph>DOUBLE</codeph> to <codeph>FLOAT</codeph> or <codeph>INT</codeph> to <codeph>TINYINT</codeph>
+              requires a call to <codeph>CAST()</codeph> in the query.
+            </li>
+
+            <li>
+              Impala does perform implicit casts from string to timestamp. Impala has a restricted set of literal
+              formats for the <codeph>TIMESTAMP</codeph> data type and the <codeph>from_unixtime()</codeph> format
+              string; see <xref href="impala_timestamp.xml#timestamp"/> for details.
+            </li>
+          </ul>
+          <p>
+            See <xref href="impala_datatypes.xml#datatypes"/> for full details on implicit and explicit casting for
+            all types, and <xref href="impala_conversion_functions.xml#conversion_functions"/> for details about
+            the <codeph>CAST()</codeph> function.
+          </p>
+        </li>
+
+        <li>
+          Impala does not store or interpret timestamps using the local timezone, to avoid undesired results from
+          unexpected time zone issues. Timestamps are stored and interpreted relative to UTC. This difference can
+          produce different results for some calls to similarly named date/time functions between Impala and Hive.
+          See <xref href="impala_datetime_functions.xml#datetime_functions"/> for details about the Impala
+          functions. See <xref href="impala_timestamp.xml#timestamp"/> for a discussion of how Impala handles
+          time zones, and configuration options you can use to make Impala match the Hive behavior more closely
+          when dealing with Parquet-encoded <codeph>TIMESTAMP</codeph> data or when converting between
+          the local time zone and UTC.
+        </li>
+
+        <li>
+          The Impala <codeph>TIMESTAMP</codeph> type can represent dates ranging from 1400-01-01 to 9999-12-31.
+          This is different from the Hive date range, which is 0000-01-01 to 9999-12-31.
+        </li>
+
+        <li>
+          Impala does not return column overflows as <codeph>NULL</codeph>, so that customers can distinguish
+          between <codeph>NULL</codeph> data and overflow conditions similar to how they do so with traditional
+          database systems. Impala returns the largest or smallest value in the range for the type. For example,
+          valid values for a <codeph>tinyint</codeph> range from -128 to 127. In Impala, a <codeph>tinyint</codeph>
+          with a value of -200 returns -128 rather than <codeph>NULL</codeph>. A <codeph>tinyint</codeph> with a
+          value of 200 returns 127.
+        </li>
+      </ul>
+
+      <p>
+        <b>Miscellaneous features:</b>
+      </p>
+
+      <ul>
+        <li>
+          Impala does not provide virtual columns.
+        </li>
+
+        <li>
+          Impala does not expose locking.
+        </li>
+
+        <li>
+          Impala does not expose some configuration properties.
+        </li>
+      </ul>
+    </conbody>
+  </concept>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_limit.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_limit.xml b/docs/topics/impala_limit.xml
new file mode 100644
index 0000000..c186cd4
--- /dev/null
+++ b/docs/topics/impala_limit.xml
@@ -0,0 +1,149 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="limit">
+
+  <title>LIMIT Clause</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Reports"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      The <codeph>LIMIT</codeph> clause in a <codeph>SELECT</codeph> query sets a maximum number of rows for the
+      result set. Pre-selecting the maximum size of the result set helps Impala to optimize memory usage while
+      processing a distributed query.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>LIMIT <varname>constant_integer_expression</varname></codeblock>
+
+    <p>
+      The argument to the <codeph>LIMIT</codeph> clause must evaluate to a constant value. It can be a numeric
+      literal, or another kind of numeric expression involving operators, casts, and function return values. You
+      cannot refer to a column or use a subquery.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      This clause is useful in contexts such as:
+    </p>
+
+    <ul>
+      <li>
+        To return exactly N items from a top-N query, such as the 10 highest-rated items in a shopping category or
+        the 50 hostnames that refer the most traffic to a web site.
+      </li>
+
+      <li>
+        To demonstrate some sample values from a table or a particular query. (To display some arbitrary items, use
+        a query with no <codeph>ORDER BY</codeph> clause. An <codeph>ORDER BY</codeph> clause causes additional
+        memory and/or disk usage during the query.)
+      </li>
+
+      <li>
+        To keep queries from returning huge result sets by accident if a table is larger than expected, or a
+        <codeph>WHERE</codeph> clause matches more rows than expected.
+      </li>
+    </ul>
+
+    <p rev="1.2.1">
+      Originally, the value for the <codeph>LIMIT</codeph> clause had to be a numeric literal. In Impala 1.2.1 and
+      higher, it can be a numeric expression.
+    </p>
+
+    <p rev="obwl" conref="../shared/impala_common.xml#common/order_by_limit"/>
+
+    <p>
+      See <xref href="impala_order_by.xml#order_by"/> for details.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/limit_and_offset"/>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/subquery_no_limit"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      The following example shows how the <codeph>LIMIT</codeph> clause caps the size of the result set, with the
+      limit being applied after any other clauses such as <codeph>WHERE</codeph>.
+    </p>
+
+<codeblock>[localhost:21000] &gt; create database limits;
+[localhost:21000] &gt; use limits;
+[localhost:21000] &gt; create table numbers (x int);
+[localhost:21000] &gt; insert into numbers values (1), (3), (4), (5), (2);
+Inserted 5 rows in 1.34s
+[localhost:21000] &gt; select x from numbers limit 100;
++---+
+| x |
++---+
+| 1 |
+| 3 |
+| 4 |
+| 5 |
+| 2 |
++---+
+Returned 5 row(s) in 0.26s
+[localhost:21000] &gt; select x from numbers limit 3;
++---+
+| x |
++---+
+| 1 |
+| 3 |
+| 4 |
++---+
+Returned 3 row(s) in 0.27s
+[localhost:21000] &gt; select x from numbers where x &gt; 2 limit 2;
++---+
+| x |
++---+
+| 3 |
+| 4 |
++---+
+Returned 2 row(s) in 0.27s</codeblock>
+
+    <p>
+      For top-N and bottom-N queries, you use the <codeph>ORDER BY</codeph> and <codeph>LIMIT</codeph> clauses
+      together:
+    </p>
+
+<codeblock rev="obwl">[localhost:21000] &gt; select x as "Top 3" from numbers order by x desc limit 3;
++-------+
+| top 3 |
++-------+
+| 5     |
+| 4     |
+| 3     |
++-------+
+[localhost:21000] &gt; select x as "Bottom 3" from numbers order by x limit 3;
++----------+
+| bottom 3 |
++----------+
+| 1        |
+| 2        |
+| 3        |
++----------+
+</codeblock>
+
+    <p>
+      You can use constant values besides integer literals as the <codeph>LIMIT</codeph> argument:
+    </p>
+
+<codeblock>-- Other expressions that yield constant integer values work too.
+SELECT x FROM t1 LIMIT 1e6;                        -- Limit is one million.
+SELECT x FROM t1 LIMIT length('hello world');      -- Limit is 11.
+SELECT x FROM t1 LIMIT 2+2;                        -- Limit is 4.
+SELECT x FROM t1 LIMIT cast(truncate(9.9) AS INT); -- Limit is 9.
+</codeblock>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_literals.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_literals.xml b/docs/topics/impala_literals.xml
new file mode 100644
index 0000000..3c53796
--- /dev/null
+++ b/docs/topics/impala_literals.xml
@@ -0,0 +1,384 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="literals">
+
+  <title>Literals</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">literals</indexterm>
+      Each of the Impala data types has corresponding notation for literal values of that type. You specify literal
+      values in SQL statements, such as in the <codeph>SELECT</codeph> list or <codeph>WHERE</codeph> clause of a
+      query, or as an argument to a function call. See <xref href="impala_datatypes.xml#datatypes"/> for a complete
+      list of types, ranges, and conversion rules.
+    </p>
+
+    <p outputclass="toc inpage"/>
+  </conbody>
+
+  <concept id="numeric_literals">
+
+    <title>Numeric Literals</title>
+
+    <conbody>
+
+      <p>
+        <indexterm audience="Cloudera">numeric literals</indexterm>
+        To write literals for the integer types (<codeph>TINYINT</codeph>, <codeph>SMALLINT</codeph>,
+        <codeph>INT</codeph>, and <codeph>BIGINT</codeph>), use a sequence of digits with optional leading zeros.
+      </p>
+
+      <p rev="1.4.0">
+        To write literals for the floating-point types (<codeph rev="1.4.0">DECIMAL</codeph>,
+        <codeph>FLOAT</codeph>, and <codeph>DOUBLE</codeph>), use a sequence of digits with an optional decimal
+        point (<codeph>.</codeph> character). To preserve accuracy during arithmetic expressions, Impala interprets
+        floating-point literals as the <codeph>DECIMAL</codeph> type with the smallest appropriate precision and
+        scale, until required by the context to convert the result to <codeph>FLOAT</codeph> or
+        <codeph>DOUBLE</codeph>.
+      </p>
+
+      <p>
+        Integer values are promoted to floating-point when necessary, based on the context.
+      </p>
+
+      <p>
+        You can also use exponential notation by including an <codeph>e</codeph> character. For example,
+        <codeph>1e6</codeph> is 1 times 10 to the power of 6 (1 million). A number in exponential notation is
+        always interpreted as floating-point.
+      </p>
+
+      <p rev="tk">
+        When Impala encounters a numeric literal, it considers the type to be the <q>smallest</q> that can
+        accurately represent the value. The type is promoted to larger or more accurate types if necessary, based
+        on subsequent parts of an expression.
+      </p>
+      <p>
+        For example, you can see by the types Impala defines for the following table columns
+        how it interprets the corresponding numeric literals:
+      </p>
+<codeblock>[localhost:21000] > create table ten as select 10 as x;
++-------------------+
+| summary           |
++-------------------+
+| Inserted 1 row(s) |
++-------------------+
+[localhost:21000] > desc ten;
++------+---------+---------+
+| name | type    | comment |
++------+---------+---------+
+| x    | tinyint |         |
++------+---------+---------+
+
+[localhost:21000] > create table four_k as select 4096 as x;
++-------------------+
+| summary           |
++-------------------+
+| Inserted 1 row(s) |
++-------------------+
+[localhost:21000] > desc four_k;
++------+----------+---------+
+| name | type     | comment |
++------+----------+---------+
+| x    | smallint |         |
++------+----------+---------+
+
+[localhost:21000] > create table one_point_five as select 1.5 as x;
++-------------------+
+| summary           |
++-------------------+
+| Inserted 1 row(s) |
++-------------------+
+[localhost:21000] > desc one_point_five;
++------+--------------+---------+
+| name | type         | comment |
++------+--------------+---------+
+| x    | decimal(2,1) |         |
++------+--------------+---------+
+
+[localhost:21000] > create table one_point_three_three_three as select 1.333 as x;
++-------------------+
+| summary           |
++-------------------+
+| Inserted 1 row(s) |
++-------------------+
+[localhost:21000] > desc one_point_three_three_three;
++------+--------------+---------+
+| name | type         | comment |
++------+--------------+---------+
+| x    | decimal(4,3) |         |
++------+--------------+---------+
+</codeblock>
+    </conbody>
+  </concept>
+
+  <concept id="string_literals">
+
+    <title>String Literals</title>
+
+    <conbody>
+
+      <p>
+        <indexterm audience="Cloudera">string literals</indexterm>
+        String literals are quoted using either single or double quotation marks. You can use either kind of quotes
+        for string literals, even both kinds for different literals within the same statement.
+      </p>
+
+      <p rev="2.0.0">
+        Quoted literals are considered to be of type <codeph>STRING</codeph>. To use quoted literals in contexts
+        requiring a <codeph>CHAR</codeph> or <codeph>VARCHAR</codeph> value, <codeph>CAST()</codeph> the literal to
+        a <codeph>CHAR</codeph> or <codeph>VARCHAR</codeph> of the appropriate length.
+      </p>
+
+      <p>
+        <b>Escaping special characters:</b>
+      </p>
+
+      <p>
+        To encode special characters within a string literal, precede them with the backslash (<codeph>\</codeph>)
+        escape character:
+      </p>
+
+      <ul>
+        <li>
+          <codeph>\t</codeph> represents a tab.
+        </li>
+
+        <li>
+          <codeph>\n</codeph> represents a newline or linefeed. This might cause extra line breaks in
+          <cmdname>impala-shell</cmdname> output.
+        </li>
+
+        <li>
+          <codeph>\r</codeph> represents a carriage return. This might cause unusual formatting (making it appear
+          that some content is overwritten) in <cmdname>impala-shell</cmdname> output.
+        </li>
+
+        <li>
+          <codeph>\b</codeph> represents a backspace. This might cause unusual formatting (making it appear that
+          some content is overwritten) in <cmdname>impala-shell</cmdname> output.
+        </li>
+
+        <li>
+          <codeph>\0</codeph> represents an ASCII <codeph>nul</codeph> character (not the same as a SQL
+          <codeph>NULL</codeph>). This might not be visible in <cmdname>impala-shell</cmdname> output.
+        </li>
+
+        <li>
+          <codeph>\Z</codeph> represents a DOS end-of-file character. This might not be visible in
+          <cmdname>impala-shell</cmdname> output.
+        </li>
+
+        <li>
+          <codeph>\%</codeph> and <codeph>\_</codeph> can be used to escape wildcard characters within the string
+          passed to the <codeph>LIKE</codeph> operator.
+        </li>
+
+        <li>
+          <codeph>\</codeph> followed by 3 octal digits represents the ASCII code of a single character; for
+          example, <codeph>\101</codeph> is ASCII 65, the character <codeph>A</codeph>.
+        </li>
+
+        <li>
+          Use two consecutive backslashes (<codeph>\\</codeph>) to prevent the backslash from being interpreted as
+          an escape character.
+        </li>
+
+        <li>
+          Use the backslash to escape single or double quotation mark characters within a string literal, if the
+          literal is enclosed by the same type of quotation mark.
+        </li>
+
+        <li>
+          If the character following the <codeph>\</codeph> does not represent the start of a recognized escape
+          sequence, the character is passed through unchanged.
+        </li>
+      </ul>
+
+      <p>
+        <b>Quotes within quotes:</b>
+      </p>
+
+      <p>
+        To include a single quotation character within a string value, enclose the literal with either single or
+        double quotation marks, and optionally escape the single quote as a <codeph>\'</codeph> sequence. Earlier
+        releases required escaping a single quote inside double quotes. Continue using escape sequences in this
+        case if you also need to run your SQL code on older versions of Impala.
+      </p>
+
+      <p>
+        To include a double quotation character within a string value, enclose the literal with single quotation
+        marks, no escaping is necessary in this case. Or, enclose the literal with double quotation marks and
+        escape the double quote as a <codeph>\"</codeph> sequence.
+      </p>
+
+<codeblock>[localhost:21000] &gt; select "What\'s happening?" as single_within_double,
+                  &gt;        'I\'m not sure.' as single_within_single,
+                  &gt;        "Homer wrote \"The Iliad\"." as double_within_double,
+                  &gt;        'Homer also wrote "The Odyssey".' as double_within_single;
++----------------------+----------------------+--------------------------+---------------------------------+
+| single_within_double | single_within_single | double_within_double     | double_within_single            |
++----------------------+----------------------+--------------------------+---------------------------------+
+| What's happening?    | I'm not sure.        | Homer wrote "The Iliad". | Homer also wrote "The Odyssey". |
++----------------------+----------------------+--------------------------+---------------------------------+
+</codeblock>
+
+      <p>
+        <b>Field terminator character in CREATE TABLE:</b>
+      </p>
+
+      <note conref="../shared/impala_common.xml#common/thorn"/>
+
+      <p>
+        <b>impala-shell considerations:</b>
+      </p>
+
+      <p>
+        When dealing with output that includes non-ASCII or non-printable characters such as linefeeds and
+        backspaces, use the <cmdname>impala-shell</cmdname> options to save to a file, turn off pretty printing, or
+        both rather than relying on how the output appears visually. See
+        <xref href="impala_shell_options.xml#shell_options"/> for a list of <cmdname>impala-shell</cmdname>
+        options.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="boolean_literals">
+
+    <title>Boolean Literals</title>
+
+    <conbody>
+
+      <p>
+        For <codeph>BOOLEAN</codeph> values, the literals are <codeph>TRUE</codeph> and <codeph>FALSE</codeph>,
+        with no quotation marks and case-insensitive.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>select true;
+select * from t1 where assertion = false;
+select case bool_col when true then 'yes' when false 'no' else 'null' end from t1;</codeblock>
+    </conbody>
+  </concept>
+
+  <concept id="timestamp_literals">
+
+    <title>Timestamp Literals</title>
+
+    <conbody>
+
+      <p conref="../shared/impala_common.xml#common/timestamp_conversions"/>
+
+      <p>
+        You can also use <codeph>INTERVAL</codeph> expressions to add or subtract from timestamp literal values,
+        such as <codeph>'1966-07-30' + INTERVAL 5 YEARS + INTERVAL 3 DAYS</codeph>. See
+        <xref href="impala_timestamp.xml#timestamp"/> for details.
+      </p>
+
+      <p>
+        Depending on your data pipeline, you might receive date and time data as text, in notation that does not
+        exactly match the format for Impala <codeph>TIMESTAMP</codeph> literals.
+        See <xref href="impala_datetime_functions.xml#datetime_functions"/> for functions that can convert
+        between a variety of string literals (including different field order, separators, and timezone notation)
+        and equivalent <codeph>TIMESTAMP</codeph> or numeric values.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="null">
+
+    <title>NULL</title>
+
+    <conbody>
+
+      <p>
+        <indexterm audience="Cloudera">NULL</indexterm>
+        The notion of <codeph>NULL</codeph> values is familiar from all kinds of database systems, but each SQL
+        dialect can have its own behavior and restrictions on <codeph>NULL</codeph> values. For Big Data
+        processing, the precise semantics of <codeph>NULL</codeph> values are significant: any misunderstanding
+        could lead to inaccurate results or misformatted data, that could be time-consuming to correct for large
+        data sets.
+      </p>
+
+      <ul>
+        <li>
+          <codeph>NULL</codeph> is a different value than an empty string. The empty string is represented by a
+          string literal with nothing inside, <codeph>""</codeph> or <codeph>''</codeph>.
+        </li>
+
+        <li>
+          In a delimited text file, the <codeph>NULL</codeph> value is represented by the special token
+          <codeph>\N</codeph>.
+        </li>
+
+        <li>
+          When Impala inserts data into a partitioned table, and the value of one of the partitioning columns is
+          <codeph>NULL</codeph> or the empty string, the data is placed in a special partition that holds only
+          these two kinds of values. When these values are returned in a query, the result is <codeph>NULL</codeph>
+          whether the value was originally <codeph>NULL</codeph> or an empty string. This behavior is compatible
+          with the way Hive treats <codeph>NULL</codeph> values in partitioned tables. Hive does not allow empty
+          strings as partition keys, and it returns a string value such as
+          <codeph>__HIVE_DEFAULT_PARTITION__</codeph> instead of <codeph>NULL</codeph> when such values are
+          returned from a query. For example:
+<codeblock>create table t1 (i int) partitioned by (x int, y string);
+-- Select an INT column from another table, with all rows going into a special HDFS subdirectory
+-- named __HIVE_DEFAULT_PARTITION__. Depending on whether one or both of the partitioning keys
+-- are null, this special directory name occurs at different levels of the physical data directory
+-- for the table.
+insert into t1 partition(x=NULL, y=NULL) select c1 from some_other_table;
+insert into t1 partition(x, y=NULL) select c1, c2 from some_other_table;
+insert into t1 partition(x=NULL, y) select c1, c3  from some_other_table;</codeblock>
+        </li>
+
+        <li>
+          There is no <codeph>NOT NULL</codeph> clause when defining a column to prevent <codeph>NULL</codeph>
+          values in that column.
+        </li>
+
+        <li>
+          There is no <codeph>DEFAULT</codeph> clause to specify a non-<codeph>NULL</codeph> default value.
+        </li>
+
+        <li>
+          If an <codeph>INSERT</codeph> operation mentions some columns but not others, the unmentioned columns
+          contain <codeph>NULL</codeph> for all inserted rows.
+        </li>
+
+        <li rev="1.2.1">
+          <p conref="../shared/impala_common.xml#common/null_sorting_change"/>
+          <note>
+            <draft-comment translate="no"> Probably a bunch of similar view-related restrictions like this that should be collected, reused, or cross-referenced under the Views topic. </draft-comment>
+            Because the <codeph>NULLS FIRST</codeph> and <codeph>NULLS LAST</codeph> keywords are not currently
+            available in Hive queries, any views you create using those keywords will not be available through
+            Hive.
+          </note>
+        </li>
+
+        <li>
+          In all other contexts besides sorting with <codeph>ORDER BY</codeph>, comparing a <codeph>NULL</codeph>
+          to anything else returns <codeph>NULL</codeph>, making the comparison meaningless. For example,
+          <codeph>10 &gt; NULL</codeph> produces <codeph>NULL</codeph>, <codeph>10 &lt; NULL</codeph> also produces
+          <codeph>NULL</codeph>, <codeph>5 BETWEEN 1 AND NULL</codeph> produces <codeph>NULL</codeph>, and so on.
+        </li>
+      </ul>
+
+      <p>
+        Several built-in functions serve as shorthand for evaluating expressions and returning
+        <codeph>NULL</codeph>, 0, or some other substitution value depending on the expression result:
+        <codeph>ifnull()</codeph>, <codeph>isnull()</codeph>, <codeph>nvl()</codeph>, <codeph>nullif()</codeph>,
+        <codeph>nullifzero()</codeph>, and <codeph>zeroifnull()</codeph>. See
+        <xref href="impala_conditional_functions.xml#conditional_functions"/> for details.
+      </p>
+    </conbody>
+  </concept>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_live_progress.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_live_progress.xml b/docs/topics/impala_live_progress.xml
new file mode 100644
index 0000000..f58cdcb
--- /dev/null
+++ b/docs/topics/impala_live_progress.xml
@@ -0,0 +1,81 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="2.3.0" id="live_progress">
+
+  <title>LIVE_PROGRESS Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Performance"/>
+      <data name="Category" value="Reports"/>
+      <data name="Category" value="impala-shell"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">LIVE_PROGRESS query option</indexterm>
+      For queries submitted through the <cmdname>impala-shell</cmdname> command,
+      displays an interactive progress bar showing roughly what percentage of
+      processing has been completed. When the query finishes, the progress bar is erased
+      from the <cmdname>impala-shell</cmdname> console output.
+    </p>
+
+    <p>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/type_boolean"/>
+    <p conref="../shared/impala_common.xml#common/default_false_0"/>
+
+    <p conref="../shared/impala_common.xml#common/command_line_blurb"/>
+    <p>
+      You can enable this query option within <cmdname>impala-shell</cmdname>
+      by starting the shell with the <codeph>--live_progress</codeph>
+      command-line option.
+      You can still turn this setting off and on again within the shell through the
+      <codeph>SET</codeph> command.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+    <p conref="../shared/impala_common.xml#common/live_reporting_details"/>
+    <p>
+      For a more detailed way of tracking the progress of an interactive query through
+      all phases of processing, see <xref href="impala_live_summary.xml#live_summary"/>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+    <p>
+      Because the percentage complete figure is calculated using the number of
+      issued and completed <q>scan ranges</q>, which occur while reading the table
+      data, the progress bar might reach 100% before the query is entirely finished.
+      For example, the query might do work to perform aggregations after all the
+      table data has been read. If many of your queries fall into this category,
+      consider using the <codeph>LIVE_SUMMARY</codeph> option instead for
+      more granular progress reporting.
+    </p>
+    <p conref="../shared/impala_common.xml#common/impala_shell_progress_reports_compute_stats_caveat"/>
+    <p conref="../shared/impala_common.xml#common/impala_shell_progress_reports_shell_only_caveat"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+<codeblock><![CDATA[[localhost:21000] > set live_progress=true;
+LIVE_PROGRESS set to true
+[localhost:21000] > select count(*) from customer;
++----------+
+| count(*) |
++----------+
+| 150000   |
++----------+
+[localhost:21000] > select count(*) from customer t1 cross join customer t2;
+[##################################################                                                  ] 50%
+[####################################################################################################] 100%
+
+]]>
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/live_progress_live_summary_asciinema"/>
+
+  </conbody>
+</concept>

[21/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

Posted by jr...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/shared/impala_common.xml
----------------------------------------------------------------------
diff --git a/docs/shared/impala_common.xml b/docs/shared/impala_common.xml
new file mode 100644
index 0000000..37ebc34
--- /dev/null
+++ b/docs/shared/impala_common.xml
@@ -0,0 +1,2477 @@
+<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept xmlns:ditaarch="http://dita.oasis-open.org/architecture/2005/" id="common" ditaarch:DITAArchVersion="1.2" domains="(topic concept)                            (topic hi-d)                             (topic ut-d)                             (topic indexing-d)                            (topic hazard-d)                            (topic abbrev-d)                            (topic pr-d)                             (topic sw-d)                            (topic ui-d)    " xml:lang="en-US">
+
+  <title>Reusable Text, Paragraphs, List Items, and Other Elements for Impala</title>
+
+  <conbody>
+
+    <p>
+      All the elements in this file with IDs are intended to be conref'ed elsewhere. Practically all of the
+      conref'ed elements for the Impala docs are in this file, to avoid questions of when it's safe to remove or
+      move something in any of the 'main' files, and avoid having to change and conref references as a result.
+    </p>
+
+    <p>
+      This file defines some dummy subheadings as section elements, just for self-documentation. Using sections
+      instead of nested concepts lets all the conref links point to a very simple name pattern,
+      '#common/id_within_the_file', rather than a 3-part reference with an intervening, variable concept ID.
+    </p>
+
+    <section id="sentry">
+
+      <title>Sentry-Related Content</title>
+
+      <p>
+        Material related to Sentry security, intended to be reused between Hive and Impala. Complicated by the fact
+        that most of it will probably be multi-paragraph or involve subheads, might need to be represented as
+        nested topics at the end of this file.
+      </p>
+
+      <note id="authentication_vs_authorization">
+        Regardless of the authentication mechanism used, Impala always creates HDFS directories and data files
+        owned by the same user (typically <codeph>impala</codeph>). To implement user-level access to different
+        databases, tables, columns, partitions, and so on, use the Sentry authorization feature, as explained in
+        <xref href="../topics/impala_authorization.xml#authorization"/>.
+      </note>
+
+<!-- Contrived nesting needed to allow <ph> with ID to be reused inside the <title> of a conref. -->
+
+      <p>
+        <b><ph id="title_sentry_debug">Debugging Failed Sentry Authorization Requests</ph></b>
+      </p>
+
+      <p id="sentry_debug">
+        Sentry logs all facts that lead up to authorization decisions at the debug level. If you do not understand
+        why Sentry is denying access, the best way to debug is to temporarily turn on debug logging:
+        <ul>
+          <li>
+            In Cloudera Manager, add <codeph>log4j.logger.org.apache.sentry=DEBUG</codeph> to the logging settings
+            for your service through the corresponding <uicontrol>Logging Safety Valve</uicontrol> field for the
+            Impala, Hive Server 2, or Solr Server services.
+          </li>
+
+          <li>
+            On systems not managed by Cloudera Manager, add <codeph>log4j.logger.org.apache.sentry=DEBUG</codeph>
+            to the <filepath>log4j.properties</filepath> file on each host in the cluster, in the appropriate
+            configuration directory for each service.
+          </li>
+        </ul>
+        Specifically, look for exceptions and messages such as:
+<codeblock xml:space="preserve">FilePermission server..., RequestPermission server...., result [true|false]</codeblock>
+        which indicate each evaluation Sentry makes. The <codeph>FilePermission</codeph> is from the policy file,
+        while <codeph>RequestPermission</codeph> is the privilege required for the query. A
+        <codeph>RequestPermission</codeph> will iterate over all appropriate <codeph>FilePermission</codeph>
+        settings until a match is found. If no matching privilege is found, Sentry returns <codeph>false</codeph>
+        indicating <q>Access Denied</q> .
+<!--
+[1]
+Impala: Impala Daemon -> Advanced -> Impala Daemon Logging Safety Valve 
+Hive: Hive Server 2 -> Advanced -> HiveServer2 Logging Safety Valve 
+Search: Solr Server -> Advanced -> HiveServer2 Logging Safety Valve
+-->
+      </p>
+
+    </section>
+
+    <section id="cm">
+
+      <title>Cloudera Manager Terminology</title>
+
+      <p>
+        Especially during the transition from CM 4 to CM 5, we'll use some stock phraseology to talk about fields
+        and such.
+      </p>
+
+      <p>
+        <ph id="safety_valve"> In Cloudera Manager 4, these fields are labelled <uicontrol>Safety
+        Valve</uicontrol>; in Cloudera Manager 5, they are called <uicontrol>Advanced Configuration
+        Snippet</uicontrol>. </ph>
+      </p>
+
+    </section>
+
+    <section id="citi">
+
+      <title>Items from the Citibank Escalation Spreadsheet</title>
+
+      <p>
+        Paragraphs with IDs are intended to be reused both in the FAQ and the User's Guide. They refer to feature
+        requests or misunderstandings encountered by Citibank, captured in the escalation spreadsheet here:
+        <xref href="https://docs.google.com/a/cloudera.com/spreadsheet/ccc?key=0AplfwQJKyyTWdFdhY0E5WHVwNXZSTG9sMEZwQy1QZ1E&amp;usp=drive_web#gid=0" scope="external" format="html"/>.
+      </p>
+
+      <p id="string_concatenation">
+        With Impala, you use the built-in <codeph>CONCAT()</codeph> function to concatenate two, three, or more
+        strings:
+<codeblock xml:space="preserve">select concat('some prefix: ', col1) from t1;
+select concat('abc','mno','xyz');</codeblock>
+        Impala does not currently support operators for string concatenation, such as <codeph>||</codeph> as seen
+        in some other database systems.
+      </p>
+
+      <p id="column_aliases">
+        You can specify column aliases with or without the <codeph>AS</codeph> keyword, and with no quotation
+        marks, single quotation marks, or double quotation marks. Some kind of quotation marks are required if the
+        column alias contains any spaces or other problematic characters. The alias text is displayed in the
+        <cmdname>impala-shell</cmdname> output as all-lowercase. For example:
+<codeblock xml:space="preserve">[localhost:21000] &gt; select c1 First_Column from t;
+[localhost:21000] &gt; select c1 as First_Column from t;
++--------------+
+| first_column |
++--------------+
+...
+
+[localhost:21000] &gt; select c1 'First Column' from t;
+[localhost:21000] &gt; select c1 as 'First Column' from t;
++--------------+
+| first column |
++--------------+
+...
+
+[localhost:21000] &gt; select c1 "First Column" from t;
+[localhost:21000] &gt; select c1 as "First Column" from t;
++--------------+
+| first column |
++--------------+
+...</codeblock>
+      </p>
+
+      <p id="temp_tables">
+        Currently, Impala does not support temporary tables. Some other database systems have a class of
+        <q>lightweight</q> tables that are held only in memory and/or that are only accessible by one connection
+        and disappear when the session ends. In Impala, creating new databases is a relatively lightweight
+        operation, so as an alternative, you could create a database with a unique name and use <codeph>CREATE
+        TABLE LIKE</codeph>, <codeph>CREATE TABLE AS SELECT</codeph>, and <codeph>INSERT</codeph> statements to
+        create a table in that database to hold the result set of a query, to use in subsequent queries. When
+        finished, issue a <codeph>DROP TABLE</codeph> statement followed by <codeph>DROP DATABASE</codeph>.
+      </p>
+
+    </section>
+
+    <section id="standards">
+
+      <title>Blurbs About Standards Compliance</title>
+
+      <p>
+        The following blurbs simplify the process of flagging which SQL standard various features were first
+        introduced in. The wording and the tagging can be modified by editing one central instance of each blurb.
+        Not extensively used yet, just here and there in the SQL Language Reference section.
+      </p>
+
+      <p id="sql1986">
+<!-- No Wikipedia page for SQL-1986, so no link. -->
+        <b>Standards compliance:</b> Introduced in SQL-1986.
+      </p>
+
+      <p id="sql1989">
+<!-- No Wikipedia page for SQL-1989, so no link. -->
+        <b>Standards compliance:</b> Introduced in SQL-1989.
+      </p>
+
+      <p id="sql1992">
+        <b>Standards compliance:</b> Introduced in
+        <xref href="http://en.wikipedia.org/wiki/SQL-92" scope="external" format="html">SQL-1992</xref>.
+      </p>
+
+      <p id="sql1999">
+        <b>Standards compliance:</b> Introduced in
+        <xref href="http://en.wikipedia.org/wiki/SQL:1999" scope="external" format="html">SQL:1999</xref>.
+      </p>
+
+      <p id="sql2003">
+        <b>Standards compliance:</b> Introduced in
+        <xref href="http://en.wikipedia.org/wiki/SQL:2003" scope="external" format="html">SQL:2003</xref>.
+      </p>
+
+      <p id="sql2008">
+        <b>Standards compliance:</b> Introduced in
+        <xref href="http://en.wikipedia.org/wiki/SQL:2008" scope="external" format="html">SQL:2008</xref>.
+      </p>
+
+      <p id="sql2011">
+        <b>Standards compliance:</b> Introduced in
+        <xref href="http://en.wikipedia.org/wiki/SQL:2011" scope="external" format="html">SQL:2011</xref>.
+      </p>
+
+      <p id="hiveql">
+        <b>Standards compliance:</b> Extension first introduced in HiveQL.
+      </p>
+
+      <p id="impalaql">
+        <b>Standards compliance:</b> Extension first introduced in Impala.
+      </p>
+
+    </section>
+
+    <section id="refresh_invalidate">
+
+      <title>Background Info for REFRESH, INVALIDATE METADATA, and General Metadata Discussion</title>
+
+      <p id="refresh_vs_invalidate">
+        <codeph>INVALIDATE METADATA</codeph> and <codeph>REFRESH</codeph> are counterparts: <codeph>INVALIDATE
+        METADATA</codeph> waits to reload the metadata when needed for a subsequent query, but reloads all the
+        metadata for the table, which can be an expensive operation, especially for large tables with many
+        partitions. <codeph>REFRESH</codeph> reloads the metadata immediately, but only loads the block location
+        data for newly added data files, making it a less expensive operation overall. If data was altered in some
+        more extensive way, such as being reorganized by the HDFS balancer, use <codeph>INVALIDATE
+        METADATA</codeph> to avoid a performance penalty from reduced local reads. If you used Impala version 1.0,
+        the <codeph>INVALIDATE METADATA</codeph> statement works just like the Impala 1.0 <codeph>REFRESH</codeph>
+        statement did, while the Impala 1.1 <codeph>REFRESH</codeph> is optimized for the common use case of adding
+        new data files to an existing table, thus the table name argument is now required.
+      </p>
+
+    </section>
+
+    <section id="kudu">
+
+      <title>Kudu Snippets</title>
+
+      <p>
+        If any advice, background info, or warnings are needed in multiple
+        places for interaction of Impala with Kudu, put them under here.
+      </p>
+
+    </section>
+
+    <section id="sql_ref">
+
+      <title>SQL Language Reference Snippets</title>
+
+      <p>
+        These reusable chunks were taken from conrefs originally in <filepath>ciiu_langref_sql.xml</filepath>. Or
+        they are primarily used in new SQL syntax topics underneath that parent topic.
+      </p>
+
+      <p id="live_reporting_details">
+        The output from this query option is printed to standard error. The output is only displayed in interactive mode,
+        that is, not when the <codeph>-q</codeph> or <codeph>-f</codeph> options are used.
+      </p>
+
+      <p id="live_progress_live_summary_asciinema">
+        To see how the <codeph>LIVE_PROGRESS</codeph> and <codeph>LIVE_SUMMARY</codeph> query options
+        work in real time, see <xref href="https://asciinema.org/a/1rv7qippo0fe7h5k1b6k4nexk" scope="external" format="html">this animated demo</xref>.
+      </p>
+
+      <p rev="2.3.0" id="impala_shell_progress_reports_compute_stats_caveat">
+        The <codeph>LIVE_PROGRESS</codeph> and <codeph>LIVE_SUMMARY</codeph> query options
+        currently do not produce any output during <codeph>COMPUTE STATS</codeph> operations.
+      </p>
+
+<!-- This is a shorter version of the similar 'caveat' text. This shorter one can be reused more easily in various places. -->
+      <p rev="2.3.0" id="impala_shell_progress_reports_shell_only_blurb">
+        The <codeph>LIVE_PROGRESS</codeph> and <codeph>LIVE_SUMMARY</codeph> query options only apply
+        inside the <cmdname>impala-shell</cmdname> interpreter. You cannot use them with the
+        <codeph>SET</codeph> statement from a JDBC or ODBC application.
+      </p>
+
+      <p id="impala_shell_progress_reports_shell_only_caveat">
+        Because the <codeph>LIVE_PROGRESS</codeph> and <codeph>LIVE_SUMMARY</codeph> query options
+        are available only within the <cmdname>impala-shell</cmdname> interpreter: 
+        <ul>
+          <li>
+            <p>
+              You cannot change these query options through the SQL <codeph>SET</codeph>
+              statement using the JDBC or ODBC interfaces. The <codeph>SET</codeph>
+              command in <cmdname>impala-shell</cmdname> recognizes these names as
+              shell-only options.
+            </p>
+          </li>
+          <li>
+            <p>
+              Be careful when using <cmdname>impala-shell</cmdname> on a pre-CDH 5.5
+              system to connect to Impala running on a CDH 5.5 or higher system.
+              The older <cmdname>impala-shell</cmdname> does not recognize these
+              query option names. Upgrade <cmdname>impala-shell</cmdname> on the
+              systems where you intend to use these query options.
+            </p>
+          </li>
+          <li>
+            <p>
+              Likewise, the <cmdname>impala-shell</cmdname> command relies on
+              some information only available in Impala 2.3 / CDH 5.5 and higher
+              to prepare live progress reports and query summaries. The
+              <codeph>LIVE_PROGRESS</codeph> and <codeph>LIVE_SUMMARY</codeph> 
+              query options have no effect when <cmdname>impala-shell</cmdname> connects
+              to a cluster running an older version of Impala.
+            </p>
+          </li>
+        </ul>
+      </p>
+
+<!-- Same example used in both CREATE DATABASE and DROP DATABASE. -->
+<codeblock id="create_drop_db_example">create database first_db;
+use first_db;
+create table t1 (x int);
+
+create database second_db;
+use second_db;
+-- Each database has its own namespace for tables.
+-- You can reuse the same table names in each database.
+create table t1 (s string);
+
+create database temp;
+
+-- You can either USE a database after creating it,
+-- or qualify all references to the table name with the name of the database.
+-- Here, tables T2 and T3 are both created in the TEMP database.
+
+create table temp.t2 (x int, y int);
+use database temp;
+create table t3 (s string);
+
+-- You cannot drop a database while it is selected by the USE statement.
+drop database temp;
+<i>ERROR: AnalysisException: Cannot drop current default database: temp</i>
+
+-- The always-available database 'default' is a convenient one to USE
+-- before dropping a database you created.
+use default;
+
+-- Before dropping a database, first drop all the tables inside it,
+<ph rev="2.3.0">-- or in CDH 5.5 and higher use the CASCADE clause.</ph>
+drop database temp;
+ERROR: ImpalaRuntimeException: Error making 'dropDatabase' RPC to Hive Metastore: 
+CAUSED BY: InvalidOperationException: Database temp is not empty
+show tables in temp;
++------+
+| name |
++------+
+| t3   |
++------+
+
+<ph rev="2.3.0">-- CDH 5.5 and higher:</ph>
+<ph rev="2.3.0">drop database temp cascade;</ph>
+
+-- CDH 5.4 and lower:
+drop table temp.t3;
+drop database temp;
+</codeblock>
+
+      <p id="cast_convenience_fn_example">
+        This example shows how to use the <codeph>castto*()</codeph> functions as an equivalent
+        to <codeph>CAST(<varname>value</varname> AS <varname>type</varname>)</codeph> expressions.
+      </p>
+
+      <p id="cast_convenience_fn_usage"><b>Usage notes:</b>
+        A convenience function to skip the SQL <codeph>CAST <varname>value</varname> AS <varname>type</varname></codeph> syntax,
+        for example when programmatically generating SQL statements where a regular function call might be easier to construct.
+      </p>
+
+      <p rev="2.2.0" id="timezone_conversion_caveat">
+        The way this function deals with time zones when converting to or from <codeph>TIMESTAMP</codeph>
+        values is affected by the <codeph>-use_local_tz_for_unix_timestamp_conversions</codeph> startup flag for the
+        <cmdname>impalad</cmdname> daemon. See <xref href="../topics/impala_timestamp.xml#timestamp"/> for details about
+        how Impala handles time zone considerations for the <codeph>TIMESTAMP</codeph> data type.
+      </p>
+
+      <note rev="2.2.0" id="s3_caveat" type="important">
+        <p>
+          Impala query support for Amazon S3 is included in CDH 5.4.0, but is not currently supported or recommended for production use.
+          If you're interested in this feature, try it out in a test environment until we address the issues and limitations needed for production-readiness.
+        </p>
+      </note>
+
+      <p rev="2.2.0" id="s3_dml">
+        Currently, Impala cannot insert or load data into a table or partition that resides in the Amazon
+        Simple Storage Service (S3).
+        Bring data into S3 using the normal S3 transfer mechanisms, then use Impala to query the S3 data.
+        See <xref href="../topics/impala_s3.xml#s3"/> for details about using Impala with S3.
+      </p>
+
+      <p rev="2.2.0" id="s3_metadata">
+        The <codeph>REFRESH</codeph> and <codeph>INVALIDATE METADATA</codeph> statements also cache metadata
+        for tables where the data resides in the Amazon Simple Storage Service (S3).
+        In particular, issue a <codeph>REFRESH</codeph> for a table after adding or removing files
+        in the associated S3 data directory.
+        See <xref href="../topics/impala_s3.xml#s3"/> for details about working with S3 tables.
+      </p>
+
+      <p id="y2k38" rev="2.2.0">
+        In Impala 2.2.0 and higher, built-in functions that accept or return integers representing <codeph>TIMESTAMP</codeph> values
+        use the <codeph>BIGINT</codeph> type for parameters and return values, rather than <codeph>INT</codeph>.
+        This change lets the date and time functions avoid an overflow error that would otherwise occur
+        on January 19th, 2038 (known as the
+        <xref href="http://en.wikipedia.org/wiki/Year_2038_problem" scope="external" format="html"><q>Year 2038 problem</q> or <q>Y2K38 problem</q></xref>).
+        This change affects the <codeph>from_unixtime()</codeph> and <codeph>unix_timestamp()</codeph> functions.
+        You might need to change application code that interacts with these functions, change the types of
+        columns that store the return values, or add <codeph>CAST()</codeph> calls to SQL statements that
+        call these functions.
+      </p>
+
+      <p id="timestamp_conversions">
+        Impala automatically converts <codeph>STRING</codeph> literals of the correct format into
+        <codeph>TIMESTAMP</codeph> values. Timestamp values are accepted in the format
+        <codeph>"yyyy-MM-dd HH:mm:ss.SSSSSS"</codeph>, and can consist of just the date, or just the time, with or
+        without the fractional second portion. For example, you can specify <codeph>TIMESTAMP</codeph> values such as
+        <codeph>'1966-07-30'</codeph>, <codeph>'08:30:00'</codeph>, or <codeph>'1985-09-25 17:45:30.005'</codeph>.
+        <ph conref="../shared/impala_common.xml#common/cast_int_to_timestamp"/>
+      </p>
+
+
+      <p>
+        <ph id="cast_int_to_timestamp">Casting an integer or floating-point value <codeph>N</codeph> to
+        <codeph>TIMESTAMP</codeph> produces a value that is <codeph>N</codeph> seconds past the start of the epoch
+        date (January 1, 1970). By default, the result value represents a date and time in the UTC time zone.
+        If the setting <codeph>-use_local_tz_for_unix_timestamp_conversions=true</codeph> is in effect,
+        the resulting <codeph>TIMESTAMP</codeph> represents a date and time in the local time zone.</ph>
+      </p>
+
+      <p id="redaction_yes" rev="2.2.0">
+        If these statements in your environment contain sensitive literal values such as credit card numbers or tax
+        identifiers, Impala can redact this sensitive information when displaying the statements in log files and
+        other administrative contexts. See
+        <xref audience="integrated" href="../topics/sg_redaction.xml#log_redact"/><xref audience="standalone" href="http://www.cloudera.com/content/cloudera/en/documentation/core/latest/topics/sg_redaction.html" scope="external" format="html"/>
+        for details.
+      </p>
+
+      <p id="incremental_partition_spec">
+        The <codeph>PARTITION</codeph> clause is only allowed in combination with the <codeph>INCREMENTAL</codeph>
+        clause. It is optional for <codeph>COMPUTE INCREMENTAL STATS</codeph>, and required for <codeph>DROP
+        INCREMENTAL STATS</codeph>. Whenever you specify partitions through the <codeph>PARTITION
+        (<varname>partition_spec</varname>)</codeph> clause in a <codeph>COMPUTE INCREMENTAL STATS</codeph> or
+        <codeph>DROP INCREMENTAL STATS</codeph> statement, you must include all the partitioning columns in the
+        specification, and specify constant values for all the partition key columns.
+      </p>
+
+      <p id="udf_persistence_restriction">
+        Currently, Impala UDFs and UDAs are not persisted in the metastore database. Information
+        about these functions is held in the memory of the <cmdname>catalogd</cmdname> daemon. You must reload them
+        by running the <codeph>CREATE FUNCTION</codeph> statements again each time you restart the
+        <cmdname>catalogd</cmdname> daemon.
+      </p>
+
+      <note id="add_partition_set_location">
+        If you are creating a partition for the first time and specifying its location, for maximum efficiency, use
+        a single <codeph>ALTER TABLE</codeph> statement including both the <codeph>ADD PARTITION</codeph> and
+        <codeph>LOCATION</codeph> clauses, rather than separate statements with <codeph>ADD PARTITION</codeph> and
+        <codeph>SET LOCATION</codeph> clauses.
+      </note>
+
+      <p id="insert_hidden_work_directory">
+        The <codeph>INSERT</codeph> statement has always left behind a hidden work directory inside the data
+        directory of the table. Formerly, this hidden work directory was named
+        <filepath>.impala_insert_staging</filepath> . In Impala 2.0.1 and later, this directory name is changed to
+        <filepath>_impala_insert_staging</filepath> . (While HDFS tools are expected to treat names beginning
+        either with underscore and dot as hidden, in practice names beginning with an underscore are more widely
+        supported.) If you have any scripts, cleanup jobs, and so on that rely on the name of this work directory,
+        adjust them to use the new name.
+      </p>
+
+      <p id="check_internal_external_table">
+        To see whether a table is internal or external, and its associated HDFS location, issue the statement
+        <codeph>DESCRIBE FORMATTED <varname>table_name</varname></codeph>. The <codeph>Table Type</codeph> field
+        displays <codeph>MANAGED_TABLE</codeph> for internal tables and <codeph>EXTERNAL_TABLE</codeph> for
+        external tables. The <codeph>Location</codeph> field displays the path of the table directory as an HDFS
+        URI.
+      </p>
+
+      <p id="switch_internal_external_table">
+        You can switch a table from internal to external, or from external to internal, by using the <codeph>ALTER
+        TABLE</codeph> statement:
+<codeblock xml:space="preserve">
+-- Switch a table from internal to external.
+ALTER TABLE <varname>table_name</varname> SET TBLPROPERTIES('EXTERNAL'='TRUE');
+
+-- Switch a table from external to internal.
+ALTER TABLE <varname>table_name</varname> SET TBLPROPERTIES('EXTERNAL'='FALSE');
+</codeblock>
+      </p>
+
+<!-- The data to show sensible output from these queries is in the TPC-DS schema 'CUSTOMER' table.
+     If you want to show real output, add a LIMIT 5 or similar clause to each query to avoid
+     too-long output. -->
+
+<codeblock id="regexp_rlike_examples" xml:space="preserve">-- Find all customers whose first name starts with 'J', followed by 0 or more of any character.
+select c_first_name, c_last_name from customer where c_first_name regexp '^J.*';
+select c_first_name, c_last_name from customer where c_first_name rlike '^J.*';
+
+-- Find 'Macdonald', where the first 'a' is optional and the 'D' can be upper- or lowercase.
+-- The ^...$ are required, to match the start and end of the value.
+select c_first_name, c_last_name from customer where c_last_name regexp '^Ma?c[Dd]onald$';
+select c_first_name, c_last_name from customer where c_last_name rlike '^Ma?c[Dd]onald$';
+
+-- Match multiple character sequences, either 'Mac' or 'Mc'.
+select c_first_name, c_last_name from customer where c_last_name regexp '^(Mac|Mc)donald$';
+select c_first_name, c_last_name from customer where c_last_name rlike '^(Mac|Mc)donald$';
+
+-- Find names starting with 'S', then one or more vowels, then 'r', then any other characters.
+-- Matches 'Searcy', 'Sorenson', 'Sauer'.
+select c_first_name, c_last_name from customer where c_last_name regexp '^S[aeiou]+r.*$';
+select c_first_name, c_last_name from customer where c_last_name rlike '^S[aeiou]+r.*$';
+
+-- Find names that end with 2 or more vowels: letters from the set a,e,i,o,u.
+select c_first_name, c_last_name from customer where c_last_name regexp '.*[aeiou]{2,}$';
+select c_first_name, c_last_name from customer where c_last_name rlike '.*[aeiou]{2,}$';
+
+-- You can use letter ranges in the [] blocks, for example to find names starting with A, B, or C.
+select c_first_name, c_last_name from customer where c_last_name regexp '^[A-C].*';
+select c_first_name, c_last_name from customer where c_last_name rlike '^[A-C].*';
+
+-- If you are not sure about case, leading/trailing spaces, and so on, you can process the
+-- column using string functions first.
+select c_first_name, c_last_name from customer where lower(trim(c_last_name)) regexp '^de.*';
+select c_first_name, c_last_name from customer where lower(trim(c_last_name)) rlike '^de.*';
+</codeblock>
+
+      <p id="show_security">
+        When authorization is enabled, the output of the <codeph>SHOW</codeph> statement is limited to those
+        objects for which you have some privilege. There might be other database, tables, and so on, but their
+        names are concealed. If you believe an object exists but you cannot see it in the <codeph>SHOW</codeph>
+        output, check with the system administrator if you need to be granted a new privilege for that object. See
+        <xref href="../topics/impala_authorization.xml#authorization"/> for how to set up authorization and add
+        privileges for specific kinds of objects.
+      </p>
+
+      <p rev="2.0.0" id="user_kerberized">
+        In Impala 2.0 and later, <codeph>user()</codeph> returns the the full Kerberos principal string, such as
+        <codeph>user@example.com</codeph>, in a Kerberized environment.
+      </p>
+
+      <ul>
+        <li id="grant_revoke_single">
+          Currently, each Impala <codeph>GRANT</codeph> or <codeph>REVOKE</codeph> statement can only grant or
+          revoke a single privilege to or from a single role.
+        </li>
+      </ul>
+
+      <p id="blobs_are_strings">
+        All data in <codeph>CHAR</codeph> and <codeph>VARCHAR</codeph> columns must be in a character encoding that
+        is compatible with UTF-8. If you have binary data from another database system (that is, a BLOB type), use
+        a <codeph>STRING</codeph> column to hold it.
+      </p>
+
+<!-- The codeblock is nested inside this paragraph, so the intro text
+     and the code get conref'ed as a unit. -->
+
+      <p id="create_drop_view_examples">
+        The following example creates a series of views and then drops them. These examples illustrate how views
+        are associated with a particular database, and both the view definitions and the view names for
+        <codeph>CREATE VIEW</codeph> and <codeph>DROP VIEW</codeph> can refer to a view in the current database or
+        a fully qualified view name.
+<codeblock xml:space="preserve">
+-- Create and drop a view in the current database.
+CREATE VIEW few_rows_from_t1 AS SELECT * FROM t1 LIMIT 10;
+DROP VIEW few_rows_from_t1;
+
+-- Create and drop a view referencing a table in a different database.
+CREATE VIEW table_from_other_db AS SELECT x FROM db1.foo WHERE x IS NOT NULL;
+DROP VIEW table_from_other_db;
+
+USE db1;
+-- Create a view in a different database.
+CREATE VIEW db2.v1 AS SELECT * FROM db2.foo;
+-- Switch into the other database and drop the view.
+USE db2;
+DROP VIEW v1;
+
+USE db1;
+-- Create a view in a different database.
+CREATE VIEW db2.v1 AS SELECT * FROM db2.foo;
+-- Drop a view in the other database.
+DROP VIEW db2.v1;
+</codeblock>
+      </p>
+
+      <p id="char_varchar_cast_from_string">
+        For <codeph>INSERT</codeph> operations into <codeph>CHAR</codeph> or <codeph>VARCHAR</codeph> columns, you
+        must cast all <codeph>STRING</codeph> literals or expressions returning <codeph>STRING</codeph> to to a
+        <codeph>CHAR</codeph> or <codeph>VARCHAR</codeph> type with the appropriate length.
+      </p>
+
+      <p rev="2.0.0" id="subquery_no_limit">
+        Correlated subqueries used in <codeph>EXISTS</codeph> and <codeph>IN</codeph> operators cannot include a
+        <codeph>LIMIT</codeph> clause.
+      </p>
+
+      <p id="avro_no_timestamp">
+        Currently, Avro tables cannot contain <codeph>TIMESTAMP</codeph> columns. If you need to store date and
+        time values in Avro tables, as a workaround you can use a <codeph>STRING</codeph> representation of the
+        values, convert the values to <codeph>BIGINT</codeph> with the <codeph>UNIX_TIMESTAMP()</codeph> function,
+        or create separate numeric columns for individual date and time fields using the <codeph>EXTRACT()</codeph>
+        function.
+      </p>
+
+      <p id="zero_length_strings">
+        <b>Zero-length strings:</b> For purposes of clauses such as <codeph>DISTINCT</codeph> and <codeph>GROUP
+        BY</codeph>, Impala considers zero-length strings (<codeph>""</codeph>), <codeph>NULL</codeph>, and space
+        to all be different values.
+      </p>
+
+      <p id="order_by_scratch_dir">
+        By default, intermediate files used during large sort, join, aggregation, or analytic function operations
+        are stored in the directory <filepath>/tmp/impala-scratch</filepath> . These files are removed when the
+        operation finishes. (Multiple concurrent queries can perform operations that use the <q>spill to disk</q>
+        technique, without any name conflicts for these temporary files.) You can specify a different location by
+        starting the <cmdname>impalad</cmdname> daemon with the
+        <codeph>--scratch_dirs="<varname>path_to_directory</varname>"</codeph> configuration option or the
+        equivalent configuration option in the Cloudera Manager user interface. You can specify a single directory,
+        or a comma-separated list of directories. The scratch directories must be on the local filesystem, not in
+        HDFS. You might specify different directory paths for different hosts, depending on the capacity and speed
+        of the available storage devices. In CDH 5.5 / Impala 2.3 or higher, Impala successfully starts (with a warning
+        written to the log) if it cannot create or read and write files in one of the scratch directories.
+        If there is less than 1 GB free on the filesystem where that directory resides, Impala still runs, but writes a
+        warning message to its log.  If Impala encounters an error reading or writing files in a scratch directory during
+        a query, Impala logs the error and the query fails.
+      </p>
+
+      <p id="order_by_view_restriction">
+        An <codeph>ORDER BY</codeph> clause without an additional <codeph>LIMIT</codeph> clause is ignored in any
+        view definition. If you need to sort the entire result set from a view, use an <codeph>ORDER BY</codeph>
+        clause in the <codeph>SELECT</codeph> statement that queries the view. You can still make a simple <q>top
+        10</q> report by combining the <codeph>ORDER BY</codeph> and <codeph>LIMIT</codeph> clauses in the same
+        view definition:
+<codeblock xml:space="preserve">[localhost:21000] &gt; create table unsorted (x bigint);
+[localhost:21000] &gt; insert into unsorted values (1), (9), (3), (7), (5), (8), (4), (6), (2);
+[localhost:21000] &gt; create view sorted_view as select x from unsorted order by x;
+[localhost:21000] &gt; select x from sorted_view; -- ORDER BY clause in view has no effect.
++---+
+| x |
++---+
+| 1 |
+| 9 |
+| 3 |
+| 7 |
+| 5 |
+| 8 |
+| 4 |
+| 6 |
+| 2 |
++---+
+[localhost:21000] &gt; select x from sorted_view order by x; -- View query requires ORDER BY at outermost level.
++---+
+| x |
++---+
+| 1 |
+| 2 |
+| 3 |
+| 4 |
+| 5 |
+| 6 |
+| 7 |
+| 8 |
+| 9 |
++---+
+[localhost:21000] &gt; create view top_3_view as select x from unsorted order by x limit 3;
+[localhost:21000] &gt; select x from top_3_view; -- ORDER BY and LIMIT together in view definition are preserved.
++---+
+| x |
++---+
+| 1 |
+| 2 |
+| 3 |
++---+
+</codeblock>
+      </p>
+
+      <p id="precision_scale_example">
+        The following examples demonstrate how to check the precision and scale of numeric literals or other
+        numeric expressions. Impala represents numeric literals in the smallest appropriate type. 5 is a
+        <codeph>TINYINT</codeph> value, which ranges from -128 to 127, therefore 3 decimal digits are needed to
+        represent the entire range, and because it is an integer value there are no fractional digits. 1.333 is
+        interpreted as a <codeph>DECIMAL</codeph> value, with 4 digits total and 3 digits after the decimal point.
+<codeblock xml:space="preserve">[localhost:21000] &gt; select precision(5), scale(5);
++--------------+----------+
+| precision(5) | scale(5) |
++--------------+----------+
+| 3            | 0        |
++--------------+----------+
+[localhost:21000] &gt; select precision(1.333), scale(1.333);
++------------------+--------------+
+| precision(1.333) | scale(1.333) |
++------------------+--------------+
+| 4                | 3            |
++------------------+--------------+
+[localhost:21000] &gt; with t1 as
+  ( select cast(12.34 as decimal(20,2)) x union select cast(1 as decimal(8,6)) x )
+  select precision(x), scale(x) from t1 limit 1;
++--------------+----------+
+| precision(x) | scale(x) |
++--------------+----------+
+| 24           | 6        |
++--------------+----------+
+</codeblock>
+      </p>
+
+<!-- These 'type_' entries are for query options, where the type doesn't match up exactly with an Impala data type. -->
+
+      <p id="type_boolean">
+        <b>Type:</b> Boolean; recognized values are 1 and 0, or <codeph>true</codeph> and <codeph>false</codeph>;
+        any other value interpreted as <codeph>false</codeph>
+      </p>
+
+      <p id="type_string">
+        <b>Type:</b> string
+      </p>
+
+      <p id="default_false">
+        <b>Default:</b> <codeph>false</codeph>
+      </p>
+
+      <p id="default_false_0">
+        <b>Default:</b> <codeph>false</codeph> (shown as 0 in output of <codeph>SET</codeph> statement)
+      </p>
+
+      <p id="odd_return_type_string">
+        Currently, the return value is always a <codeph>STRING</codeph>. The return type is subject to change in
+        future releases. Always use <codeph>CAST()</codeph> to convert the result to whichever data type is
+        appropriate for your computations.
+      </p>
+
+      <p rev="2.0.0" id="former_odd_return_type_string">
+        <b>Return type:</b> <codeph>DOUBLE</codeph> in Impala 2.0 and higher; <codeph>STRING</codeph> in earlier
+        releases
+      </p>
+
+      <p id="for_compatibility_only">
+        <b>Usage notes:</b> Primarily for compatibility with code containing industry extensions to SQL.
+      </p>
+
+      <p id="return_type_boolean">
+        <b>Return type:</b> <codeph>BOOLEAN</codeph>
+      </p>
+
+      <p id="return_type_double">
+        <b>Return type:</b> <codeph>DOUBLE</codeph>
+      </p>
+
+      <p id="return_type_same">
+        <b>Return type:</b> Same as the input value
+      </p>
+
+      <p id="return_type_same_except_string">
+        <b>Return type:</b> Same as the input value, except for <codeph>CHAR</codeph> and <codeph>VARCHAR</codeph>
+        arguments which produce a <codeph>STRING</codeph> result
+      </p>
+
+      <p id="builtins_db">
+        Impala includes another predefined database, <codeph>_impala_builtins</codeph>, that serves as the location
+        for the <xref href="../topics/impala_functions.xml#builtins">built-in functions</xref>. To see the built-in
+        functions, use a statement like the following:
+<codeblock xml:space="preserve">show functions in _impala_builtins;
+show functions in _impala_builtins like '*<varname>substring</varname>*';
+</codeblock>
+      </p>
+
+      <p id="sum_double">
+        Due to the way arithmetic on <codeph>FLOAT</codeph> and <codeph>DOUBLE</codeph> columns uses
+        high-performance hardware instructions, and distributed queries can perform these operations in different
+        order for each query, results can vary slightly for aggregate function calls such as <codeph>SUM()</codeph>
+        and <codeph>AVG()</codeph> for <codeph>FLOAT</codeph> and <codeph>DOUBLE</codeph> columns, particularly on
+        large data sets where millions or billions of values are summed or averaged. For perfect consistency and
+        repeatability, use the <codeph>DECIMAL</codeph> data type for such operations instead of
+        <codeph>FLOAT</codeph> or <codeph>DOUBLE</codeph>.
+      </p>
+
+      <p id="float_double_decimal_caveat">
+        The inability to exactly represent certain floating-point values means that
+        <codeph>DECIMAL</codeph> is sometimes a better choice than <codeph>DOUBLE</codeph>
+        or <codeph>FLOAT</codeph> when precision is critical, particularly when
+        transferring data from other database systems that use different representations
+        or file formats.
+      </p>
+
+      <p rev="1.4.0" id="decimal_no_stats">
+        Currently, the <codeph>COMPUTE STATS</codeph> statement under CDH 4 does not store any statistics for
+        <codeph>DECIMAL</codeph> columns. When Impala runs under CDH 5, which has better support for
+        <codeph>DECIMAL</codeph> in the metastore database, <codeph>COMPUTE STATS</codeph> does collect statistics
+        for <codeph>DECIMAL</codeph> columns and Impala uses the statistics to optimize query performance.
+      </p>
+
+      <p id="datetime_function_chaining">
+        <codeph>unix_timestamp()</codeph> and <codeph>from_unixtime()</codeph> are often used in combination to
+        convert a <codeph>TIMESTAMP</codeph> value into a particular string format. For example:
+<codeblock xml:space="preserve">select from_unixtime(unix_timestamp(now() + interval 3 days), 'yyyy/MM/dd HH:mm');
+</codeblock>
+      </p>
+
+      <p rev="1.4.0 obwl" id="insert_sort_blurb">
+        <b>Sorting considerations:</b> Although you can specify an <codeph>ORDER BY</codeph> clause in an
+        <codeph>INSERT ... SELECT</codeph> statement, any <codeph>ORDER BY</codeph> clause is ignored and the
+        results are not necessarily sorted. An <codeph>INSERT ... SELECT</codeph> operation potentially creates
+        many different data files, prepared on different data nodes, and therefore the notion of the data being
+        stored in sorted order is impractical.
+      </p>
+
+      <p rev="1.4.0" id="create_table_like_view">
+        Prior to Impala 1.4.0, it was not possible to use the <codeph>CREATE TABLE LIKE
+        <varname>view_name</varname></codeph> syntax. In Impala 1.4.0 and higher, you can create a table with the
+        same column definitions as a view using the <codeph>CREATE TABLE LIKE</codeph> technique. Although
+        <codeph>CREATE TABLE LIKE</codeph> normally inherits the file format of the original table, a view has no
+        underlying file format, so <codeph>CREATE TABLE LIKE <varname>view_name</varname></codeph> produces a text
+        table by default. To specify a different file format, include a <codeph>STORED AS
+        <varname>file_format</varname></codeph> clause at the end of the <codeph>CREATE TABLE LIKE</codeph>
+        statement.
+      </p>
+
+      <note rev="1.4.0" id="compute_stats_nulls">
+        Prior to Impala 1.4.0, <codeph>COMPUTE STATS</codeph> counted the number of <codeph>NULL</codeph> values in
+        each column and recorded that figure in the metastore database. Because Impala does not currently make use
+        of the <codeph>NULL</codeph> count during query planning, Impala 1.4.0 and higher speeds up the
+        <codeph>COMPUTE STATS</codeph> statement by skipping this <codeph>NULL</codeph> counting.
+      </note>
+
+      <p rev="1.3.1" id="regexp_matching">
+        In Impala 1.3.1 and higher, the <codeph>REGEXP</codeph> and <codeph>RLIKE</codeph> operators now match a
+        regular expression string that occurs anywhere inside the target string, the same as if the regular
+        expression was enclosed on each side by <codeph>.*</codeph>. See
+        <xref href="../topics/impala_operators.xml#regexp"/> for examples. Previously, these operators only
+        succeeded when the regular expression matched the entire target string. This change improves compatibility
+        with the regular expression support for popular database systems. There is no change to the behavior of the
+        <codeph>regexp_extract()</codeph> and <codeph>regexp_replace()</codeph> built-in functions.
+      </p>
+
+      <p rev="1.3.1" id="insert_inherit_permissions">
+        By default, if an <codeph>INSERT</codeph> statement creates any new subdirectories underneath a partitioned
+        table, those subdirectories are assigned default HDFS permissions for the <codeph>impala</codeph> user. To
+        make each subdirectory have the same permissions as its parent directory in HDFS, specify the
+        <codeph>--insert_inherit_permissions</codeph> startup option for the <cmdname>impalad</cmdname> daemon.
+      </p>
+
+      <note id="multiple_count_distinct">
+        <p>
+          By default, Impala only allows a single <codeph>COUNT(DISTINCT <varname>columns</varname>)</codeph>
+          expression in each query.
+        </p>
+        <p>
+          If you do not need precise accuracy, you can produce an estimate of the distinct values for a column by
+          specifying <codeph>NDV(<varname>column</varname>)</codeph>; a query can contain multiple instances of
+          <codeph>NDV(<varname>column</varname>)</codeph>. To make Impala automatically rewrite
+          <codeph>COUNT(DISTINCT)</codeph> expressions to <codeph>NDV()</codeph>, enable the
+          <codeph>APPX_COUNT_DISTINCT</codeph> query option.
+        </p>
+        <p>
+          To produce the same result as multiple <codeph>COUNT(DISTINCT)</codeph> expressions, you can use the
+          following technique for queries involving a single table:
+        </p>
+<codeblock xml:space="preserve">select v1.c1 result1, v2.c1 result2 from
+  (select count(distinct col1) as c1 from t1) v1 
+    cross join
+  (select count(distinct col2) as c1 from t1) v2;
+</codeblock>
+        <p>
+          Because <codeph>CROSS JOIN</codeph> is an expensive operation, prefer to use the <codeph>NDV()</codeph>
+          technique wherever practical.
+        </p>
+      </note>
+
+      <p>
+        <ph id="union_all_vs_union">Prefer <codeph>UNION ALL</codeph> over <codeph>UNION</codeph> when you know the
+        data sets are disjoint or duplicate values are not a problem; <codeph>UNION ALL</codeph> is more efficient
+        because it avoids materializing and sorting the entire result set to eliminate duplicate values.</ph>
+      </p>
+
+      <note id="thorn">
+        The <codeph>CREATE TABLE</codeph> clauses <codeph>FIELDS TERMINATED BY</codeph>, <codeph>ESCAPED
+        BY</codeph>, and <codeph>LINES TERMINATED BY</codeph> have special rules for the string literal used for
+        their argument, because they all require a single character. You can use a regular character surrounded by
+        single or double quotation marks, an octal sequence such as <codeph>'\054'</codeph> (representing a comma),
+        or an integer in the range '-127'..'128' (with quotation marks but no backslash), which is interpreted as a
+        single-byte ASCII character. Negative values are subtracted from 256; for example, <codeph>FIELDS
+        TERMINATED BY '-2'</codeph> sets the field delimiter to ASCII code 254, the <q>Icelandic Thorn</q>
+        character used as a delimiter by some data formats.
+      </note>
+
+      <p id="command_line_blurb">
+        <b>Command-line equivalent:</b>
+      </p>
+
+      <p rev="2.3.0" id="complex_types_blurb">
+        <b>Complex type considerations:</b>
+      </p>
+
+      <p id="complex_types_combo">
+        Because complex types are often used in combination,
+        for example an <codeph>ARRAY</codeph> of <codeph>STRUCT</codeph>
+        elements, if you are unfamiliar with the Impala complex types,
+        start with <xref href="../topics/impala_complex_types.xml#complex_types"/> for
+        background information and usage examples.
+      </p>
+
+      <ul id="complex_types_restrictions">
+        <li>
+          Columns with this data type can only be used in tables or partitions with the Parquet file format.
+        </li>
+        <li>
+          Columns with this data type cannot be used as partition key columns in a partitioned table.
+        </li>
+        <li>
+          The <codeph>COMPUTE STATS</codeph> statement does not produce any statistics for columns of this data type.
+        </li>
+        <li>
+          See <xref href="../topics/impala_complex_types.xml#complex_types_limits"/> for a full list of limitations
+          and associated guidelines about complex type columns.
+        </li>
+      </ul>
+
+      <p rev="2.3.0" id="complex_types_partitioning">
+        Partitioned tables can contain complex type columns.
+        All the partition key columns must be scalar types.
+      </p>
+
+      <p rev="2.3.0" id="complex_types_describe">
+        You can pass a multi-part qualified name to <codeph>DESCRIBE</codeph>
+        to specify an <codeph>ARRAY</codeph>, <codeph>STRUCT</codeph>, or <codeph>MAP</codeph>
+        column and visualize its structure as if it were a table.
+        For example, if table <codeph>T1</codeph> contains an <codeph>ARRAY</codeph> column
+        <codeph>A1</codeph>, you could issue the statement <codeph>DESCRIBE t1.a1</codeph>.
+        If table <codeph>T1</codeph> contained a <codeph>STRUCT</codeph> column <codeph>S1</codeph>,
+        and a field <codeph>F1</codeph> within the <codeph>STRUCT</codeph> was a <codeph>MAP</codeph>,
+        you could issue the statement <codeph>DESCRIBE t1.s1.f1</codeph>.
+        An <codeph>ARRAY</codeph> is shown as a two-column table, with
+        <codeph>ITEM</codeph> and <codeph>POS</codeph> columns.
+        A <codeph>STRUCT</codeph> is shown as a table with each field
+        representing a column in the table.
+        A <codeph>MAP</codeph> is shown as a two-column table, with
+        <codeph>KEY</codeph> and <codeph>VALUE</codeph> columns.
+      </p>
+
+      <note id="complex_type_schema_pointer">
+      Many of the complex type examples refer to tables
+      such as <codeph>CUSTOMER</codeph> and <codeph>REGION</codeph>
+      adapted from the tables used in the TPC-H benchmark.
+      See <xref href="../topics/impala_complex_types.xml#complex_sample_schema"/>
+      for the table definitions.
+      </note>
+
+      <p rev="2.3.0" id="complex_types_unsupported_filetype">
+        <b>Complex type considerations:</b>
+        Although you can create tables in this file format using
+        the complex types (<codeph>ARRAY</codeph>, <codeph>STRUCT</codeph>,
+        and <codeph>MAP</codeph>) available in CDH 5.5 / Impala 2.3 and higher,
+        currently, Impala can query these types only in Parquet tables.
+      </p>
+
+      <p rev="2.3.0" id="complex_types_caveat_no_operator">
+        You cannot refer to a column with a complex data type (<codeph>ARRAY</codeph>, <codeph>STRUCT</codeph>, or <codeph>MAP</codeph>
+        directly in an operator. You can apply operators only to scalar values that make up a complex type
+        (the fields of a <codeph>STRUCT</codeph>, the items of an <codeph>ARRAY</codeph>,
+        or the key or value portion of a <codeph>MAP</codeph>) as part of a join query that refers to
+        the scalar value using the appropriate dot notation or <codeph>ITEM</codeph>, <codeph>KEY</codeph>, or <codeph>VALUE</codeph>
+        pseudocolumn names.
+      </p>
+
+      <p rev="2.3.0" id="udfs_no_complex_types">
+        Currently, Impala UDFs cannot accept arguments or return values of the Impala complex types
+        (<codeph>STRUCT</codeph>, <codeph>ARRAY</codeph>, or <codeph>MAP</codeph>).
+      </p>
+
+      <p rev="2.3.0" id="complex_types_read_only">
+        Impala currently cannot write new data files containing complex type columns.
+        Therefore, although the <codeph>SELECT</codeph> statement works for queries
+        involving complex type columns, you cannot use a statement form that writes
+        data to complex type columns, such as <codeph>CREATE TABLE AS SELECT</codeph> or <codeph>INSERT ... SELECT</codeph>.
+        To create data files containing complex type data, use the Hive <codeph>INSERT</codeph> statement, or another
+        ETL mechanism such as MapReduce jobs, Spark jobs, Pig, and so on.
+      </p>
+
+      <p rev="2.3.0" id="complex_types_views">
+        For tables containing complex type columns (<codeph>ARRAY</codeph>,
+        <codeph>STRUCT</codeph>, or <codeph>MAP</codeph>), you typically use
+        join queries to refer to the complex values. You can use views to
+        hide the join notation, making such tables seem like traditional denormalized
+        tables, and making those tables queryable by business intelligence tools
+        that do not have built-in support for those complex types.
+        See <xref href="../topics/impala_complex_types.xml#complex_types_views"/> for details.
+      </p>
+
+      <p rev="2.3.0" id="complex_types_views_caveat">
+        Because you cannot directly issue <codeph>SELECT <varname>col_name</varname></codeph>
+        against a column of complex type, you cannot use a view or a <codeph>WITH</codeph>
+        clause to <q>rename</q> a column by selecting it with a column alias.
+      </p>
+
+      <p rev="2.3.0" id="jdbc_odbc_complex_types">
+        The Impala complex types (<codeph>STRUCT</codeph>, <codeph>ARRAY</codeph>, or <codeph>MAP</codeph>)
+        are available in CDH 5.5 / Impala 2.3 and higher.
+        To use these types with JDBC requires version 2.5.28 or higher of the Cloudera JDBC Connector for Impala.
+        To use these types with ODBC requires version 2.5.30 or higher of the Cloudera ODBC Connector for Impala.
+        Consider upgrading all JDBC and ODBC drivers at the same time you upgrade from CDH 5.5 or higher.
+      </p>
+
+      <p rev="2.3.0" id="jdbc_odbc_complex_types_views">
+        Although the result sets from queries involving complex types consist of all scalar values,
+        the queries involve join notation and column references that might not be understood by
+        a particular JDBC or ODBC connector. Consider defining a view that represents the
+        flattened version of a table containing complex type columns, and pointing the JDBC
+        or ODBC application at the view.
+        See <xref href="../topics/impala_complex_types.xml#complex_types"/> for details.
+      </p>
+
+      <p rev="2.3.0" id="complex_types_aggregation_explanation">
+        To access a column with a complex type (<codeph>ARRAY</codeph>, <codeph>STRUCT</codeph>, or <codeph>MAP</codeph>)
+        in an aggregation function, you unpack the individual elements using join notation in the query,
+        and then apply the function to the final scalar item, field, key, or value at the bottom of any nested type hierarchy in the column.
+        See <xref href="../topics/impala_complex_types.xml#complex_types"/> for details about using complex types in Impala.
+      </p>
+
+<p rev="2.3.0" id="complex_types_aggregation_example">
+The following example demonstrates calls to several aggregation functions
+using values from a column containing nested complex types
+(an <codeph>ARRAY</codeph> of <codeph>STRUCT</codeph> items).
+The array is unpacked inside the query using join notation.
+The array elements are referenced using the <codeph>ITEM</codeph>
+pseudocolumn, and the structure fields inside the array elements
+are referenced using dot notation.
+Numeric values such as <codeph>SUM()</codeph> and <codeph>AVG()</codeph>
+are computed using the numeric <codeph>R_NATIONKEY</codeph> field, and
+the general-purpose <codeph>MAX()</codeph> and <codeph>MIN()</codeph>
+values are computed from the string <codeph>N_NAME</codeph> field.
+<codeblock>describe region;
++-------------+-------------------------+---------+
+| name        | type                    | comment |
++-------------+-------------------------+---------+
+| r_regionkey | smallint                |         |
+| r_name      | string                  |         |
+| r_comment   | string                  |         |
+| r_nations   | array&lt;struct&lt;           |         |
+|             |   n_nationkey:smallint, |         |
+|             |   n_name:string,        |         |
+|             |   n_comment:string      |         |
+|             | &gt;&gt;                      |         |
++-------------+-------------------------+---------+
+
+select r_name, r_nations.item.n_nationkey
+  from region, region.r_nations as r_nations
+order by r_name, r_nations.item.n_nationkey;
++-------------+------------------+
+| r_name      | item.n_nationkey |
++-------------+------------------+
+| AFRICA      | 0                |
+| AFRICA      | 5                |
+| AFRICA      | 14               |
+| AFRICA      | 15               |
+| AFRICA      | 16               |
+| AMERICA     | 1                |
+| AMERICA     | 2                |
+| AMERICA     | 3                |
+| AMERICA     | 17               |
+| AMERICA     | 24               |
+| ASIA        | 8                |
+| ASIA        | 9                |
+| ASIA        | 12               |
+| ASIA        | 18               |
+| ASIA        | 21               |
+| EUROPE      | 6                |
+| EUROPE      | 7                |
+| EUROPE      | 19               |
+| EUROPE      | 22               |
+| EUROPE      | 23               |
+| MIDDLE EAST | 4                |
+| MIDDLE EAST | 10               |
+| MIDDLE EAST | 11               |
+| MIDDLE EAST | 13               |
+| MIDDLE EAST | 20               |
++-------------+------------------+
+
+select
+  r_name,
+  count(r_nations.item.n_nationkey) as count,
+  sum(r_nations.item.n_nationkey) as sum,
+  avg(r_nations.item.n_nationkey) as average,
+  min(r_nations.item.n_name) as minimum,
+  max(r_nations.item.n_name) as maximum,
+  ndv(r_nations.item.n_nationkey) as distinct_values
+from
+  region, region.r_nations as r_nations
+group by r_name
+order by r_name;
++-------------+-------+-----+---------+-----------+----------------+-----------------+
+| r_name      | count | sum | average | minimum   | maximum        | distinct_values |
++-------------+-------+-----+---------+-----------+----------------+-----------------+
+| AFRICA      | 5     | 50  | 10      | ALGERIA   | MOZAMBIQUE     | 5               |
+| AMERICA     | 5     | 47  | 9.4     | ARGENTINA | UNITED STATES  | 5               |
+| ASIA        | 5     | 68  | 13.6    | CHINA     | VIETNAM        | 5               |
+| EUROPE      | 5     | 77  | 15.4    | FRANCE    | UNITED KINGDOM | 5               |
+| MIDDLE EAST | 5     | 58  | 11.6    | EGYPT     | SAUDI ARABIA   | 5               |
++-------------+-------+-----+---------+-----------+----------------+-----------------+
+</codeblock>
+</p>
+
+      <p id="hive_blurb">
+        <b>Hive considerations:</b>
+      </p>
+
+      <p rev="CDH-19187" id="permissions_blurb">
+        <b>HDFS permissions:</b>
+      </p>
+
+      <p rev="CDH-19187" id="permissions_blurb_no">
+        <b>HDFS permissions:</b> This statement does not touch any HDFS files or directories,
+        therefore no HDFS permissions are required.
+      </p>
+
+      <p id="security_blurb">
+        <b>Security considerations:</b>
+      </p>
+
+      <p id="performance_blurb">
+        <b>Performance considerations:</b>
+      </p>
+
+      <p id="conversion_blurb">
+        <b>Casting and conversions:</b>
+      </p>
+
+      <p id="related_info">
+        <b>Related information:</b>
+      </p>
+
+      <p id="related_tasks">
+        <b>Related tasks:</b>
+      </p>
+
+      <p id="related_options">
+        <b>Related startup options:</b>
+      </p>
+
+      <p id="restrictions_blurb">
+        <b>Restrictions:</b>
+      </p>
+
+      <p rev="2.0.0" id="restrictions_sliding_window">
+        <b>Restrictions:</b> In Impala 2.0 and higher, this function can be used as an analytic function, but with restrictions on any window clause.
+        For <codeph>MAX()</codeph> and <codeph>MIN()</codeph>, the window clause is only allowed if the start
+        bound is <codeph>UNBOUNDED PRECEDING</codeph>.
+      </p>
+
+<!-- This blurb has been superceded by analytic_not_allowed_caveat. Consider removing it if it turns out never to be needed. -->
+      <p rev="2.0.0" id="restrictions_non_analytic">
+        <b>Restrictions:</b> This function cannot be used as an analytic function; it does not currently support
+        the <codeph>OVER()</codeph> clause.
+      </p>
+
+      <p id="compatibility_blurb">
+        <b>Compatibility:</b>
+      </p>
+
+      <p id="null_blurb">
+        <b>NULL considerations:</b>
+      </p>
+
+      <p id="udf_blurb">
+        <b>UDF considerations:</b>
+      </p>
+
+      <p id="udf_blurb_no">
+        <b>UDF considerations:</b> This type cannot be used for the argument or return type of a user-defined
+        function (UDF) or user-defined aggregate function (UDA).
+      </p>
+
+      <p id="view_blurb">
+        <b>Considerations for views:</b>
+      </p>
+
+      <p id="null_bad_numeric_cast">
+        <b>NULL considerations:</b> Casting any non-numeric value to this type produces a <codeph>NULL</codeph>
+        value.
+      </p>
+
+      <p id="null_bad_timestamp_cast">
+        <b>NULL considerations:</b> Casting any unrecognized <codeph>STRING</codeph> value to this type produces a
+        <codeph>NULL</codeph> value.
+      </p>
+
+      <p id="null_null_arguments">
+        <b>NULL considerations:</b> An expression of this type produces a <codeph>NULL</codeph> value if any
+        argument of the expression is <codeph>NULL</codeph>.
+      </p>
+
+      <p id="privileges_blurb">
+        <b>Required privileges:</b>
+      </p>
+
+      <p id="parquet_blurb">
+        <b>Parquet considerations:</b>
+      </p>
+
+      <p id="parquet_tools_blurb">
+        To examine the internal structure and data of Parquet files, you can use the
+        <cmdname>parquet-tools</cmdname> command that comes with CDH. Make sure this
+        command is in your <codeph>$PATH</codeph>. (Typically, it is symlinked from
+        <filepath>/usr/bin</filepath>; sometimes, depending on your installation setup, you
+        might need to locate it under a CDH-specific <codeph>bin</codeph> directory.)
+        The arguments to this command let you perform operations such as:
+        <ul>
+          <li>
+            <codeph>cat</codeph>: Print a file's contents to standard out. In CDH 5.5 and higher, you can use
+            the <codeph>-j</codeph> option to output JSON.
+          </li>
+          <li>
+            <codeph>head</codeph>: Print the first few records of a file to standard output.
+          </li>
+          <li>
+            <codeph>schema</codeph>: Print the Parquet schema for the file.
+          </li>
+          <li>
+            <codeph>meta</codeph>: Print the file footer metadata, including key-value properties (like Avro schema), compression ratios,
+            encodings, compression used, and row group information.
+          </li>
+          <li>
+            <codeph>dump</codeph>: Print all data and metadata.
+          </li>
+        </ul>
+        Use <codeph>parquet-tools -h</codeph> to see usage information for all the arguments.
+        Here are some examples showing <cmdname>parquet-tools</cmdname> usage:
+
+<codeblock><![CDATA[
+$ # Be careful doing this for a big file! Use parquet-tools head to be safe.
+$ parquet-tools cat sample.parq
+year = 1992
+month = 1
+day = 2
+dayofweek = 4
+dep_time = 748
+crs_dep_time = 750
+arr_time = 851
+crs_arr_time = 846
+carrier = US
+flight_num = 53
+actual_elapsed_time = 63
+crs_elapsed_time = 56
+arrdelay = 5
+depdelay = -2
+origin = CMH
+dest = IND
+distince = 182
+cancelled = 0
+diverted = 0
+
+year = 1992
+month = 1
+day = 3
+...
+]]>
+</codeblock>
+
+<codeblock><![CDATA[
+$ parquet-tools head -n 2 sample.parq
+year = 1992
+month = 1
+day = 2
+dayofweek = 4
+dep_time = 748
+crs_dep_time = 750
+arr_time = 851
+crs_arr_time = 846
+carrier = US
+flight_num = 53
+actual_elapsed_time = 63
+crs_elapsed_time = 56
+arrdelay = 5
+depdelay = -2
+origin = CMH
+dest = IND
+distince = 182
+cancelled = 0
+diverted = 0
+
+year = 1992
+month = 1
+day = 3
+...
+]]>
+</codeblock>
+
+<codeblock><![CDATA[
+$ parquet-tools schema sample.parq
+message schema {
+  optional int32 year;
+  optional int32 month;
+  optional int32 day;
+  optional int32 dayofweek;
+  optional int32 dep_time;
+  optional int32 crs_dep_time;
+  optional int32 arr_time;
+  optional int32 crs_arr_time;
+  optional binary carrier;
+  optional int32 flight_num;
+...
+]]>
+</codeblock>
+
+<codeblock><![CDATA[
+$ parquet-tools meta sample.parq
+creator:             impala version 2.2.0-cdh5.4.3 (build 517bb0f71cd604a00369254ac6d88394df83e0f6) 
+
+file schema:         schema 
+-------------------------------------------------------------------
+year:                OPTIONAL INT32 R:0 D:1
+month:               OPTIONAL INT32 R:0 D:1
+day:                 OPTIONAL INT32 R:0 D:1
+dayofweek:           OPTIONAL INT32 R:0 D:1
+dep_time:            OPTIONAL INT32 R:0 D:1
+crs_dep_time:        OPTIONAL INT32 R:0 D:1
+arr_time:            OPTIONAL INT32 R:0 D:1
+crs_arr_time:        OPTIONAL INT32 R:0 D:1
+carrier:             OPTIONAL BINARY R:0 D:1
+flight_num:          OPTIONAL INT32 R:0 D:1
+...
+
+row group 1:         RC:20636601 TS:265103674 
+-------------------------------------------------------------------
+year:                 INT32 SNAPPY DO:4 FPO:35 SZ:10103/49723/4.92 VC:20636601 ENC:PLAIN_DICTIONARY,RLE,PLAIN
+month:                INT32 SNAPPY DO:10147 FPO:10210 SZ:11380/35732/3.14 VC:20636601 ENC:PLAIN_DICTIONARY,RLE,PLAIN
+day:                  INT32 SNAPPY DO:21572 FPO:21714 SZ:3071658/9868452/3.21 VC:20636601 ENC:PLAIN_DICTIONARY,RLE,PLAIN
+dayofweek:            INT32 SNAPPY DO:3093276 FPO:3093319 SZ:2274375/5941876/2.61 VC:20636601 ENC:PLAIN_DICTIONARY,RLE,PLAIN
+dep_time:             INT32 SNAPPY DO:5367705 FPO:5373967 SZ:28281281/28573175/1.01 VC:20636601 ENC:PLAIN_DICTIONARY,RLE,PLAIN
+crs_dep_time:         INT32 SNAPPY DO:33649039 FPO:33654262 SZ:10220839/11574964/1.13 VC:20636601 ENC:PLAIN_DICTIONARY,RLE,PLAIN
+arr_time:             INT32 SNAPPY DO:43869935 FPO:43876489 SZ:28562410/28797767/1.01 VC:20636601 ENC:PLAIN_DICTIONARY,RLE,PLAIN
+crs_arr_time:         INT32 SNAPPY DO:72432398 FPO:72438151 SZ:10908972/12164626/1.12 VC:20636601 ENC:PLAIN_DICTIONARY,RLE,PLAIN
+carrier:              BINARY SNAPPY DO:83341427 FPO:83341558 SZ:114916/128611/1.12 VC:20636601 ENC:PLAIN_DICTIONARY,RLE,PLAIN
+flight_num:           INT32 SNAPPY DO:83456393 FPO:83488603 SZ:10216514/11474301/1.12 VC:20636601 ENC:PLAIN_DICTIONARY,RLE,PLAIN
+...
+]]>
+</codeblock>
+      </p>
+
+      <p id="parquet_ok">
+        <b>Parquet considerations:</b> This type is fully compatible with Parquet tables.
+      </p>
+
+      <p id="analytic_not_allowed_caveat">
+        This function cannot be used in an analytic context. That is, the <codeph>OVER()</codeph> clause is not allowed at all with this function.
+      </p>
+
+      <p id="impala_parquet_encodings_caveat">
+        Impala can query Parquet files that use the <codeph>PLAIN</codeph>, <codeph>PLAIN_DICTIONARY</codeph>,
+        <codeph>BIT_PACKED</codeph>, and <codeph>RLE</codeph> encodings. 
+        Currently, Impala does not support <codeph>RLE_DICTIONARY</codeph> encoding.
+        When creating files outside of Impala for use by Impala, make sure to use one of the supported encodings.
+        In particular, for MapReduce jobs, <codeph>parquet.writer.version</codeph> must not be defined
+        (especially as <codeph>PARQUET_2_0</codeph>) for writing the configurations of Parquet MR jobs.
+        Use the default version (or format). The default format, 1.0, includes some enhancements that are compatible with older versions.
+        Data using the 2.0 format might not be consumable by Impala, due to use of the <codeph>RLE_DICTIONARY</codeph> encoding.
+      </p>
+
+      <note id="restrictions_nonimpala_parquet">
+        <p>
+          Currently, Impala always decodes the column data in Parquet files based on the ordinal position of the
+          columns, not by looking up the position of each column based on its name. Parquet files produced outside
+          of Impala must write column data in the same order as the columns are declared in the Impala table. Any
+          optional columns that are omitted from the data files must be the rightmost columns in the Impala table
+          definition.
+        </p>
+
+        <p>
+          If you created compressed Parquet files through some tool other than Impala, make sure that any
+          compression codecs are supported in Parquet by Impala. For example, Impala does not currently support LZO
+          compression in Parquet files. Also doublecheck that you used any recommended compatibility settings in
+          the other tool, such as <codeph>spark.sql.parquet.binaryAsString</codeph> when writing Parquet files
+          through Spark.
+        </p>
+      </note>
+
+      <p id="text_blurb">
+        <b>Text table considerations:</b>
+      </p>
+
+      <p id="text_bulky">
+        <b>Text table considerations:</b> Values of this type are potentially larger in text tables than in tables
+        using Parquet or other binary formats.
+      </p>
+
+      <p id="schema_evolution_blurb">
+        <b>Schema evolution considerations:</b>
+      </p>
+
+      <p id="column_stats_blurb">
+        <b>Column statistics considerations:</b>
+      </p>
+
+      <p id="column_stats_constant">
+        <b>Column statistics considerations:</b> Because this type has a fixed size, the maximum and average size
+        fields are always filled in for column statistics, even before you run the <codeph>COMPUTE STATS</codeph>
+        statement.
+      </p>
+
+      <p id="column_stats_variable">
+        <b>Column statistics considerations:</b> Because the values of this type have variable size, none of the
+        column statistics fields are filled in until you run the <codeph>COMPUTE STATS</codeph> statement.
+      </p>
+
+      <p id="usage_notes_blurb">
+        <b>Usage notes:</b>
+      </p>
+
+      <p id="example_blurb">
+        <b>Examples:</b>
+      </p>
+
+      <p id="result_set_blurb">
+        <b>Result set:</b>
+      </p>
+
+      <p id="jdbc_blurb">
+        <b>JDBC and ODBC considerations:</b>
+      </p>
+
+      <p id="cancel_blurb_no">
+        <b>Cancellation:</b> Cannot be cancelled.
+      </p>
+
+      <p id="cancel_blurb_yes">
+        <b>Cancellation:</b> Can be cancelled. To cancel this statement, use Ctrl-C from the
+        <cmdname>impala-shell</cmdname> interpreter, the <uicontrol>Cancel</uicontrol> button from the
+        <uicontrol>Watch</uicontrol> page in Hue, <uicontrol>Actions &gt; Cancel</uicontrol> from the
+        <uicontrol>Queries</uicontrol> list in Cloudera Manager, or <uicontrol>Cancel</uicontrol> from the list of
+        in-flight queries (for a particular node) on the <uicontrol>Queries</uicontrol> tab in the Impala web UI
+        (port 25000).
+      </p>
+
+      <p id="cancel_blurb_maybe">
+        <b>Cancellation:</b> Certain multi-stage statements (<codeph>CREATE TABLE AS SELECT</codeph> and
+        <codeph>COMPUTE STATS</codeph>) can be cancelled during some stages, when running <codeph>INSERT</codeph>
+        or <codeph>SELECT</codeph> operations internally. To cancel this statement, use Ctrl-C from the
+        <cmdname>impala-shell</cmdname> interpreter, the <uicontrol>Cancel</uicontrol> button from the
+        <uicontrol>Watch</uicontrol> page in Hue, <uicontrol>Actions &gt; Cancel</uicontrol> from the
+        <uicontrol>Queries</uicontrol> list in Cloudera Manager, or <uicontrol>Cancel</uicontrol> from the list of
+        in-flight queries (for a particular node) on the <uicontrol>Queries</uicontrol> tab in the Impala web UI
+        (port 25000).
+      </p>
+
+      <p id="partitioning_blurb">
+        <b>Partitioning:</b>
+      </p>
+
+      <p id="partitioning_good">
+        <b>Partitioning:</b> Prefer to use this type for a partition key column. Impala can process the numeric
+        type more efficiently than a <codeph>STRING</codeph> representation of the value.
+      </p>
+
+      <p id="partitioning_bad">
+        <b>Partitioning:</b> This type can be used for partition key columns. Because of the efficiency advantage
+        of numeric values over character-based values, if the partition key is a string representation of a number,
+        prefer to use an integer type with sufficient range (<codeph>INT</codeph>, <codeph>BIGINT</codeph>, and so
+        on) where practical.
+      </p>
+
+      <p id="partitioning_silly">
+        <b>Partitioning:</b> Because this type has so few distinct values, it is typically not a sensible choice
+        for a partition key column.
+      </p>
+
+      <p id="partitioning_imprecise">
+        <b>Partitioning:</b> Because fractional values of this type are not always represented precisely, when this
+        type is used for a partition key column, the underlying HDFS directories might not be named exactly as you
+        expect. Prefer to partition on a <codeph>DECIMAL</codeph> column instead.
+      </p>
+
+      <p id="partitioning_worrisome">
+        <b>Partitioning:</b> Because this type potentially has so many distinct values, it is often not a sensible
+        choice for a partition key column. For example, events 1 millisecond apart would be stored in different
+        partitions. Consider using the <codeph>TRUNC()</codeph> function to condense the number of distinct values,
+        and partition on a new column with the truncated values.
+      </p>
+
+      <p id="hdfs_blurb">
+        <b>HDFS considerations:</b>
+      </p>
+
+      <p id="file_format_blurb">
+        <b>File format considerations:</b>
+      </p>
+
+      <p id="s3_blurb" rev="2.2.0">
+        <b>Amazon S3 considerations:</b>
+      </p>
+
+      <p id="isilon_blurb" rev="5.4.3">
+        <b>Isilon considerations:</b>
+      </p>
+      <p id="isilon_block_size_caveat" rev="5.4.3">
+        Because the EMC Isilon storage devices use a global value for the block size
+        rather than a configurable value for each file, the <codeph>PARQUET_FILE_SIZE</codeph>
+        query option has no effect when Impala inserts data into a table or partition
+        residing on Isilon storage. Use the <codeph>isi</codeph> command to set the
+        default block size globally on the Isilon device. For example, to set the
+        Isilon default block size to 256 MB, the recommended size for Parquet
+        data files for Impala, issue the following command:
+<codeblock>isi hdfs settings modify --default-block-size=256MB</codeblock>
+      </p>
+
+      <p id="hbase_blurb">
+        <b>HBase considerations:</b>
+      </p>
+
+      <p id="hbase_ok">
+        <b>HBase considerations:</b> This data type is fully compatible with HBase tables.
+      </p>
+
+      <p id="hbase_no">
+        <b>HBase considerations:</b> This data type cannot be used with HBase tables.
+      </p>
+
+      <p id="internals_blurb">
+        <b>Internal details:</b>
+      </p>
+
+      <p id="internals_1_bytes">
+        <b>Internal details:</b> Represented in memory as a 1-byte value.
+      </p>
+
+      <p id="internals_2_bytes">
+        <b>Internal details:</b> Represented in memory as a 2-byte value.
+      </p>
+
+      <p id="internals_4_bytes">
+        <b>Internal details:</b> Represented in memory as a 4-byte value.
+      </p>
+
+      <p id="internals_8_bytes">
+        <b>Internal details:</b> Represented in memory as an 8-byte value.
+      </p>
+
+      <p id="internals_16_bytes">
+        <b>Internal details:</b> Represented in memory as a 16-byte value.
+      </p>
+
+      <p id="internals_max_bytes">
+        <b>Internal details:</b> Represented in memory as a byte array with the same size as the length
+        specification. Values that are shorter than the specified length are padded on the right with trailing
+        spaces.
+      </p>
+
+      <p id="internals_min_bytes">
+        <b>Internal details:</b> Represented in memory as a byte array with the minimum size needed to represent
+        each value.
+      </p>
+
+      <p rev="2.3.0" id="added_in_230">
+        <b>Added in:</b> CDH 5.5.0 (Impala 2.3.0)
+      </p>
+
+      <p rev="2.0.0" id="added_in_20">
+        <b>Added in:</b> CDH 5.2.0 (Impala 2.0.0)
+      </p>
+
+      <p rev="2.0.0" id="enhanced_in_20">
+        <b>Added in:</b> Available in earlier Impala releases, but new capabilities were added in
+        CDH 5.2.0 / Impala 2.0.0
+      </p>
+
+      <p id="added_forever">
+        <b>Added in:</b> Available in all versions of Impala.
+      </p>
+
+      <p id="added_in_140">
+        <b>Added in:</b> Impala 1.4.0
+      </p>
+
+      <p id="added_in_130">
+        <b>Added in:</b> Impala 1.3.0
+      </p>
+
+      <p id="added_in_11">
+        <b>Added in:</b> Impala 1.1
+      </p>
+
+      <p id="added_in_111">
+        <b>Added in:</b> Impala 1.1.1
+      </p>
+
+      <p id="added_in_210">
+        <b>Added in:</b> CDH 5.3.0 (Impala 2.1.0)
+      </p>
+
+      <p id="added_in_220">
+        <b>Added in:</b> CDH 5.4.0 (Impala 2.2.0)
+      </p>
+
+      <p id="syntax_blurb">
+        <b>Syntax:</b>
+      </p>
+
+      <p id="disk_space_blurb">
+        For other tips about managing and reclaiming Impala disk space, see
+        <xref href="../topics/impala_disk_space.xml#disk_space"/>.
+      </p>
+
+      <p id="join_types">
+        Impala supports a wide variety of <codeph>JOIN</codeph> clauses. Left, right, semi, full, and outer joins
+        are supported in all Impala versions. The <codeph>CROSS JOIN</codeph> operator is available in Impala 1.2.2
+        and higher. During performance tuning, you can override the reordering of join clauses that Impala does
+        internally by including the keyword <codeph>STRAIGHT_JOIN</codeph> immediately after the
+        <codeph>SELECT</codeph> keyword
+      </p>
+
+      <p id="catalog_server_124">
+        In Impala 1.2.4 and higher, you can specify a table name with <codeph>INVALIDATE METADATA</codeph> after
+        the table is created in Hive, allowing you to make individual tables visible to Impala without doing a full
+        reload of the catalog metadata. Impala 1.2.4 also includes other changes to make the metadata broadcast
+        mechanism faster and more responsive, especially during Impala startup. See
+        <xref href="../topics/impala_new_features.xml#new_features_124"/> for details.
+      </p>
+
+      <p id="explain_interpret">
+        Read the <codeph>EXPLAIN</codeph> plan from bottom to top:
+        <ul>
+          <li>
+            The last part of the plan shows the low-level details such as the expected amount of data that will be
+            read, where you can judge the effectiveness of your partitioning strategy and estimate how long it will
+            take to scan a table based on total data size and the size of the cluster.
+          </li>
+
+          <li>
+            As you work your way up, next you see the operations that will be parallelized and performed on each
+            Impala node.
+          </li>
+
+          <li>
+            At the higher levels, you see how data flows when intermediate result sets are combined and transmitted
+            from one node to another.
+          </li>
+
+          <li>
+            See <xref href="../topics/impala_explain_level.xml#explain_level"/> for details about the
+            <codeph>EXPLAIN_LEVEL</codeph> query option, which lets you customize how much detail to show in the
+            <codeph>EXPLAIN</codeph> plan depending on whether you are doing high-level or low-level tuning,
+            dealing with logical or physical aspects of the query.
+          </li>
+        </ul>
+      </p>
+
+<!-- This sequence of paragraph + codeblock + paragraph is typically referenced in sequence wherever it's reused. -->
+
+      <p id="aggr1">
+        Aggregate functions are a special category with different rules. These functions calculate a return value
+        across all the items in a result set, so they require a <codeph>FROM</codeph> clause in the query:
+      </p>
+
+<codeblock id="aggr2" xml:space="preserve">select count(product_id) from product_catalog;
+select max(height), avg(height) from census_data where age &gt; 20;
+</codeblock>
+
+      <p id="aggr3">
+        Aggregate functions also ignore <codeph>NULL</codeph> values rather than returning a <codeph>NULL</codeph>
+        result. For example, if some rows have <codeph>NULL</codeph> for a particular column, those rows are
+        ignored when computing the <codeph>AVG()</codeph> for that column. Likewise, specifying
+        <codeph>COUNT(<varname>col_name</varname>)</codeph> in a query counts only those rows where
+        <varname>col_name</varname> contains a non-<codeph>NULL</codeph> value.
+      </p>
+
+      <p>
+        <ph id="aliases_vs_identifiers"> Aliases follow the same rules as identifiers when it comes to case
+        insensitivity. Aliases can be longer than identifiers (up to the maximum length of a Java string) and can
+        include additional characters such as spaces and dashes when they are quoted using backtick characters.
+        </ph>
+      </p>
+
+      <p id="views_vs_identifiers">
+        Another way to define different names for the same tables or columns is to create views. See
+        <xref href="../topics/impala_views.xml#views"/> for details.
+      </p>
+
+      <p id="insert_hints" rev="1.2.2">
+        When inserting into partitioned tables, especially using the Parquet file format, you can include a hint in
+        the <codeph>INSERT</codeph> statement to fine-tune the overall performance of the operation and its
+        resource usage:
+        <ul>
+          <li>
+            These hints are available in Impala 1.2.2 and higher.
+          </li>
+
+          <li>
+            You would only use these hints if an <codeph>INSERT</codeph> into a partitioned Parquet table was
+            failing due to capacity limits, or if such an <codeph>INSERT</codeph> was succeeding but with
+            less-than-optimal performance.
+          </li>
+
+          <li>
+            To use these hints, put the hint keyword <codeph>[SHUFFLE]</codeph> or <codeph>[NOSHUFFLE]</codeph>
+            (including the square brackets) after the <codeph>PARTITION</codeph> clause, immediately before the
+            <codeph>SELECT</codeph> keyword.
+          </li>
+
+          <li>
+            <codeph>[SHUFFLE]</codeph> selects an execution plan that minimizes the number of files being written
+            simultaneously to HDFS, and the number of memory buffers holding data for individual partitions. Thus
+            it reduces overall resource usage for the <codeph>INSERT</codeph> operation, allowing some
+            <codeph>INSERT</codeph> operations to succeed that otherwise would fail. It does involve some data
+            transfer between the nodes so that the data files for a particular partition are all constructed on the
+            same node.
+          </li>
+
+          <li>
+            <codeph>[NOSHUFFLE]</codeph> selects an execution plan that might be faster overall, but might also
+            produce a larger number of small data files or exceed capacity limits, causing the
+            <codeph>INSERT</codeph> operation to fail. Use <codeph>[SHUFFLE]</codeph> in cases where an
+            <codeph>INSERT</codeph> statement fails or runs inefficiently due to all nodes attempting to construct
+            data for all partitions.
+          </li>
+
+          <li>
+            Impala automatically uses the <codeph>[SHUFFLE]</codeph> method if any partition key column in the
+            source table, mentioned in the <codeph>INSERT ... SELECT</codeph> query, does not have column
+            statistics. In this case, only the <codeph>[NOSHUFFLE]</codeph> hint would have any effect.
+          </li>
+
+          <li>
+            If column statistics are available for all partition key columns in the source table mentioned in the
+            <codeph>INSERT ... SELECT</codeph> query, Impala chooses whether to use the <codeph>[SHUFFLE]</codeph>
+            or <codeph>[NOSHUFFLE]</codeph> technique based on the estimated number of distinct values in those
+            columns and the number of nodes involved in the <codeph>INSERT</codeph> operation. In this case, you
+            might need the <codeph>[SHUFFLE]</codeph> or the <codeph>[NOSHUFFLE]</codeph> hint to override the
+            execution plan selected by Impala.
+          </li>
+        </ul>
+      </p>
+
+      <p id="insert_parquet_blocksize">
+        Any <codeph>INSERT</codeph> statement for a Parquet table requires enough free space in the HDFS filesystem
+        to write one block. Because Parquet data files use a block size of 1 GB by default, an
+        <codeph>INSERT</codeph> might fail (even for a very small amount of data) if your HDFS is running low on
+        space.
+      </p>
+
+      <note id="compute_stats_next" type="important">
+        After adding or replacing data in a table used in performance-critical queries, issue a <codeph>COMPUTE
+        STATS</codeph> statement to make sure all statistics are up-to-date. Consider updating statistics for a
+        table after any <codeph>INSERT</codeph>, <codeph>LOAD DATA</codeph>, or <codeph>CREATE TABLE AS
+        SELECT</codeph> statement in Impala, or after loading data through Hive and doing a <codeph>REFRESH
+        <varname>table_name</varname></codeph> in Impala. This technique is especially important for tables that
+        are very large, used in join queries, or both.
+      </note>
+
+      <p id="concat_blurb">
+        <b>Usage notes:</b> <codeph>concat()</codeph> and <codeph>concat_ws()</codeph> are appropriate for
+        concatenating the values of multiple columns within the same row, while <codeph>group_concat()</codeph>
+        joins together values from different rows.
+      </p>
+
+      <p id="null_sorting_change">
+        In Impala 1.2.1 and higher, all <codeph>NULL</codeph> values come at the end of the result set for
+        <codeph>ORDER BY ... ASC</codeph> queries, and at the beginning of the result set for <codeph>ORDER BY ...
+        DESC</codeph> queries. In effect, <codeph>NULL</codeph> is considered greater than all other values for
+        sorting purposes. The original Impala behavior always put <codeph>NULL</codeph> values at the end, even for
+        <codeph>ORDER BY ... DESC</codeph> queries. The new behavior in Impala 1.2.1 makes Impala more compatible
+        with other popular database systems. In Impala 1.2.1 and higher, you can override or specify the sorting
+        behavior for <codeph>NULL</codeph> by adding the clause <codeph>NULLS FIRST</codeph> or <codeph>NULLS
+        LAST</codeph> at the end of the <codeph>ORDER BY</codeph> clause.
+      </p>
+
+      <p id="return_same_type">
+        <b>Return type:</b> same as the initial argument value, except that integer values are promoted to
+        <codeph>BIGINT</codeph> and floating-point values are promoted to <codeph>DOUBLE</codeph>; use
+        <codeph>CAST()</codeph> when inserting into a smaller numeric column
+      </p>
+
+      <p id="ddl_blurb">
+        <b>Statement type:</b> DDL
+      </p>
+
+      <p id="dml_blurb">
+        <b>Statement type:</b> DML (but still affected by
+        <xref href="../topics/impala_sync_ddl.xml#sync_ddl">SYNC_DDL</xref> query option)
+      </p>
+
+      <p rev="1.2" id="sync_ddl_blurb">
+        If you connect to different Impala nodes within an <cmdname>impala-shell</cmdname> session for
+        load-balancing purposes, you can enable the <codeph>SYNC_DDL</codeph> query option to make each DDL
+        statement wait before returning, until the new or changed metadata has been received by all the Impala
+        nodes. See <xref href="../topics/impala_sync_ddl.xml#sync_ddl"/> for details.
+      </p>
+
+<!-- Boost no longer used in Impala 2.0 and later, so this conref is no longer referenced anywhere. -->
+
+      <p id="regexp_boost">
+        The Impala regular expression syntax conforms to the POSIX Extended Regular Expression syntax used by the
+        Boost library. For details, see
+        <xref href="http://www.boost.org/doc/libs/1_46_0/libs/regex/doc/html/boost_regex/syntax/basic_extended.html" scope="external" format="html">the
+        Boost documentation</xref>. It has most idioms familiar from regular expressions in Perl, Python, and so
+        on. It does not support <codeph>.*?</codeph> for non-greedy matches.
+      </p>
+
+      <p rev="2.0.0" id="regexp_re2">
+        In Impala 2.0 and later, the Impala regular expression syntax conforms to the POSIX Extended Regular
+        Expression syntax used by the Google RE2 library. For details, see
+        <xref href="https://code.google.com/p/re2/" scope="external" format="html">the RE2 documentation</xref>. It
+        has most idioms familiar from regular expressions in Perl, Python, and so on, including
+        <codeph>.*?</codeph> for non-greedy matches.
+      </p>
+
+      <p rev="2.0.0" id="regexp_re2_warning">
+        In Impala 2.0 and later, a change in the underlying regular expression library could cause changes in the
+        way regular expressions are interpreted by this function. Test any queries that use regular expressions and
+        adjust the expression patterns if necessary. See
+        <xref href="../topics/impala_incompatible_changes.xml#incompatible_changes_200"/> for details.
+      </p>
+
+      <p id="regexp_escapes">
+        Because the <cmdname>impala-shell</cmdname> interpreter uses the <codeph>\</codeph> character for escaping,
+        use <codeph>\\</codeph> to represent the regular expression escape character in any regular expressions
+        that you submit through <cmdname>impala-shell</cmdname> . You might prefer to use the equivalent character
+        class names, such as <codeph>[[:digit:]]</codeph> instead of <codeph>\d</codeph> which you would have to
+        escape

<TRUNCATED>

[20/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

Posted by jr...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_abort_on_default_limit_exceeded.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_abort_on_default_limit_exceeded.xml b/docs/topics/impala_abort_on_default_limit_exceeded.xml
new file mode 100644
index 0000000..c58be63
--- /dev/null
+++ b/docs/topics/impala_abort_on_default_limit_exceeded.xml
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="obwl" id="abort_on_default_limit_exceeded">
+
+  <title>ABORT_ON_DEFAULT_LIMIT_EXCEEDED Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p conref="../shared/impala_common.xml#common/obwl_query_options"/>
+
+    <p conref="../shared/impala_common.xml#common/type_boolean"/>
+    <p conref="../shared/impala_common.xml#common/default_false_0"/>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_abort_on_error.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_abort_on_error.xml b/docs/topics/impala_abort_on_error.xml
new file mode 100644
index 0000000..1926333
--- /dev/null
+++ b/docs/topics/impala_abort_on_error.xml
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="abort_on_error">
+
+  <title>ABORT_ON_ERROR Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Troubleshooting"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">ABORT_ON_ERROR query option</indexterm>
+      When this option is enabled, Impala cancels a query immediately when any of the nodes encounters an error,
+      rather than continuing and possibly returning incomplete results. This option is disabled by default, to help
+      gather maximum diagnostic information when an error occurs, for example, whether the same problem occurred on
+      all nodes or only a single node. Currently, the errors that Impala can skip over involve data corruption,
+      such as a column that contains a string value when expected to contain an integer value.
+    </p>
+
+    <p>
+      To control how much logging Impala does for non-fatal errors when <codeph>ABORT_ON_ERROR</codeph> is turned
+      off, use the <codeph>MAX_ERRORS</codeph> option.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/type_boolean"/>
+    <p conref="../shared/impala_common.xml#common/default_false_0"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+    <p>
+      <xref href="impala_max_errors.xml#max_errors"/>,
+      <xref href="impala_logging.xml#logging"/>
+    </p>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_aggregate_functions.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_aggregate_functions.xml b/docs/topics/impala_aggregate_functions.xml
new file mode 100644
index 0000000..5095266
--- /dev/null
+++ b/docs/topics/impala_aggregate_functions.xml
@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="aggregate_functions">
+
+  <title>Impala Aggregate Functions</title>
+  <titlealts><navtitle>Aggregate Functions</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="Aggregate Functions"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p conref="../shared/impala_common.xml#common/aggr1"/>
+
+<codeblock conref="../shared/impala_common.xml#common/aggr2"/>
+
+    <p conref="../shared/impala_common.xml#common/aggr3"/>
+
+    <p>
+      <indexterm audience="Cloudera">aggregate functions</indexterm>
+    </p>
+
+    <p outputclass="toc"/>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_aliases.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_aliases.xml b/docs/topics/impala_aliases.xml
new file mode 100644
index 0000000..66a16fe
--- /dev/null
+++ b/docs/topics/impala_aliases.xml
@@ -0,0 +1,71 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="aliases">
+
+  <title>Overview of Impala Aliases</title>
+  <titlealts><navtitle>Aliases</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      When you write the names of tables, columns, or column expressions in a query, you can assign an alias at the
+      same time. Then you can specify the alias rather than the original name when making other references to the
+      table or column in the same statement. You typically specify aliases that are shorter, easier to remember, or
+      both than the original names. The aliases are printed in the query header, making them useful for
+      self-documenting output.
+    </p>
+
+    <p>
+      To set up an alias, add the <codeph>AS <varname>alias</varname></codeph> clause immediately after any table,
+      column, or expression name in the <codeph>SELECT</codeph> list or <codeph>FROM</codeph> list of a query. The
+      <codeph>AS</codeph> keyword is optional; you can also specify the alias immediately after the original name.
+    </p>
+
+    <p>
+      To use an alias name that matches one of the Impala reserved keywords (listed in
+      <xref href="impala_reserved_words.xml#reserved_words"/>), surround the identifier with either single or
+      double quotation marks, or <codeph>``</codeph> characters (backticks).
+    </p>
+
+<codeblock>select c1 as name, c2 as address, c3 as phone from table_with_terse_columns;
+select sum(ss_xyz_dollars_net) as total_sales from table_with_cryptic_columns;
+select one.name, two.address, three.phone from
+  census one, building_directory two, phonebook three
+  where one.id = two.id and two.id = three.id;</codeblock>
+
+    <p>
+      <ph conref="../shared/impala_common.xml#common/aliases_vs_identifiers"/>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p rev="2.3.0">
+      Queries involving the complex types (<codeph>ARRAY</codeph>,
+      <codeph>STRUCT</codeph>, and <codeph>MAP</codeph>), typically make
+      extensive use of table aliases. These queries involve join clauses
+      where the complex type column is treated as a joined table.
+      To construct two-part or three-part qualified names for the
+      complex column elements in the <codeph>FROM</codeph> list,
+      sometimes it is syntactically required to construct a table
+      alias for the complex column where it is referenced in the join clause.
+      See <xref href="impala_complex_types.xml#complex_types"/> for details and examples.
+    </p>
+
+    <p>
+      <b>Alternatives:</b>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/views_vs_identifiers"/>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_allow_unsupported_formats.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_allow_unsupported_formats.xml b/docs/topics/impala_allow_unsupported_formats.xml
new file mode 100644
index 0000000..824daa4
--- /dev/null
+++ b/docs/topics/impala_allow_unsupported_formats.xml
@@ -0,0 +1,29 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="allow_unsupported_formats">
+
+  <title>ALLOW_UNSUPPORTED_FORMATS Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+<!--
+The original brief explanation with not enough detail comes from the comments at:
+  http://github.sf.cloudera.com/CDH/Impala/raw/master/common/thrift/ImpalaService.thrift
+Removing that wording from here after discussions with dev team. Just recording the URL for posterity.
+-->
+
+    <p>
+      An obsolete query option from early work on support for file formats. Do not use. Might be removed in the
+      future.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/type_boolean"/>
+    <p conref="../shared/impala_common.xml#common/default_false_0"/>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_alter_table.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_alter_table.xml b/docs/topics/impala_alter_table.xml
new file mode 100644
index 0000000..800261a
--- /dev/null
+++ b/docs/topics/impala_alter_table.xml
@@ -0,0 +1,411 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="alter_table">
+
+  <title>ALTER TABLE Statement</title>
+  <titlealts><navtitle>ALTER TABLE</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="DDL"/>
+      <data name="Category" value="HDFS Caching"/>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">ALTER TABLE statement</indexterm>
+      The <codeph>ALTER TABLE</codeph> statement changes the structure or properties of an existing table. In
+      Impala, this is a logical operation that updates the table metadata in the metastore database that Impala
+      shares with Hive; <codeph>ALTER TABLE</codeph> does not actually rewrite, move, and so on the actual data
+      files. Thus, you might need to perform corresponding physical filesystem operations, such as moving data
+      files to a different HDFS directory, rewriting the data files to include extra fields, or converting them to
+      a different file format.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>ALTER TABLE [<varname>old_db_name</varname>.]<varname>old_table_name</varname> RENAME TO [<varname>new_db_name</varname>.]<varname>new_table_name</varname>
+
+ALTER TABLE <varname>name</varname> ADD COLUMNS (<varname>col_spec</varname>[, <varname>col_spec</varname> ...])
+ALTER TABLE <varname>name</varname> DROP [COLUMN] <varname>column_name</varname>
+ALTER TABLE <varname>name</varname> CHANGE <varname>column_name</varname> <varname>new_name</varname> <varname>new_type</varname>
+ALTER TABLE <varname>name</varname> REPLACE COLUMNS (<varname>col_spec</varname>[, <varname>col_spec</varname> ...])
+
+ALTER TABLE <varname>name</varname> { ADD | DROP } PARTITION (<varname>partition_spec</varname>) <ph rev="2.3.0">[PURGE]</ph>
+
+ALTER TABLE <varname>name</varname> [PARTITION (<varname>partition_spec</varname>)]
+  SET { FILEFORMAT <varname>file_format</varname>
+  | LOCATION '<varname>hdfs_path_of_directory</varname>'
+  | TBLPROPERTIES (<varname>table_properties</varname>)
+  | SERDEPROPERTIES (<varname>serde_properties</varname>) }
+
+<ph rev="1.4.0">ALTER TABLE <varname>name</varname> [PARTITION (<varname>partition_spec</varname>)] SET { CACHED IN '<varname>pool_name</varname>' <ph rev="2.2.0">[WITH REPLICATION = <varname>integer</varname>]</ph> | UNCACHED }</ph>
+
+<varname>new_name</varname> ::= [<varname>new_database</varname>.]<varname>new_table_name</varname>
+
+<varname>col_spec</varname> ::= <varname>col_name</varname> <varname>type_name</varname>
+
+<varname>partition_spec</varname> ::= <varname>partition_col</varname>=<varname>constant_value</varname>
+
+<varname>table_properties</varname> ::= '<varname>name</varname>'='<varname>value</varname>'[, '<varname>name</varname>'='<varname>value</varname>' ...]
+
+<varname>serde_properties</varname> ::= '<varname>name</varname>'='<varname>value</varname>'[, '<varname>name</varname>'='<varname>value</varname>' ...]
+
+<varname>file_format</varname> ::= { PARQUET | TEXTFILE | RCFILE | SEQUENCEFILE | AVRO }
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/ddl_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p rev="2.3.0">
+      In CDH 5.5 / Impala 2.3 and higher, the <codeph>ALTER TABLE</codeph> statement can 
+      change the metadata for tables containing complex types (<codeph>ARRAY</codeph>,
+      <codeph>STRUCT</codeph>, and <codeph>MAP</codeph>).
+      For example, you can use an <codeph>ADD COLUMNS</codeph>, <codeph>DROP COLUMN</codeph>, or <codeph>CHANGE</codeph>
+      clause to modify the table layout for complex type columns.
+      Although Impala queries only work for complex type columns in Parquet tables, the complex type support in the
+      <codeph>ALTER TABLE</codeph> statement applies to all file formats.
+      For example, you can use Impala to update metadata for a staging table in a non-Parquet file format where the
+      data is populated by Hive. Or you can use <codeph>ALTER TABLE SET FILEFORMAT</codeph> to change the format
+      of an existing table to Parquet so that Impala can query it. (Remember that changing the file format for a table does
+      not convert the data files within the table; you must prepare any Parquet data files containing complex types
+      outside Impala, and bring them into the table using <codeph>LOAD DATA</codeph> or updating the table's
+      <codeph>LOCATION</codeph> property.
+      See <xref href="impala_complex_types.xml#complex_types"/> for details about using complex types.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      Whenever you specify partitions in an <codeph>ALTER TABLE</codeph> statement, through the <codeph>PARTITION
+      (<varname>partition_spec</varname>)</codeph> clause, you must include all the partitioning columns in the
+      specification.
+    </p>
+
+    <p>
+      Most of the <codeph>ALTER TABLE</codeph> operations work the same for internal tables (managed by Impala) as
+      for external tables (with data files located in arbitrary locations). The exception is renaming a table; for
+      an external table, the underlying data directory is not renamed or moved.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/s3_blurb"/>
+    <p rev="2.2.0">
+      You can specify an <codeph>s3a://</codeph> prefix in the <codeph>LOCATION</codeph> attribute of a table or partition
+      to make Impala query data from the Amazon S3 filesystem.
+      See <xref href="impala_s3.xml#s3"/> for details.
+    </p>
+
+    <p rev="1.4.0">
+      <b>HDFS caching (CACHED IN clause):</b>
+    </p>
+
+    <p rev="1.4.0">
+      If you specify the <codeph>CACHED IN</codeph> clause, any existing or future data files in the table
+      directory or the partition subdirectories are designated to be loaded into memory with the HDFS caching
+      mechanism. See <xref href="impala_perf_hdfs_caching.xml#hdfs_caching"/> for details about using the HDFS
+      caching feature.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/impala_cache_replication_factor"/>
+
+    <p conref="../shared/impala_common.xml#common/sync_ddl_blurb"/>
+
+    <p>
+      The following sections show examples of the use cases for various <codeph>ALTER TABLE</codeph> clauses.
+    </p>
+
+    <p>
+      <b>To rename a table (RENAME TO clause):</b>
+    </p>
+
+<!-- Beefing up the syntax in its original location up to, don't need to repeat it here.
+<codeblock>ALTER TABLE <varname>old_name</varname> RENAME TO <varname>new_name</varname>;</codeblock>
+-->
+
+    <p>
+      The <codeph>RENAME TO</codeph> clause lets you change the name of an existing table, and optionally which
+      database it is located in.
+    </p>
+
+    <p>
+      For internal tables, his operation physically renames the directory within HDFS that contains the data files;
+      the original directory name no longer exists. By qualifying the table names with database names, you can use
+      this technique to move an internal table (and its associated data directory) from one database to another.
+      For example:
+    </p>
+
+<codeblock>create database d1;
+create database d2;
+create database d3;
+use d1;
+create table mobile (x int);
+use d2;
+-- Move table from another database to the current one.
+alter table d1.mobile rename to mobile;
+use d1;
+-- Move table from one database to another.
+alter table d2.mobile rename to d3.mobile;</codeblock>
+
+    <p>
+      For external tables,
+    </p>
+
+    <p>
+      <b>To change the physical location where Impala looks for data files associated with a table or
+      partition:</b>
+    </p>
+
+<codeblock>ALTER TABLE <varname>table_name</varname> [PARTITION (<varname>partition_spec</varname>)] SET LOCATION '<varname>hdfs_path_of_directory</varname>';</codeblock>
+
+    <p>
+      The path you specify is the full HDFS path where the data files reside, or will be created. Impala does not
+      create any additional subdirectory named after the table. Impala does not move any data files to this new
+      location or change any data files that might already exist in that directory.
+    </p>
+
+    <p>
+      To set the location for a single partition, include the <codeph>PARTITION</codeph> clause. Specify all the
+      same partitioning columns for the table, with a constant value for each, to precisely identify the single
+      partition affected by the statement:
+    </p>
+
+<codeblock>create table p1 (s string) partitioned by (month int, day int);
+-- Each ADD PARTITION clause creates a subdirectory in HDFS.
+alter table p1 add partition (month=1, day=1);
+alter table p1 add partition (month=1, day=2);
+alter table p1 add partition (month=2, day=1);
+alter table p1 add partition (month=2, day=2);
+-- Redirect queries, INSERT, and LOAD DATA for one partition
+-- to a specific different directory.
+alter table p1 partition (month=1, day=1) set location '/usr/external_data/new_years_day';
+</codeblock>
+
+    <note conref="../shared/impala_common.xml#common/add_partition_set_location"/>
+
+    <p rev="1.2">
+      <b>To change the key-value pairs of the TBLPROPERTIES and SERDEPROPERTIES fields:</b>
+    </p>
+
+<codeblock>ALTER TABLE <varname>table_name</varname> SET TBLPROPERTIES ('<varname>key1</varname>'='<varname>value1</varname>', '<varname>key2</varname>'='<varname>value2</varname>'[, ...]);
+ALTER TABLE <varname>table_name</varname> SET SERDEPROPERTIES ('<varname>key1</varname>'='<varname>value1</varname>', '<varname>key2</varname>'='<varname>value2</varname>'[, ...]);</codeblock>
+
+    <p>
+      The <codeph>TBLPROPERTIES</codeph> clause is primarily a way to associate arbitrary user-specified data items
+      with a particular table.
+    </p>
+
+    <p>
+      The <codeph>SERDEPROPERTIES</codeph> clause sets up metadata defining how tables are read or written, needed
+      in some cases by Hive but not used extensively by Impala. You would use this clause primarily to change the
+      delimiter in an existing text table or partition, by setting the <codeph>'serialization.format'</codeph> and
+      <codeph>'field.delim'</codeph> property values to the new delimiter character:
+    </p>
+
+<codeblock>-- This table begins life as pipe-separated text format.
+create table change_to_csv (s1 string, s2 string) row format delimited fields terminated by '|';
+-- Then we change it to a CSV table.
+alter table change_to_csv set SERDEPROPERTIES ('serialization.format'=',', 'field.delim'=',');
+insert overwrite change_to_csv values ('stop','go'), ('yes','no');
+!hdfs dfs -cat 'hdfs://<varname>hostname</varname>:8020/<varname>data_directory</varname>/<varname>dbname</varname>.db/change_to_csv/<varname>data_file</varname>';
+stop,go
+yes,no</codeblock>
+
+    <p>
+      Use the <codeph>DESCRIBE FORMATTED</codeph> statement to see the current values of these properties for an
+      existing table. See <xref href="impala_create_table.xml#create_table"/> for more details about these clauses.
+      See <xref href="impala_perf_stats.xml#perf_stats_manual"/> for an example of using table properties to
+      fine-tune the performance-related table statistics.
+    </p>
+
+    <p>
+      <b>To reorganize columns for a table:</b>
+    </p>
+
+<codeblock>ALTER TABLE <varname>table_name</varname> ADD COLUMNS (<varname>column_defs</varname>);
+ALTER TABLE <varname>table_name</varname> REPLACE COLUMNS (<varname>column_defs</varname>);
+ALTER TABLE <varname>table_name</varname> CHANGE <varname>column_name</varname> <varname>new_name</varname> <varname>new_type</varname>;
+ALTER TABLE <varname>table_name</varname> DROP <varname>column_name</varname>;</codeblock>
+
+    <p>
+      The <varname>column_spec</varname> is the same as in the <codeph>CREATE TABLE</codeph> statement: the column
+      name, then its data type, then an optional comment. You can add multiple columns at a time. The parentheses
+      are required whether you add a single column or multiple columns. When you replace columns, all the original
+      column definitions are discarded. You might use this technique if you receive a new set of data files with
+      different data types or columns in a different order. (The data files are retained, so if the new columns are
+      incompatible with the old ones, use <codeph>INSERT OVERWRITE</codeph> or <codeph>LOAD DATA OVERWRITE</codeph>
+      to replace all the data before issuing any further queries.)
+    </p>
+
+    <p>
+      You might use the <codeph>CHANGE</codeph> clause to rename a single column, or to treat an existing column as
+      a different type than before, such as to switch between treating a column as <codeph>STRING</codeph> and
+      <codeph>TIMESTAMP</codeph>, or between <codeph>INT</codeph> and <codeph>BIGINT</codeph>. You can only drop a
+      single column at a time; to drop multiple columns, issue multiple <codeph>ALTER TABLE</codeph> statements, or
+      define the new set of columns with a single <codeph>ALTER TABLE ... REPLACE COLUMNS</codeph> statement.
+    </p>
+
+    <p>
+      <b>To change the file format that Impala expects data to be in, for a table or partition:</b>
+    </p>
+
+    <p>
+      Use an <codeph>ALTER TABLE ... SET FILEFORMAT</codeph> clause. You can include an optional <codeph>PARTITION
+      (<varname>col1</varname>=<varname>val1</varname>, <varname>col2</varname>=<varname>val2</varname>,
+      ...</codeph> clause so that the file format is changed for a specific partition rather than the entire table.
+    </p>
+
+    <p>
+      Because this operation only changes the table metadata, you must do any conversion of existing data using
+      regular Hadoop techniques outside of Impala. Any new data created by the Impala <codeph>INSERT</codeph>
+      statement will be in the new format. You cannot specify the delimiter for Text files; the data files must be
+      comma-delimited.
+<!-- Although Impala can read Avro tables
+        created through Hive, you cannot specify the Avro file format in an Impala
+        <codeph>ALTER TABLE</codeph> statement. -->
+    </p>
+
+    <p>
+      To set the file format for a single partition, include the <codeph>PARTITION</codeph> clause. Specify all the
+      same partitioning columns for the table, with a constant value for each, to precisely identify the single
+      partition affected by the statement:
+    </p>
+
+<codeblock>create table p1 (s string) partitioned by (month int, day int);
+-- Each ADD PARTITION clause creates a subdirectory in HDFS.
+alter table p1 add partition (month=1, day=1);
+alter table p1 add partition (month=1, day=2);
+alter table p1 add partition (month=2, day=1);
+alter table p1 add partition (month=2, day=2);
+-- Queries and INSERT statements will read and write files
+-- in this format for this specific partition.
+alter table p1 partition (month=2, day=2) set fileformat parquet;
+</codeblock>
+
+    <p>
+      <b>To add or drop partitions for a table</b>, the table must already be partitioned (that is, created with a
+      <codeph>PARTITIONED BY</codeph> clause). The partition is a physical directory in HDFS, with a name that
+      encodes a particular column value (the <b>partition key</b>). The Impala <codeph>INSERT</codeph> statement
+      already creates the partition if necessary, so the <codeph>ALTER TABLE ... ADD PARTITION</codeph> is
+      primarily useful for importing data by moving or copying existing data files into the HDFS directory
+      corresponding to a partition. (You can use the <codeph>LOAD DATA</codeph> statement to move files into the
+      partition directory, or <codeph>ALTER TABLE ... PARTITION (...) SET LOCATION</codeph> to point a partition at
+      a directory that already contains data files.
+    </p>
+
+    <p>
+      The <codeph>DROP PARTITION</codeph> clause is used to remove the HDFS directory and associated data files for
+      a particular set of partition key values; for example, if you always analyze the last 3 months worth of data,
+      at the beginning of each month you might drop the oldest partition that is no longer needed. Removing
+      partitions reduces the amount of metadata associated with the table and the complexity of calculating the
+      optimal query plan, which can simplify and speed up queries on partitioned tables, particularly join queries.
+      Here is an example showing the <codeph>ADD PARTITION</codeph> and <codeph>DROP PARTITION</codeph> clauses.
+    </p>
+
+    <p rev="2.3.0">
+      The optional <codeph>PURGE</codeph> keyword, available in CDH 5.5 / Impala 2.3 and higher,
+      is used with the <codeph>DROP PARTITION</codeph> clause to remove associated HDFS data files
+      immediately rather than going through the HDFS trashcan mechanism.
+      Use this keyword when dropping a partition if it is
+      crucial to remove the data as quickly as possible to free up space, or if there is a problem with
+      the trashcan, such as the trashcan not being configured or being in a different HDFS encryption zone
+      than the data files.
+    </p>
+
+    <draft-comment translate="no">
+        Make example more general by partitioning by year/month/day.
+        Then could show inserting into fixed year, variable month and day;
+        dropping particular year/month/day partition.
+      </draft-comment>
+
+<codeblock>-- Create an empty table and define the partitioning scheme.
+create table part_t (x int) partitioned by (month int);
+-- Create an empty partition into which you could copy data files from some other source.
+alter table part_t add partition (month=1);
+-- After changing the underlying data, issue a REFRESH statement to make the data visible in Impala.
+refresh part_t;
+-- Later, do the same for the next month.
+alter table part_t add partition (month=2);
+
+-- Now you no longer need the older data.
+alter table part_t drop partition (month=1);
+-- If the table was partitioned by month and year, you would issue a statement like:
+-- alter table part_t drop partition (year=2003,month=1);
+-- which would require 12 ALTER TABLE statements to remove a year's worth of data.
+
+-- If the data files for subsequent months were in a different file format,
+-- you could set a different file format for the new partition as you create it.
+alter table part_t add partition (month=3) set fileformat=parquet;
+</codeblock>
+
+    <p>
+      The value specified for a partition key can be an arbitrary constant expression, without any references to
+      columns. For example:
+    </p>
+
+<codeblock>alter table time_data add partition (month=concat('Decem','ber'));
+alter table sales_data add partition (zipcode = cast(9021 * 10 as string));</codeblock>
+
+    <note>
+      <p>
+        An alternative way to reorganize a table and its associated data files is to use <codeph>CREATE
+        TABLE</codeph> to create a variation of the original table, then use <codeph>INSERT</codeph> to copy the
+        transformed or reordered data to the new table. The advantage of <codeph>ALTER TABLE</codeph> is that it
+        avoids making a duplicate copy of the data files, allowing you to reorganize huge volumes of data in a
+        space-efficient way using familiar Hadoop techniques.
+      </p>
+    </note>
+
+    <p>
+      <b>To switch a table between internal and external:</b>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/switch_internal_external_table"/>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+    <p rev="CDH-19187">
+      Most <codeph>ALTER TABLE</codeph> clauses do not actually
+      read or write any HDFS files, and so do not depend on
+      specific HDFS permissions. For example, the <codeph>SET FILEFORMAT</codeph>
+      clause does not actually check the file format existing data files or
+      convert them to the new format, and the <codeph>SET LOCATION</codeph> clause
+      does not require any special permissions on the new location.
+      (Any permission-related failures would come later, when you
+      actually query or insert into the table.)
+    </p>
+<!-- Haven't rigorously tested all the assertions in the following paragraph. -->
+<!-- Most testing so far has been around RENAME TO clause. -->
+    <p>
+      In general, <codeph>ALTER TABLE</codeph> clauses that do touch
+      HDFS files and directories require the same HDFS permissions
+      as corresponding <codeph>CREATE</codeph>, <codeph>INSERT</codeph>,
+      or <codeph>SELECT</codeph> statements.
+      The permissions allow
+      the user ID that the <cmdname>impalad</cmdname> daemon runs under,
+      typically the <codeph>impala</codeph> user, to read or write
+      files or directories, or (in the case of the execute bit) descend into a directory.
+      The <codeph>RENAME TO</codeph> clause requires read, write, and execute permission in the
+      source and destination database directories and in the table data directory,
+      and read and write permission for the data files within the table.
+      The <codeph>ADD PARTITION</codeph> and <codeph>DROP PARTITION</codeph> clauses
+      require write and execute permissions for the associated partition directory.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_tables.xml#tables"/>,
+      <xref href="impala_create_table.xml#create_table"/>, <xref href="impala_drop_table.xml#drop_table"/>,
+      <xref href="impala_partitioning.xml#partitioning"/>, <xref href="impala_tables.xml#internal_tables"/>,
+      <xref href="impala_tables.xml#external_tables"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_alter_view.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_alter_view.xml b/docs/topics/impala_alter_view.xml
new file mode 100644
index 0000000..0d83032
--- /dev/null
+++ b/docs/topics/impala_alter_view.xml
@@ -0,0 +1,73 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.1" id="alter_view">
+
+  <title>ALTER VIEW Statement</title>
+  <titlealts><navtitle>ALTER VIEW</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="DDL"/>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">ALTER VIEW statement</indexterm>
+      Changes the query associated with a view, or the associated database and/or name of the view.
+    </p>
+
+    <p>
+      Because a view is purely a logical construct (an alias for a query) with no physical data behind it,
+      <codeph>ALTER VIEW</codeph> only involves changes to metadata in the metastore database, not any data files
+      in HDFS.
+    </p>
+
+<!-- View _permissions_ don't rely on underlying table. -->
+
+<!-- Could use views to grant access only to certain columns. -->
+
+<!-- Treated like a table for authorization. -->
+
+<!-- ALTER VIEW that queries another view - possibly a runtime error. -->
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>ALTER VIEW [<varname>database_name</varname>.]<varname>view_name</varname> AS <varname>select_statement</varname>
+ALTER VIEW [<varname>database_name</varname>.]<varname>view_name</varname> RENAME TO [<varname>database_name</varname>.]<varname>view_name</varname></codeblock>
+
+    <p conref="../shared/impala_common.xml#common/ddl_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/sync_ddl_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/security_blurb"/>
+    <p conref="../shared/impala_common.xml#common/redaction_yes"/>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>create table t1 (x int, y int, s string);
+create table t2 like t1;
+create view v1 as select * from t1;
+alter view v1 as select * from t2;
+alter view v1 as select x, upper(s) s from t2;</codeblock>
+
+<!-- Repeat the same blurb + example to see the definition of a view, as in CREATE VIEW. -->
+
+    <p conref="../shared/impala_common.xml#common/describe_formatted_view"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_views.xml#views"/>, <xref href="impala_create_view.xml#create_view"/>,
+      <xref href="impala_drop_view.xml#drop_view"/>
+    </p>
+  </conbody>
+</concept>

[07/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

Posted by jr...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_operators.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_operators.xml b/docs/topics/impala_operators.xml
new file mode 100644
index 0000000..da3dab3
--- /dev/null
+++ b/docs/topics/impala_operators.xml
@@ -0,0 +1,1262 @@
+<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="operators">
+
+  <title>SQL Operators</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">operators</indexterm>
+      SQL operators are a class of comparison functions that are widely used within the <codeph>WHERE</codeph>
+      clauses of <codeph>SELECT</codeph> statements.
+    </p>
+
+    <p outputclass="toc inpage"/>
+  </conbody>
+
+  <concept rev="1.4.0" id="arithmetic_operators">
+
+    <title>Arithmetic Operators</title>
+
+    <conbody>
+
+      <p>
+        <indexterm audience="Cloudera">arithmetic operators</indexterm>
+        The arithmetic operators use expressions with a left-hand argument, the operator, and then (in most cases)
+        a right-hand argument.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock><varname>left_hand_arg</varname> <varname>binary_operator</varname> <varname>right_hand_arg</varname>
+<varname>unary_operator</varname> <varname>single_arg</varname>
+</codeblock>
+
+      <ul>
+        <li>
+          <codeph>+</codeph> and <codeph>-</codeph>: Can be used either as unary or binary operators.
+          <ul>
+            <li>
+              <p>
+                With unary notation, such as <codeph>+5</codeph>, <codeph>-2.5</codeph>, or
+                <codeph>-<varname>col_name</varname></codeph>, they multiply their single numeric argument by
+                <codeph>+1</codeph> or <codeph>-1</codeph>. Therefore, unary <codeph>+</codeph> returns its
+                argument unchanged, while unary <codeph>-</codeph> flips the sign of its argument. Although you can
+                double up these operators in expressions such as <codeph>++5</codeph> (always positive) or
+                <codeph>-+2</codeph> or <codeph>+-2</codeph> (both always negative), you cannot double the unary
+                minus operator because <codeph>--</codeph> is interpreted as the start of a comment. (You can use a
+                double unary minus operator if you separate the <codeph>-</codeph> characters, for example with a
+                space or parentheses.)
+              </p>
+            </li>
+
+            <li>
+              <p>
+                With binary notation, such as <codeph>2+2</codeph>, <codeph>5-2.5</codeph>, or
+                <codeph><varname>col1</varname> + <varname>col2</varname></codeph>, they add or subtract
+                respectively the right-hand argument to (or from) the left-hand argument. Both arguments must be of
+                numeric types.
+              </p>
+            </li>
+          </ul>
+        </li>
+
+        <li>
+          <p>
+            <codeph>*</codeph> and <codeph>/</codeph>: Multiplication and division respectively. Both arguments
+            must be of numeric types.
+          </p>
+          <p>
+            When multiplying, the shorter argument is promoted if necessary (such as <codeph>SMALLINT</codeph> to
+            <codeph>INT</codeph> or <codeph>BIGINT</codeph>, or <codeph>FLOAT</codeph> to <codeph>DOUBLE</codeph>),
+            and then the result is promoted again to the next larger type. Thus, multiplying a
+            <codeph>TINYINT</codeph> and an <codeph>INT</codeph> produces a <codeph>BIGINT</codeph> result.
+            Multiplying a <codeph>FLOAT</codeph> and a <codeph>FLOAT</codeph> produces a <codeph>DOUBLE</codeph>
+            result. Multiplying a <codeph>FLOAT</codeph> and a <codeph>DOUBLE</codeph> or a <codeph>DOUBLE</codeph>
+            and a <codeph>DOUBLE</codeph> produces a <codeph>DECIMAL(38,17)</codeph>, because
+            <codeph>DECIMAL</codeph> values can represent much larger and more precise values than
+            <codeph>DOUBLE</codeph>.
+          </p>
+          <p>
+            When dividing, Impala always treats the arguments and result as <codeph>DOUBLE</codeph> values to avoid
+            losing precision. If you need to insert the results of a division operation into a
+            <codeph>FLOAT</codeph> column, use the <codeph>CAST()</codeph> function to convert the result to the
+            correct type.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            <codeph>%</codeph>: Modulo operator. Returns the remainder of the left-hand argument divided by the
+            right-hand argument. Both arguments must be of one of the integer types.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            <codeph>&amp;</codeph>, <codeph>|</codeph>, <codeph>~</codeph>, and <codeph>^</codeph>: Bitwise operators that return the
+            logical AND, logical OR, <codeph>NOT</codeph>, or logical XOR (exclusive OR) of their argument values. Both arguments must be
+            of one of the integer types. If the arguments are of different type, the argument with the smaller type
+            is implicitly extended to match the argument with the longer type.
+          </p>
+        </li>
+      </ul>
+
+      <p>
+        You can chain a sequence of arithmetic expressions, optionally grouping them with parentheses.
+      </p>
+
+      <p>
+        The arithmetic operators generally do not have equivalent calling conventions using functional notation.
+        For example, prior to Impala 2.2.0 / CDH 5.4.0, there is no <codeph>MOD()</codeph> function equivalent to the <codeph>%</codeph> modulo
+        operator. Conversely, there are some arithmetic functions that do not have a corresponding operator. For
+        example, for exponentiation you use the <codeph>POW()</codeph> function, but there is no
+        <codeph>**</codeph> exponentiation operator. See <xref href="impala_math_functions.xml#math_functions"/>
+        for the arithmetic functions you can use.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/complex_types_aggregation_explanation"/>
+      
+      <p conref="../shared/impala_common.xml#common/complex_types_aggregation_example"/>
+
+      <p conref="../shared/impala_common.xml#common/complex_types_caveat_no_operator"/>
+
+      <p rev="2.3.0">
+        The following example shows how to do an arithmetic operation using a numeric field of a <codeph>STRUCT</codeph> type
+        that is an item within an <codeph>ARRAY</codeph> column. Once the scalar numeric value <codeph>R_NATIONKEY</codeph>
+        is extracted, it can be used in an arithmetic expression, such as multiplying by 10:
+      </p>
+
+<codeblock rev="2.3.0">
+-- The SMALLINT is a field within an array of structs.
+describe region;
++-------------+-------------------------+---------+
+| name        | type                    | comment |
++-------------+-------------------------+---------+
+| r_regionkey | smallint                |         |
+| r_name      | string                  |         |
+| r_comment   | string                  |         |
+| r_nations   | array&lt;struct&lt;           |         |
+|             |   n_nationkey:smallint, |         |
+|             |   n_name:string,        |         |
+|             |   n_comment:string      |         |
+|             | &gt;&gt;                      |         |
++-------------+-------------------------+---------+
+
+-- When we refer to the scalar value using dot notation,
+-- we can use arithmetic and comparison operators on it
+-- like any other number.
+select r_name, nation.item.n_name, nation.item.n_nationkey * 10
+  from region, region.r_nations as nation
+where nation.item.n_nationkey &lt; 5;
++-------------+-------------+------------------------------+
+| r_name      | item.n_name | nation.item.n_nationkey * 10 |
++-------------+-------------+------------------------------+
+| AMERICA     | CANADA      | 30                           |
+| AMERICA     | BRAZIL      | 20                           |
+| AMERICA     | ARGENTINA   | 10                           |
+| MIDDLE EAST | EGYPT       | 40                           |
+| AFRICA      | ALGERIA     | 0                            |
++-------------+-------------+------------------------------+
+</codeblock>
+
+    </conbody>
+  </concept>
+
+  <concept id="between">
+
+    <title>BETWEEN Operator</title>
+
+    <conbody>
+
+      <p>
+        <indexterm audience="Cloudera">BETWEEN operator</indexterm>
+        In a <codeph>WHERE</codeph> clause, compares an expression to both a lower and upper bound. The comparison
+        is successful is the expression is greater than or equal to the lower bound, and less than or equal to the
+        upper bound. If the bound values are switched, so the lower bound is greater than the upper bound, does not
+        match any values.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock><varname>expression</varname> BETWEEN <varname>lower_bound</varname> AND <varname>upper_bound</varname></codeblock>
+
+      <p>
+        <b>Data types:</b> Typically used with numeric data types. Works with any data type, although not very
+        practical for <codeph>BOOLEAN</codeph> values. (<codeph>BETWEEN false AND true</codeph> will match all
+        <codeph>BOOLEAN</codeph> values.) Use <codeph>CAST()</codeph> if necessary to ensure the lower and upper
+        bound values are compatible types. Call string or date/time functions if necessary to extract or transform
+        the relevant portion to compare, especially if the value can be transformed into a number.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+      <p>
+        Be careful when using short string operands. A longer string that starts with the upper bound value will
+        not be included, because it is considered greater than the upper bound. For example, <codeph>BETWEEN 'A'
+        and 'M'</codeph> would not match the string value <codeph>'Midway'</codeph>. Use functions such as
+        <codeph>upper()</codeph>, <codeph>lower()</codeph>, <codeph>substr()</codeph>, <codeph>trim()</codeph>, and
+        so on if necessary to ensure the comparison works as expected.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/complex_types_caveat_no_operator"/>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>-- Retrieve data for January through June, inclusive.
+select c1 from t1 where month <b>between 1 and 6</b>;
+
+-- Retrieve data for names beginning with 'A' through 'M' inclusive.
+-- Only test the first letter to ensure all the values starting with 'M' are matched.
+-- Do a case-insensitive comparison to match names with various capitalization conventions.
+select last_name from customers where upper(substr(last_name,1,1)) <b>between 'A' and 'M'</b>;
+
+-- Retrieve data for only the first week of each month.
+select count(distinct visitor_id)) from web_traffic where dayofmonth(when_viewed) <b>between 1 and 7</b>;</codeblock>
+
+      <p rev="2.3.0">
+        The following example shows how to do a <codeph>BETWEEN</codeph> comparison using a numeric field of a <codeph>STRUCT</codeph> type
+        that is an item within an <codeph>ARRAY</codeph> column. Once the scalar numeric value <codeph>R_NATIONKEY</codeph>
+        is extracted, it can be used in a comparison operator:
+      </p>
+
+<codeblock rev="2.3.0">
+-- The SMALLINT is a field within an array of structs.
+describe region;
++-------------+-------------------------+---------+
+| name        | type                    | comment |
++-------------+-------------------------+---------+
+| r_regionkey | smallint                |         |
+| r_name      | string                  |         |
+| r_comment   | string                  |         |
+| r_nations   | array&lt;struct&lt;           |         |
+|             |   n_nationkey:smallint, |         |
+|             |   n_name:string,        |         |
+|             |   n_comment:string      |         |
+|             | &gt;&gt;                      |         |
++-------------+-------------------------+---------+
+
+-- When we refer to the scalar value using dot notation,
+-- we can use arithmetic and comparison operators on it
+-- like any other number.
+select r_name, nation.item.n_name, nation.item.n_nationkey
+from region, region.r_nations as nation
+where nation.item.n_nationkey between 3 and 5
++-------------+-------------+------------------+
+| r_name      | item.n_name | item.n_nationkey |
++-------------+-------------+------------------+
+| AMERICA     | CANADA      | 3                |
+| MIDDLE EAST | EGYPT       | 4                |
+| AFRICA      | ETHIOPIA    | 5                |
++-------------+-------------+------------------+
+</codeblock>
+
+    </conbody>
+  </concept>
+
+  <concept id="comparison_operators">
+
+    <title>Comparison Operators</title>
+
+    <conbody>
+
+      <p>
+        <indexterm audience="Cloudera">comparison operators</indexterm>
+        Impala supports the familiar comparison operators for checking equality and sort order for the column data
+        types:
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock><varname>left_hand_expression</varname> <varname>comparison_operator</varname> <varname>right_hand_expression</varname></codeblock>
+
+      <ul>
+        <li>
+          <codeph>=</codeph>, <codeph>!=</codeph>, <codeph>&lt;&gt;</codeph>: apply to all types.
+        </li>
+
+        <li>
+          <codeph>&lt;</codeph>, <codeph>&lt;=</codeph>, <codeph>&gt;</codeph>, <codeph>&gt;=</codeph>: apply to
+          all types; for <codeph>BOOLEAN</codeph>, <codeph>TRUE</codeph> is considered greater than
+          <codeph>FALSE</codeph>.
+        </li>
+      </ul>
+
+      <p>
+        <b>Alternatives:</b>
+      </p>
+
+      <p>
+        The <codeph>IN</codeph> and <codeph>BETWEEN</codeph> operators provide shorthand notation for expressing
+        combinations of equality, less than, and greater than comparisons with a single operator.
+      </p>
+
+      <p>
+        Because comparing any value to <codeph>NULL</codeph> produces <codeph>NULL</codeph> rather than
+        <codeph>TRUE</codeph> or <codeph>FALSE</codeph>, use the <codeph>IS NULL</codeph> and <codeph>IS NOT
+        NULL</codeph> operators to check if a value is <codeph>NULL</codeph> or not.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/complex_types_caveat_no_operator"/>
+
+      <p rev="2.3.0">
+        The following example shows how to do an arithmetic operation using a numeric field of a <codeph>STRUCT</codeph> type
+        that is an item within an <codeph>ARRAY</codeph> column. Once the scalar numeric value <codeph>R_NATIONKEY</codeph>
+        is extracted, it can be used with a comparison operator such as <codeph>&lt;</codeph>:
+      </p>
+
+<codeblock rev="2.3.0">
+-- The SMALLINT is a field within an array of structs.
+describe region;
++-------------+-------------------------+---------+
+| name        | type                    | comment |
++-------------+-------------------------+---------+
+| r_regionkey | smallint                |         |
+| r_name      | string                  |         |
+| r_comment   | string                  |         |
+| r_nations   | array&lt;struct&lt;           |         |
+|             |   n_nationkey:smallint, |         |
+|             |   n_name:string,        |         |
+|             |   n_comment:string      |         |
+|             | &gt;&gt;                      |         |
++-------------+-------------------------+---------+
+
+-- When we refer to the scalar value using dot notation,
+-- we can use arithmetic and comparison operators on it
+-- like any other number.
+select r_name, nation.item.n_name, nation.item.n_nationkey
+from region, region.r_nations as nation
+where nation.item.n_nationkey &lt; 5
++-------------+-------------+------------------+
+| r_name      | item.n_name | item.n_nationkey |
++-------------+-------------+------------------+
+| AMERICA     | CANADA      | 3                |
+| AMERICA     | BRAZIL      | 2                |
+| AMERICA     | ARGENTINA   | 1                |
+| MIDDLE EAST | EGYPT       | 4                |
+| AFRICA      | ALGERIA     | 0                |
++-------------+-------------+------------------+
+</codeblock>
+
+    </conbody>
+  </concept>
+
+  <concept audience="Cloudera" rev="2.1.0" id="except">
+
+    <title>EXCEPT Operator</title>
+
+    <conbody>
+
+      <p>
+        <indexterm audience="Cloudera">EXCEPT operator</indexterm>
+      </p>
+    </conbody>
+  </concept>
+
+  <concept rev="2.0.0" id="exists">
+
+    <title>EXISTS Operator</title>
+
+    <conbody>
+
+      <p>
+        <indexterm audience="Cloudera">EXISTS operator</indexterm>
+        <indexterm audience="Cloudera">NOT EXISTS operator</indexterm>
+        The <codeph>EXISTS</codeph> operator tests whether a subquery returns any results.
+        You typically use it to find values from one table that have corresponding values in another table.
+      </p>
+
+      <p>
+        The converse, <codeph>NOT EXISTS</codeph>, helps to find all the values from one table that do not have any
+        corresponding values in another table.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>EXISTS (<varname>subquery</varname>)
+NOT EXISTS (<varname>subquery</varname>)
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+      <p>
+        The subquery can refer to a different table than the outer query block, or the same table. For example, you
+        might use <codeph>EXISTS</codeph> or <codeph>NOT EXISTS</codeph> to check the existence of parent/child
+        relationships between two columns of the same table.
+      </p>
+
+      <p>
+        You can also use operators and function calls within the subquery to test for other kinds of relationships
+        other than strict equality. For example, you might use a call to <codeph>COUNT()</codeph> in the subquery
+        to check whether the number of matching values is higher or lower than some limit. You might call a UDF in
+        the subquery to check whether values in one table matches a hashed representation of those same values in a
+        different table.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/null_blurb"/>
+
+      <p>
+        If the subquery returns any value at all (even <codeph>NULL</codeph>), <codeph>EXISTS</codeph> returns
+        <codeph>TRUE</codeph> and <codeph>NOT EXISTS</codeph> returns false.
+      </p>
+
+      <p>
+        The following example shows how even when the subquery returns only <codeph>NULL</codeph> values,
+        <codeph>EXISTS</codeph> still returns <codeph>TRUE</codeph> and thus matches all the rows from the table in
+        the outer query block.
+      </p>
+
+<codeblock>[localhost:21000] &gt; create table all_nulls (x int);
+[localhost:21000] &gt; insert into all_nulls values (null), (null), (null);
+[localhost:21000] &gt; select y from t2 where exists (select x from all_nulls);
++---+
+| y |
++---+
+| 2 |
+| 4 |
+| 6 |
++---+
+</codeblock>
+
+      <p>
+        However, if the table in the subquery is empty and so the subquery returns an empty result set,
+        <codeph>EXISTS</codeph> returns <codeph>FALSE</codeph>:
+      </p>
+
+<codeblock>[localhost:21000] &gt; create table empty (x int);
+[localhost:21000] &gt; select y from t2 where exists (select x from empty);
+[localhost:21000] &gt;
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/added_in_20"/>
+
+      <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/subquery_no_limit"/>
+
+      <p>
+        The <codeph>NOT EXISTS</codeph> operator requires a correlated subquery.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/complex_types_caveat_no_operator"/>
+
+<!-- To do: construct an EXISTS / NOT EXISTS example for complex types. -->
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+<!-- Maybe turn this into a conref if the same set of tables gets used for subqueries, EXISTS, other places. -->
+<!-- Yes, the material was reused under Subqueries for anti-joins. -->
+        The following examples refer to these simple tables containing small sets of integers or strings:
+<codeblock>[localhost:21000] &gt; create table t1 (x int);
+[localhost:21000] &gt; insert into t1 values (1), (2), (3), (4), (5), (6);
+
+[localhost:21000] &gt; create table t2 (y int);
+[localhost:21000] &gt; insert into t2 values (2), (4), (6);
+
+[localhost:21000] &gt; create table t3 (z int);
+[localhost:21000] &gt; insert into t3 values (1), (3), (5);
+
+[localhost:21000] &gt; create table month_names (m string);
+[localhost:21000] &gt; insert into month_names values
+                  &gt; ('January'), ('February'), ('March'),
+                  &gt; ('April'), ('May'), ('June'), ('July'),
+                  &gt; ('August'), ('September'), ('October'),
+                  &gt; ('November'), ('December');
+</codeblock>
+      </p>
+
+      <p>
+        The following example shows a correlated subquery that finds all the values in one table that exist in
+        another table. For each value <codeph>X</codeph> from <codeph>T1</codeph>, the query checks if the
+        <codeph>Y</codeph> column of <codeph>T2</codeph> contains an identical value, and the
+        <codeph>EXISTS</codeph> operator returns <codeph>TRUE</codeph> or <codeph>FALSE</codeph> as appropriate in
+        each case.
+      </p>
+
+<codeblock>localhost:21000] &gt; select x from t1 where exists (select y from t2 where t1.x = y);
++---+
+| x |
++---+
+| 2 |
+| 4 |
+| 6 |
++---+
+</codeblock>
+
+      <p>
+        An uncorrelated query is less interesting in this case. Because the subquery always returns
+        <codeph>TRUE</codeph>, all rows from <codeph>T1</codeph> are returned. If the table contents where changed
+        so that the subquery did not match any rows, none of the rows from <codeph>T1</codeph> would be returned.
+      </p>
+
+<codeblock>[localhost:21000] &gt; select x from t1 where exists (select y from t2 where y &gt; 5);
++---+
+| x |
++---+
+| 1 |
+| 2 |
+| 3 |
+| 4 |
+| 5 |
+| 6 |
++---+
+</codeblock>
+
+      <p>
+        The following example shows how an uncorrelated subquery can test for the existence of some condition
+        within a table. By using <codeph>LIMIT 1</codeph> or an aggregate function, the query returns a single
+        result or no result based on whether the subquery matches any rows. Here, we know that <codeph>T1</codeph>
+        and <codeph>T2</codeph> contain some even numbers, but <codeph>T3</codeph> does not.
+      </p>
+
+<codeblock>[localhost:21000] &gt; select "contains an even number" from t1 where exists (select x from t1 where x % 2 = 0) limit 1;
++---------------------------+
+| 'contains an even number' |
++---------------------------+
+| contains an even number   |
++---------------------------+
+[localhost:21000] &gt; select "contains an even number" as assertion from t1 where exists (select x from t1 where x % 2 = 0) limit 1;
++-------------------------+
+| assertion               |
++-------------------------+
+| contains an even number |
++-------------------------+
+[localhost:21000] &gt; select "contains an even number" as assertion from t2 where exists (select x from t2 where y % 2 = 0) limit 1;
+ERROR: AnalysisException: couldn't resolve column reference: 'x'
+[localhost:21000] &gt; select "contains an even number" as assertion from t2 where exists (select y from t2 where y % 2 = 0) limit 1;
++-------------------------+
+| assertion               |
++-------------------------+
+| contains an even number |
++-------------------------+
+[localhost:21000] &gt; select "contains an even number" as assertion from t3 where exists (select z from t3 where z % 2 = 0) limit 1;
+[localhost:21000] &gt;
+</codeblock>
+
+      <p>
+        The following example finds numbers in one table that are 1 greater than numbers from another table. The
+        <codeph>EXISTS</codeph> notation is simpler than an equivalent <codeph>CROSS JOIN</codeph> between the
+        tables. (The example then also illustrates how the same test could be performed using an
+        <codeph>IN</codeph> operator.)
+      </p>
+
+<codeblock>[localhost:21000] &gt; select x from t1 where exists (select y from t2 where x = y + 1);
++---+
+| x |
++---+
+| 3 |
+| 5 |
++---+
+[localhost:21000] &gt; select x from t1 where x in (select y + 1 from t2);
++---+
+| x |
++---+
+| 3 |
+| 5 |
++---+
+</codeblock>
+
+      <p>
+        The following example finds values from one table that do not exist in another table.
+      </p>
+
+<codeblock>[localhost:21000] &gt; select x from t1 where not exists (select y from t2 where x = y);
++---+
+| x |
++---+
+| 1 |
+| 3 |
+| 5 |
++---+
+</codeblock>
+
+      <p>
+        The following example uses the <codeph>NOT EXISTS</codeph> operator to find all the leaf nodes in
+        tree-structured data. This simplified <q>tree of life</q> has multiple levels (class, order, family, and so
+        on), with each item pointing upward through a <codeph>PARENT</codeph> pointer. The example runs an outer
+        query and a subquery on the same table, returning only those items whose <codeph>ID</codeph> value is
+        <i>not</i> referenced by the <codeph>PARENT</codeph> of any other item.
+      </p>
+
+<codeblock>[localhost:21000] &gt; create table tree (id int, parent int, name string);
+[localhost:21000] &gt; insert overwrite tree values
+                  &gt; (0, null, "animals"),
+                  &gt; (1, 0, "placentals"),
+                  &gt; (2, 0, "marsupials"),
+                  &gt; (3, 1, "bats"),
+                  &gt; (4, 1, "cats"),
+                  &gt; (5, 2, "kangaroos"),
+                  &gt; (6, 4, "lions"),
+                  &gt; (7, 4, "tigers"),
+                  &gt; (8, 5, "red kangaroo"),
+                  &gt; (9, 2, "wallabies");
+[localhost:21000] &gt; select name as "leaf node" from tree one
+                  &gt; where not exists (select parent from tree two where one.id = two.parent);
++--------------+
+| leaf node    |
++--------------+
+| bats         |
+| lions        |
+| tigers       |
+| red kangaroo |
+| wallabies    |
++--------------+
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_subqueries.xml#subqueries"/>
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="in">
+
+    <title>IN Operator</title>
+
+    <conbody>
+
+      <p>
+        <indexterm audience="Cloudera">IN operator</indexterm>
+        <indexterm audience="Cloudera">NOT IN operator</indexterm>
+        The <codeph>IN</codeph> operator compares an argument value to a set of values, and returns
+        <codeph>TRUE</codeph> if the argument matches any value in the set. The <codeph>NOT IN</codeph> operator
+        reverses the comparison, and checks if the argument value is not part of a set of values.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock rev="2.0.0"><varname>expression</varname> IN (<varname>expression</varname> [, <varname>expression</varname>])
+<varname>expression</varname> IN (<varname>subquery</varname>)
+
+<varname>expression</varname> NOT IN (<varname>expression</varname> [, <varname>expression</varname>])
+<varname>expression</varname> NOT IN (<varname>subquery</varname>)
+</codeblock>
+
+      <p>
+        The left-hand expression and the set of comparison values must be of compatible types.
+      </p>
+
+      <p>
+        The left-hand expression must consist only of a single value, not a tuple. Although the left-hand
+        expression is typically a column name, it could also be some other value. For example, the
+        <codeph>WHERE</codeph> clauses <codeph>WHERE id IN (5)</codeph> and <codeph>WHERE 5 IN (id)</codeph>
+        produce the same results.
+      </p>
+
+      <p>
+        The set of values to check against can be specified as constants, function calls, column names, or other
+        expressions in the query text. When the values are listed explicitly, the maximum number of expressions is
+        10,000.
+      </p>
+
+      <p rev="2.0.0">
+        In Impala 2.0 and higher, the set of values can also be generated by a subquery. <codeph>IN</codeph> can
+        evaluate an unlimited number of results using a subquery.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+      <p>
+        Any expression using the <codeph>IN</codeph> operator could be rewritten as a series of equality tests
+        connected with <codeph>OR</codeph>, but the <codeph>IN</codeph> syntax is often clearer, more concise, and
+        easier for Impala to optimize. For example, with partitioned tables, queries frequently use
+        <codeph>IN</codeph> clauses to filter data by comparing the partition key columns to specific values.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/null_blurb"/>
+
+      <p>
+        If there really is a matching non-null value, <codeph>IN</codeph> returns <codeph>TRUE</codeph>:
+      </p>
+
+<codeblock>[localhost:21000] &gt; select 1 in (1,null,2,3);
++----------------------+
+| 1 in (1, null, 2, 3) |
++----------------------+
+| true                 |
++----------------------+
+[localhost:21000] &gt; select 1 not in (1,null,2,3);
++--------------------------+
+| 1 not in (1, null, 2, 3) |
++--------------------------+
+| false                    |
++--------------------------+
+</codeblock>
+
+      <p>
+        If the searched value is not found in the comparison values, and the comparison values include
+        <codeph>NULL</codeph>, the result is <codeph>NULL</codeph>:
+      </p>
+
+<codeblock>[localhost:21000] &gt; select 5 in (1,null,2,3);
++----------------------+
+| 5 in (1, null, 2, 3) |
++----------------------+
+| NULL                 |
++----------------------+
+[localhost:21000] &gt; select 5 not in (1,null,2,3);
++--------------------------+
+| 5 not in (1, null, 2, 3) |
++--------------------------+
+| NULL                     |
++--------------------------+
+[localhost:21000] &gt; select 1 in (null);
++-------------+
+| 1 in (null) |
++-------------+
+| NULL        |
++-------------+
+[localhost:21000] &gt; select 1 not in (null);
++-----------------+
+| 1 not in (null) |
++-----------------+
+| NULL            |
++-----------------+
+</codeblock>
+
+      <p>
+        If the left-hand argument is <codeph>NULL</codeph>, <codeph>IN</codeph> always returns
+        <codeph>NULL</codeph>. This rule applies even if the comparison values include <codeph>NULL</codeph>.
+      </p>
+
+<codeblock>[localhost:21000] &gt; select null in (1,2,3);
++-------------------+
+| null in (1, 2, 3) |
++-------------------+
+| NULL              |
++-------------------+
+[localhost:21000] &gt; select null not in (1,2,3);
++-----------------------+
+| null not in (1, 2, 3) |
++-----------------------+
+| NULL                  |
++-----------------------+
+[localhost:21000] &gt; select null in (null);
++----------------+
+| null in (null) |
++----------------+
+| NULL           |
++----------------+
+[localhost:21000] &gt; select null not in (null);
++--------------------+
+| null not in (null) |
++--------------------+
+| NULL               |
++--------------------+
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/enhanced_in_20"/>
+
+      <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/complex_types_caveat_no_operator"/>
+
+      <p rev="2.3.0">
+        The following example shows how to do an arithmetic operation using a numeric field of a <codeph>STRUCT</codeph> type
+        that is an item within an <codeph>ARRAY</codeph> column. Once the scalar numeric value <codeph>R_NATIONKEY</codeph>
+        is extracted, it can be used in an arithmetic expression, such as multiplying by 10:
+      </p>
+
+<codeblock rev="2.3.0">
+-- The SMALLINT is a field within an array of structs.
+describe region;
++-------------+-------------------------+---------+
+| name        | type                    | comment |
++-------------+-------------------------+---------+
+| r_regionkey | smallint                |         |
+| r_name      | string                  |         |
+| r_comment   | string                  |         |
+| r_nations   | array&lt;struct&lt;           |         |
+|             |   n_nationkey:smallint, |         |
+|             |   n_name:string,        |         |
+|             |   n_comment:string      |         |
+|             | &gt;&gt;                      |         |
++-------------+-------------------------+---------+
+
+-- When we refer to the scalar value using dot notation,
+-- we can use arithmetic and comparison operators on it
+-- like any other number.
+select r_name, nation.item.n_name, nation.item.n_nationkey
+from region, region.r_nations as nation
+where nation.item.n_nationkey in (1,3,5)
++---------+-------------+------------------+
+| r_name  | item.n_name | item.n_nationkey |
++---------+-------------+------------------+
+| AMERICA | CANADA      | 3                |
+| AMERICA | ARGENTINA   | 1                |
+| AFRICA  | ETHIOPIA    | 5                |
++---------+-------------+------------------+
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/subquery_no_limit"/>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>-- Using IN is concise and self-documenting.
+SELECT * FROM t1 WHERE c1 IN (1,2,10);
+-- Equivalent to series of = comparisons ORed together.
+SELECT * FROM t1 WHERE c1 = 1 OR c1 = 2 OR c1 = 10;
+
+SELECT c1 AS "starts with vowel" FROM t2 WHERE upper(substr(c1,1,1)) IN ('A','E','I','O','U');
+
+SELECT COUNT(DISTINCT(visitor_id)) FROM web_traffic WHERE month IN ('January','June','July');</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_subqueries.xml#subqueries"/>
+      </p>
+    </conbody>
+  </concept>
+
+  <concept audience="Cloudera" rev="2.1.0" id="intersect">
+
+    <title>INTERSECT Operator</title>
+
+    <conbody>
+
+      <p>
+        <indexterm audience="Cloudera">INTERSECT operator</indexterm>
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="is_null">
+
+    <title>IS NULL Operator</title>
+
+    <conbody>
+
+      <p>
+        <indexterm audience="Cloudera">IS NULL operator</indexterm>
+        <indexterm audience="Cloudera">IS NOT NULL operator</indexterm>
+        The <codeph>IS NULL</codeph> operator, and its converse the <codeph>IS NOT NULL</codeph> operator, test
+        whether a specified value is <codeph><xref href="impala_literals.xml#null">NULL</xref></codeph>. Because
+        using <codeph>NULL</codeph> with any of the other comparison operators such as <codeph>=</codeph> or
+        <codeph>!=</codeph> also returns <codeph>NULL</codeph> rather than <codeph>TRUE</codeph> or
+        <codeph>FALSE</codeph>, you use a special-purpose comparison operator to check for this special condition.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock><varname>expression</varname> IS NULL
+<varname>expression</varname> IS NOT NULL
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+      <p>
+        In many cases, <codeph>NULL</codeph> values indicate some incorrect or incomplete processing during data
+        ingestion or conversion. You might check whether any values in a column are <codeph>NULL</codeph>, and if
+        so take some followup action to fill them in.
+      </p>
+
+      <p>
+        With sparse data, often represented in <q>wide</q> tables, it is common for most values to be
+        <codeph>NULL</codeph> with only an occasional non-<codeph>NULL</codeph> value. In those cases, you can use
+        the <codeph>IS NOT NULL</codeph> operator to identify the rows containing any data at all for a particular
+        column, regardless of the actual value.
+      </p>
+
+      <p>
+        With a well-designed database schema, effective use of <codeph>NULL</codeph> values and <codeph>IS
+        NULL</codeph> and <codeph>IS NOT NULL</codeph> operators can save having to design custom logic around
+        special values such as 0, -1, <codeph>'N/A'</codeph>, empty string, and so on. <codeph>NULL</codeph> lets
+        you distinguish between a value that is known to be 0, false, or empty, and a truly unknown value.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>-- If this value is non-zero, something is wrong.
+select count(*) from employees where employee_id is null;
+
+-- With data from disparate sources, some fields might be blank.
+-- Not necessarily an error condition.
+select count(*) from census where household_income is null;
+
+-- Sometimes we expect fields to be null, and followup action
+-- is needed when they are not.
+select count(*) from web_traffic where weird_http_code is not null;</codeblock>
+    </conbody>
+  </concept>
+
+  <concept id="like">
+
+    <title>LIKE Operator</title>
+
+    <conbody>
+
+      <p>
+        <indexterm audience="Cloudera">LIKE operator</indexterm>
+        A comparison operator for <codeph>STRING</codeph> data, with basic wildcard capability using
+        <codeph>_</codeph> to match a single character and <codeph>%</codeph> to match multiple characters. The
+        argument expression must match the entire string value. Typically, it is more efficient to put any
+        <codeph>%</codeph> wildcard match at the end of the string.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock><varname>string_expression</varname> LIKE <varname>wildcard_expression</varname>
+<varname>string_expression</varname> NOT LIKE <varname>wildcard_expression</varname>
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/complex_types_caveat_no_operator"/>
+
+<!-- To do: construct a LIKE example for complex types. -->
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>select distinct c_last_name from customer where c_last_name like 'Mc%' or c_last_name like 'Mac%';
+select count(c_last_name) from customer where c_last_name like 'M%';
+select c_email_address from customer where c_email_address like '%.edu';
+
+-- We can find 4-letter names beginning with 'M' by calling functions...
+select distinct c_last_name from customer where length(c_last_name) = 4 and substr(c_last_name,1,1) = 'M';
+-- ...or in a more readable way by matching M followed by exactly 3 characters.
+select distinct c_last_name from customer where c_last_name like 'M___';</codeblock>
+
+      <p>
+        For a more general kind of search operator using regular expressions, see
+        <xref href="impala_operators.xml#regexp"/>.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="logical_operators">
+
+    <title>Logical Operators</title>
+
+    <conbody>
+
+      <p>
+        <indexterm audience="Cloudera">logical operators</indexterm>
+        Logical operators return a <codeph>BOOLEAN</codeph> value, based on a binary or unary logical operation
+        between arguments that are also Booleans. Typically, the argument expressions use
+        <xref href="impala_operators.xml#comparison_operators">comparison operators</xref>.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock><varname>boolean_expression</varname> <varname>binary_logical_operator</varname> <varname>boolean_expression</varname>
+<varname>unary_logical_operator</varname> <varname>boolean_expression</varname>
+</codeblock>
+
+      <p>
+        The Impala logical operators are:
+      </p>
+
+      <ul>
+        <li>
+          <codeph>AND</codeph>: A binary operator that returns <codeph>true</codeph> if its left-hand and
+          right-hand arguments both evaluate to <codeph>true</codeph>, <codeph>NULL</codeph> if either argument is
+          <codeph>NULL</codeph>, and <codeph>false</codeph> otherwise.
+        </li>
+
+        <li>
+          <codeph>OR</codeph>: A binary operator that returns <codeph>true</codeph> if either of its left-hand and
+          right-hand arguments evaluate to <codeph>true</codeph>, <codeph>NULL</codeph> if one argument is
+          <codeph>NULL</codeph> and the other is either <codeph>NULL</codeph> or <codeph>false</codeph>, and
+          <codeph>false</codeph> otherwise.
+        </li>
+
+        <li>
+          <codeph>NOT</codeph>: A unary operator that flips the state of a Boolean expression from
+          <codeph>true</codeph> to <codeph>false</codeph>, or <codeph>false</codeph> to <codeph>true</codeph>. If
+          the argument expression is <codeph>NULL</codeph>, the result remains <codeph>NULL</codeph>. (When
+          <codeph>NOT</codeph> is used this way as a unary logical operator, it works differently than the
+          <codeph>IS NOT NULL</codeph> comparison operator, which returns <codeph>true</codeph> when applied to a
+          <codeph>NULL</codeph>.)
+        </li>
+      </ul>
+
+      <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/complex_types_caveat_no_operator"/>
+
+      <p rev="2.3.0">
+        The following example shows how to do an arithmetic operation using a numeric field of a <codeph>STRUCT</codeph> type
+        that is an item within an <codeph>ARRAY</codeph> column. Once the scalar numeric value <codeph>R_NATIONKEY</codeph>
+        is extracted, it can be used in an arithmetic expression, such as multiplying by 10:
+      </p>
+
+<codeblock rev="2.3.0">
+-- The SMALLINT is a field within an array of structs.
+describe region;
++-------------+-------------------------+---------+
+| name        | type                    | comment |
++-------------+-------------------------+---------+
+| r_regionkey | smallint                |         |
+| r_name      | string                  |         |
+| r_comment   | string                  |         |
+| r_nations   | array&lt;struct&lt;           |         |
+|             |   n_nationkey:smallint, |         |
+|             |   n_name:string,        |         |
+|             |   n_comment:string      |         |
+|             | &gt;&gt;                      |         |
++-------------+-------------------------+---------+
+
+-- When we refer to the scalar value using dot notation,
+-- we can use arithmetic and comparison operators on it
+-- like any other number.
+select r_name, nation.item.n_name, nation.item.n_nationkey
+  from region, region.r_nations as nation
+where
+  nation.item.n_nationkey between 3 and 5
+  or nation.item.n_nationkey &lt; 15;
++-------------+----------------+------------------+
+| r_name      | item.n_name    | item.n_nationkey |
++-------------+----------------+------------------+
+| EUROPE      | UNITED KINGDOM | 23               |
+| EUROPE      | RUSSIA         | 22               |
+| EUROPE      | ROMANIA        | 19               |
+| ASIA        | VIETNAM        | 21               |
+| ASIA        | CHINA          | 18               |
+| AMERICA     | UNITED STATES  | 24               |
+| AMERICA     | PERU           | 17               |
+| AMERICA     | CANADA         | 3                |
+| MIDDLE EAST | SAUDI ARABIA   | 20               |
+| MIDDLE EAST | EGYPT          | 4                |
+| AFRICA      | MOZAMBIQUE     | 16               |
+| AFRICA      | ETHIOPIA       | 5                |
++-------------+----------------+------------------+
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        These examples demonstrate the <codeph>AND</codeph> operator:
+      </p>
+
+<codeblock>[localhost:21000] &gt; select true and true;
++---------------+
+| true and true |
++---------------+
+| true          |
++---------------+
+[localhost:21000] &gt; select true and false;
++----------------+
+| true and false |
++----------------+
+| false          |
++----------------+
+[localhost:21000] &gt; select false and false;
++-----------------+
+| false and false |
++-----------------+
+| false           |
++-----------------+
+[localhost:21000] &gt; select true and null;
++---------------+
+| true and null |
++---------------+
+| NULL          |
++---------------+
+[localhost:21000] &gt; select (10 &gt; 2) and (6 != 9);
++-----------------------+
+| (10 &gt; 2) and (6 != 9) |
++-----------------------+
+| true                  |
++-----------------------+
+</codeblock>
+
+      <p>
+        These examples demonstrate the <codeph>OR</codeph> operator:
+      </p>
+
+<codeblock>[localhost:21000] &gt; select true or true;
++--------------+
+| true or true |
++--------------+
+| true         |
++--------------+
+[localhost:21000] &gt; select true or false;
++---------------+
+| true or false |
++---------------+
+| true          |
++---------------+
+[localhost:21000] &gt; select false or false;
++----------------+
+| false or false |
++----------------+
+| false          |
++----------------+
+[localhost:21000] &gt; select true or null;
++--------------+
+| true or null |
++--------------+
+| true         |
++--------------+
+[localhost:21000] &gt; select null or true;
++--------------+
+| null or true |
++--------------+
+| true         |
++--------------+
+[localhost:21000] &gt; select false or null;
++---------------+
+| false or null |
++---------------+
+| NULL          |
++---------------+
+[localhost:21000] &gt; select (1 = 1) or ('hello' = 'world');
++--------------------------------+
+| (1 = 1) or ('hello' = 'world') |
++--------------------------------+
+| true                           |
++--------------------------------+
+[localhost:21000] &gt; select (2 + 2 != 4) or (-1 &gt; 0);
++--------------------------+
+| (2 + 2 != 4) or (-1 &gt; 0) |
++--------------------------+
+| false                    |
++--------------------------+
+</codeblock>
+
+      <p>
+        These examples demonstrate the <codeph>NOT</codeph> operator:
+      </p>
+
+<codeblock>[localhost:21000] &gt; select not true;
++----------+
+| not true |
++----------+
+| false    |
++----------+
+[localhost:21000] &gt; select not false;
++-----------+
+| not false |
++-----------+
+| true      |
++-----------+
+[localhost:21000] &gt; select not null;
++----------+
+| not null |
++----------+
+| NULL     |
++----------+
+[localhost:21000] &gt; select not (1=1);
++-------------+
+| not (1 = 1) |
++-------------+
+| false       |
++-------------+
+</codeblock>
+    </conbody>
+  </concept>
+
+  <concept id="regexp">
+
+    <title>REGEXP Operator</title>
+
+    <conbody>
+
+      <p>
+        <indexterm audience="Cloudera">REGEXP operator</indexterm>
+        Tests whether a value matches a regular expression. Uses the POSIX regular expression syntax where
+        <codeph>^</codeph> and <codeph>$</codeph> match the beginning and end of the string, <codeph>.</codeph>
+        represents any single character, <codeph>*</codeph> represents a sequence of zero or more items,
+        <codeph>+</codeph> represents a sequence of one or more items, <codeph>?</codeph> produces a non-greedy
+        match, and so on.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+<codeblock><varname>string_expression</varname> REGEXP <varname>regular_expression</varname>
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+      <p>
+        The regular expression must match the entire value, not just occur somewhere inside it. Use
+        <codeph>.*</codeph> at the beginning and/or the end if you only need to match characters anywhere in the
+        middle. Thus, the <codeph>^</codeph> and <codeph>$</codeph> atoms are often redundant, although you might
+        already have them in your expression strings that you reuse from elsewhere.
+      </p>
+
+      <p>
+        The <codeph>RLIKE</codeph> operator is a synonym for <codeph>REGEXP</codeph>.
+      </p>
+
+      <p>
+        The <codeph>|</codeph> symbol is the alternation operator, typically used within <codeph>()</codeph> to
+        match different sequences. The <codeph>()</codeph> groups do not allow backreferences. To retrieve the part
+        of a value matched within a <codeph>()</codeph> section, use the
+        <codeph><xref href="impala_string_functions.xml#string_functions/regexp_extract">regexp_extract()</xref></codeph>
+        built-in function.
+      </p>
+
+      <note rev="1.3.1">
+        <p conref="../shared/impala_common.xml#common/regexp_matching"/>
+      </note>
+
+      <p conref="../shared/impala_common.xml#common/regexp_re2"/>
+
+      <p conref="../shared/impala_common.xml#common/regexp_re2_warning"/>
+
+      <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/complex_types_caveat_no_operator"/>
+
+<!-- To do: construct a REGEXP example for complex types. -->
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        The following examples demonstrate the identical syntax for the <codeph>REGEXP</codeph> and
+        <codeph>RLIKE</codeph> operators.
+      </p>
+
+<!-- Same examples shown for both REGEXP and RLIKE operators. -->
+
+<codeblock conref="../shared/impala_common.xml#common/regexp_rlike_examples"/>
+    </conbody>
+  </concept>
+
+  <concept id="rlike">
+
+    <title>RLIKE Operator</title>
+
+    <conbody>
+
+      <p>
+        <indexterm audience="Cloudera">RLIKE operator</indexterm>
+        Synonym for the <codeph>REGEXP</codeph> operator. See <xref href="impala_operators.xml#regexp"/> for
+        details.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        The following examples demonstrate the identical syntax for the <codeph>REGEXP</codeph> and
+        <codeph>RLIKE</codeph> operators.
+      </p>
+
+<!-- Same examples shown for both REGEXP and RLIKE operators. -->
+
+<codeblock conref="../shared/impala_common.xml#common/regexp_rlike_examples"/>
+    </conbody>
+  </concept>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_order_by.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_order_by.xml b/docs/topics/impala_order_by.xml
new file mode 100644
index 0000000..f3042e5
--- /dev/null
+++ b/docs/topics/impala_order_by.xml
@@ -0,0 +1,316 @@
+<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="order_by">
+
+  <title>ORDER BY Clause</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      The familiar <codeph>ORDER BY</codeph> clause of a <codeph>SELECT</codeph> statement sorts the result set
+      based on the values from one or more columns.
+    </p>
+
+    <p>
+      For distributed queries, this is a relatively expensive operation, because the entire result set must be
+      produced and transferred to one node before the sorting can happen. This can require more memory capacity
+      than a query without <codeph>ORDER BY</codeph>. Even if the query takes approximately the same time to finish
+      with or without the <codeph>ORDER BY</codeph> clause, subjectively it can appear slower because no results
+      are available until all processing is finished, rather than results coming back gradually as rows matching
+      the <codeph>WHERE</codeph> clause are found. Therefore, if you only need the first N results from the sorted
+      result set, also include the <codeph>LIMIT</codeph> clause, which reduces network overhead and the memory
+      requirement on the coordinator node.
+    </p>
+
+    <note>
+      <p rev="1.4.0 obwl">
+        In Impala 1.4.0 and higher, the <codeph>LIMIT</codeph> clause is now optional (rather than required) for
+        queries that use the <codeph>ORDER BY</codeph> clause. Impala automatically uses a temporary disk work area
+        to perform the sort if the sort operation would otherwise exceed the Impala memory limit for a particular
+        data node.
+      </p>
+    </note>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      The full syntax for the <codeph>ORDER BY</codeph> clause is:
+    </p>
+
+<codeblock rev="1.2.1">ORDER BY <varname>col_ref</varname> [, <varname>col_ref</varname> ...] [ASC | DESC] [NULLS FIRST | NULLS LAST]
+
+col_ref ::= <varname>column_name</varname> | <varname>integer_literal</varname>
+</codeblock>
+
+    <p>
+      Although the most common usage is <codeph>ORDER BY <varname>column_name</varname></codeph>, you can also
+      specify <codeph>ORDER BY 1</codeph> to sort by the first column of the result set, <codeph>ORDER BY
+      2</codeph> to sort by the second column, and so on. The number must be a numeric literal, not some other kind
+      of constant expression. (If the argument is some other expression, even a <codeph>STRING</codeph> value, the
+      query succeeds but the order of results is undefined.)
+    </p>
+
+    <p>
+      <codeph>ORDER BY <varname>column_number</varname></codeph> can only be used when the query explicitly lists
+      the columns in the <codeph>SELECT</codeph> list, not with <codeph>SELECT *</codeph> queries.
+    </p>
+
+    <p>
+      <b>Ascending and descending sorts:</b>
+    </p>
+
+    <p>
+      The default sort order (the same as using the <codeph>ASC</codeph> keyword) puts the smallest values at the
+      start of the result set, and the largest values at the end. Specifying the <codeph>DESC</codeph> keyword
+      reverses that order.
+    </p>
+
+    <p>
+      <b>Sort order for NULL values:</b>
+    </p>
+
+    <p rev="1.2.1">
+      See <xref href="impala_literals.xml#null"/> for details about how <codeph>NULL</codeph> values are positioned
+      in the sorted result set, and how to use the <codeph>NULLS FIRST</codeph> and <codeph>NULLS LAST</codeph>
+      clauses. (The sort position for <codeph>NULL</codeph> values in <codeph>ORDER BY ... DESC</codeph> queries is
+      changed in Impala 1.2.1 and higher to be more standards-compliant, and the <codeph>NULLS FIRST</codeph> and
+      <codeph>NULLS LAST</codeph> keywords are new in Impala 1.2.1.)
+    </p>
+
+    <p rev="obwl" conref="../shared/impala_common.xml#common/order_by_limit"/>
+
+    <!-- Good to show an example of cases where ORDER BY does and doesn't work with complex types. -->
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p rev="2.3.0">
+      In CDH 5.5 / Impala 2.3 and higher, the complex data types <codeph>STRUCT</codeph>,
+      <codeph>ARRAY</codeph>, and <codeph>MAP</codeph> are available. These columns cannot
+      be referenced directly in the <codeph>ORDER BY</codeph> clause.
+      When you query a complex type column, you use join notation to <q>unpack</q> the elements
+      of the complex type, and within the join query you can include an <codeph>ORDER BY</codeph>
+      clause to control the order in the result set of the scalar elements from the complex type.
+      See <xref href="impala_complex_types.xml#complex_types"/> for details about Impala support for complex types.
+    </p>
+
+    <p>
+      The following query shows how a complex type column cannot be directly used in an <codeph>ORDER BY</codeph> clause:
+    </p>
+
+<codeblock>CREATE TABLE games (id BIGINT, score ARRAY &lt;BIGINT&gt;) STORED AS PARQUET;
+...use LOAD DATA to load externally created Parquet files into the table...
+SELECT id FROM games ORDER BY score DESC;
+ERROR: AnalysisException: ORDER BY expression 'score' with complex type 'ARRAY&lt;BIGINT&gt;' is not supported.
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      The following query retrieves the user ID and score, only for scores greater than one million,
+      with the highest scores for each user listed first.
+      Because the individual array elements are now represented as separate rows in the result set,
+      they can be used in the <codeph>ORDER BY</codeph> clause, referenced using the <codeph>ITEM</codeph>
+      pseudocolumn that represents each array element.
+    </p>
+
+<codeblock>SELECT id, item FROM games, games.score
+  WHERE item &gt; 1000000
+ORDER BY id, item desc;
+</codeblock>
+
+    <p>
+      The following queries use similar <codeph>ORDER BY</codeph> techniques with variations of the <codeph>GAMES</codeph>
+      table, where the complex type is an <codeph>ARRAY</codeph> containing <codeph>STRUCT</codeph> or <codeph>MAP</codeph>
+      elements to represent additional details about each game that was played.
+      For an array of structures, the fields of the structure are referenced as <codeph>ITEM.<varname>field_name</varname></codeph>.
+      For an array of maps, the keys and values within each array element are referenced as <codeph>ITEM.KEY</codeph>
+      and <codeph>ITEM.VALUE</codeph>.
+    </p>
+
+<codeblock>CREATE TABLE games2 (id BIGINT, play array &lt; struct &lt;game_name: string, score: BIGINT, high_score: boolean&gt; &gt;) STORED AS PARQUET
+...use LOAD DATA to load externally created Parquet files into the table...
+SELECT id, item.game_name, item.score FROM games2, games2.play
+  WHERE item.score &gt; 1000000
+ORDER BY id, item.score DESC;
+
+CREATE TABLE games3 (id BIGINT, play ARRAY &lt; MAP &lt;STRING, BIGINT&gt; &gt;) STORED AS PARQUET;  
+...use LOAD DATA to load externally created Parquet files into the table...
+SELECT id, info.key AS k, info.value AS v from games3, games3.play AS plays, games3.play.item AS info
+  WHERE info.KEY = 'score' AND info.VALUE &gt; 1000000
+ORDER BY id, info.value desc;
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      Although the <codeph>LIMIT</codeph> clause is now optional on <codeph>ORDER BY</codeph> queries, if your
+      query only needs some number of rows that you can predict in advance, use the <codeph>LIMIT</codeph> clause
+      to reduce unnecessary processing. For example, if the query has a clause <codeph>LIMIT 10</codeph>, each data
+      node sorts its portion of the relevant result set and only returns 10 rows to the coordinator node. The
+      coordinator node picks the 10 highest or lowest row values out of this small intermediate result set.
+    </p>
+
+    <p>
+      If an <codeph>ORDER BY</codeph> clause is applied to an early phase of query processing, such as a subquery
+      or a view definition, Impala ignores the <codeph>ORDER BY</codeph> clause. To get ordered results from a
+      subquery or view, apply an <codeph>ORDER BY</codeph> clause to the outermost or final <codeph>SELECT</codeph>
+      level.
+    </p>
+
+    <p>
+      <codeph>ORDER BY</codeph> is often used in combination with <codeph>LIMIT</codeph> to perform <q>top-N</q>
+      queries:
+    </p>
+
+<codeblock>SELECT user_id AS "Top 10 Visitors", SUM(page_views) FROM web_stats
+  GROUP BY page_views, user_id
+  ORDER BY SUM(page_views) DESC LIMIT 10;
+</codeblock>
+
+    <p>
+      <codeph>ORDER BY</codeph> is sometimes used in combination with <codeph>OFFSET</codeph> and
+      <codeph>LIMIT</codeph> to paginate query results, although it is relatively inefficient to issue multiple
+      queries like this against the large tables typically used with Impala:
+    </p>
+
+<codeblock>SELECT page_title AS "Page 1 of search results", page_url FROM search_content
+  WHERE LOWER(page_title) LIKE '%game%')
+  ORDER BY page_title LIMIT 10 OFFSET 0;
+SELECT page_title AS "Page 2 of search results", page_url FROM search_content
+  WHERE LOWER(page_title) LIKE '%game%')
+  ORDER BY page_title LIMIT 10 OFFSET 10;
+SELECT page_title AS "Page 3 of search results", page_url FROM search_content
+  WHERE LOWER(page_title) LIKE '%game%')
+  ORDER BY page_title LIMIT 10 OFFSET 20;
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/internals_blurb"/>
+
+    <p>
+      Impala sorts the intermediate results of an <codeph>ORDER BY</codeph> clause in memory whenever practical. In
+      a cluster of N data nodes, each node sorts roughly 1/Nth of the result set, the exact proportion varying
+      depending on how the data matching the query is distributed in HDFS.
+    </p>
+
+    <p>
+      If the size of the sorted intermediate result set on any data node would cause the query to exceed the Impala
+      memory limit, Impala sorts as much as practical in memory, then writes partially sorted data to disk. (This
+      technique is known in industry terminology as <q>external sorting</q> and <q>spilling to disk</q>.) As each
+      8 MB batch of data is written to disk, Impala frees the corresponding memory to sort a new 8 MB batch of
+      data. When all the data has been processed, a final merge sort operation is performed to correctly order the
+      in-memory and on-disk results as the result set is transmitted back to the coordinator node. When external
+      sorting becomes necessary, Impala requires approximately 60 MB of RAM at a minimum for the buffers needed to
+      read, write, and sort the intermediate results. If more RAM is available on the data node, Impala will use
+      the additional RAM to minimize the amount of disk I/O for sorting.
+    </p>
+
+    <p>
+      This external sort technique is used as appropriate on each data node (possibly including the coordinator
+      node) to sort the portion of the result set that is processed on that node. When the sorted intermediate
+      results are sent back to the coordinator node to produce the final result set, the coordinator node uses a
+      merge sort technique to produce a final sorted result set without using any extra resources on the
+      coordinator node.
+    </p>
+
+    <p rev="obwl">
+      <b>Configuration for disk usage:</b>
+    </p>
+
+    <p rev="obwl" conref="../shared/impala_common.xml#common/order_by_scratch_dir"/>
+
+<!-- Here is actually the more logical place to collect all those examples, move them from SELECT and cross-reference to here. -->
+
+<!--     <p rev="obwl" conref="/Content/impala_common_xi44078.xml#common/restrictions_blurb"/> -->
+
+    <p rev="obwl" conref="../shared/impala_common.xml#common/insert_sort_blurb"/>
+
+    <p rev="obwl" conref="../shared/impala_common.xml#common/order_by_view_restriction"/>
+
+    <p>
+      With the lifting of the requirement to include a <codeph>LIMIT</codeph> clause in every <codeph>ORDER
+      BY</codeph> query (in Impala 1.4 and higher):
+    </p>
+
+    <ul>
+      <li>
+        <p>
+          Now the use of scratch disk space raises the possibility of an <q>out of disk space</q> error on a
+          particular data node, as opposed to the previous possibility of an <q>out of memory</q> error. Make sure
+          to keep at least 1 GB free on the filesystem used for temporary sorting work.
+        </p>
+      </li>
+
+      <li>
+        <p>
+          The query options
+          <xref href="impala_default_order_by_limit.xml#default_order_by_limit">DEFAULT_ORDER_BY_LIMIT</xref> and
+          <xref href="impala_abort_on_default_limit_exceeded.xml#abort_on_default_limit_exceeded">ABORT_ON_DEFAULT_LIMIT_EXCEEDED</xref>,
+          which formerly controlled the behavior of <codeph>ORDER BY</codeph> queries with no limit specified, are
+          now ignored.
+        </p>
+      </li>
+    </ul>
+
+    <p rev="obwl" conref="../shared/impala_common.xml#common/null_sorting_change"/>
+<codeblock>[localhost:21000] > create table numbers (x int);
+[localhost:21000] > insert into numbers values (1), (null), (2), (null), (3);
+[localhost:21000] > select x from numbers order by x nulls first;
++------+
+| x    |
++------+
+| NULL |
+| NULL |
+| 1    |
+| 2    |
+| 3    |
++------+
+[localhost:21000] > select x from numbers order by x desc nulls first;
++------+
+| x    |
++------+
+| NULL |
+| NULL |
+| 3    |
+| 2    |
+| 1    |
++------+
+[localhost:21000] > select x from numbers order by x nulls last;
++------+
+| x    |
++------+
+| 1    |
+| 2    |
+| 3    |
+| NULL |
+| NULL |
++------+
+[localhost:21000] > select x from numbers order by x desc nulls last;
++------+
+| x    |
++------+
+| 3    |
+| 2    |
+| 1    |
+| NULL |
+| NULL |
++------+
+</codeblock>
+
+    <p rev="obwl" conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p rev="obwl">
+      See <xref href="impala_select.xml#select"/> for further examples of queries with the <codeph>ORDER
+      BY</codeph> clause.
+    </p>
+
+    <p>
+      Analytic functions use the <codeph>ORDER BY</codeph> clause in a different context to define the sequence in
+      which rows are analyzed. See <xref href="impala_analytic_functions.xml#analytic_functions"/> for details.
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_parquet_compression_codec.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_parquet_compression_codec.xml b/docs/topics/impala_parquet_compression_codec.xml
new file mode 100644
index 0000000..d178a0d
--- /dev/null
+++ b/docs/topics/impala_parquet_compression_codec.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="parquet_compression_codec">
+
+  <title>PARQUET_COMPRESSION_CODEC Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Parquet"/>
+      <data name="Category" value="File Formats"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Deprecated Features"/>
+      <data name="Category" value="Compression"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">PARQUET_COMPRESSION_CODEC query option</indexterm>
+      Deprecated. Use <codeph>COMPRESSION_CODEC</codeph> in Impala 2.0 and later. See
+      <xref href="impala_compression_codec.xml#compression_codec"/> for details.
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_parquet_file_size.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_parquet_file_size.xml b/docs/topics/impala_parquet_file_size.xml
new file mode 100644
index 0000000..396fa92
--- /dev/null
+++ b/docs/topics/impala_parquet_file_size.xml
@@ -0,0 +1,82 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="parquet_block_size" id="parquet_file_size">
+
+  <title>PARQUET_FILE_SIZE Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Parquet"/>
+      <data name="Category" value="File Formats"/>
+      <data name="Category" value="Impala Query Options"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">PARQUET_FILE_SIZE query option</indexterm>
+      Specifies the maximum size of each Parquet data file produced by Impala <codeph>INSERT</codeph> statements.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      Specify the size in bytes, or with a trailing <codeph>m</codeph> or <codeph>g</codeph> character to indicate
+      megabytes or gigabytes. For example:
+    </p>
+
+<codeblock>-- 128 megabytes.
+set PARQUET_FILE_SIZE=134217728
+INSERT OVERWRITE parquet_table SELECT * FROM text_table;
+
+-- 512 megabytes.
+set PARQUET_FILE_SIZE=512m;
+INSERT OVERWRITE parquet_table SELECT * FROM text_table;
+
+-- 1 gigabyte.
+set PARQUET_FILE_SIZE=1g;
+INSERT OVERWRITE parquet_table SELECT * FROM text_table;
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      With tables that are small or finely partitioned, the default Parquet block size (formerly 1 GB, now 256 MB
+      in Impala 2.0 and later) could be much larger than needed for each data file. For <codeph>INSERT</codeph>
+      operations into such tables, you can increase parallelism by specifying a smaller
+      <codeph>PARQUET_FILE_SIZE</codeph> value, resulting in more HDFS blocks that can be processed by different
+      nodes.
+<!-- Reducing the file size also reduces the memory required to buffer each block before writing it to disk. -->
+    </p>
+
+    <p>
+      <b>Type:</b> numeric, with optional unit specifier
+    </p>
+
+    <note type="important">
+    <p>
+      Currently, the maximum value for this setting is 1 gigabyte (<codeph>1g</codeph>).
+      Setting a value higher than 1 gigabyte could result in errors during
+      an <codeph>INSERT</codeph> operation.
+    </p>
+    </note>
+
+    <p>
+      <b>Default:</b> 0 (produces files with a target size of 256 MB; files might be larger for very wide tables)
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/isilon_blurb"/>
+    <p conref="../shared/impala_common.xml#common/isilon_block_size_caveat"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      For information about the Parquet file format, and how the number and size of data files affects query
+      performance, see <xref href="impala_parquet.xml#parquet"/>.
+    </p>
+
+<!-- Examples actually folded into Syntax earlier.   <p conref="../shared/impala_common.xml#common/example_blurb"/> -->
+
+  </conbody>
+</concept>

[02/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

Posted by jr...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_udf.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_udf.xml b/docs/topics/impala_udf.xml
new file mode 100644
index 0000000..53dd8eb
--- /dev/null
+++ b/docs/topics/impala_udf.xml
@@ -0,0 +1,1759 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.2" id="udfs">
+
+  <title>Impala User-Defined Functions (UDFs)</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="UDFs"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      User-defined functions (frequently abbreviated as UDFs) let you code your own application logic for
+      processing column values during an Impala query. For example, a UDF could perform calculations using an
+      external math library, combine several column values into one, do geospatial calculations, or other kinds of
+      tests and transformations that are outside the scope of the built-in SQL operators and functions.
+    </p>
+
+    <p>
+      You can use UDFs to simplify query logic when producing reports, or to transform data in flexible ways when
+      copying from one table to another with the <codeph>INSERT ... SELECT</codeph> syntax.
+    </p>
+
+    <p>
+      You might be familiar with this feature from other database products, under names such as stored functions or
+      stored routines.
+<!--
+    , user-defined aggregate functions (UDAFs), table functions, or window functions.
+    -->
+    </p>
+
+    <p>
+      Impala support for UDFs is available in Impala 1.2 and higher:
+    </p>
+
+    <ul>
+      <li>
+        In Impala 1.1, using UDFs in a query required using the Hive shell. (Because Impala and Hive share the same
+        metastore database, you could switch to Hive to run just those queries requiring UDFs, then switch back to
+        Impala.)
+      </li>
+
+      <li>
+        Starting in Impala 1.2, Impala can run both high-performance native code UDFs written in C++, and
+        Java-based Hive UDFs that you might already have written.
+      </li>
+
+      <li>
+        Impala can run scalar UDFs that return a single value for each row of the result set, and user-defined
+        aggregate functions (UDAFs) that return a value based on a set of rows. Currently, Impala does not support
+        user-defined table functions (UDTFs) or window functions.
+      </li>
+    </ul>
+
+    <p outputclass="toc inpage"/>
+  </conbody>
+
+  <concept id="udf_concepts">
+
+    <title>UDF Concepts</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Concepts"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        Depending on your use case, you might write all-new functions, reuse Java UDFs that you have already
+        written for Hive, or port Hive Java UDF code to higher-performance native Impala UDFs in C++. You can code
+        either scalar functions for producing results one row at a time, or more complex aggregate functions for
+        doing analysis across. The following sections discuss these different aspects of working with UDFs.
+      </p>
+
+      <p outputclass="toc inpage"/>
+    </conbody>
+
+    <concept id="udfs_udafs">
+
+      <title>UDFs and UDAFs</title>
+
+      <conbody>
+
+        <p>
+          Depending on your use case, the user-defined functions (UDFs) you write might accept or produce different
+          numbers of input and output values:
+        </p>
+
+        <ul>
+          <li>
+            The most general kind of user-defined function (the one typically referred to by the abbreviation UDF)
+            takes a single input value and produces a single output value. When used in a query, it is called once
+            for each row in the result set. For example:
+<codeblock>select customer_name, is_frequent_customer(customer_id) from customers;
+select obfuscate(sensitive_column) from sensitive_data;</codeblock>
+          </li>
+
+          <li>
+            A user-defined aggregate function (UDAF) accepts a group of values and returns a single value. You use
+            UDAFs to summarize and condense sets of rows, in the same style as the built-in <codeph>COUNT</codeph>,
+            <codeph>MAX()</codeph>, <codeph>SUM()</codeph>, and <codeph>AVG()</codeph> functions. When called in a
+            query that uses the <codeph>GROUP BY</codeph> clause, the function is called once for each combination
+            of <codeph>GROUP BY</codeph> values. For example:
+<codeblock>-- Evaluates multiple rows but returns a single value.
+select closest_restaurant(latitude, longitude) from places;
+
+-- Evaluates batches of rows and returns a separate value for each batch.
+select most_profitable_location(store_id, sales, expenses, tax_rate, depreciation) from franchise_data group by year;</codeblock>
+          </li>
+
+          <li>
+            Currently, Impala does not support other categories of user-defined functions, such as user-defined
+            table functions (UDTFs) or window functions.
+          </li>
+
+<!--
+<li>
+A user-defined table function (UDTF) returns an arbitrary number of rows (zero, one, or many) for each input row.
+These functions filter, explode, or transform the input data in a variety of ways.
+Currently, Impala does not support UDTFs.
+For example:
+<codeblock>select anomalous_event() from web_traffic;
+select price_change() from stock_ticker;
+select real_words(letters) from word_games;</codeblock>
+</li>
+-->
+        </ul>
+      </conbody>
+    </concept>
+
+    <concept id="native_udfs">
+
+      <title>Native Impala UDFs</title>
+
+      <conbody>
+
+        <p>
+          Impala supports UDFs written in C++, in addition to supporting existing Hive UDFs written in Java.
+          Cloudera recommends using C++ UDFs because the compiled native code can yield higher performance, with
+          UDF execution time often 10x faster for a C++ UDF than the equivalent Java UDF.
+        </p>
+      </conbody>
+    </concept>
+
+    <concept id="udfs_hive">
+
+      <title>Using Hive UDFs with Impala</title>
+
+      <conbody>
+
+        <p>
+          Impala can run Java-based user-defined functions (UDFs), originally written for Hive, with no changes,
+          subject to the following conditions:
+        </p>
+
+        <ul>
+          <li>
+            The parameters and return value must all use scalar data types supported by Impala. For example, complex or nested
+            types are not supported.
+          </li>
+
+          <li>
+            Currently, Hive UDFs that accept or return the <codeph>TIMESTAMP</codeph> type are not supported.
+          </li>
+
+          <li>
+            The return type must be a <q>Writable</q> type such as <codeph>Text</codeph> or
+            <codeph>IntWritable</codeph>, rather than a Java primitive type such as <codeph>String</codeph> or
+            <codeph>int</codeph>. Otherwise, the UDF will return <codeph>NULL</codeph>.
+          </li>
+
+          <li>
+            Hive UDAFs and UDTFs are not supported.
+          </li>
+
+          <li>
+            Typically, a Java UDF will execute several times slower in Impala than the equivalent native UDF
+            written in C++.
+          </li>
+        </ul>
+
+        <p>
+          To take full advantage of the Impala architecture and performance features, you can also write
+          Impala-specific UDFs in C++.
+        </p>
+
+        <p>
+          For background about Java-based Hive UDFs, see the
+          <xref href="https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF" scope="external" format="html">Hive
+          documentation for UDFs</xref>. For examples or tutorials for writing such UDFs, search the web for
+          related blog posts.
+        </p>
+
+        <p>
+          The ideal way to understand how to reuse Java-based UDFs (originally written for Hive) with Impala is to
+          take some of the Hive built-in functions (implemented as Java UDFs) and take the applicable JAR files
+          through the UDF deployment process for Impala, creating new UDFs with different names:
+        </p>
+
+        <ol>
+          <li>
+            Take a copy of the Hive JAR file containing the Hive built-in functions. For example, the path might be
+            like <filepath>/usr/lib/hive/lib/hive-exec-0.10.0-cdh4.2.0.jar</filepath>, with different version
+            numbers corresponding to your specific level of CDH.
+          </li>
+
+          <li>
+            Use <codeph>jar tf <varname>jar_file</varname></codeph> to see a list of the classes inside the JAR.
+            You will see names like <codeph>org/apache/hadoop/hive/ql/udf/UDFLower.class</codeph> and
+            <codeph>org/apache/hadoop/hive/ql/udf/UDFOPNegative.class</codeph>. Make a note of the names of the
+            functions you want to experiment with. When you specify the entry points for the Impala <codeph>CREATE
+            FUNCTION</codeph> statement, change the slash characters to dots and strip off the
+            <codeph>.class</codeph> suffix, for example <codeph>org.apache.hadoop.hive.ql.udf.UDFLower</codeph> and
+            <codeph>org.apache.hadoop.hive.ql.udf.UDFOPNegative</codeph>.
+          </li>
+
+          <li>
+            Copy that file to an HDFS location that Impala can read. (In the examples here, we renamed the file to
+            <filepath>hive-builtins.jar</filepath> in HDFS for simplicity.)
+          </li>
+
+          <li>
+            For each Java-based UDF that you want to call through Impala, issue a <codeph>CREATE FUNCTION</codeph>
+            statement, with a <codeph>LOCATION</codeph> clause containing the full HDFS path of the JAR file, and a
+            <codeph>SYMBOL</codeph> clause with the fully qualified name of the class, using dots as separators and
+            without the <codeph>.class</codeph> extension. Remember that user-defined functions are associated with
+            a particular database, so issue a <codeph>USE</codeph> statement for the appropriate database first, or
+            specify the SQL function name as
+            <codeph><varname>db_name</varname>.<varname>function_name</varname></codeph>. Use completely new names
+            for the SQL functions, because Impala UDFs cannot have the same name as Impala built-in functions.
+          </li>
+
+          <li>
+            Call the function from your queries, passing arguments of the correct type to match the function
+            signature. These arguments could be references to columns, arithmetic or other kinds of expressions,
+            the results of <codeph>CAST</codeph> functions to ensure correct data types, and so on.
+          </li>
+        </ol>
+
+        <example>
+
+          <title>Java UDF Example: Reusing lower() Function</title>
+
+          <p>
+            For example, the following <cmdname>impala-shell</cmdname> session creates an Impala UDF
+            <codeph>my_lower()</codeph> that reuses the Java code for the Hive <codeph>lower()</codeph>: built-in
+            function. We cannot call it <codeph>lower()</codeph> because Impala does not allow UDFs to have the
+            same name as built-in functions. From SQL, we call the function in a basic way (in a query with no
+            <codeph>WHERE</codeph> clause), directly on a column, and on the results of a string expression:
+          </p>
+
+<codeblock>[localhost:21000] &gt; create database udfs;
+[localhost:21000] &gt; use udfs;
+localhost:21000] &gt; create function lower(string) returns string location '/user/hive/udfs/hive.jar' symbol='org.apache.hadoop.hive.ql.udf.UDFLower';
+ERROR: AnalysisException: Function cannot have the same name as a builtin: lower
+[localhost:21000] &gt; create function my_lower(string) returns string location '/user/hive/udfs/hive.jar' symbol='org.apache.hadoop.hive.ql.udf.UDFLower';
+[localhost:21000] &gt; select my_lower('Some String NOT ALREADY LOWERCASE');
++----------------------------------------------------+
+| udfs.my_lower('some string not already lowercase') |
++----------------------------------------------------+
+| some string not already lowercase                  |
++----------------------------------------------------+
+Returned 1 row(s) in 0.11s
+[localhost:21000] &gt; create table t2 (s string);
+[localhost:21000] &gt; insert into t2 values ('lower'),('UPPER'),('Init cap'),('CamelCase');
+Inserted 4 rows in 2.28s
+[localhost:21000] &gt; select * from t2;
++-----------+
+| s         |
++-----------+
+| lower     |
+| UPPER     |
+| Init cap  |
+| CamelCase |
++-----------+
+Returned 4 row(s) in 0.47s
+[localhost:21000] &gt; select my_lower(s) from t2;
++------------------+
+| udfs.my_lower(s) |
++------------------+
+| lower            |
+| upper            |
+| init cap         |
+| camelcase        |
++------------------+
+Returned 4 row(s) in 0.54s
+[localhost:21000] &gt; select my_lower(concat('ABC ',s,' XYZ')) from t2;
++------------------------------------------+
+| udfs.my_lower(concat('abc ', s, ' xyz')) |
++------------------------------------------+
+| abc lower xyz                            |
+| abc upper xyz                            |
+| abc init cap xyz                         |
+| abc camelcase xyz                        |
++------------------------------------------+
+Returned 4 row(s) in 0.22s</codeblock>
+
+        </example>
+
+        <example>
+
+          <title>Java UDF Example: Reusing negative() Function</title>
+
+          <p>
+            Here is an example that reuses the Hive Java code for the <codeph>negative()</codeph> built-in
+            function. This example demonstrates how the data types of the arguments must match precisely with the
+            function signature. At first, we create an Impala SQL function that can only accept an integer
+            argument. Impala cannot find a matching function when the query passes a floating-point argument,
+            although we can call the integer version of the function by casting the argument. Then we overload the
+            same function name to also accept a floating-point argument.
+          </p>
+
+<codeblock>[localhost:21000] &gt; create table t (x int);
+[localhost:21000] &gt; insert into t values (1), (2), (4), (100);
+Inserted 4 rows in 1.43s
+[localhost:21000] &gt; create function my_neg(bigint) returns bigint location '/user/hive/udfs/hive.jar' symbol='org.apache.hadoop.hive.ql.udf.UDFOPNegative';
+[localhost:21000] &gt; select my_neg(4);
++----------------+
+| udfs.my_neg(4) |
++----------------+
+| -4             |
++----------------+
+[localhost:21000] &gt; select my_neg(x) from t;
++----------------+
+| udfs.my_neg(x) |
++----------------+
+| -2             |
+| -4             |
+| -100           |
++----------------+
+Returned 3 row(s) in 0.60s
+[localhost:21000] &gt; select my_neg(4.0);
+ERROR: AnalysisException: No matching function with signature: udfs.my_neg(FLOAT).
+[localhost:21000] &gt; select my_neg(cast(4.0 as int));
++-------------------------------+
+| udfs.my_neg(cast(4.0 as int)) |
++-------------------------------+
+| -4                            |
++-------------------------------+
+Returned 1 row(s) in 0.11s
+[localhost:21000] &gt; create function my_neg(double) returns double location '/user/hive/udfs/hive.jar' symbol='org.apache.hadoop.hive.ql.udf.UDFOPNegative';
+[localhost:21000] &gt; select my_neg(4.0);
++------------------+
+| udfs.my_neg(4.0) |
++------------------+
+| -4               |
++------------------+
+Returned 1 row(s) in 0.11s</codeblock>
+
+          <p>
+            You can find the sample files mentioned here in
+            <xref href="https://github.com/cloudera/impala/tree/master/be/src/udf_samples" scope="external" format="html">the
+            Impala github repo</xref>.
+<!-- Internal-only repo, don't know an external equivalent.
+and other examples demonstrating this technique in
+<xref href="http://github.sf.cloudera.com/CDH/Impala/blob/master/testdata/workloads/functional-query/queries/QueryTest/load-hive-udfs.test" scope="external" format="html">the Impala test files</xref>.
+-->
+          </p>
+
+        </example>
+      </conbody>
+    </concept>
+  </concept>
+
+  <concept id="udf_runtime">
+    <title>Runtime Environment for UDFs</title>
+    <conbody>
+      <p>
+        By default, Impala copies UDFs into <filepath>/tmp</filepath>,
+        and you can configure this location through the <codeph>--local_library_dir</codeph>
+        startup flag for the <cmdname>impalad</cmdname> daemon.
+      </p>
+    </conbody>
+  </concept>
+
+
+  <concept id="udf_demo_env">
+
+    <title>Installing the UDF Development Package</title>
+
+    <conbody>
+
+      <p>
+        To develop UDFs for Impala, download and install the <codeph>impala-udf-devel</codeph> package containing
+        header files, sample source, and build configuration files.
+      </p>
+
+      <ol>
+        <li>
+          Start at <xref href="https://archive.cloudera.com/cdh5/" scope="external" format="html"/> for the CDH 5
+          package, or <xref href="https://archive.cloudera.com/impala/" scope="external" format="html"/> for the CDH
+          4 package.
+        </li>
+
+        <li>
+          Locate the appropriate <codeph>.repo</codeph> or list file for your operating system version, such as
+          <xref href="https://archive.cloudera.com/impala/redhat/6/x86_64/impala/cloudera-impala.repo" scope="external" format="html">the
+          <codeph>.repo</codeph> file for CDH 4 on RHEL 6</xref>.
+        </li>
+
+        <li>
+          Use the familiar <codeph>yum</codeph>, <codeph>zypper</codeph>, or <codeph>apt-get</codeph> commands
+          depending on your operating system, with <codeph>impala-udf-devel</codeph> for the package name.
+        </li>
+      </ol>
+
+      <note>
+        The UDF development code does not rely on Impala being installed on the same machine. You can write and
+        compile UDFs on a minimal development system, then deploy them on a different one for use with Impala. If
+        you develop UDFs on a server managed by Cloudera Manager through the parcel mechanism, you still install
+        the UDF development kit through the package mechanism; this small standalone package does not interfere
+        with the parcels containing the main Impala code.
+      </note>
+
+      <p>
+        When you are ready to start writing your own UDFs, download the sample code and build scripts from
+        <xref href="https://github.com/cloudera/impala-udf-samples" scope="external" format="html">the Cloudera
+        sample UDF github</xref>. Then see <xref href="impala_udf.xml#udf_coding"/> for how to code UDFs, and
+        <xref href="impala_udf.xml#udf_tutorial"/> for how to build and run UDFs.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="udf_coding">
+
+    <title>Writing User-Defined Functions (UDFs)</title>
+
+    <conbody>
+
+      <p>
+        Before starting UDF development, make sure to install the development package and download the UDF code
+        samples, as described in <xref href="#udf_demo_env"/>.
+      </p>
+
+      <p>
+        When writing UDFs:
+      </p>
+
+      <ul>
+        <li>
+          Keep in mind the data type differences as you transfer values from the high-level SQL to your lower-level
+          UDF code. For example, in the UDF code you might be much more aware of how many bytes different kinds of
+          integers require.
+        </li>
+
+        <li>
+          Use best practices for function-oriented programming: choose arguments carefully, avoid side effects,
+          make each function do a single thing, and so on.
+        </li>
+      </ul>
+
+      <p outputclass="toc inpage"/>
+    </conbody>
+
+    <concept id="udf_exploring">
+
+      <title>Getting Started with UDF Coding</title>
+  <prolog>
+    <metadata>
+      <!-- OK, this is not something a Hadoop newbie would tackle, but being lenient and inclusive in this initial pass, so including the GS tag. -->
+      <data name="Category" value="Getting Started"/>
+    </metadata>
+  </prolog>
+
+      <conbody>
+
+        <p>
+          To understand the layout and member variables and functions of the predefined UDF data types, examine the
+          header file <filepath>/usr/include/impala_udf/udf.h</filepath>:
+        </p>
+
+<codeblock>// This is the only Impala header required to develop UDFs and UDAs. This header
+// contains the types that need to be used and the FunctionContext object. The context
+// object serves as the interface object between the UDF/UDA and the impala process. </codeblock>
+
+        <p>
+          For the basic declarations needed to write a scalar UDF, see the header file
+          <filepath>udf-sample.h</filepath> within the sample build environment, which defines a simple function
+          named <codeph>AddUdf()</codeph>:
+        </p>
+
+<codeblock>#ifndef IMPALA_UDF_SAMPLE_UDF_H
+#define IMPALA_UDF_SAMPLE_UDF_H
+
+#include &lt;impala_udf/udf.h&gt;
+
+using namespace impala_udf;
+
+IntVal AddUdf(FunctionContext* context, const IntVal&amp; arg1, const IntVal&amp; arg2);
+
+#endif</codeblock>
+
+        <p>
+          For sample C++ code for a simple function named <codeph>AddUdf()</codeph>, see the source file
+          <filepath>udf-sample.cc</filepath> within the sample build environment:
+        </p>
+
+<codeblock>#include "udf-sample.h"
+
+// In this sample we are declaring a UDF that adds two ints and returns an int.
+IntVal AddUdf(FunctionContext* context, const IntVal&amp; arg1, const IntVal&amp; arg2) {
+  if (arg1.is_null || arg2.is_null) return IntVal::null();
+  return IntVal(arg1.val + arg2.val);
+}
+
+// Multiple UDFs can be defined in the same file</codeblock>
+      </conbody>
+    </concept>
+
+    <concept id="udfs_args">
+
+      <title>Data Types for Function Arguments and Return Values</title>
+
+      <conbody>
+
+        <p>
+          Each value that a user-defined function can accept as an argument or return as a result value must map to
+          a SQL data type that you could specify for a table column.
+        </p>
+ 
+        <p conref="../shared/impala_common.xml#common/udfs_no_complex_types"/>
+
+        <p>
+          Each data type has a corresponding structure defined in the C++ and Java header files, with two member
+          fields and some predefined comparison operators and constructors:
+        </p>
+
+        <ul>
+          <li>
+            <p>
+              <codeph>is_null</codeph> indicates whether the value is <codeph>NULL</codeph> or not.
+              <codeph>val</codeph> holds the actual argument or return value when it is non-<codeph>NULL</codeph>.
+            </p>
+          </li>
+
+          <li>
+            <p>
+              Each struct also defines a <codeph>null()</codeph> member function that constructs an instance of the
+              struct with the <codeph>is_null</codeph> flag set.
+            </p>
+          </li>
+
+          <li>
+            <p>
+              The built-in SQL comparison operators and clauses such as <codeph>&lt;</codeph>,
+              <codeph>&gt;=</codeph>, <codeph>BETWEEN</codeph>, and <codeph>ORDER BY</codeph> all work
+              automatically based on the SQL return type of each UDF. For example, Impala knows how to evaluate
+              <codeph>BETWEEN 1 AND udf_returning_int(col1)</codeph> or <codeph>ORDER BY
+              udf_returning_string(col2)</codeph> without you declaring any comparison operators within the UDF
+              itself.
+            </p>
+            <p>
+              For convenience within your UDF code, each struct defines <codeph>==</codeph> and <codeph>!=</codeph>
+              operators for comparisons with other structs of the same type. These are for typical C++ comparisons
+              within your own code, not necessarily reproducing SQL semantics. For example, if the
+              <codeph>is_null</codeph> flag is set in both structs, they compare as equal. That behavior of
+              <codeph>null</codeph> comparisons is different from SQL (where <codeph>NULL == NULL</codeph> is
+              <codeph>NULL</codeph> rather than <codeph>true</codeph>), but more in line with typical C++ behavior.
+            </p>
+          </li>
+
+          <li>
+            <p>
+              Each kind of struct has one or more constructors that define a filled-in instance of the struct,
+              optionally with default values.
+            </p>
+          </li>
+
+          <li>
+            <p>
+              Each kind of struct has a <codeph>null()</codeph> member function that returns an instance of the
+              struct with the <codeph>is_null</codeph> flag set.
+            </p>
+          </li>
+
+          <li>
+            <p>
+              Because Impala currently does not support composite or nested types, Impala cannot process UDFs that
+              accept such types as arguments or return them as result values. This limitation applies both to
+              Impala UDFs written in C++ and Java-based Hive UDFs.
+            </p>
+          </li>
+
+          <li>
+            <p>
+              You can overload functions by creating multiple functions with the same SQL name but different
+              argument types. For overloaded functions, you must use different C++ or Java entry point names in the
+              underlying functions.
+            </p>
+          </li>
+        </ul>
+
+        <p>
+          The data types defined on the C++ side (in <filepath>/usr/include/impala_udf/udf.h</filepath>) are:
+        </p>
+
+        <ul>
+          <li>
+            <p>
+              <codeph>IntVal</codeph> represents an <codeph>INT</codeph> column.
+            </p>
+          </li>
+
+          <li>
+            <p>
+              <codeph>BigIntVal</codeph> represents a <codeph>BIGINT</codeph> column. Even if you do not need the
+              full range of a <codeph>BIGINT</codeph> value, it can be useful to code your function arguments as
+              <codeph>BigIntVal</codeph> to make it convenient to call the function with different kinds of integer
+              columns and expressions as arguments. Impala automatically casts smaller integer types to larger ones
+              when appropriate, but does not implicitly cast large integer types to smaller ones.
+            </p>
+          </li>
+
+          <li>
+            <p>
+              <codeph>SmallIntVal</codeph> represents a <codeph>SMALLINT</codeph> column.
+            </p>
+          </li>
+
+          <li>
+            <p>
+              <codeph>TinyIntVal</codeph> represents a <codeph>TINYINT</codeph> column.
+            </p>
+          </li>
+
+          <li>
+            <p>
+              <codeph>StringVal</codeph> represents a <codeph>STRING</codeph> column. It has a <codeph>len</codeph>
+              field representing the length of the string, and a <codeph>ptr</codeph> field pointing to the string
+              data. It has constructors that create a new <codeph>StringVal</codeph> struct based on a
+              null-terminated C-style string, or a pointer plus a length; these new structs still refer to the
+              original string data rather than allocating a new buffer for the data. It also has a constructor that
+              takes a pointer to a <codeph>FunctionContext</codeph> struct and a length, that does allocate space
+              for a new copy of the string data, for use in UDFs that return string values.
+            </p>
+          </li>
+
+          <li>
+            <p>
+              <codeph>BooleanVal</codeph> represents a <codeph>BOOLEAN</codeph> column.
+            </p>
+          </li>
+
+          <li>
+            <p>
+              <codeph>FloatVal</codeph> represents a <codeph>FLOAT</codeph> column.
+            </p>
+          </li>
+
+          <li>
+            <p>
+              <codeph>DoubleVal</codeph> represents a <codeph>DOUBLE</codeph> column.
+            </p>
+          </li>
+
+          <li>
+            <p>
+              <codeph>TimestampVal</codeph> represents a <codeph>TIMESTAMP</codeph> column. It has a
+              <codeph>date</codeph> field, a 32-bit integer representing the Gregorian date, that is, the days past
+              the epoch date. It also has a <codeph>time_of_day</codeph> field, a 64-bit integer representing the
+              current time of day in nanoseconds.
+            </p>
+          </li>
+
+<!--
+  <li>
+    <p>
+      <codeph>AnyVal</codeph> is the parent type of all the other
+      structs. They inherit the <codeph>is_null</codeph> field from it.
+      You do not use this type directly in your code.
+    </p>
+  </li>
+-->
+        </ul>
+      </conbody>
+    </concept>
+
+    <concept id="udf_varargs">
+
+      <title>Variable-Length Argument Lists</title>
+
+      <conbody>
+
+        <p>
+          UDFs typically take a fixed number of arguments, with each one named explicitly in the signature of your
+          C++ function. Your function can also accept additional optional arguments, all of the same type. For
+          example, you can concatenate two strings, three strings, four strings, and so on. Or you can compare two
+          numbers, three numbers, four numbers, and so on.
+        </p>
+
+        <p>
+          To accept a variable-length argument list, code the signature of your function like this:
+        </p>
+
+<codeblock>StringVal Concat(FunctionContext* context, const StringVal&amp; separator,
+  int num_var_args, const StringVal* args);</codeblock>
+
+        <p>
+          In the <codeph>CREATE FUNCTION</codeph> statement, after the type of the first optional argument, include
+          <codeph>...</codeph> to indicate it could be followed by more arguments of the same type. For example,
+          the following function accepts a <codeph>STRING</codeph> argument, followed by one or more additional
+          <codeph>STRING</codeph> arguments:
+        </p>
+
+<codeblock>[localhost:21000] &gt; create function my_concat(string, string ...) returns string location '/user/test_user/udfs/sample.so' symbol='Concat';
+</codeblock>
+
+        <p>
+          The call from the SQL query must pass at least one argument to the variable-length portion of the
+          argument list.
+        </p>
+
+        <p>
+          When Impala calls the function, it fills in the initial set of required arguments, then passes the number
+          of extra arguments and a pointer to the first of those optional arguments.
+        </p>
+      </conbody>
+    </concept>
+
+    <concept id="udf_null">
+
+      <title>Handling NULL Values</title>
+
+      <conbody>
+
+        <p>
+          For correctness, performance, and reliability, it is important for each UDF to handle all situations
+          where any <codeph>NULL</codeph> values are passed to your function. For example, when passed a
+          <codeph>NULL</codeph>, UDFs typically also return <codeph>NULL</codeph>. In an aggregate function, which
+          could be passed a combination of real and <codeph>NULL</codeph> values, you might make the final value
+          into a <codeph>NULL</codeph> (as in <codeph>CONCAT()</codeph>), ignore the <codeph>NULL</codeph> value
+          (as in <codeph>AVG()</codeph>), or treat it the same as a numeric zero or empty string.
+        </p>
+
+        <p>
+          Each parameter type, such as <codeph>IntVal</codeph> or <codeph>StringVal</codeph>, has an
+          <codeph>is_null</codeph> Boolean member.
+<!--
+If your function has no effect when passed <codeph>NULL</codeph>
+values,
+-->
+          Test this flag immediately for each argument to your function, and if it is set, do not refer to the
+          <codeph>val</codeph> field of the argument structure. The <codeph>val</codeph> field is undefined when
+          the argument is <codeph>NULL</codeph>, so your function could go into an infinite loop or produce
+          incorrect results if you skip the special handling for <codeph>NULL</codeph>.
+<!-- and return if so.
+For <codeph>void</codeph> intermediate functions
+within UDAs, you can return without specifying a value.
+-->
+        </p>
+
+        <p>
+          If your function returns <codeph>NULL</codeph> when passed a <codeph>NULL</codeph> value, or in other
+          cases such as when a search string is not found, you can construct a null instance of the return type by
+          using its <codeph>null()</codeph> member function.
+        </p>
+      </conbody>
+    </concept>
+
+    <concept id="udf_malloc">
+
+      <title>Memory Allocation for UDFs</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Memory"/>
+    </metadata>
+  </prolog>
+
+      <conbody>
+
+        <p>
+          By default, memory allocated within a UDF is deallocated when the function exits, which could be before
+          the query is finished. The input arguments remain allocated for the lifetime of the function, so you can
+          refer to them in the expressions for your return values. If you use temporary variables to construct
+          all-new string values, use the <codeph>StringVal()</codeph> constructor that takes an initial
+          <codeph>FunctionContext*</codeph> argument followed by a length, and copy the data into the newly
+          allocated memory buffer.
+        </p>
+      </conbody>
+    </concept>
+
+    <concept rev="1.3.0" id="udf_threads">
+
+      <title>Thread-Safe Work Area for UDFs</title>
+
+      <conbody>
+
+        <p>
+          One way to improve performance of UDFs is to specify the optional <codeph>PREPARE_FN</codeph> and
+          <codeph>CLOSE_FN</codeph> clauses on the <codeph>CREATE FUNCTION</codeph> statement. The <q>prepare</q>
+          function sets up a thread-safe data structure in memory that you can use as a work area. The <q>close</q>
+          function deallocates that memory. Each subsequent call to the UDF within the same thread can access that
+          same memory area. There might be several such memory areas allocated on the same host, as UDFs are
+          parallelized using multiple threads.
+        </p>
+
+        <p>
+          Within this work area, you can set up predefined lookup tables, or record the results of complex
+          operations on data types such as <codeph>STRING</codeph> or <codeph>TIMESTAMP</codeph>. Saving the
+          results of previous computations rather than repeating the computation each time is an optimization known
+          as <xref href="http://en.wikipedia.org/wiki/Memoization" scope="external" format="html"/>. For example,
+          if your UDF performs a regular expression match or date manipulation on a column that repeats the same
+          value over and over, you could store the last-computed value or a hash table of already-computed values,
+          and do a fast lookup to find the result for subsequent iterations of the UDF.
+        </p>
+
+        <p>
+          Each such function must have the signature:
+        </p>
+
+<codeblock>void <varname>function_name</varname>(impala_udf::FunctionContext*, impala_udf::FunctionContext::FunctionScope)
+</codeblock>
+
+        <p>
+          Currently, only <codeph>THREAD_SCOPE</codeph> is implemented, not <codeph>FRAGMENT_SCOPE</codeph>. See
+          <filepath>udf.h</filepath> for details about the scope values.
+        </p>
+      </conbody>
+    </concept>
+
+    <concept id="udf_error_handling">
+
+      <title>Error Handling for UDFs</title>
+  <prolog>
+    <metadata>
+      <!-- A little bit of a stretch, but if you're doing UDFs and you need to debug you might look up Troubleshooting. -->
+      <data name="Category" value="Troubleshooting"/>
+    </metadata>
+  </prolog>
+
+      <conbody>
+
+        <p>
+          To handle errors in UDFs, you call functions that are members of the initial
+          <codeph>FunctionContext*</codeph> argument passed to your function.
+        </p>
+
+        <p>
+          A UDF can record one or more warnings, for conditions that indicate minor, recoverable problems that do
+          not cause the query to stop. The signature for this function is:
+        </p>
+
+<codeblock>bool AddWarning(const char* warning_msg);</codeblock>
+
+        <p>
+          For a serious problem that requires cancelling the query, a UDF can set an error flag that prevents the
+          query from returning any results. The signature for this function is:
+        </p>
+
+<codeblock>void SetError(const char* error_msg);</codeblock>
+      </conbody>
+    </concept>
+  </concept>
+
+  <concept id="udafs">
+
+    <title>Writing User-Defined Aggregate Functions (UDAFs)</title>
+
+    <conbody>
+
+      <p>
+        User-defined aggregate functions (UDAFs or UDAs) are a powerful and flexible category of user-defined
+        functions. If a query processes N rows, calling a UDAF during the query condenses the result set, anywhere
+        from a single value (such as with the <codeph>SUM</codeph> or <codeph>MAX</codeph> functions), or some
+        number less than or equal to N (as in queries using the <codeph>GROUP BY</codeph> or
+        <codeph>HAVING</codeph> clause).
+      </p>
+
+      <p outputclass="toc inpage"/>
+    </conbody>
+
+    <concept id="uda_functions">
+
+      <title>The Underlying Functions for a UDA</title>
+
+      <conbody>
+
+        <p>
+          A UDAF must maintain a state value across subsequent calls, so that it can accumulate a result across a
+          set of calls, rather than derive it purely from one set of arguments. For that reason, a UDAF is
+          represented by multiple underlying functions:
+        </p>
+
+        <ul>
+          <li>
+            An initialization function that sets any counters to zero, creates empty buffers, and does any other
+            one-time setup for a query.
+          </li>
+
+          <li>
+            An update function that processes the arguments for each row in the query result set and accumulates an
+            intermediate result for each node. For example, this function might increment a counter, append to a
+            string buffer, or set flags.
+          </li>
+
+          <li>
+            A merge function that combines the intermediate results from two different nodes.
+          </li>
+
+          <li rev="2.0.0">
+            A serialize function that flattens any intermediate values containing pointers, and frees any memory
+            allocated during the init, update, and merge phases.
+          </li>
+
+          <li>
+            A finalize function that either passes through the combined result unchanged, or does one final
+            transformation.
+          </li>
+        </ul>
+
+        <p>
+          In the SQL syntax, you create a UDAF by using the statement <codeph>CREATE AGGREGATE FUNCTION</codeph>.
+          You specify the entry points of the underlying C++ functions using the clauses <codeph>INIT_FN</codeph>,
+          <codeph>UPDATE_FN</codeph>, <codeph>MERGE_FN</codeph>, <codeph rev="2.0.0">SERIALIZE_FN</codeph>, and
+          <codeph>FINALIZE_FN</codeph>.
+        </p>
+
+        <p>
+          <draft-comment translate="no">
+Need an example to demonstrate exactly what tokens are used for init, merge, finalize in
+this substitution.
+</draft-comment>
+          For convenience, you can use a naming convention for the underlying functions and Impala automatically
+          recognizes those entry points. Specify the <codeph>UPDATE_FN</codeph> clause, using an entry point name
+          containing the string <codeph>update</codeph> or <codeph>Update</codeph>. When you omit the other
+          <codeph>_FN</codeph> clauses from the SQL statement, Impala looks for entry points with names formed by
+          substituting the <codeph>update</codeph> or <codeph>Update</codeph> portion of the specified name.
+        </p>
+
+<!--
+[INIT_FN '<varname>function</varname>]
+[UPDATE_FN '<varname>function</varname>]
+[MERGE_FN '<varname>function</varname>]
+[FINALIZE_FN '<varname>function</varname>]
+-->
+
+        <p>
+          <filepath>uda-sample.h</filepath>:
+        </p>
+
+        <p>
+          See this file online at:
+          <xref href="https://github.com/cloudera/impala-udf-samples/blob/master/uda-sample.cc" scope="external" format="html"/>
+        </p>
+
+<codeblock audience="Cloudera">#ifndef IMPALA_UDF_SAMPLE_UDA_H
+#define IMPALA_UDF_SAMPLE_UDA_H
+
+#include &lt;impala_udf/udf.h&gt;
+
+using namespace impala_udf;
+
+// This is an example of the COUNT aggregate function.
+void CountInit(FunctionContext* context, BigIntVal* val);
+void CountUpdate(FunctionContext* context, const AnyVal&amp; input, BigIntVal* val);
+void CountMerge(FunctionContext* context, const BigIntVal&amp; src, BigIntVal* dst);
+BigIntVal CountFinalize(FunctionContext* context, const BigIntVal&amp; val);
+
+// This is an example of the AVG(double) aggregate function. This function needs to
+// maintain two pieces of state, the current sum and the count. We do this using
+// the BufferVal intermediate type. When this UDA is registered, it would specify
+// 16 bytes (8 byte sum + 8 byte count) as the size for this buffer.
+void AvgInit(FunctionContext* context, BufferVal* val);
+void AvgUpdate(FunctionContext* context, const DoubleVal&amp; input, BufferVal* val);
+void AvgMerge(FunctionContext* context, const BufferVal&amp; src, BufferVal* dst);
+DoubleVal AvgFinalize(FunctionContext* context, const BufferVal&amp; val);
+
+// This is a sample of implementing the STRING_CONCAT aggregate function.
+// Example: select string_concat(string_col, ",") from table
+void StringConcatInit(FunctionContext* context, StringVal* val);
+void StringConcatUpdate(FunctionContext* context, const StringVal&amp; arg1,
+    const StringVal&amp; arg2, StringVal* val);
+void StringConcatMerge(FunctionContext* context, const StringVal&amp; src, StringVal* dst);
+StringVal StringConcatFinalize(FunctionContext* context, const StringVal&amp; val);
+
+#endif</codeblock>
+
+        <p>
+          <filepath>uda-sample.cc</filepath>:
+        </p>
+
+        <p>
+          See this file online at:
+          <xref href="https://github.com/cloudera/impala-udf-samples/blob/master/uda-sample.h" scope="external" format="html"/>
+        </p>
+
+<codeblock audience="Cloudera">#include "uda-sample.h"
+#include &lt;assert.h&gt;
+
+using namespace impala_udf;
+
+// ---------------------------------------------------------------------------
+// This is a sample of implementing a COUNT aggregate function.
+// ---------------------------------------------------------------------------
+void CountInit(FunctionContext* context, BigIntVal* val) {
+  val-&gt;is_null = false;
+  val-&gt;val = 0;
+}
+
+void CountUpdate(FunctionContext* context, const AnyVal&amp; input, BigIntVal* val) {
+  if (input.is_null) return;
+  ++val-&gt;val;
+}
+
+void CountMerge(FunctionContext* context, const BigIntVal&amp; src, BigIntVal* dst) {
+  dst-&gt;val += src.val;
+}
+
+BigIntVal CountFinalize(FunctionContext* context, const BigIntVal&amp; val) {
+  return val;
+}
+
+// ---------------------------------------------------------------------------
+// This is a sample of implementing an AVG aggregate function.
+// ---------------------------------------------------------------------------
+struct AvgStruct {
+  double sum;
+  int64_t count;
+};
+
+void AvgInit(FunctionContext* context, BufferVal* val) {
+  assert(sizeof(AvgStruct) == 16);
+  memset(*val, 0, sizeof(AvgStruct));
+}
+
+void AvgUpdate(FunctionContext* context, const DoubleVal&amp; input, BufferVal* val) {
+  if (input.is_null) return;
+  AvgStruct* avg = reinterpret_cast&lt;AvgStruct*&gt;(*val);
+  avg-&gt;sum += input.val;
+  ++avg-&gt;count;
+}
+
+void AvgMerge(FunctionContext* context, const BufferVal&amp; src, BufferVal* dst) {
+  if (src == NULL) return;
+  const AvgStruct* src_struct = reinterpret_cast&lt;const AvgStruct*&gt;(src);
+  AvgStruct* dst_struct = reinterpret_cast&lt;AvgStruct*&gt;(*dst);
+  dst_struct-&gt;sum += src_struct-&gt;sum;
+  dst_struct-&gt;count += src_struct-&gt;count;
+}
+
+DoubleVal AvgFinalize(FunctionContext* context, const BufferVal&amp; val) {
+  if (val == NULL) return DoubleVal::null();
+  AvgStruct* val_struct = reinterpret_cast&lt;AvgStruct*&gt;(val);
+  return DoubleVal(val_struct-&gt;sum / val_struct-&gt;count);
+}
+
+// ---------------------------------------------------------------------------
+// This is a sample of implementing the STRING_CONCAT aggregate function.
+// Example: select string_concat(string_col, ",") from table
+// ---------------------------------------------------------------------------
+void StringConcatInit(FunctionContext* context, StringVal* val) {
+  val-&gt;is_null = true;
+}
+
+void StringConcatUpdate(FunctionContext* context, const StringVal&amp; arg1,
+    const StringVal&amp; arg2, StringVal* val) {
+  if (val-&gt;is_null) {
+    val-&gt;is_null = false;
+    *val = StringVal(context, arg1.len);
+    memcpy(val-&gt;ptr, arg1.ptr, arg1.len);
+  } else {
+    int new_len = val-&gt;len + arg1.len + arg2.len;
+    StringVal new_val(context, new_len);
+    memcpy(new_val.ptr, val-&gt;ptr, val-&gt;len);
+    memcpy(new_val.ptr + val-&gt;len, arg2.ptr, arg2.len);
+    memcpy(new_val.ptr + val-&gt;len + arg2.len, arg1.ptr, arg1.len);
+    *val = new_val;
+  }
+}
+
+void StringConcatMerge(FunctionContext* context, const StringVal&amp; src, StringVal* dst) {
+  if (src.is_null) return;
+  StringConcatUpdate(context, src, ",", dst);
+}
+
+StringVal StringConcatFinalize(FunctionContext* context, const StringVal&amp; val) {
+  return val;
+}</codeblock>
+      </conbody>
+    </concept>
+
+    <concept audience="Cloudera" id="udf_intermediate">
+
+      <title>Intermediate Results for UDAs</title>
+
+      <conbody>
+
+        <p>
+          A user-defined aggregate function might produce and combine intermediate results during some phases of
+          processing, using a different data type than the final return value. For example, if you implement a
+          function similar to the built-in <codeph>AVG()</codeph> function, it must keep track of two values, the
+          number of values counted and the sum of those values. Or, you might accumulate a string value over the
+          course of a UDA, then in the end return a numeric or Boolean result.
+        </p>
+
+        <p>
+          In such a case, specify the data type of the intermediate results using the optional <codeph>INTERMEDIATE
+          <varname>type_name</varname></codeph> clause of the <codeph>CREATE AGGREGATE FUNCTION</codeph> statement.
+          If the intermediate data is a typeless byte array (for example, to represent a C++ struct or array),
+          specify the type name as <codeph>CHAR(<varname>n</varname>)</codeph>, with <varname>n</varname>
+          representing the number of bytes in the intermediate result buffer.
+        </p>
+      </conbody>
+    </concept>
+  </concept>
+
+  <concept id="udf_building">
+
+    <title>Building and Deploying UDFs</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Deploying"/>
+      <data name="Category" value="Building"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        This section explains the steps to compile Impala UDFs from C++ source code, and deploy the resulting
+        libraries for use in Impala queries.
+      </p>
+
+      <p>
+        Impala ships with a sample build environment for UDFs, that you can study, experiment with, and adapt for
+        your own use. This sample build environment starts with the <cmdname>cmake</cmdname> configuration command,
+        which reads the file <filepath>CMakeLists.txt</filepath> and generates a <filepath>Makefile</filepath>
+        customized for your particular directory paths. Then the <cmdname>make</cmdname> command runs the actual
+        build steps based on the rules in the <filepath>Makefile</filepath>.
+      </p>
+
+      <p>
+        Impala loads the shared library from an HDFS location. After building a shared library containing one or
+        more UDFs, use <codeph>hdfs dfs</codeph> or <codeph>hadoop fs</codeph> commands to copy the binary file to
+        an HDFS location readable by Impala.
+      </p>
+
+      <p>
+        The final step in deployment is to issue a <codeph>CREATE FUNCTION</codeph> statement in the
+        <cmdname>impala-shell</cmdname> interpreter to make Impala aware of the new function. See
+        <xref href="impala_create_function.xml#create_function"/> for syntax details. Because each function is
+        associated with a particular database, always issue a <codeph>USE</codeph> statement to the appropriate
+        database before creating a function, or specify a fully qualified name, that is, <codeph>CREATE FUNCTION
+        <varname>db_name</varname>.<varname>function_name</varname></codeph>.
+      </p>
+
+      <p>
+        As you update the UDF code and redeploy updated versions of a shared library, use <codeph>DROP
+        FUNCTION</codeph> and <codeph>CREATE FUNCTION</codeph> to let Impala pick up the latest version of the
+        code.
+      </p>
+
+      <note>
+        <p conref="../shared/impala_common.xml#common/udf_persistence_restriction"/>
+      </note>
+
+      <p>
+        Prerequisites for the build environment are:
+      </p>
+
+<codeblock># Use the appropriate package installation command for your Linux distribution.
+sudo yum install gcc-c++ cmake boost-devel
+sudo yum install impala-udf-devel</codeblock>
+
+      <p>
+        Then, unpack the sample code in <filepath>udf_samples.tar.gz</filepath> and use that as a template to set
+        up your build environment.
+      </p>
+
+      <p>
+        To build the original samples:
+      </p>
+
+<codeblock># Process CMakeLists.txt and set up appropriate Makefiles.
+cmake .
+# Generate shared libraries from UDF and UDAF sample code,
+# udf_samples/libudfsample.so and udf_samples/libudasample.so
+make</codeblock>
+
+      <p>
+        The sample code to examine, experiment with, and adapt is in these files:
+      </p>
+
+      <ul>
+        <li>
+          <filepath>udf-sample.h</filepath>: Header file that declares the signature for a scalar UDF
+          (<codeph>AddUDF</codeph>).
+        </li>
+
+        <li>
+          <filepath>udf-sample.cc</filepath>: Sample source for a simple UDF that adds two integers. Because
+          Impala can reference multiple function entry points from the same shared library, you could add other UDF
+          functions in this file and add their signatures to the corresponding header file.
+        </li>
+
+        <li>
+          <filepath>udf-sample-test.cc</filepath>: Basic unit tests for the sample UDF.
+        </li>
+
+        <li>
+          <filepath>uda-sample.h</filepath>: Header file that declares the signature for sample aggregate
+          functions. The SQL functions will be called <codeph>COUNT</codeph>, <codeph>AVG</codeph>, and
+          <codeph>STRINGCONCAT</codeph>. Because aggregate functions require more elaborate coding to handle the
+          processing for multiple phases, there are several underlying C++ functions such as
+          <codeph>CountInit</codeph>, <codeph>AvgUpdate</codeph>, and <codeph>StringConcatFinalize</codeph>.
+        </li>
+
+        <li>
+          <filepath>uda-sample.cc</filepath>: Sample source for simple UDAFs that demonstrate how to manage the
+          state transitions as the underlying functions are called during the different phases of query processing.
+          <ul>
+            <li>
+              The UDAF that imitates the <codeph>COUNT</codeph> function keeps track of a single incrementing
+              number; the merge functions combine the intermediate count values from each Impala node, and the
+              combined number is returned verbatim by the finalize function.
+            </li>
+
+            <li>
+              The UDAF that imitates the <codeph>AVG</codeph> function keeps track of two numbers, a count of rows
+              processed and the sum of values for a column. These numbers are updated and merged as with
+              <codeph>COUNT</codeph>, then the finalize function divides them to produce and return the final
+              average value.
+            </li>
+
+            <li>
+              The UDAF that concatenates string values into a comma-separated list demonstrates how to manage
+              storage for a string that increases in length as the function is called for multiple rows.
+            </li>
+          </ul>
+        </li>
+
+        <li>
+          <filepath>uda-sample-test.cc</filepath>: basic unit tests for the sample UDAFs.
+        </li>
+      </ul>
+    </conbody>
+  </concept>
+
+  <concept id="udf_performance">
+
+    <title>Performance Considerations for UDFs</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Performance"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        Because a UDF typically processes each row of a table, potentially being called billions of times, the
+        performance of each UDF is a critical factor in the speed of the overall ETL or ELT pipeline. Tiny
+        optimizations you can make within the function body can pay off in a big way when the function is called
+        over and over when processing a huge result set.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="udf_tutorial">
+
+    <title>Examples of Creating and Using UDFs</title>
+
+    <conbody>
+
+      <p>
+        This section demonstrates how to create and use all kinds of user-defined functions (UDFs).
+      </p>
+
+      <p>
+        For downloadable examples that you can experiment with, adapt, and use as templates for your own functions,
+        see <xref href="https://github.com/cloudera/impala-udf-samples" scope="external" format="html">the Cloudera
+        sample UDF github</xref>. You must have already installed the appropriate header files, as explained in
+        <xref href="impala_udf.xml#udf_demo_env"/>.
+      </p>
+
+<!-- Limitation: mini-TOC currently doesn't include the <example> tags. -->
+
+<!-- <p outputclass="toc inpage"/> -->
+
+      <example id="udf_sample_udf">
+
+        <title>Sample C++ UDFs: HasVowels, CountVowels, StripVowels</title>
+
+        <p>
+          This example shows 3 separate UDFs that operate on strings and return different data types. In the C++
+          code, the functions are <codeph>HasVowels()</codeph> (checks if a string contains any vowels),
+          <codeph>CountVowels()</codeph> (returns the number of vowels in a string), and
+          <codeph>StripVowels()</codeph> (returns a new string with vowels removed).
+        </p>
+
+        <p>
+          First, we add the signatures for these functions to <filepath>udf-sample.h</filepath> in the demo build
+          environment:
+        </p>
+
+<codeblock>BooleanVal HasVowels(FunctionContext* context, const StringVal&amp; input);
+IntVal CountVowels(FunctionContext* context, const StringVal&amp; arg1);
+StringVal StripVowels(FunctionContext* context, const StringVal&amp; arg1);</codeblock>
+
+        <p>
+          Then, we add the bodies of these functions to <filepath>udf-sample.cc</filepath>:
+        </p>
+
+<codeblock>BooleanVal HasVowels(FunctionContext* context, const StringVal&amp; input)
+{
+        if (input.is_null) return BooleanVal::null();
+
+        int index;
+        uint8_t *ptr;
+
+        for (ptr = input.ptr, index = 0; index &lt;= input.len; index++, ptr++)
+        {
+                uint8_t c = tolower(*ptr);
+                if (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u')
+                {
+                        return BooleanVal(true);
+                }
+        }
+        return BooleanVal(false);
+}
+
+IntVal CountVowels(FunctionContext* context, const StringVal&amp; arg1)
+{
+        if (arg1.is_null) return IntVal::null();
+
+        int count;
+        int index;
+        uint8_t *ptr;
+
+        for (ptr = arg1.ptr, count = 0, index = 0; index &lt;= arg1.len; index++, ptr++)
+        {
+                uint8_t c = tolower(*ptr);
+                if (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u')
+                {
+                        count++;
+                }
+        }
+        return IntVal(count);
+}
+
+StringVal StripVowels(FunctionContext* context, const StringVal&amp; arg1)
+{
+        if (arg1.is_null) return StringVal::null();
+
+        int index;
+        std::string original((const char *)arg1.ptr,arg1.len);
+        std::string shorter("");
+
+        for (index = 0; index &lt; original.length(); index++)
+        {
+                uint8_t c = original[index];
+                uint8_t l = tolower(c);
+
+                if (l == 'a' || l == 'e' || l == 'i' || l == 'o' || l == 'u')
+                {
+                        ;
+                }
+                else
+                {
+                    shorter.append(1, (char)c);
+                }
+        }
+// The modified string is stored in 'shorter', which is destroyed when this function ends. We need to make a string val
+// and copy the contents.
+        StringVal result(context, shorter.size()); // Only the version of the ctor that takes a context object allocates new memory
+        memcpy(result.ptr, shorter.c_str(), shorter.size());
+        return result;
+}</codeblock>
+
+        <p>
+          We build a shared library, <filepath>libudfsample.so</filepath>, and put the library file into HDFS
+          where Impala can read it:
+        </p>
+
+<codeblock>$ make
+[  0%] Generating udf_samples/uda-sample.ll
+[ 16%] Built target uda-sample-ir
+[ 33%] Built target udasample
+[ 50%] Built target uda-sample-test
+[ 50%] Generating udf_samples/udf-sample.ll
+[ 66%] Built target udf-sample-ir
+Scanning dependencies of target udfsample
+[ 83%] Building CXX object CMakeFiles/udfsample.dir/udf-sample.o
+Linking CXX shared library udf_samples/libudfsample.so
+[ 83%] Built target udfsample
+Linking CXX executable udf_samples/udf-sample-test
+[100%] Built target udf-sample-test
+$ hdfs dfs -put ./udf_samples/libudfsample.so /user/hive/udfs/libudfsample.so</codeblock>
+
+        <p>
+          Finally, we go into the <cmdname>impala-shell</cmdname> interpreter where we set up some sample data,
+          issue <codeph>CREATE FUNCTION</codeph> statements to set up the SQL function names, and call the
+          functions in some queries:
+        </p>
+
+<codeblock>[localhost:21000] &gt; create database udf_testing;
+[localhost:21000] &gt; use udf_testing;
+
+[localhost:21000] &gt; create function has_vowels (string) returns boolean location '/user/hive/udfs/libudfsample.so' symbol='HasVowels';
+[localhost:21000] &gt; select has_vowels('abc');
++------------------------+
+| udfs.has_vowels('abc') |
++------------------------+
+| true                   |
++------------------------+
+Returned 1 row(s) in 0.13s
+[localhost:21000] &gt; select has_vowels('zxcvbnm');
++----------------------------+
+| udfs.has_vowels('zxcvbnm') |
++----------------------------+
+| false                      |
++----------------------------+
+Returned 1 row(s) in 0.12s
+[localhost:21000] &gt; select has_vowels(null);
++-----------------------+
+| udfs.has_vowels(null) |
++-----------------------+
+| NULL                  |
++-----------------------+
+Returned 1 row(s) in 0.11s
+[localhost:21000] &gt; select s, has_vowels(s) from t2;
++-----------+--------------------+
+| s         | udfs.has_vowels(s) |
++-----------+--------------------+
+| lower     | true               |
+| UPPER     | true               |
+| Init cap  | true               |
+| CamelCase | true               |
++-----------+--------------------+
+Returned 4 row(s) in 0.24s
+
+[localhost:21000] &gt; create function count_vowels (string) returns int location '/user/hive/udfs/libudfsample.so' symbol='CountVowels';
+[localhost:21000] &gt; select count_vowels('cat in the hat');
++-------------------------------------+
+| udfs.count_vowels('cat in the hat') |
++-------------------------------------+
+| 4                                   |
++-------------------------------------+
+Returned 1 row(s) in 0.12s
+[localhost:21000] &gt; select s, count_vowels(s) from t2;
++-----------+----------------------+
+| s         | udfs.count_vowels(s) |
++-----------+----------------------+
+| lower     | 2                    |
+| UPPER     | 2                    |
+| Init cap  | 3                    |
+| CamelCase | 4                    |
++-----------+----------------------+
+Returned 4 row(s) in 0.23s
+[localhost:21000] &gt; select count_vowels(null);
++-------------------------+
+| udfs.count_vowels(null) |
++-------------------------+
+| NULL                    |
++-------------------------+
+Returned 1 row(s) in 0.12s
+
+[localhost:21000] &gt; create function strip_vowels (string) returns string location '/user/hive/udfs/libudfsample.so' symbol='StripVowels';
+[localhost:21000] &gt; select strip_vowels('abcdefg');
++------------------------------+
+| udfs.strip_vowels('abcdefg') |
++------------------------------+
+| bcdfg                        |
++------------------------------+
+Returned 1 row(s) in 0.11s
+[localhost:21000] &gt; select strip_vowels('ABCDEFG');
++------------------------------+
+| udfs.strip_vowels('abcdefg') |
++------------------------------+
+| BCDFG                        |
++------------------------------+
+Returned 1 row(s) in 0.12s
+[localhost:21000] &gt; select strip_vowels(null);
++-------------------------+
+| udfs.strip_vowels(null) |
++-------------------------+
+| NULL                    |
++-------------------------+
+Returned 1 row(s) in 0.16s
+[localhost:21000] &gt; select s, strip_vowels(s) from t2;
++-----------+----------------------+
+| s         | udfs.strip_vowels(s) |
++-----------+----------------------+
+| lower     | lwr                  |
+| UPPER     | PPR                  |
+| Init cap  | nt cp                |
+| CamelCase | CmlCs                |
++-----------+----------------------+
+Returned 4 row(s) in 0.24s</codeblock>
+
+      </example>
+
+      <example id="udf_sample_uda">
+
+        <title>Sample C++ UDA: SumOfSquares</title>
+
+        <p>
+          This example demonstrates a user-defined aggregate function (UDA) that produces the sum of the squares of
+          its input values.
+        </p>
+
+        <p>
+          The coding for a UDA is a little more involved than a scalar UDF, because the processing is split into
+          several phases, each implemented by a different function. Each phase is relatively straightforward: the
+          <q>update</q> and <q>merge</q> phases, where most of the work is done, read an input value and combine it
+          with some accumulated intermediate value.
+        </p>
+
+        <p>
+          As in our sample UDF from the previous example, we add function signatures to a header file (in this
+          case, <filepath>uda-sample.h</filepath>). Because this is a math-oriented UDA, we make two versions of
+          each function, one accepting an integer value and the other accepting a floating-point value.
+        </p>
+
+<codeblock>void SumOfSquaresInit(FunctionContext* context, BigIntVal* val);
+void SumOfSquaresInit(FunctionContext* context, DoubleVal* val);
+
+void SumOfSquaresUpdate(FunctionContext* context, const BigIntVal&amp; input, BigIntVal* val);
+void SumOfSquaresUpdate(FunctionContext* context, const DoubleVal&amp; input, DoubleVal* val);
+
+void SumOfSquaresMerge(FunctionContext* context, const BigIntVal&amp; src, BigIntVal* dst);
+void SumOfSquaresMerge(FunctionContext* context, const DoubleVal&amp; src, DoubleVal* dst);
+
+BigIntVal SumOfSquaresFinalize(FunctionContext* context, const BigIntVal&amp; val);
+DoubleVal SumOfSquaresFinalize(FunctionContext* context, const DoubleVal&amp; val);</codeblock>
+
+        <p>
+          We add the function bodies to a C++ source file (in this case, <filepath>uda-sample.cc</filepath>):
+        </p>
+
+<codeblock>void SumOfSquaresInit(FunctionContext* context, BigIntVal* val) {
+  val-&gt;is_null = false;
+  val-&gt;val = 0;
+}
+void SumOfSquaresInit(FunctionContext* context, DoubleVal* val) {
+  val-&gt;is_null = false;
+  val-&gt;val = 0.0;
+}
+
+void SumOfSquaresUpdate(FunctionContext* context, const BigIntVal&amp; input, BigIntVal* val) {
+  if (input.is_null) return;
+  val-&gt;val += input.val * input.val;
+}
+void SumOfSquaresUpdate(FunctionContext* context, const DoubleVal&amp; input, DoubleVal* val) {
+  if (input.is_null) return;
+  val-&gt;val += input.val * input.val;
+}
+
+void SumOfSquaresMerge(FunctionContext* context, const BigIntVal&amp; src, BigIntVal* dst) {
+  dst-&gt;val += src.val;
+}
+void SumOfSquaresMerge(FunctionContext* context, const DoubleVal&amp; src, DoubleVal* dst) {
+  dst-&gt;val += src.val;
+}
+
+BigIntVal SumOfSquaresFinalize(FunctionContext* context, const BigIntVal&amp; val) {
+  return val;
+}
+DoubleVal SumOfSquaresFinalize(FunctionContext* context, const DoubleVal&amp; val) {
+  return val;
+}</codeblock>
+
+        <p>
+          As with the sample UDF, we build a shared library and put it into HDFS:
+        </p>
+
+<codeblock>$ make
+[  0%] Generating udf_samples/uda-sample.ll
+[ 16%] Built target uda-sample-ir
+Scanning dependencies of target udasample
+[ 33%] Building CXX object CMakeFiles/udasample.dir/uda-sample.o
+Linking CXX shared library udf_samples/libudasample.so
+[ 33%] Built target udasample
+Scanning dependencies of target uda-sample-test
+[ 50%] Building CXX object CMakeFiles/uda-sample-test.dir/uda-sample-test.o
+Linking CXX executable udf_samples/uda-sample-test
+[ 50%] Built target uda-sample-test
+[ 50%] Generating udf_samples/udf-sample.ll
+[ 66%] Built target udf-sample-ir
+[ 83%] Built target udfsample
+[100%] Built target udf-sample-test
+$ hdfs dfs -put ./udf_samples/libudasample.so /user/hive/udfs/libudasample.so</codeblock>
+
+        <p>
+          To create the SQL function, we issue a <codeph>CREATE AGGREGATE FUNCTION</codeph> statement and specify
+          the underlying C++ function names for the different phases:
+        </p>
+
+<codeblock>[localhost:21000] &gt; use udf_testing;
+
+[localhost:21000] &gt; create table sos (x bigint, y double);
+[localhost:21000] &gt; insert into sos values (1, 1.1), (2, 2.2), (3, 3.3), (4, 4.4);
+Inserted 4 rows in 1.10s
+
+[localhost:21000] &gt; create aggregate function sum_of_squares(bigint) returns bigint
+  &gt; location '/user/hive/udfs/libudasample.so'
+  &gt; init_fn='SumOfSquaresInit'
+  &gt; update_fn='SumOfSquaresUpdate'
+  &gt; merge_fn='SumOfSquaresMerge'
+  &gt; finalize_fn='SumOfSquaresFinalize';
+
+[localhost:21000] &gt; -- Compute the same value using literals or the UDA;
+[localhost:21000] &gt; select 1*1 + 2*2 + 3*3 + 4*4;
++-------------------------------+
+| 1 * 1 + 2 * 2 + 3 * 3 + 4 * 4 |
++-------------------------------+
+| 30                            |
++-------------------------------+
+Returned 1 row(s) in 0.12s
+[localhost:21000] &gt; select sum_of_squares(x) from sos;
++------------------------+
+| udfs.sum_of_squares(x) |
++------------------------+
+| 30                     |
++------------------------+
+Returned 1 row(s) in 0.35s</codeblock>
+
+        <p>
+          Until we create the overloaded version of the UDA, it can only handle a single data type. To allow it to
+          handle <codeph>DOUBLE</codeph> as well as <codeph>BIGINT</codeph>, we issue another <codeph>CREATE
+          AGGREGATE FUNCTION</codeph> statement:
+        </p>
+
+<codeblock>[localhost:21000] &gt; select sum_of_squares(y) from sos;
+ERROR: AnalysisException: No matching function with signature: udfs.sum_of_squares(DOUBLE).
+
+[localhost:21000] &gt; create aggregate function sum_of_squares(double) returns double
+  &gt; location '/user/hive/udfs/libudasample.so'
+  &gt; init_fn='SumOfSquaresInit'
+  &gt; update_fn='SumOfSquaresUpdate'
+  &gt; merge_fn='SumOfSquaresMerge'
+  &gt; finalize_fn='SumOfSquaresFinalize';
+
+[localhost:21000] &gt; -- Compute the same value using literals or the UDA;
+[localhost:21000] &gt; select 1.1*1.1 + 2.2*2.2 + 3.3*3.3 + 4.4*4.4;
++-----------------------------------------------+
+| 1.1 * 1.1 + 2.2 * 2.2 + 3.3 * 3.3 + 4.4 * 4.4 |
++-----------------------------------------------+
+| 36.3                                          |
++-----------------------------------------------+
+Returned 1 row(s) in 0.12s
+[localhost:21000] &gt; select sum_of_squares(y) from sos;
++------------------------+
+| udfs.sum_of_squares(y) |
++------------------------+
+| 36.3                   |
++------------------------+
+Returned 1 row(s) in 0.35s</codeblock>
+
+        <p>
+          Typically, you use a UDA in queries with <codeph>GROUP BY</codeph> clauses, to produce a result set with
+          a separate aggregate value for each combination of values from the <codeph>GROUP BY</codeph> clause.
+          Let's change our sample table to use <codeph>0</codeph> to indicate rows containing even values, and
+          <codeph>1</codeph> to flag rows containing odd values. Then the <codeph>GROUP BY</codeph> query can
+          return two values, the sum of the squares for the even values, and the sum of the squares for the odd
+          values:
+        </p>
+
+<codeblock>[localhost:21000] &gt; insert overwrite sos values (1, 1), (2, 0), (3, 1), (4, 0);
+Inserted 4 rows in 1.24s
+
+[localhost:21000] &gt; -- Compute 1 squared + 3 squared, and 2 squared + 4 squared;
+[localhost:21000] &gt; select y, sum_of_squares(x) from sos group by y;
++---+------------------------+
+| y | udfs.sum_of_squares(x) |
++---+------------------------+
+| 1 | 10                     |
+| 0 | 20                     |
++---+------------------------+
+Returned 2 row(s) in 0.43s</codeblock>
+
+      </example>
+    </conbody>
+  </concept>
+
+  <concept id="udf_security">
+
+    <title>Security Considerations for User-Defined Functions</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Security"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        When the Impala authorization feature is enabled:
+      </p>
+
+      <ul>
+        <li>
+          To call a UDF in a query, you must have the required read privilege for any databases and tables used in
+          the query.
+        </li>
+
+        <li>
+          Because incorrectly coded UDFs could cause performance or capacity problems, for example by going into
+          infinite loops or allocating excessive amounts of memory, only an administrative user can create UDFs.
+          That is, to execute the <codeph>CREATE FUNCTION</codeph> statement requires the <codeph>ALL</codeph>
+          privilege on the server.
+        </li>
+      </ul>
+
+      <p>
+        See <xref href="impala_authorization.xml#authorization"/> for details about authorization in Impala.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="udf_limits">
+
+    <title>Limitations and Restrictions for Impala UDFs</title>
+
+    <conbody>
+
+      <p>
+        The following limitations and restrictions apply to Impala UDFs in the current release:
+      </p>
+
+      <ul>
+        <li>
+          Impala does not support Hive UDFs that accept or return composite or nested types, or other types not
+          available in Impala tables.
+        </li>
+
+        <li>
+          All Impala UDFs must be deterministic, that is, produce the same output each time when passed the same
+          argument values. For example, an Impala UDF must not call functions such as <codeph>rand()</codeph> to
+          produce different values for each invocation. It must not retrieve data from external sources, such as
+          from disk or over the network.
+        </li>
+
+        <li>
+          An Impala UDF must not spawn other threads or processes.
+        </li>
+
+        <li>
+          When the <cmdname>catalogd</cmdname> process is restarted, all UDFs become undefined and must be
+          reloaded.
+        </li>
+
+        <li>
+          Impala currently does not support user-defined table functions (UDTFs).
+        </li>
+
+        <li rev="2.0.0">
+          The <codeph>CHAR</codeph> and <codeph>VARCHAR</codeph> types cannot be used as input arguments or return
+          values for UDFs.
+        </li>
+      </ul>
+    </conbody>
+  </concept>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_union.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_union.xml b/docs/topics/impala_union.xml
new file mode 100644
index 0000000..29a0b45
--- /dev/null
+++ b/docs/topics/impala_union.xml
@@ -0,0 +1,150 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="union">
+
+  <title>UNION Clause</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      The <codeph>UNION</codeph> clause lets you combine the result sets of multiple queries. By default, the
+      result sets are combined as if the <codeph>DISTINCT</codeph> operator was applied.
+<!--
+Because duplicate elimination can be a memory-intensive process, the more useful
+variation for most Impala queries is <codeph>UNION ALL</codeph>, which returns
+all results from both queries, even if there are duplicates.
+-->
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock><varname>query_1</varname> UNION [DISTINCT | ALL] <varname>query_2</varname></codeblock>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      The <codeph>UNION</codeph> keyword by itself is the same as <codeph>UNION DISTINCT</codeph>. Because
+      eliminating duplicates can be a memory-intensive process for a large result set, prefer <codeph>UNION
+      ALL</codeph> where practical. (That is, when you know the different queries in the union will not produce any
+      duplicates, or where the duplicate values are acceptable.)
+    </p>
+
+    <p rev="obwl">
+      When an <codeph>ORDER BY</codeph> clause applies to a <codeph>UNION ALL</codeph> or <codeph>UNION</codeph>
+      query, in Impala 1.4 and higher, the <codeph>LIMIT</codeph> clause is no longer required. To make the
+      <codeph>ORDER BY</codeph> and <codeph>LIMIT</codeph> clauses apply to the entire result set, turn the
+      <codeph>UNION</codeph> query into a subquery, <codeph>SELECT</codeph> from the subquery, and put the
+      <codeph>ORDER BY</codeph> clause at the end, outside the subquery.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      First, we set up some sample data, including duplicate <codeph>1</codeph> values.
+    </p>
+
+<codeblock rev="obwl">[localhost:21000] &gt; create table few_ints (x int);
+[localhost:21000] &gt; insert into few_ints values (1), (1), (2), (3);
+[localhost:21000] &gt; set default_order_by_limit=1000;</codeblock>
+
+    <p>
+      This example shows how <codeph>UNION ALL</codeph> returns all rows from both queries, without any additional
+      filtering to eliminate duplicates. For the large result sets common with Impala queries, this is the most
+      memory-efficient technique.
+    </p>
+
+<codeblock>[localhost:21000] &gt; select x from few_ints order by x;
++---+
+| x |
++---+
+| 1 |
+| 1 |
+| 2 |
+| 3 |
++---+
+Returned 4 row(s) in 0.41s
+[localhost:21000] &gt; select x from few_ints union all select x from few_ints;
++---+
+| x |
++---+
+| 1 |
+| 1 |
+| 2 |
+| 3 |
+| 1 |
+| 1 |
+| 2 |
+| 3 |
++---+
+Returned 8 row(s) in 0.42s
+[localhost:21000] &gt; select * from (select x from few_ints union all select x from few_ints) as t1 order by x;
++---+
+| x |
++---+
+| 1 |
+| 1 |
+| 1 |
+| 1 |
+| 2 |
+| 2 |
+| 3 |
+| 3 |
++---+
+Returned 8 row(s) in 0.53s
+[localhost:21000] &gt; select x from few_ints union all select 10;
++----+
+| x  |
++----+
+| 10 |
+| 1  |
+| 1  |
+| 2  |
+| 3  |
++----+
+Returned 5 row(s) in 0.38s</codeblock>
+
+    <p>
+      This example shows how the <codeph>UNION</codeph> clause without the <codeph>ALL</codeph> keyword condenses
+      the result set to eliminate all duplicate values, making the query take more time and potentially more
+      memory. The extra processing typically makes this technique not recommended for queries that return result
+      sets with millions or billions of values.
+    </p>
+
+<codeblock>[localhost:21000] &gt; select x from few_ints union select x+1 from few_ints;
++---+
+| x |
++---+
+| 3 |
+| 4 |
+| 1 |
+| 2 |
++---+
+Returned 4 row(s) in 0.51s
+[localhost:21000] &gt; select x from few_ints union select 10;
++----+
+| x  |
++----+
+| 2  |
+| 10 |
+| 1  |
+| 3  |
++----+
+Returned 4 row(s) in 0.49s
+[localhost:21000] &gt; select * from (select x from few_ints union select x from few_ints) as t1 order by x;
++---+
+| x |
++---+
+| 1 |
+| 2 |
+| 3 |
++---+
+Returned 3 row(s) in 0.53s</codeblock>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_update.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_update.xml b/docs/topics/impala_update.xml
new file mode 100644
index 0000000..3b9e330
--- /dev/null
+++ b/docs/topics/impala_update.xml
@@ -0,0 +1,64 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="update">
+
+  <title>UPDATE Statement (CDH 5.5 and higher only)</title>
+  <titlealts><navtitle>UPDATE</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Kudu"/>
+      <data name="Category" value="ETL"/>
+      <data name="Category" value="Ingest"/>
+      <data name="Category" value="DML"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">UPDATE statement</indexterm>
+      Updates one or more rows from a Kudu table.
+      Although updating a single row or a range of rows would be inefficient for tables using HDFS
+      data files, Kudu is able to perform this operation efficiently. Therefore, this statement
+      only works for Impala tables that use the Kudu storage engine.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>
+</codeblock>
+
+    <p rev="kudu" audience="impala_next">
+      Normally, an <codeph>UPDATE</codeph> operation for a Kudu table fails if
+      some partition key columns are not found, due to their being deleted or changed
+      by a concurrent <codeph>UPDATE</codeph> or <codeph>DELETE</codeph> operation.
+      Specify <codeph>UPDATE IGNORE <varname>rest_of_statement</varname></codeph> to
+      make the <codeph>UPDATE</codeph> continue in this case. The rows with the nonexistent
+      duplicate partition key column values are not changed.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/dml_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/sync_ddl_blurb"/>
+
+    <note conref="../shared/impala_common.xml#common/compute_stats_next"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+<codeblock>
+
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_kudu.xml#impala_kudu"/>
+    </p>
+
+  </conbody>
+
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_use.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_use.xml b/docs/topics/impala_use.xml
new file mode 100644
index 0000000..9e0b654
--- /dev/null
+++ b/docs/topics/impala_use.xml
@@ -0,0 +1,77 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="use">
+
+  <title>USE Statement</title>
+  <titlealts><navtitle>USE</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Databases"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">USE statement</indexterm>
+      Switches the current session to a specified database. The <term>current database</term> is where any
+      <codeph>CREATE TABLE</codeph>, <codeph>INSERT</codeph>, <codeph>SELECT</codeph>, or other statements act when
+      you specify a table or other object name, without prefixing it with a database name. The new current database
+      applies for the duration of the session or unti another <codeph>USE</codeph> statement is executed.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>USE <varname>db_name</varname></codeblock>
+
+    <p>
+      By default, when you connect to an Impala instance, you begin in a database named <codeph>default</codeph>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      Switching the default database is convenient in the following situations:
+    </p>
+
+    <ul>
+      <li>
+        To avoid qualifying each reference to a table with the database name. For example, <codeph>SELECT * FROM t1
+        JOIN t2</codeph> rather than <codeph>SELECT * FROM db.t1 JOIN db.t2</codeph>.
+      </li>
+
+      <li>
+        To do a sequence of operations all within the same database, such as creating a table, inserting data, and
+        querying the table.
+      </li>
+    </ul>
+
+    <p>
+      To start the <cmdname>impala-shell</cmdname> interpreter and automatically issue a <codeph>USE</codeph>
+      statement for a particular database, specify the option <codeph>-d <varname>db_name</varname></codeph> for
+      the <cmdname>impala-shell</cmdname> command. The <codeph>-d</codeph> option is useful to run SQL scripts,
+      such as setup or test scripts, against multiple databases without hardcoding a <codeph>USE</codeph> statement
+      into the SQL source.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      See <xref href="impala_create_database.xml#create_database"/> for examples covering <codeph>CREATE
+      DATABASE</codeph>, <codeph>USE</codeph>, and <codeph>DROP DATABASE</codeph>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_create_database.xml#create_database"/>,
+      <xref href="impala_drop_database.xml#drop_database"/>, <xref href="impala_show.xml#show_databases"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_v_cpu_cores.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_v_cpu_cores.xml b/docs/topics/impala_v_cpu_cores.xml
new file mode 100644
index 0000000..41be3af
--- /dev/null
+++ b/docs/topics/impala_v_cpu_cores.xml
@@ -0,0 +1,37 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.2" id="v_cpu_cores">
+
+  <title>V_CPU_CORES Query Option (CDH 5 only)</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Resource Management"/>
+      <data name="Category" value="YARN"/>
+      <data name="Category" value="Llama"/>
+      <data name="Category" value="Impala Query Options"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">V_CPU_CORES query option</indexterm>
+      The number of per-host virtual CPU cores to request from YARN. If set, the query option overrides the
+      automatic estimate from Impala.
+<!-- This sentence is used in a few places and could be conref'ed. -->
+      Used in conjunction with the Impala resource management feature in Impala 1.2 and higher and CDH 5.
+    </p>
+
+    <p>
+      <b>Type:</b> numeric
+    </p>
+
+    <p>
+      <b>Default:</b> 0 (use automatic estimates)
+    </p>
+
+<!-- Worth adding a couple of related info links here. -->
+
+  </conbody>
+</concept>

[13/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

Posted by jr...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_decimal.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_decimal.xml b/docs/topics/impala_decimal.xml
new file mode 100644
index 0000000..c0c98d9
--- /dev/null
+++ b/docs/topics/impala_decimal.xml
@@ -0,0 +1,836 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.4.0" id="decimal">
+
+  <title>DECIMAL Data Type (CDH 5.1 or higher only)</title>
+  <titlealts><navtitle>DECIMAL (CDH 5.1 or higher only)</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      A numeric data type with fixed scale and precision, used in <codeph>CREATE TABLE</codeph> and <codeph>ALTER
+      TABLE</codeph> statements. Suitable for financial and other arithmetic calculations where the imprecise
+      representation and rounding behavior of <codeph>FLOAT</codeph> and <codeph>DOUBLE</codeph> make those types
+      impractical.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      In the column definition of a <codeph>CREATE TABLE</codeph> statement:
+    </p>
+
+<codeblock><varname>column_name</varname> DECIMAL[(<varname>precision</varname>[,<varname>scale</varname>])]</codeblock>
+
+    <p>
+      <codeph>DECIMAL</codeph> with no precision or scale values is equivalent to <codeph>DECIMAL(9,0)</codeph>.
+    </p>
+
+    <p>
+      <b>Precision and Scale:</b>
+    </p>
+
+    <p>
+      <varname>precision</varname> represents the total number of digits that can be represented by the column,
+      regardless of the location of the decimal point. This value must be between 1 and 38. For example,
+      representing integer values up to 9999, and floating-point values up to 99.99, both require a precision of 4.
+      You can also represent corresponding negative values, without any change in the precision. For example, the
+      range -9999 to 9999 still only requires a precision of 4.
+    </p>
+
+    <p>
+      <varname>scale</varname> represents the number of fractional digits. This value must be less than or equal to
+      <varname>precision</varname>. A scale of 0 produces integral values, with no fractional part. If precision
+      and scale are equal, all the digits come after the decimal point, making all the values between 0 and
+      0.999... or 0 and -0.999...
+    </p>
+
+    <p>
+      When <varname>precision</varname> and <varname>scale</varname> are omitted, a <codeph>DECIMAL</codeph> value
+      is treated as <codeph>DECIMAL(9,0)</codeph>, that is, an integer value ranging from
+      <codeph>-999,999,999</codeph> to <codeph>999,999,999</codeph>. This is the largest <codeph>DECIMAL</codeph>
+      value that can still be represented in 4 bytes. If precision is specified but scale is omitted, Impala uses a
+      value of zero for the scale.
+    </p>
+
+    <p>
+      Both <varname>precision</varname> and <varname>scale</varname> must be specified as integer literals, not any
+      other kind of constant expressions.
+    </p>
+
+    <p>
+      To check the precision or scale for arbitrary values, you can call the
+      <xref href="impala_math_functions.xml#math_functions"><codeph>precision()</codeph> and
+      <codeph>scale()</codeph> built-in functions</xref>. For example, you might use these values to figure out how
+      many characters are required for various fields in a report, or to understand the rounding characteristics of
+      a formula as applied to a particular <codeph>DECIMAL</codeph> column.
+    </p>
+
+    <p>
+      <b>Range:</b>
+    </p>
+
+    <p>
+      The maximum precision value is 38. Thus, the largest integral value is represented by
+      <codeph>DECIMAL(38,0)</codeph> (999... with 9 repeated 38 times). The most precise fractional value (between
+      0 and 1, or 0 and -1) is represented by <codeph>DECIMAL(38,38)</codeph>, with 38 digits to the right of the
+      decimal point. The value closest to 0 would be .0000...1 (37 zeros and the final 1). The value closest to 1
+      would be .999... (9 repeated 38 times).
+    </p>
+
+    <p>
+      For a given precision and scale, the range of <codeph>DECIMAL</codeph> values is the same in the positive and
+      negative directions. For example, <codeph>DECIMAL(4,2)</codeph> can represent from -99.99 to 99.99. This is
+      different from other integral numeric types where the positive and negative bounds differ slightly.
+    </p>
+
+    <p>
+      When you use <codeph>DECIMAL</codeph> values in arithmetic expressions, the precision and scale of the result
+      value are determined as follows:
+    </p>
+
+    <ul>
+      <li>
+        <p>
+          For addition and subtraction, the precision and scale are based on the maximum possible result, that is,
+          if all the digits of the input values were 9s and the absolute values were added together.
+        </p>
+<!-- Seems like buggy output from this first query, so hiding the example for the time being. -->
+<codeblock audience="Cloudera">[localhost:21000] &gt; select 50000.5 + 12.444, precision(50000.5 + 12.444), scale(50000.5 + 12.444);
++------------------+-----------------------------+-------------------------+
+| 50000.5 + 12.444 | precision(50000.5 + 12.444) | scale(50000.5 + 12.444) |
++------------------+-----------------------------+-------------------------+
+| 50012.944        | 9                           | 3                       |
++------------------+-----------------------------+-------------------------+
+[localhost:21000] &gt; select 99999.9 + 99.999, precision(99999.9 + 99.999), scale(99999.9 + 99.999);
++------------------+-----------------------------+-------------------------+
+| 99999.9 + 99.999 | precision(99999.9 + 99.999) | scale(99999.9 + 99.999) |
++------------------+-----------------------------+-------------------------+
+| 100099.899       | 9                           | 3                       |
++------------------+-----------------------------+-------------------------+
+</codeblock>
+      </li>
+
+      <li>
+        <p>
+          For multiplication, the precision is the sum of the precisions of the input values. The scale is the sum
+          of the scales of the input values.
+        </p>
+      </li>
+
+<!-- Need to add some specifics to discussion of division. Details here: http://blogs.msdn.com/b/sqlprogrammability/archive/2006/03/29/564110.aspx -->
+
+      <li>
+        <p>
+          For division, Impala sets the precision and scale to values large enough to represent the whole and
+          fractional parts of the result.
+        </p>
+      </li>
+
+      <li>
+        <p>
+          For <codeph>UNION</codeph>, the scale is the larger of the scales of the input values, and the precision
+          is increased if necessary to accommodate any additional fractional digits. If the same input value has
+          the largest precision and the largest scale, the result value has the same precision and scale. If one
+          value has a larger precision but smaller scale, the scale of the result value is increased. For example,
+          <codeph>DECIMAL(20,2) UNION DECIMAL(8,6)</codeph> produces a result of type
+          <codeph>DECIMAL(24,6)</codeph>. The extra 4 fractional digits of scale (6-2) are accommodated by
+          extending the precision by the same amount (20+4).
+        </p>
+      </li>
+
+      <li>
+        <p>
+          To doublecheck, you can always call the <codeph>PRECISION()</codeph> and <codeph>SCALE()</codeph>
+          functions on the results of an arithmetic expression to see the relevant values, or use a <codeph>CREATE
+          TABLE AS SELECT</codeph> statement to define a column based on the return type of the expression.
+        </p>
+      </li>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/compatibility_blurb"/>
+
+    <ul>
+      <li>
+        Using the <codeph>DECIMAL</codeph> type is only supported under CDH 5.1.0 and higher.
+<!--
+    Although Impala-created tables containing <codeph>DECIMAL</codeph> columns are
+    readable in CDH 5.1, <codeph>DECIMAL</codeph> data is not interoperable with
+    other Hadoop components in CDH 4, and some Impala operations such as
+    <codeph>COMPUTE STATS</codeph> are not possible on such tables in CDH 4.
+        If you create a Parquet table with a <codeph>DECIMAL</codeph>
+        column under CDH 4, Impala issues a warning because the data files might not be readable from other CDH 4 components.
+-->
+      </li>
+
+<!--
+      <li>
+        The <codeph>DECIMAL</codeph> data type is a relatively new addition to the
+        Parquet file format. To read Impala-created Parquet files containing
+        <codeph>DECIMAL</codeph> columns from another Hadoop component such as
+        MapReduce, Pig, or Hive, use CDH 5.1 or higher, or the equivalent levels of the relevant components and Parquet
+        JARs from CDH 5.1.
+        If you create a Parquet table with a <codeph>DECIMAL</codeph>
+        column under CDH 4, Impala issues a warning because the data files might not be readable from other CDH 4 components.
+      </li>
+
+      <li>
+        In particular, Impala-created tables with <codeph>DECIMAL</codeph> columns are
+        not readable by Hive under CDH 4.
+      </li>
+-->
+
+      <li>
+        Use the <codeph>DECIMAL</codeph> data type in Impala for applications where you used the
+        <codeph>NUMBER</codeph> data type in Oracle. The Impala <codeph>DECIMAL</codeph> type does not support the
+        Oracle idioms of <codeph>*</codeph> for scale or negative values for precision.
+      </li>
+    </ul>
+
+    <p>
+      <b>Conversions and casting:</b>
+    </p>
+
+    <p>
+      <ph conref="../shared/impala_common.xml#common/cast_int_to_timestamp"/>
+    </p>
+
+    <p>
+      Impala automatically converts between <codeph>DECIMAL</codeph> and other numeric types where possible. A
+      <codeph>DECIMAL</codeph> with zero scale is converted to or from the smallest appropriate integral type. A
+      <codeph>DECIMAL</codeph> with a fractional part is automatically converted to or from the smallest
+      appropriate floating-point type. If the destination type does not have sufficient precision or scale to hold
+      all possible values of the source type, Impala raises an error and does not convert the value.
+    </p>
+
+    <p>
+      For example, these statements show how expressions of <codeph>DECIMAL</codeph> and other types are reconciled
+      to the same type in the context of <codeph>UNION</codeph> queries and <codeph>INSERT</codeph> statements:
+    </p>
+
+<codeblock>[localhost:21000] &gt; select cast(1 as int) as x union select cast(1.5 as decimal(9,4)) as x;
++----------------+
+| x              |
++----------------+
+| 1.5000         |
+| 1.0000         |
++----------------+
+[localhost:21000] &gt; create table int_vs_decimal as select cast(1 as int) as x union select cast(1.5 as decimal(9,4)) as x;
++-------------------+
+| summary           |
++-------------------+
+| Inserted 2 row(s) |
++-------------------+
+[localhost:21000] &gt; desc int_vs_decimal;
++------+---------------+---------+
+| name | type          | comment |
++------+---------------+---------+
+| x    | decimal(14,4) |         |
++------+---------------+---------+
+</codeblock>
+
+    <p>
+      To avoid potential conversion errors, you can use <codeph>CAST()</codeph> to convert <codeph>DECIMAL</codeph>
+      values to <codeph>FLOAT</codeph>, <codeph>TINYINT</codeph>, <codeph>SMALLINT</codeph>, <codeph>INT</codeph>,
+      <codeph>BIGINT</codeph>, <codeph>STRING</codeph>, <codeph>TIMESTAMP</codeph>, or <codeph>BOOLEAN</codeph>.
+      You can use exponential notation in <codeph>DECIMAL</codeph> literals or when casting from
+      <codeph>STRING</codeph>, for example <codeph>1.0e6</codeph> to represent one million.
+    </p>
+
+    <p>
+      If you cast a value with more fractional digits than the scale of the destination type, any extra fractional
+      digits are truncated (not rounded). Casting a value to a target type with not enough precision produces a
+      result of <codeph>NULL</codeph> and displays a runtime warning.
+    </p>
+
+<codeblock>[localhost:21000] &gt; select cast(1.239 as decimal(3,2));
++-----------------------------+
+| cast(1.239 as decimal(3,2)) |
++-----------------------------+
+| 1.23                        |
++-----------------------------+
+[localhost:21000] &gt; select cast(1234 as decimal(3));
++----------------------------+
+| cast(1234 as decimal(3,0)) |
++----------------------------+
+| NULL                       |
++----------------------------+
+WARNINGS: Expression overflowed, returning NULL
+</codeblock>
+
+    <p>
+      When you specify integer literals, for example in <codeph>INSERT ... VALUES</codeph> statements or arithmetic
+      expressions, those numbers are interpreted as the smallest applicable integer type. You must use
+      <codeph>CAST()</codeph> calls for some combinations of integer literals and <codeph>DECIMAL</codeph>
+      precision. For example, <codeph>INT</codeph> has a maximum value that is 10 digits long,
+      <codeph>TINYINT</codeph> has a maximum value that is 3 digits long, and so on. If you specify a value such as
+      123456 to go into a <codeph>DECIMAL</codeph> column, Impala checks if the column has enough precision to
+      represent the largest value of that integer type, and raises an error if not. Therefore, use an expression
+      like <codeph>CAST(123456 TO DECIMAL(9,0))</codeph> for <codeph>DECIMAL</codeph> columns with precision 9 or
+      less, <codeph>CAST(50 TO DECIMAL(2,0))</codeph> for <codeph>DECIMAL</codeph> columns with precision 2 or
+      less, and so on. For <codeph>DECIMAL</codeph> columns with precision 10 or greater, Impala automatically
+      interprets the value as the correct <codeph>DECIMAL</codeph> type; however, because
+      <codeph>DECIMAL(10)</codeph> requires 8 bytes of storage while <codeph>DECIMAL(9)</codeph> requires only 4
+      bytes, only use precision of 10 or higher when actually needed.
+    </p>
+
+<codeblock>[localhost:21000] &gt; create table decimals_9_0 (x decimal);
+[localhost:21000] &gt; insert into decimals_9_0 values (1), (2), (4), (8), (16), (1024), (32768), (65536), (1000000);
+ERROR: AnalysisException: Possible loss of precision for target table 'decimal_testing.decimals_9_0'.
+Expression '1' (type: INT) would need to be cast to DECIMAL(9,0) for column 'x'
+[localhost:21000] &gt; insert into decimals_9_0 values (cast(1 as decimal)), (cast(2 as decimal)), (cast(4 as decimal)), (cast(8 as decimal)), (cast(16 as decimal)), (cast(1024 as decimal)), (cast(32768 as decimal)), (cast(65536 as decimal)), (cast(1000000 as decimal));
+
+[localhost:21000] &gt; create table decimals_10_0 (x decimal(10,0));
+[localhost:21000] &gt; insert into decimals_10_0 values (1), (2), (4), (8), (16), (1024), (32768), (65536), (1000000);
+[localhost:21000] &gt;
+</codeblock>
+
+    <p>
+      Be aware that in memory and for binary file formats such as Parquet or Avro, <codeph>DECIMAL(10)</codeph> or
+      higher consumes 8 bytes while <codeph>DECIMAL(9)</codeph> (the default for <codeph>DECIMAL</codeph>) or lower
+      consumes 4 bytes. Therefore, to conserve space in large tables, use the smallest-precision
+      <codeph>DECIMAL</codeph> type that is appropriate and <codeph>CAST()</codeph> literal values where necessary,
+      rather than declaring <codeph>DECIMAL</codeph> columns with high precision for convenience.
+    </p>
+
+    <p>
+      To represent a very large or precise <codeph>DECIMAL</codeph> value as a literal, for example one that
+      contains more digits than can be represented by a <codeph>BIGINT</codeph> literal, use a quoted string or a
+      floating-point value for the number, and <codeph>CAST()</codeph> to the desired <codeph>DECIMAL</codeph>
+      type:
+    </p>
+
+<codeblock>insert into decimals_38_5 values (1), (2), (4), (8), (16), (1024), (32768), (65536), (1000000),
+  (cast("999999999999999999999999999999" as decimal(38,5))),
+  (cast(999999999999999999999999999999. as decimal(38,5)));
+</codeblock>
+
+    <ul>
+      <li>
+        <p>
+          The result of an aggregate function such as <codeph>MAX()</codeph>, <codeph>SUM()</codeph>, or
+          <codeph>AVG()</codeph> on <codeph>DECIMAL</codeph> values is promoted to a scale of 38, with the same
+          precision as the underlying column. Thus, the result can represent the largest possible value at that
+          particular precision.
+        </p>
+      </li>
+
+      <li>
+        <p>
+          <codeph>STRING</codeph> columns, literals, or expressions can be converted to <codeph>DECIMAL</codeph> as
+          long as the overall number of digits and digits to the right of the decimal point fit within the
+          specified precision and scale for the declared <codeph>DECIMAL</codeph> type. By default, a
+          <codeph>DECIMAL</codeph> value with no specified scale or precision can hold a maximum of 9 digits of an
+          integer value. If there are more digits in the string value than are allowed by the
+          <codeph>DECIMAL</codeph> scale and precision, the result is <codeph>NULL</codeph>.
+        </p>
+        <p>
+          The following examples demonstrate how <codeph>STRING</codeph> values with integer and fractional parts
+          are represented when converted to <codeph>DECIMAL</codeph>. If the scale is 0, the number is treated
+          as an integer value with a maximum of <varname>precision</varname> digits. If the precision is greater than
+          0, the scale must be increased to account for the digits both to the left and right of the decimal point.
+          As the precision increases, output values are printed with additional trailing zeros after the decimal
+          point if needed. Any trailing zeros after the decimal point in the <codeph>STRING</codeph> value must fit
+          within the number of digits specified by the precision.
+        </p>
+<codeblock>[localhost:21000] &gt; select cast('100' as decimal); -- Small integer value fits within 9 digits of scale.
++-----------------------------+
+| cast('100' as decimal(9,0)) |
++-----------------------------+
+| 100                         |
++-----------------------------+
+[localhost:21000] &gt; select cast('100' as decimal(3,0)); -- Small integer value fits within 3 digits of scale.
++-----------------------------+
+| cast('100' as decimal(3,0)) |
++-----------------------------+
+| 100                         |
++-----------------------------+
+[localhost:21000] &gt; select cast('100' as decimal(2,0)); -- 2 digits of scale is not enough!
++-----------------------------+
+| cast('100' as decimal(2,0)) |
++-----------------------------+
+| NULL                        |
++-----------------------------+
+[localhost:21000] &gt; select cast('100' as decimal(3,1)); -- (3,1) = 2 digits left of the decimal point, 1 to the right. Not enough.
++-----------------------------+
+| cast('100' as decimal(3,1)) |
++-----------------------------+
+| NULL                        |
++-----------------------------+
+[localhost:21000] &gt; select cast('100' as decimal(4,1)); -- 4 digits total, 1 to the right of the decimal point.
++-----------------------------+
+| cast('100' as decimal(4,1)) |
++-----------------------------+
+| 100.0                       |
++-----------------------------+
+[localhost:21000] &gt; select cast('98.6' as decimal(3,1)); -- (3,1) can hold a 3 digit number with 1 fractional digit.
++------------------------------+
+| cast('98.6' as decimal(3,1)) |
++------------------------------+
+| 98.6                         |
++------------------------------+
+[localhost:21000] &gt; select cast('98.6' as decimal(15,1)); -- Larger scale allows bigger numbers but still only 1 fractional digit.
++-------------------------------+
+| cast('98.6' as decimal(15,1)) |
++-------------------------------+
+| 98.6                          |
++-------------------------------+
+[localhost:21000] &gt; select cast('98.6' as decimal(15,5)); -- Larger precision allows more fractional digits, outputs trailing zeros.
++-------------------------------+
+| cast('98.6' as decimal(15,5)) |
++-------------------------------+
+| 98.60000                      |
++-------------------------------+
+[localhost:21000] &gt; select cast('98.60000' as decimal(15,1)); -- Trailing zeros in the string must fit within 'scale' digits (1 in this case).
++-----------------------------------+
+| cast('98.60000' as decimal(15,1)) |
++-----------------------------------+
+| NULL                              |
++-----------------------------------+
+</codeblock>
+      </li>
+
+      <li>
+        Most built-in arithmetic functions such as <codeph>SIN()</codeph> and <codeph>COS()</codeph> continue to
+        accept only <codeph>DOUBLE</codeph> values because they are so commonly used in scientific context for
+        calculations of IEEE 954-compliant values. The built-in functions that accept and return
+        <codeph>DECIMAL</codeph> are:
+<!-- List from Skye: positive, negative, least, greatest, fnv_hash, if, nullif, zeroifnull, isnull, coalesce -->
+<!-- Nong had already told me about abs, ceil, floor, round, truncate -->
+        <ul>
+          <li>
+            <codeph>ABS()</codeph>
+          </li>
+
+          <li>
+            <codeph>CEIL()</codeph>
+          </li>
+
+          <li>
+            <codeph>COALESCE()</codeph>
+          </li>
+
+          <li>
+            <codeph>FLOOR()</codeph>
+          </li>
+
+          <li>
+            <codeph>FNV_HASH()</codeph>
+          </li>
+
+          <li>
+            <codeph>GREATEST()</codeph>
+          </li>
+
+          <li>
+            <codeph>IF()</codeph>
+          </li>
+
+          <li>
+            <codeph>ISNULL()</codeph>
+          </li>
+
+          <li>
+            <codeph>LEAST()</codeph>
+          </li>
+
+          <li>
+            <codeph>NEGATIVE()</codeph>
+          </li>
+
+          <li>
+            <codeph>NULLIF()</codeph>
+          </li>
+
+          <li>
+            <codeph>POSITIVE()</codeph>
+          </li>
+
+          <li>
+            <codeph>PRECISION()</codeph>
+          </li>
+
+          <li>
+            <codeph>ROUND()</codeph>
+          </li>
+
+          <li>
+            <codeph>SCALE()</codeph>
+          </li>
+
+          <li>
+            <codeph>TRUNCATE()</codeph>
+          </li>
+
+          <li>
+            <codeph>ZEROIFNULL()</codeph>
+          </li>
+        </ul>
+        See <xref href="impala_functions.xml#builtins"/> for details.
+      </li>
+
+      <li>
+        <p>
+          <codeph>BIGINT</codeph>, <codeph>INT</codeph>, <codeph>SMALLINT</codeph>, and <codeph>TINYINT</codeph>
+          values can all be cast to <codeph>DECIMAL</codeph>. The number of digits to the left of the decimal point
+          in the <codeph>DECIMAL</codeph> type must be sufficient to hold the largest value of the corresponding
+          integer type. Note that integer literals are treated as the smallest appropriate integer type, meaning
+          there is sometimes a range of values that require one more digit of <codeph>DECIMAL</codeph> scale than
+          you might expect. For integer values, the precision of the <codeph>DECIMAL</codeph> type can be zero; if
+          the precision is greater than zero, remember to increase the scale value by an equivalent amount to hold
+          the required number of digits to the left of the decimal point.
+        </p>
+        <p>
+          The following examples show how different integer types are converted to <codeph>DECIMAL</codeph>.
+        </p>
+<!-- According to Nong, it's a bug that so many integer digits can be converted to a DECIMAL
+     value with small (s,p) spec. So expect to re-do this example. -->
+<codeblock>[localhost:21000] &gt; select cast(1 as decimal(1,0));
++-------------------------+
+| cast(1 as decimal(1,0)) |
++-------------------------+
+| 1                       |
++-------------------------+
+[localhost:21000] &gt; select cast(9 as decimal(1,0));
++-------------------------+
+| cast(9 as decimal(1,0)) |
++-------------------------+
+| 9                       |
++-------------------------+
+[localhost:21000] &gt; select cast(10 as decimal(1,0));
++--------------------------+
+| cast(10 as decimal(1,0)) |
++--------------------------+
+| 10                       |
++--------------------------+
+[localhost:21000] &gt; select cast(10 as decimal(1,1));
++--------------------------+
+| cast(10 as decimal(1,1)) |
++--------------------------+
+| 10.0                     |
++--------------------------+
+[localhost:21000] &gt; select cast(100 as decimal(1,1));
++---------------------------+
+| cast(100 as decimal(1,1)) |
++---------------------------+
+| 100.0                     |
++---------------------------+
+[localhost:21000] &gt; select cast(1000 as decimal(1,1));
++----------------------------+
+| cast(1000 as decimal(1,1)) |
++----------------------------+
+| 1000.0                     |
++----------------------------+
+</codeblock>
+      </li>
+
+      <li>
+        <p>
+          When a <codeph>DECIMAL</codeph> value is converted to any of the integer types, any fractional part is
+          truncated (that is, rounded towards zero):
+        </p>
+<codeblock>[localhost:21000] &gt; create table num_dec_days (x decimal(4,1));
+[localhost:21000] &gt; insert into num_dec_days values (1), (2), (cast(4.5 as decimal(4,1)));
+[localhost:21000] &gt; insert into num_dec_days values (cast(0.1 as decimal(4,1))), (cast(.9 as decimal(4,1))), (cast(9.1 as decimal(4,1))), (cast(9.9 as decimal(4,1)));
+[localhost:21000] &gt; select cast(x as int) from num_dec_days;
++----------------+
+| cast(x as int) |
++----------------+
+| 1              |
+| 2              |
+| 4              |
+| 0              |
+| 0              |
+| 9              |
+| 9              |
++----------------+
+</codeblock>
+      </li>
+
+      <li>
+        <p>
+          You cannot directly cast <codeph>TIMESTAMP</codeph> or <codeph>BOOLEAN</codeph> values to or from
+          <codeph>DECIMAL</codeph> values. You can turn a <codeph>DECIMAL</codeph> value into a time-related
+          representation using a two-step process, by converting it to an integer value and then using that result
+          in a call to a date and time function such as <codeph>from_unixtime()</codeph>.
+        </p>
+<codeblock>[localhost:21000] &gt; select from_unixtime(cast(cast(1000.0 as decimal) as bigint));
++-------------------------------------------------------------+
+| from_unixtime(cast(cast(1000.0 as decimal(9,0)) as bigint)) |
++-------------------------------------------------------------+
+| 1970-01-01 00:16:40                                         |
++-------------------------------------------------------------+
+[localhost:21000] &gt; select now() + interval cast(x as int) days from num_dec_days; -- x is a DECIMAL column.
+
+[localhost:21000] &gt; create table num_dec_days (x decimal(4,1));
+[localhost:21000] &gt; insert into num_dec_days values (1), (2), (cast(4.5 as decimal(4,1)));
+[localhost:21000] &gt; select now() + interval cast(x as int) days from num_dec_days; -- The 4.5 value is truncated to 4 and becomes '4 days'.
++--------------------------------------+
+| now() + interval cast(x as int) days |
++--------------------------------------+
+| 2014-05-13 23:11:55.163284000        |
+| 2014-05-14 23:11:55.163284000        |
+| 2014-05-16 23:11:55.163284000        |
++--------------------------------------+
+</codeblock>
+      </li>
+
+      <li>
+        <p>
+          Because values in <codeph>INSERT</codeph> statements are checked rigorously for type compatibility, be
+          prepared to use <codeph>CAST()</codeph> function calls around literals, column references, or other
+          expressions that you are inserting into a <codeph>DECIMAL</codeph> column.
+        </p>
+      </li>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/null_bad_numeric_cast"/>
+
+    <p>
+      <b>DECIMAL differences from integer and floating-point types:</b>
+    </p>
+
+    <p>
+      With the <codeph>DECIMAL</codeph> type, you are concerned with the number of overall digits of a number
+      rather than powers of 2 (as in <codeph>TINYINT</codeph>, <codeph>SMALLINT</codeph>, and so on). Therefore,
+      the limits with integral values of <codeph>DECIMAL</codeph> types fall around 99, 999, 9999, and so on rather
+      than 32767, 65535, 2
+      <sup>32</sup>
+      -1, and so on. For fractional values, you do not need to account for imprecise representation of the
+      fractional part according to the IEEE-954 standard (as in <codeph>FLOAT</codeph> and
+      <codeph>DOUBLE</codeph>). Therefore, when you insert a fractional value into a <codeph>DECIMAL</codeph>
+      column, you can compare, sum, query, <codeph>GROUP BY</codeph>, and so on that column and get back the
+      original values rather than some <q>close but not identical</q> value.
+    </p>
+
+    <p>
+      <codeph>FLOAT</codeph> and <codeph>DOUBLE</codeph> can cause problems or unexpected behavior due to inability
+      to precisely represent certain fractional values, for example dollar and cents values for currency. You might
+      find output values slightly different than you inserted, equality tests that do not match precisely, or
+      unexpected values for <codeph>GROUP BY</codeph> columns. <codeph>DECIMAL</codeph> can help reduce unexpected
+      behavior and rounding errors, at the expense of some performance overhead for assignments and comparisons.
+    </p>
+
+    <p>
+      <b>Literals and expressions:</b>
+      <ul>
+        <li>
+          <p>
+            When you use an integer literal such as <codeph>1</codeph> or <codeph>999</codeph> in a SQL statement,
+            depending on the context, Impala will treat it as either the smallest appropriate
+            <codeph>DECIMAL</codeph> type, or the smallest integer type (<codeph>TINYINT</codeph>,
+            <codeph>SMALLINT</codeph>, <codeph>INT</codeph>, or <codeph>BIGINT</codeph>). To minimize memory usage,
+            Impala prefers to treat the literal as the smallest appropriate integer type.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            When you use a floating-point literal such as <codeph>1.1</codeph> or <codeph>999.44</codeph> in a SQL
+            statement, depending on the context, Impala will treat it as either the smallest appropriate
+            <codeph>DECIMAL</codeph> type, or the smallest floating-point type (<codeph>FLOAT</codeph> or
+            <codeph>DOUBLE</codeph>). To avoid loss of accuracy, Impala prefers to treat the literal as a
+            <codeph>DECIMAL</codeph>.
+          </p>
+        </li>
+      </ul>
+    </p>
+
+    <p>
+      <b>Storage considerations:</b>
+    </p>
+
+    <ul>
+      <li>
+        Only the precision determines the storage size for <codeph>DECIMAL</codeph> values; the scale setting has
+        no effect on the storage size.
+      </li>
+
+      <li>
+        Text, RCFile, and SequenceFile tables all use ASCII-based formats. In these text-based file formats,
+        leading zeros are not stored, but trailing zeros are stored. In these tables, each <codeph>DECIMAL</codeph>
+        value takes up as many bytes as there are digits in the value, plus an extra byte if the decimal point is
+        present and an extra byte for negative values. Once the values are loaded into memory, they are represented
+        in 4, 8, or 16 bytes as described in the following list items. The on-disk representation varies depending
+        on the file format of the table.
+      </li>
+
+<!-- Next couple of points can be conref'ed with identical list bullets farther down under File Format Considerations. -->
+
+      <li>
+        Parquet and Avro tables use binary formats, In these tables, Impala stores each value in as few bytes as
+        possible
+<!-- 4, 8, or 16 bytes -->
+        depending on the precision specified for the <codeph>DECIMAL</codeph> column.
+        <ul>
+          <li>
+            In memory, <codeph>DECIMAL</codeph> values with precision of 9 or less are stored in 4 bytes.
+          </li>
+
+          <li>
+            In memory, <codeph>DECIMAL</codeph> values with precision of 10 through 18 are stored in 8 bytes.
+          </li>
+
+          <li>
+            In memory, <codeph>DECIMAL</codeph> values with precision greater than 18 are stored in 16 bytes.
+          </li>
+        </ul>
+      </li>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/file_format_blurb"/>
+
+    <ul>
+      <li>
+        The <codeph>DECIMAL</codeph> data type can be stored in any of the file formats supported by Impala, as
+        described in <xref href="impala_file_formats.xml#file_formats"/>. Impala only writes to tables that use the
+        Parquet and text formats, so those formats are the focus for file format compatibility.
+      </li>
+
+      <li>
+        Impala can query Avro, RCFile, or SequenceFile tables containing <codeph>DECIMAL</codeph> columns, created
+        by other Hadoop components, on CDH 5 only.
+      </li>
+
+      <li>
+        You can use <codeph>DECIMAL</codeph> columns in Impala tables that are mapped to HBase tables. Impala can
+        query and insert into such tables.
+      </li>
+
+      <li>
+        Text, RCFile, and SequenceFile tables all use ASCII-based formats. In these tables, each
+        <codeph>DECIMAL</codeph> value takes up as many bytes as there are digits in the value, plus an extra byte
+        if the decimal point is present. The binary format of Parquet or Avro files offers more compact storage for
+        <codeph>DECIMAL</codeph> columns.
+      </li>
+
+      <li>
+        Parquet and Avro tables use binary formats, In these tables, Impala stores each value in 4, 8, or 16 bytes
+        depending on the precision specified for the <codeph>DECIMAL</codeph> column.
+      </li>
+
+      <li>
+        Parquet files containing <codeph>DECIMAL</codeph> columns are not expected to be readable under CDH 4. See
+        the <b>Compatibility</b> section for details.
+      </li>
+    </ul>
+
+    <p>
+      <b>UDF considerations:</b> When writing a C++ UDF, use the <codeph>DecimalVal</codeph> data type defined in
+      <filepath>/usr/include/impala_udf/udf.h</filepath>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/partitioning_blurb"/>
+
+    <p>
+      You can use a <codeph>DECIMAL</codeph> column as a partition key. Doing so provides a better match between
+      the partition key values and the HDFS directory names than using a <codeph>DOUBLE</codeph> or
+      <codeph>FLOAT</codeph> partitioning column:
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/schema_evolution_blurb"/>
+
+    <ul>
+      <li>
+        For text-based formats (text, RCFile, and SequenceFile tables), you can issue an <codeph>ALTER TABLE ...
+        REPLACE COLUMNS</codeph> statement to change the precision and scale of an existing
+        <codeph>DECIMAL</codeph> column. As long as the values in the column fit within the new precision and
+        scale, they are returned correctly by a query. Any values that do not fit within the new precision and
+        scale are returned as <codeph>NULL</codeph>, and Impala reports the conversion error. Leading zeros do not
+        count against the precision value, but trailing zeros after the decimal point do.
+<codeblock>[localhost:21000] &gt; create table text_decimals (x string);
+[localhost:21000] &gt; insert into text_decimals values ("1"), ("2"), ("99.99"), ("1.234"), ("000001"), ("1.000000000");
+[localhost:21000] &gt; select * from text_decimals;
++-------------+
+| x           |
++-------------+
+| 1           |
+| 2           |
+| 99.99       |
+| 1.234       |
+| 000001      |
+| 1.000000000 |
++-------------+
+[localhost:21000] &gt; alter table text_decimals replace columns (x decimal(4,2));
+[localhost:21000] &gt; select * from text_decimals;
++-------+
+| x     |
++-------+
+| 1.00  |
+| 2.00  |
+| 99.99 |
+| NULL  |
+| 1.00  |
+| NULL  |
++-------+
+ERRORS:
+Backend 0:Error converting column: 0 TO DECIMAL(4, 2) (Data is: 1.234)
+file: hdfs://127.0.0.1:8020/user/hive/warehouse/decimal_testing.db/text_decimals/634d4bd3aa0
+e8420-b4b13bab7f1be787_56794587_data.0
+record: 1.234
+Error converting column: 0 TO DECIMAL(4, 2) (Data is: 1.000000000)
+file: hdfs://127.0.0.1:8020/user/hive/warehouse/decimal_testing.db/text_decimals/cd40dc68e20
+c565a-cc4bd86c724c96ba_311873428_data.0
+record: 1.000000000
+</codeblock>
+      </li>
+
+      <li>
+        For binary formats (Parquet and Avro tables), although an <codeph>ALTER TABLE ... REPLACE COLUMNS</codeph>
+        statement that changes the precision or scale of a <codeph>DECIMAL</codeph> column succeeds, any subsequent
+        attempt to query the changed column results in a fatal error. (The other columns can still be queried
+        successfully.) This is because the metadata about the columns is stored in the data files themselves, and
+        <codeph>ALTER TABLE</codeph> does not actually make any updates to the data files. If the metadata in the
+        data files disagrees with the metadata in the metastore database, Impala cancels the query.
+      </li>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>CREATE TABLE t1 (x DECIMAL, y DECIMAL(5,2), z DECIMAL(25,0));
+INSERT INTO t1 VALUES (5, 99.44, 123456), (300, 6.7, 999999999);
+SELECT x+y, ROUND(y,1), z/98.6 FROM t1;
+SELECT CAST(1000.5 AS DECIMAL);
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/decimal_no_stats"/>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/partitioning_good"/> -->
+
+    <p conref="../shared/impala_common.xml#common/hbase_ok"/>
+
+    <p conref="../shared/impala_common.xml#common/parquet_ok"/>
+
+    <p conref="../shared/impala_common.xml#common/text_bulky"/>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/compatibility_blurb"/> -->
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/internals_blurb"/> -->
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/added_in_20"/> -->
+
+    <p conref="../shared/impala_common.xml#common/column_stats_constant"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_literals.xml#numeric_literals"/>, <xref href="impala_tinyint.xml#tinyint"/>,
+      <xref href="impala_smallint.xml#smallint"/>, <xref href="impala_int.xml#int"/>,
+      <xref href="impala_bigint.xml#bigint"/>, <xref href="impala_decimal.xml#decimal"/>,
+      <xref href="impala_math_functions.xml#math_functions"/> (especially <codeph>PRECISION()</codeph> and
+      <codeph>SCALE()</codeph>)
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_default_order_by_limit.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_default_order_by_limit.xml b/docs/topics/impala_default_order_by_limit.xml
new file mode 100644
index 0000000..def0335
--- /dev/null
+++ b/docs/topics/impala_default_order_by_limit.xml
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="obwl" id="default_order_by_limit">
+
+  <title>DEFAULT_ORDER_BY_LIMIT Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p conref="../shared/impala_common.xml#common/obwl_query_options"/>
+
+    <p rev="1.4.0">
+      Prior to Impala 1.4.0, Impala queries that use the <codeph><xref href="impala_order_by.xml#order_by">ORDER
+      BY</xref></codeph> clause must also include a
+      <codeph><xref href="impala_limit.xml#limit">LIMIT</xref></codeph> clause, to avoid accidentally producing
+      huge result sets that must be sorted. Sorting a huge result set is a memory-intensive operation. In Impala
+      1.4.0 and higher, Impala uses a temporary disk work area to perform the sort if that operation would
+      otherwise exceed the Impala memory limit on a particular host.
+    </p>
+
+    <p>
+      <b>Type: numeric</b>
+    </p>
+
+    <p>
+      <b>Default:</b> -1 (no default limit)
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_delete.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_delete.xml b/docs/topics/impala_delete.xml
new file mode 100644
index 0000000..fcac5e4
--- /dev/null
+++ b/docs/topics/impala_delete.xml
@@ -0,0 +1,64 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="delete">
+
+  <title>DELETE Statement (CDH 5.5 and higher only)</title>
+  <titlealts><navtitle>DELETE</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Kudu"/>
+      <data name="Category" value="ETL"/>
+      <data name="Category" value="Ingest"/>
+      <data name="Category" value="DML"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">DELETE statement</indexterm>
+      Deletes one or more rows from a Kudu table.
+      Although deleting a single row or a range of rows would be inefficient for tables using HDFS
+      data files, Kudu is able to perform this operation efficiently. Therefore, this statement
+      only works for Impala tables that use the Kudu storage engine.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>
+</codeblock>
+
+    <p rev="kudu" audience="impala_next">
+      Normally, a <codeph>DELETE</codeph> operation for a Kudu table fails if
+      some partition key columns are not found, due to their being deleted or changed
+      by a concurrent <codeph>UPDATE</codeph> or <codeph>DELETE</codeph> operation.
+      Specify <codeph>DELETE IGNORE <varname>rest_of_statement</varname></codeph> to
+      make the <codeph>DELETE</codeph> continue in this case. The rows with the nonexistent
+      duplicate partition key column values are not removed.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/dml_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/sync_ddl_blurb"/>
+
+    <note conref="../shared/impala_common.xml#common/compute_stats_next"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+<codeblock>
+
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_kudu.xml#impala_kudu"/>
+    </p>
+
+  </conbody>
+
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_describe.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_describe.xml b/docs/topics/impala_describe.xml
new file mode 100644
index 0000000..ffdb505
--- /dev/null
+++ b/docs/topics/impala_describe.xml
@@ -0,0 +1,561 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="describe">
+
+  <title id="desc">DESCRIBE Statement</title>
+  <titlealts><navtitle>DESCRIBE</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Reports"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">DESCRIBE statement</indexterm>
+      The <codeph>DESCRIBE</codeph> statement displays metadata about a table, such as the column names and their
+      data types. Its syntax is:
+    </p>
+
+<codeblock rev="2.3.0">DESCRIBE [FORMATTED] [<varname>db_name</varname>.]<varname>table_name</varname>[.<varname>complex_col_name</varname> ...]</codeblock>
+
+    <p>
+      You can use the abbreviation <codeph>DESC</codeph> for the <codeph>DESCRIBE</codeph> statement.
+    </p>
+
+    <p rev="1.1">
+      The <codeph>DESCRIBE FORMATTED</codeph> variation displays additional information, in a format familiar to
+      users of Apache Hive. The extra information includes low-level details such as whether the table is internal
+      or external, when it was created, the file format, the location of the data in HDFS, whether the object is a
+      table or a view, and (for views) the text of the query from the view definition.
+    </p>
+
+    <note>
+      The <codeph>Compressed</codeph> field is not a reliable indicator of whether the table contains compressed
+      data. It typically always shows <codeph>No</codeph>, because the compression settings only apply during the
+      session that loads data and are not stored persistently with the table metadata.
+    </note>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p rev="2.3.0">
+      For the <codeph>ARRAY</codeph>, <codeph>STRUCT</codeph>, and <codeph>MAP</codeph> types available in
+      CDH 5.5 / Impala 2.3 and higher, the <codeph>DESCRIBE</codeph> output is formatted to avoid
+      excessively long lines for multiple fields within a <codeph>STRUCT</codeph>, or a nested sequence of
+      complex types.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_describe"/>
+
+    <p rev="2.3.0">
+      For example, here is the <codeph>DESCRIBE</codeph> output for a table containing a single top-level column
+      of each complex type:
+    </p>
+
+<codeblock rev="2.3.0"><![CDATA[create table t1 (x int, a array<int>, s struct<f1: string, f2: bigint>, m map<string,int>) stored as parquet;
+
+describe t1;
++------+-----------------+---------+
+| name | type            | comment |
++------+-----------------+---------+
+| x    | int             |         |
+| a    | array<int>      |         |
+| s    | struct<         |         |
+|      |   f1:string,    |         |
+|      |   f2:bigint     |         |
+|      | >               |         |
+| m    | map<string,int> |         |
++------+-----------------+---------+
+]]>
+</codeblock>
+
+    <p rev="2.3.0">
+      Here are examples showing how to <q>drill down</q> into the layouts of complex types, including
+      using multi-part names to examine the definitions of nested types.
+      The <codeph>&lt; &gt;</codeph> delimiters identify the columns with complex types;
+      these are the columns where you can descend another level to see the parts that make up
+      the complex type.
+      This technique helps you to understand the multi-part names you use as table references in queries
+      involving complex types, and the corresponding column names you refer to in the <codeph>SELECT</codeph> list.
+      These tables are from the <q>nested TPC-H</q> schema, shown in detail in
+      <xref href="impala_complex_types.xml#complex_sample_schema"/>.
+    </p>
+
+    <p>
+      The <codeph>REGION</codeph> table contains an <codeph>ARRAY</codeph> of <codeph>STRUCT</codeph>
+      elements:
+    </p>
+
+    <ul>
+      <li>
+        <p>
+          The first <codeph>DESCRIBE</codeph> specifies the table name, to display the definition
+          of each top-level column.
+        </p>
+      </li>
+      <li>
+        <p>
+          The second <codeph>DESCRIBE</codeph> specifies the name of a complex
+          column, <codeph>REGION.R_NATIONS</codeph>, showing that when you include the name of an <codeph>ARRAY</codeph>
+          column in a <codeph>FROM</codeph> clause, that table reference acts like a two-column table with
+          columns <codeph>ITEM</codeph> and <codeph>POS</codeph>.
+        </p>
+      </li>
+      <li>
+        <p>
+          The final <codeph>DESCRIBE</codeph> specifies the fully qualified name of the <codeph>ITEM</codeph> field,
+          to display the layout of its underlying <codeph>STRUCT</codeph> type in table format, with the fields
+          mapped to column names.
+        </p>
+      </li>
+    </ul>
+      
+<codeblock rev="2.3.0"><![CDATA[
+-- #1: The overall layout of the entire table.
+describe region;
++-------------+-------------------------+---------+
+| name        | type                    | comment |
++-------------+-------------------------+---------+
+| r_regionkey | smallint                |         |
+| r_name      | string                  |         |
+| r_comment   | string                  |         |
+| r_nations   | array<struct<           |         |
+|             |   n_nationkey:smallint, |         |
+|             |   n_name:string,        |         |
+|             |   n_comment:string      |         |
+|             | >>                      |         |
++-------------+-------------------------+---------+
+
+-- #2: The ARRAY column within the table.
+describe region.r_nations;
++------+-------------------------+---------+
+| name | type                    | comment |
++------+-------------------------+---------+
+| item | struct<                 |         |
+|      |   n_nationkey:smallint, |         |
+|      |   n_name:string,        |         |
+|      |   n_comment:string      |         |
+|      | >                       |         |
+| pos  | bigint                  |         |
++------+-------------------------+---------+
+
+-- #3: The STRUCT that makes up each ARRAY element.
+--     The fields of the STRUCT act like columns of a table.
+describe region.r_nations.item;
++-------------+----------+---------+
+| name        | type     | comment |
++-------------+----------+---------+
+| n_nationkey | smallint |         |
+| n_name      | string   |         |
+| n_comment   | string   |         |
++-------------+----------+---------+
+]]>
+</codeblock>
+
+    <p>
+      The <codeph>CUSTOMER</codeph> table contains an <codeph>ARRAY</codeph> of <codeph>STRUCT</codeph>
+      elements, where one field in the <codeph>STRUCT</codeph> is another <codeph>ARRAY</codeph> of
+      <codeph>STRUCT</codeph> elements:
+    </p>
+    <ul>
+      <li>
+        <p>
+          Again, the initial <codeph>DESCRIBE</codeph> specifies only the table name.
+        </p>
+      </li>
+      <li>
+        <p>
+          The second <codeph>DESCRIBE</codeph> specifies the qualified name of the complex
+          column, <codeph>CUSTOMER.C_ORDERS</codeph>, showing how an <codeph>ARRAY</codeph>
+          is represented as a two-column table with columns <codeph>ITEM</codeph> and <codeph>POS</codeph>.
+        </p>
+      </li>
+      <li>
+        <p>
+          The third <codeph>DESCRIBE</codeph> specifies the qualified name of the <codeph>ITEM</codeph>
+          of the <codeph>ARRAY</codeph> column, to see the structure of the nested <codeph>ARRAY</codeph>.
+          Again, it has has two parts, <codeph>ITEM</codeph> and <codeph>POS</codeph>. Because the
+          <codeph>ARRAY</codeph> contains a <codeph>STRUCT</codeph>, the layout of the <codeph>STRUCT</codeph>
+          is shown.
+        </p>
+      </li>
+      <li>
+        <p>
+          The fourth and fifth <codeph>DESCRIBE</codeph> statements drill down into a <codeph>STRUCT</codeph> field that
+          is itself a complex type, an <codeph>ARRAY</codeph> of <codeph>STRUCT</codeph>.
+          The <codeph>ITEM</codeph> portion of the qualified name is only required when the <codeph>ARRAY</codeph>
+          elements are anonymous. The fields of the <codeph>STRUCT</codeph> give names to any other complex types
+          nested inside the <codeph>STRUCT</codeph>. Therefore, the <codeph>DESCRIBE</codeph> parameters
+          <codeph>CUSTOMER.C_ORDERS.ITEM.O_LINEITEMS</codeph> and <codeph>CUSTOMER.C_ORDERS.O_LINEITEMS</codeph>
+          are equivalent. (For brevity, Cloudera recommends leaving out the <codeph>ITEM</codeph> portion of
+          a qualified name when it is not required.)
+        </p>
+      </li>
+      <li>
+        <p>
+          The final <codeph>DESCRIBE</codeph> shows the layout of the deeply nested <codeph>STRUCT</codeph> type.
+          Because there are no more complex types nested inside this <codeph>STRUCT</codeph>, this is as far
+          as you can drill down into the layout for this table.
+        </p>
+      </li>
+    </ul>
+
+<codeblock rev="2.3.0"><![CDATA[-- #1: The overall layout of the entire table.
+describe customer;
++--------------+------------------------------------+
+| name         | type                               |
++--------------+------------------------------------+
+| c_custkey    | bigint                             |
+... more scalar columns ...
+| c_orders     | array<struct<                      |
+|              |   o_orderkey:bigint,               |
+|              |   o_orderstatus:string,            |
+|              |   o_totalprice:decimal(12,2),      |
+|              |   o_orderdate:string,              |
+|              |   o_orderpriority:string,          |
+|              |   o_clerk:string,                  |
+|              |   o_shippriority:int,              |
+|              |   o_comment:string,                |
+|              |   o_lineitems:array<struct<        |
+|              |     l_partkey:bigint,              |
+|              |     l_suppkey:bigint,              |
+|              |     l_linenumber:int,              |
+|              |     l_quantity:decimal(12,2),      |
+|              |     l_extendedprice:decimal(12,2), |
+|              |     l_discount:decimal(12,2),      |
+|              |     l_tax:decimal(12,2),           |
+|              |     l_returnflag:string,           |
+|              |     l_linestatus:string,           |
+|              |     l_shipdate:string,             |
+|              |     l_commitdate:string,           |
+|              |     l_receiptdate:string,          |
+|              |     l_shipinstruct:string,         |
+|              |     l_shipmode:string,             |
+|              |     l_comment:string               |
+|              |   >>                               |
+|              | >>                                 |
++--------------+------------------------------------+
+
+-- #2: The ARRAY column within the table.
+describe customer.c_orders;
++------+------------------------------------+
+| name | type                               |
++------+------------------------------------+
+| item | struct<                            |
+|      |   o_orderkey:bigint,               |
+|      |   o_orderstatus:string,            |
+... more struct fields ...
+|      |   o_lineitems:array<struct<        |
+|      |     l_partkey:bigint,              |
+|      |     l_suppkey:bigint,              |
+... more nested struct fields ...
+|      |     l_comment:string               |
+|      |   >>                               |
+|      | >                                  |
+| pos  | bigint                             |
++------+------------------------------------+
+
+-- #3: The STRUCT that makes up each ARRAY element.
+--     The fields of the STRUCT act like columns of a table.
+describe customer.c_orders.item;
++-----------------+----------------------------------+
+| name            | type                             |
++-----------------+----------------------------------+
+| o_orderkey      | bigint                           |
+| o_orderstatus   | string                           |
+| o_totalprice    | decimal(12,2)                    |
+| o_orderdate     | string                           |
+| o_orderpriority | string                           |
+| o_clerk         | string                           |
+| o_shippriority  | int                              |
+| o_comment       | string                           |
+| o_lineitems     | array<struct<                    |
+|                 |   l_partkey:bigint,              |
+|                 |   l_suppkey:bigint,              |
+... more struct fields ...
+|                 |   l_comment:string               |
+|                 | >>                               |
++-----------------+----------------------------------+
+
+-- #4: The ARRAY nested inside the STRUCT elements of the first ARRAY.
+describe customer.c_orders.item.o_lineitems;
++------+----------------------------------+
+| name | type                             |
++------+----------------------------------+
+| item | struct<                          |
+|      |   l_partkey:bigint,              |
+|      |   l_suppkey:bigint,              |
+... more struct fields ...
+|      |   l_comment:string               |
+|      | >                                |
+| pos  | bigint                           |
++------+----------------------------------+
+
+-- #5: Shorter form of the previous DESCRIBE. Omits the .ITEM portion of the name
+--     because O_LINEITEMS and other field names provide a way to refer to things
+--     inside the ARRAY element.
+describe customer.c_orders.o_lineitems;
++------+----------------------------------+
+| name | type                             |
++------+----------------------------------+
+| item | struct<                          |
+|      |   l_partkey:bigint,              |
+|      |   l_suppkey:bigint,              |
+... more struct fields ...
+|      |   l_comment:string               |
+|      | >                                |
+| pos  | bigint                           |
++------+----------------------------------+
+
+-- #6: The STRUCT representing ARRAY elements nested inside
+--     another ARRAY of STRUCTs. The lack of any complex types
+--     in this output means this is as far as DESCRIBE can
+--     descend into the table layout.
+describe customer.c_orders.o_lineitems.item;
++-----------------+---------------+
+| name            | type          |
++-----------------+---------------+
+| l_partkey       | bigint        |
+| l_suppkey       | bigint        |
+... more scalar columns ...
+| l_comment       | string        |
++-----------------+---------------+
+]]>
+</codeblock>
+
+<p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+<p>
+  After the <cmdname>impalad</cmdname> daemons are restarted, the first query against a table can take longer
+  than subsequent queries, because the metadata for the table is loaded before the query is processed. This
+  one-time delay for each table can cause misleading results in benchmark tests or cause unnecessary concern.
+  To <q>warm up</q> the Impala metadata cache, you can issue a <codeph>DESCRIBE</codeph> statement in advance
+  for each table you intend to access later.
+</p>
+
+<p>
+  When you are dealing with data files stored in HDFS, sometimes it is important to know details such as the
+  path of the data files for an Impala table, and the host name for the namenode. You can get this information
+  from the <codeph>DESCRIBE FORMATTED</codeph> output. You specify HDFS URIs or path specifications with
+  statements such as <codeph>LOAD DATA</codeph> and the <codeph>LOCATION</codeph> clause of <codeph>CREATE
+  TABLE</codeph> or <codeph>ALTER TABLE</codeph>. You might also use HDFS URIs or paths with Linux commands
+  such as <cmdname>hadoop</cmdname> and <cmdname>hdfs</cmdname> to copy, rename, and so on, data files in HDFS.
+</p>
+
+<p conref="../shared/impala_common.xml#common/sync_ddl_blurb"/>
+
+<p rev="1.2.1">
+  Each table can also have associated table statistics and column statistics. To see these categories of
+  information, use the <codeph>SHOW TABLE STATS <varname>table_name</varname></codeph> and <codeph>SHOW COLUMN
+  STATS <varname>table_name</varname></codeph> statements.
+<!--
+For example, the table statistics can often show you the number
+and total size of the files in the table, even if you have not
+run <codeph>COMPUTE STATS</codeph>.
+-->
+  See <xref href="impala_show.xml#show"/> for details.
+</p>
+
+<p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+<p rev="2.3.0">
+  Because the column definitions for complex types can become long, particularly when such types are nested,
+  the <codeph>DESCRIBE</codeph> statement uses special formatting for complex type columns to make the output readable.
+</p>
+
+<note conref="../shared/impala_common.xml#common/compute_stats_next"/>
+
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<p>
+  The following example shows the results of both a standard <codeph>DESCRIBE</codeph> and <codeph>DESCRIBE
+  FORMATTED</codeph> for different kinds of schema objects:
+</p>
+
+<ul>
+  <li>
+        <codeph>DESCRIBE</codeph> for a table or a view returns the name, type, and comment for each of the
+        columns. For a view, if the column value is computed by an expression, the column name is automatically
+        generated as <codeph>_c0</codeph>, <codeph>_c1</codeph>, and so on depending on the ordinal number of the
+        column.
+      </li>
+
+      <li>
+        A table created with no special format or storage clauses is designated as a <codeph>MANAGED_TABLE</codeph>
+        (an <q>internal table</q> in Impala terminology). Its data files are stored in an HDFS directory under the
+        default Hive data directory. By default, it uses Text data format.
+      </li>
+
+      <li>
+        A view is designated as <codeph>VIRTUAL_VIEW</codeph> in <codeph>DESCRIBE FORMATTED</codeph> output. Some
+        of its properties are <codeph>NULL</codeph> or blank because they are inherited from the base table. The
+        text of the query that defines the view is part of the <codeph>DESCRIBE FORMATTED</codeph> output.
+      </li>
+
+      <li>
+        A table with additional clauses in the <codeph>CREATE TABLE</codeph> statement has differences in
+        <codeph>DESCRIBE FORMATTED</codeph> output. The output for <codeph>T2</codeph> includes the
+        <codeph>EXTERNAL_TABLE</codeph> keyword because of the <codeph>CREATE EXTERNAL TABLE</codeph> syntax, and
+        different <codeph>InputFormat</codeph> and <codeph>OutputFormat</codeph> fields to reflect the Parquet file
+        format.
+      </li>
+    </ul>
+
+<codeblock>[localhost:21000] &gt; create table t1 (x int, y int, s string);
+Query: create table t1 (x int, y int, s string)
+[localhost:21000] &gt; describe t1;
+Query: describe t1
+Query finished, fetching results ...
++------+--------+---------+
+| name | type   | comment |
++------+--------+---------+
+| x    | int    |         |
+| y    | int    |         |
+| s    | string |         |
++------+--------+---------+
+Returned 3 row(s) in 0.13s
+[localhost:21000] &gt; describe formatted t1;
+Query: describe formatted t1
+Query finished, fetching results ...
++------------------------------+--------------------------------------------------------------------+----------------------+
+| name                         | type                                                               | comment              |
++------------------------------+--------------------------------------------------------------------+----------------------+
+| # col_name                   | data_type                                                          | comment              |
+|                              | NULL                                                               | NULL                 |
+| x                            | int                                                                | None                 |
+| y                            | int                                                                | None                 |
+| s                            | string                                                             | None                 |
+|                              | NULL                                                               | NULL                 |
+| # Detailed Table Information | NULL                                                               | NULL                 |
+| Database:                    | describe_formatted                                                 | NULL                 |
+| Owner:                       | cloudera                                                           | NULL                 |
+| CreateTime:                  | Mon Jul 22 17:03:16 EDT 2013                                       | NULL                 |
+| LastAccessTime:              | UNKNOWN                                                            | NULL                 |
+| Protect Mode:                | None                                                               | NULL                 |
+| Retention:                   | 0                                                                  | NULL                 |
+| Location:                    | hdfs://127.0.0.1:8020/user/hive/warehouse/describe_formatted.db/t1 | NULL                 |
+| Table Type:                  | MANAGED_TABLE                                                      | NULL                 |
+| Table Parameters:            | NULL                                                               | NULL                 |
+|                              | transient_lastDdlTime                                              | 1374526996           |
+|                              | NULL                                                               | NULL                 |
+| # Storage Information        | NULL                                                               | NULL                 |
+| SerDe Library:               | org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe                 | NULL                 |
+| InputFormat:                 | org.apache.hadoop.mapred.TextInputFormat                           | NULL                 |
+| OutputFormat:                | org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat         | NULL                 |
+| Compressed:                  | No                                                                 | NULL                 |
+| Num Buckets:                 | 0                                                                  | NULL                 |
+| Bucket Columns:              | []                                                                 | NULL                 |
+| Sort Columns:                | []                                                                 | NULL                 |
++------------------------------+--------------------------------------------------------------------+----------------------+
+Returned 26 row(s) in 0.03s
+[localhost:21000] &gt; create view v1 as select x, upper(s) from t1;
+Query: create view v1 as select x, upper(s) from t1
+[localhost:21000] &gt; describe v1;
+Query: describe v1
+Query finished, fetching results ...
++------+--------+---------+
+| name | type   | comment |
++------+--------+---------+
+| x    | int    |         |
+| _c1  | string |         |
++------+--------+---------+
+Returned 2 row(s) in 0.10s
+[localhost:21000] &gt; describe formatted v1;
+Query: describe formatted v1
+Query finished, fetching results ...
++------------------------------+------------------------------+----------------------+
+| name                         | type                         | comment              |
++------------------------------+------------------------------+----------------------+
+| # col_name                   | data_type                    | comment              |
+|                              | NULL                         | NULL                 |
+| x                            | int                          | None                 |
+| _c1                          | string                       | None                 |
+|                              | NULL                         | NULL                 |
+| # Detailed Table Information | NULL                         | NULL                 |
+| Database:                    | describe_formatted           | NULL                 |
+| Owner:                       | cloudera                     | NULL                 |
+| CreateTime:                  | Mon Jul 22 16:56:38 EDT 2013 | NULL                 |
+| LastAccessTime:              | UNKNOWN                      | NULL                 |
+| Protect Mode:                | None                         | NULL                 |
+| Retention:                   | 0                            | NULL                 |
+| Table Type:                  | VIRTUAL_VIEW                 | NULL                 |
+| Table Parameters:            | NULL                         | NULL                 |
+|                              | transient_lastDdlTime        | 1374526598           |
+|                              | NULL                         | NULL                 |
+| # Storage Information        | NULL                         | NULL                 |
+| SerDe Library:               | null                         | NULL                 |
+| InputFormat:                 | null                         | NULL                 |
+| OutputFormat:                | null                         | NULL                 |
+| Compressed:                  | No                           | NULL                 |
+| Num Buckets:                 | 0                            | NULL                 |
+| Bucket Columns:              | []                           | NULL                 |
+| Sort Columns:                | []                           | NULL                 |
+|                              | NULL                         | NULL                 |
+| # View Information           | NULL                         | NULL                 |
+| View Original Text:          | SELECT x, upper(s) FROM t1   | NULL                 |
+| View Expanded Text:          | SELECT x, upper(s) FROM t1   | NULL                 |
++------------------------------+------------------------------+----------------------+
+Returned 28 row(s) in 0.03s
+[localhost:21000] &gt; create external table t2 (x int, y int, s string) stored as parquet location '/user/cloudera/sample_data';
+[localhost:21000] &gt; describe formatted t2;
+Query: describe formatted t2
+Query finished, fetching results ...
++------------------------------+----------------------------------------------------+----------------------+
+| name                         | type                                               | comment              |
++------------------------------+----------------------------------------------------+----------------------+
+| # col_name                   | data_type                                          | comment              |
+|                              | NULL                                               | NULL                 |
+| x                            | int                                                | None                 |
+| y                            | int                                                | None                 |
+| s                            | string                                             | None                 |
+|                              | NULL                                               | NULL                 |
+| # Detailed Table Information | NULL                                               | NULL                 |
+| Database:                    | describe_formatted                                 | NULL                 |
+| Owner:                       | cloudera                                           | NULL                 |
+| CreateTime:                  | Mon Jul 22 17:01:47 EDT 2013                       | NULL                 |
+| LastAccessTime:              | UNKNOWN                                            | NULL                 |
+| Protect Mode:                | None                                               | NULL                 |
+| Retention:                   | 0                                                  | NULL                 |
+| Location:                    | hdfs://127.0.0.1:8020/user/cloudera/sample_data    | NULL                 |
+| Table Type:                  | EXTERNAL_TABLE                                     | NULL                 |
+| Table Parameters:            | NULL                                               | NULL                 |
+|                              | EXTERNAL                                           | TRUE                 |
+|                              | transient_lastDdlTime                              | 1374526907           |
+|                              | NULL                                               | NULL                 |
+| # Storage Information        | NULL                                               | NULL                 |
+| SerDe Library:               | org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe | NULL                 |
+| InputFormat:                 | com.cloudera.impala.hive.serde.ParquetInputFormat  | NULL                 |
+| OutputFormat:                | com.cloudera.impala.hive.serde.ParquetOutputFormat | NULL                 |
+| Compressed:                  | No                                                 | NULL                 |
+| Num Buckets:                 | 0                                                  | NULL                 |
+| Bucket Columns:              | []                                                 | NULL                 |
+| Sort Columns:                | []                                                 | NULL                 |
++------------------------------+----------------------------------------------------+----------------------+
+Returned 27 row(s) in 0.17s</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+    <p rev="CDH-19187">
+      The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+      typically the <codeph>impala</codeph> user, must have read and execute
+      permissions for all directories that are part of the table.
+      (A table could span multiple different HDFS directories if it is partitioned.
+      The directories could be widely scattered because a partition can reside
+      in an arbitrary HDFS directory based on its <codeph>LOCATION</codeph> attribute.)
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_tables.xml#tables"/>, <xref href="impala_create_table.xml#create_table"/>,
+      <xref href="impala_show.xml#show_tables"/>, <xref href="impala_show.xml#show_create_table"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_disable_codegen.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_disable_codegen.xml b/docs/topics/impala_disable_codegen.xml
new file mode 100644
index 0000000..844d49d
--- /dev/null
+++ b/docs/topics/impala_disable_codegen.xml
@@ -0,0 +1,36 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="disable_codegen">
+
+  <title>DISABLE_CODEGEN Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Troubleshooting"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">DISABLE_CODEGEN query option</indexterm>
+      This is a debug option, intended for diagnosing and working around issues that cause crashes. If a query
+      fails with an <q>illegal instruction</q> or other hardware-specific message, try setting
+      <codeph>DISABLE_CODEGEN=true</codeph> and running the query again. If the query succeeds only when the
+      <codeph>DISABLE_CODEGEN</codeph> option is turned on, submit the problem to Cloudera support and include that
+      detail in the problem report. Do not otherwise run with this setting turned on, because it results in lower
+      overall performance.
+    </p>
+
+    <p>
+      Because the code generation phase adds a small amount of overhead for each query, you might turn on the
+      <codeph>DISABLE_CODEGEN</codeph> option to achieve maximum throughput when running many short-lived queries
+      against small tables.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/type_boolean"/>
+    <p conref="../shared/impala_common.xml#common/default_false_0"/>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_disable_unsafe_spills.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_disable_unsafe_spills.xml b/docs/topics/impala_disable_unsafe_spills.xml
new file mode 100644
index 0000000..f251d65
--- /dev/null
+++ b/docs/topics/impala_disable_unsafe_spills.xml
@@ -0,0 +1,48 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="2.0.0" id="disable_unsafe_spills">
+
+  <title>DISABLE_UNSAFE_SPILLS Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Scalability"/>
+      <data name="Category" value="Memory"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">DISABLE_UNSAFE_SPILLS query option</indexterm>
+      Enable this option if you prefer to have queries fail when they exceed the Impala memory limit, rather than
+      write temporary data to disk.
+    </p>
+
+    <p>
+      Queries that <q>spill</q> to disk typically complete successfully, when in earlier Impala releases they would have failed.
+      However, queries with exorbitant memory requirements due to missing statistics or inefficient join clauses could
+      become so slow as a result that you would rather have them cancelled automatically and reduce the memory
+      usage through standard Impala tuning techniques.
+    </p>
+
+    <p>
+      This option prevents only <q>unsafe</q> spill operations, meaning that one or more tables are missing
+      statistics or the query does not include a hint to set the most efficient mechanism for a join or
+      <codeph>INSERT ... SELECT</codeph> into a partitioned table. These are the tables most likely to result in
+      suboptimal execution plans that could cause unnecessary spilling. Therefore, leaving this option enabled is a
+      good way to find tables on which to run the <codeph>COMPUTE STATS</codeph> statement.
+    </p>
+
+    <p>
+      See <xref href="impala_scalability.xml#spill_to_disk"/> for information about the <q>spill to disk</q>
+      feature for queries processing large result sets with joins, <codeph>ORDER BY</codeph>, <codeph>GROUP
+      BY</codeph>, <codeph>DISTINCT</codeph>, aggregation functions, or analytic functions.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/type_boolean"/>
+    <p conref="../shared/impala_common.xml#common/default_false_0"/>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_distinct.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_distinct.xml b/docs/topics/impala_distinct.xml
new file mode 100644
index 0000000..d49e400
--- /dev/null
+++ b/docs/topics/impala_distinct.xml
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="distinct">
+
+  <title>DISTINCT Operator</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Aggregate Functions"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">DISTINCT operator</indexterm>
+      The <codeph>DISTINCT</codeph> operator in a <codeph>SELECT</codeph> statement filters the result set to
+      remove duplicates:
+    </p>
+
+<codeblock>-- Returns the unique values from one column.
+-- NULL is included in the set of values if any rows have a NULL in this column.
+select distinct c_birth_country from customer;
+-- Returns the unique combinations of values from multiple columns.
+select distinct c_salutation, c_last_name from customer;</codeblock>
+
+    <p>
+      You can use <codeph>DISTINCT</codeph> in combination with an aggregation function, typically
+      <codeph>COUNT()</codeph>, to find how many different values a column contains:
+    </p>
+
+<codeblock>-- Counts the unique values from one column.
+-- NULL is not included as a distinct value in the count.
+select count(distinct c_birth_country) from customer;
+-- Counts the unique combinations of values from multiple columns.
+select count(distinct c_salutation, c_last_name) from customer;</codeblock>
+
+    <p>
+      One construct that Impala SQL does <i>not</i> support is using <codeph>DISTINCT</codeph> in more than one
+      aggregation function in the same query. For example, you could not have a single query with both
+      <codeph>COUNT(DISTINCT c_first_name)</codeph> and <codeph>COUNT(DISTINCT c_last_name)</codeph> in the
+      <codeph>SELECT</codeph> list.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/zero_length_strings"/>
+
+    <note conref="../shared/impala_common.xml#common/multiple_count_distinct"/>
+
+    <note>
+      <p>
+        In contrast with some database systems that always return <codeph>DISTINCT</codeph> values in sorted order,
+        Impala does not do any ordering of <codeph>DISTINCT</codeph> values. Always include an <codeph>ORDER
+        BY</codeph> clause if you need the values in alphabetical or numeric sorted order.
+      </p>
+    </note>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_dml.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_dml.xml b/docs/topics/impala_dml.xml
new file mode 100644
index 0000000..66d4022
--- /dev/null
+++ b/docs/topics/impala_dml.xml
@@ -0,0 +1,85 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="dml">
+
+  <title>DML Statements</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="DML"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="ETL"/>
+      <data name="Category" value="Ingest"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      DML refers to <q>Data Manipulation Language</q>, a subset of SQL statements that modify the data stored in
+      tables. Because Impala focuses on query performance and leverages the append-only nature of HDFS storage,
+      currently Impala only supports a small set of DML statements:
+    </p>
+
+    <ul>
+      <li audience="impala_next">
+        <xref href="impala_delete.xml#delete"/>; works for Kudu tables only
+      </li>
+
+      <li>
+        <xref href="impala_insert.xml#insert"/>
+      </li>
+
+      <li>
+        <xref href="impala_load_data.xml#load_data"/>
+      </li>
+
+      <li audience="impala_next">
+        <xref href="impala_update.xml#update"/>; works for Kudu tables only
+      </li>
+    </ul>
+
+    <p>
+      <codeph>INSERT</codeph> in Impala is primarily optimized for inserting large volumes of data in a single
+      statement, to make effective use of the multi-megabyte HDFS blocks. This is the way in Impala to create new
+      data files. If you intend to insert one or a few rows at a time, such as using the <codeph>INSERT ...
+      VALUES</codeph> syntax, that technique is much more efficient for Impala tables stored in HBase. See
+      <xref href="impala_hbase.xml#impala_hbase"/> for details.
+    </p>
+
+    <p>
+      <codeph>LOAD DATA</codeph> moves existing data files into the directory for an Impala table, making them
+      immediately available for Impala queries. This is one way in Impala to work with data files produced by other
+      Hadoop components. (<codeph>CREATE EXTERNAL TABLE</codeph> is the other alternative; with external tables,
+      you can query existing data files, while the files remain in their original location.)
+    </p>
+
+    <p>
+      To simulate the effects of an <codeph>UPDATE</codeph> or <codeph>DELETE</codeph> statement in other database
+      systems, typically you use <codeph>INSERT</codeph> or <codeph>CREATE TABLE AS SELECT</codeph> to copy data
+      from one table to another, filtering out or changing the appropriate rows during the copy operation.
+    </p>
+
+    <p>
+      Although Impala currently does not have an <codeph>UPDATE</codeph> statement, you can achieve a similar
+      result by using Impala tables stored in HBase. When you insert a row into an HBase table, and the table
+      already contains a row with the same value for the key column, the older row is hidden, effectively the same
+      as a single-row <codeph>UPDATE</codeph>.
+    </p>
+
+    <p rev="2.2.0">
+      Currently, Impala cannot perform DML operations for tables or partitions stored in the Amazon S3 filesystem.
+      See <xref href="impala_s3.xml#s3"/> for details.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      The other major classifications of SQL statements are data definition language (see
+      <xref href="impala_ddl.xml#ddl"/>) and queries (see <xref href="impala_select.xml#select"/>).
+    </p>
+  </conbody>
+</concept>

[15/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

Posted by jr...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_create_function.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_create_function.xml b/docs/topics/impala_create_function.xml
new file mode 100644
index 0000000..4140289
--- /dev/null
+++ b/docs/topics/impala_create_function.xml
@@ -0,0 +1,291 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.2" id="create_function">
+
+  <title>CREATE FUNCTION Statement</title>
+  <titlealts><navtitle>CREATE FUNCTION</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="DDL"/>
+      <data name="Category" value="Schemas"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="UDFs"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">CREATE FUNCTION statement</indexterm>
+      Creates a user-defined function (UDF), which you can use to implement custom logic during
+      <codeph>SELECT</codeph> or <codeph>INSERT</codeph> operations.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      The syntax is different depending on whether you create a scalar UDF, which is called once for each row and
+      implemented by a single function, or a user-defined aggregate function (UDA), which is implemented by
+      multiple functions that compute intermediate results across sets of rows.
+    </p>
+
+    <p>
+      To create a scalar UDF, issue a <codeph>CREATE FUNCTION</codeph> statement:
+    </p>
+
+<codeblock>CREATE FUNCTION [IF NOT EXISTS] [<varname>db_name</varname>.]<varname>function_name</varname>([<varname>arg_type</varname>[, <varname>arg_type</varname>...])
+  RETURNS <varname>return_type</varname>
+  LOCATION '<varname>hdfs_path</varname>'
+  SYMBOL='<varname>symbol_or_class</varname>'</codeblock>
+
+    <p>
+      To create a UDA, issue a <codeph>CREATE AGGREGATE FUNCTION</codeph> statement:
+    </p>
+
+<codeblock>CREATE [AGGREGATE] FUNCTION [IF NOT EXISTS] [<varname>db_name</varname>.]<varname>function_name</varname>([<varname>arg_type</varname>[, <varname>arg_type</varname>...])
+  RETURNS <varname>return_type</varname>
+  LOCATION '<varname>hdfs_path</varname>'
+  [INIT_FN='<varname>function</varname>]
+  UPDATE_FN='<varname>function</varname>
+  MERGE_FN='<varname>function</varname>
+  [PREPARE_FN='<varname>function</varname>]
+  [CLOSEFN='<varname>function</varname>]
+  <ph rev="2.0.0">[SERIALIZE_FN='<varname>function</varname>]</ph>
+  [FINALIZE_FN='<varname>function</varname>]
+<!--  [INTERMEDIATE <varname>type_spec</varname>] --></codeblock>
+
+    <p conref="../shared/impala_common.xml#common/ddl_blurb"/>
+
+    <p>
+      <b>Varargs notation:</b>
+    </p>
+
+    <p>
+      If the underlying implementation of your function accepts a variable number of arguments:
+    </p>
+
+    <ul>
+      <li>
+        The variable arguments must go last in the argument list.
+      </li>
+
+      <li>
+        The variable arguments must all be of the same type.
+      </li>
+
+      <li>
+        You must include at least one instance of the variable arguments in every function call invoked from SQL.
+      </li>
+
+      <li>
+        You designate the variable portion of the argument list in the <codeph>CREATE FUNCTION</codeph> statement
+        by including <codeph>...</codeph> immediately after the type name of the first variable argument. For
+        example, to create a function that accepts an <codeph>INT</codeph> argument, followed by a
+        <codeph>BOOLEAN</codeph>, followed by one or more <codeph>STRING</codeph> arguments, your <codeph>CREATE
+        FUNCTION</codeph> statement would look like:
+<codeblock>CREATE FUNCTION <varname>func_name</varname> (INT, BOOLEAN, STRING ...)
+  RETURNS <varname>type</varname> LOCATION '<varname>path</varname>' SYMBOL='<varname>entry_point</varname>';
+</codeblock>
+      </li>
+    </ul>
+
+    <p>
+      See <xref href="impala_udf.xml#udf_varargs"/> for how to code the C++ or Java function to accept
+      variable-length argument lists.
+    </p>
+
+    <p>
+      <b>Scalar and aggregate functions:</b>
+    </p>
+
+    <p>
+      The simplest kind of user-defined function returns a single scalar value each time it is called, typically
+      once for each row in the result set. This general kind of function is what is usually meant by UDF.
+      User-defined aggregate functions (UDAs) are a specialized kind of UDF that produce a single value based on
+      the contents of multiple rows. You usually use UDAs in combination with a <codeph>GROUP BY</codeph> clause to
+      condense a large result set into a smaller one, or even a single row summarizing column values across an
+      entire table.
+    </p>
+
+    <p>
+      You create UDAs by using the <codeph>CREATE AGGREGATE FUNCTION</codeph> syntax. The clauses
+      <codeph>INIT_FN</codeph>, <codeph>UPDATE_FN</codeph>, <codeph>MERGE_FN</codeph>,
+      <ph rev="2.0.0"><codeph>SERIALIZE_FN</codeph>,</ph> <codeph>FINALIZE_FN</codeph>, and
+      <codeph>INTERMEDIATE</codeph> only apply when you create a UDA rather than a scalar UDF.
+    </p>
+
+    <p>
+      The <codeph>*_FN</codeph> clauses specify functions to call at different phases of function processing.
+    </p>
+
+    <ul>
+      <li>
+        <b>Initialize:</b> The function you specify with the <codeph>INIT_FN</codeph> clause does any initial
+        setup, such as initializing member variables in internal data structures. This function is often a stub for
+        simple UDAs. You can omit this clause and a default (no-op) function will be used.
+      </li>
+
+      <li>
+        <b>Update:</b> The function you specify with the <codeph>UPDATE_FN</codeph> clause is called once for each
+        row in the original result set, that is, before any <codeph>GROUP BY</codeph> clause is applied. A separate
+        instance of the function is called for each different value returned by the <codeph>GROUP BY</codeph>
+        clause. The final argument passed to this function is a pointer, to which you write an updated value based
+        on its original value and the value of the first argument.
+      </li>
+
+      <li>
+        <b>Merge:</b> The function you specify with the <codeph>MERGE_FN</codeph> clause is called an arbitrary
+        number of times, to combine intermediate values produced by different nodes or different threads as Impala
+        reads and processes data files in parallel. The final argument passed to this function is a pointer, to
+        which you write an updated value based on its original value and the value of the first argument.
+      </li>
+
+      <li rev="2.0.0">
+        <b>Serialize:</b> The function you specify with the <codeph>SERIALIZE_FN</codeph> clause frees memory
+        allocated to intermediate results. It is required if any memory was allocated by the Allocate function in
+        the Init, Update, or Merge functions, or if the intermediate type contains any pointers. See
+        <xref href="https://github.com/cloudera/impala-udf-samples/blob/master/uda-sample.cc" scope="external" format="html">the
+        UDA code samples</xref> for details.
+      </li>
+
+      <li>
+        <b>Finalize:</b> The function you specify with the <codeph>FINALIZE_FN</codeph> clause does any required
+        teardown for resources acquired by your UDF, such as freeing memory, closing file handles if you explicitly
+        opened any files, and so on. This function is often a stub for simple UDAs. You can omit this clause and a
+        default (no-op) function will be used. It is required in UDAs where the final return type is different than
+        the intermediate type. or if any memory was allocated by the Allocate function in the Init, Update, or
+        Merge functions. See
+        <xref href="https://github.com/cloudera/impala-udf-samples/blob/master/uda-sample.cc" scope="external" format="html">the
+        UDA code samples</xref> for details.
+      </li>
+    </ul>
+
+    <p>
+      If you use a consistent naming convention for each of the underlying functions, Impala can automatically
+      determine the names based on the first such clause, so the others are optional.
+    </p>
+
+    <p audience="Cloudera">
+      The <codeph>INTERMEDIATE</codeph> clause specifies the data type of intermediate values passed from the
+      <q>update</q> phase to the <q>merge</q> phase, and from the <q>merge</q> phase to the <q>finalize</q> phase.
+      You can use any of the existing Impala data types, or the special notation
+      <codeph>CHAR(<varname>n</varname>)</codeph> to allocate a scratch area of <varname>n</varname> bytes for the
+      intermediate result. For example, if the different phases of your UDA pass strings to each other but in the
+      end the function returns a <codeph>BIGINT</codeph> value, you would specify <codeph>INTERMEDIATE
+      STRING</codeph>. Likewise, if the different phases of your UDA pass 2 separate <codeph>BIGINT</codeph> values
+      between them (8 bytes each), you would specify <codeph>INTERMEDIATE CHAR(16)</codeph> so that each function
+      could read from and write to a 16-byte buffer.
+    </p>
+
+    <p>
+      For end-to-end examples of UDAs, see <xref href="impala_udf.xml#udfs"/>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/udfs_no_complex_types"/>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <ul>
+      <li>
+        You can write Impala UDFs in either C++ or Java. C++ UDFs are new to Impala, and are the recommended format
+        for high performance utilizing native code. Java-based UDFs are compatible between Impala and Hive, and are
+        most suited to reusing existing Hive UDFs. (Impala can run Java-based Hive UDFs but not Hive UDAs.)
+      </li>
+
+      <li>
+        The body of the UDF is represented by a <codeph>.so</codeph> or <codeph>.jar</codeph> file, which you store
+        in HDFS and the <codeph>CREATE FUNCTION</codeph> statement distributes to each Impala node.
+      </li>
+
+      <li>
+        Impala calls the underlying code during SQL statement evaluation, as many times as needed to process all
+        the rows from the result set. All UDFs are assumed to be deterministic, that is, to always return the same
+        result when passed the same argument values. Impala might or might not skip some invocations of a UDF if
+        the result value is already known from a previous call. Therefore, do not rely on the UDF being called a
+        specific number of times, and do not return different result values based on some external factor such as
+        the current time, a random number function, or an external data source that could be updated while an
+        Impala query is in progress.
+      </li>
+
+      <li>
+        The names of the function arguments in the UDF are not significant, only their number, positions, and data
+        types.
+      </li>
+
+      <li>
+        You can overload the same function name by creating multiple versions of the function, each with a
+        different argument signature. For security reasons, you cannot make a UDF with the same name as any
+        built-in function.
+      </li>
+
+      <li>
+        In the UDF code, you represent the function return result as a <codeph>struct</codeph>. This
+        <codeph>struct</codeph> contains 2 fields. The first field is a <codeph>boolean</codeph> representing
+        whether the value is <codeph>NULL</codeph> or not. (When this field is <codeph>true</codeph>, the return
+        value is interpreted as <codeph>NULL</codeph>.) The second field is the same type as the specified function
+        return type, and holds the return value when the function returns something other than
+        <codeph>NULL</codeph>.
+      </li>
+
+      <li>
+        In the UDF code, you represent the function arguments as an initial pointer to a UDF context structure,
+        followed by references to zero or more <codeph>struct</codeph>s, corresponding to each of the arguments.
+        Each <codeph>struct</codeph> has the same 2 fields as with the return value, a <codeph>boolean</codeph>
+        field representing whether the argument is <codeph>NULL</codeph>, and a field of the appropriate type
+        holding any non-<codeph>NULL</codeph> argument value.
+      </li>
+
+      <li>
+        For sample code and build instructions for UDFs,
+        see <xref href="https://github.com/cloudera/impala/tree/master/be/src/udf_samples" scope="external" format="html">the sample UDFs in the Impala github repo</xref>.
+      </li>
+
+      <li>
+        Because the file representing the body of the UDF is stored in HDFS, it is automatically available to all
+        the Impala nodes. You do not need to manually copy any UDF-related files between servers.
+      </li>
+
+      <li>
+        Because Impala currently does not have any <codeph>ALTER FUNCTION</codeph> statement, if you need to rename
+        a function, move it to a different database, or change its signature or other properties, issue a
+        <codeph>DROP FUNCTION</codeph> statement for the original function followed by a <codeph>CREATE
+        FUNCTION</codeph> with the desired properties.
+      </li>
+
+      <li>
+        Because each UDF is associated with a particular database, either issue a <codeph>USE</codeph> statement
+        before doing any <codeph>CREATE FUNCTION</codeph> statements, or specify the name of the function as
+        <codeph><varname>db_name</varname>.<varname>function_name</varname></codeph>.
+      </li>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/sync_ddl_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/compatibility_blurb"/>
+
+    <p>
+      Impala can run UDFs that were created through Hive, as long as they refer to Impala-compatible data types
+      (not composite or nested column types). Hive can run Java-based UDFs that were created through Impala, but
+      not Impala UDFs written in C++.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/udf_persistence_restriction"/>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_udf.xml#udfs"/> for more background information, usage instructions, and examples for
+      Impala UDFs; <xref href="impala_drop_function.xml#drop_function"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_create_role.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_create_role.xml b/docs/topics/impala_create_role.xml
new file mode 100644
index 0000000..975ce15
--- /dev/null
+++ b/docs/topics/impala_create_role.xml
@@ -0,0 +1,66 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.4.0" id="create_role">
+
+  <title>CREATE ROLE Statement (CDH 5.2 or higher only)</title>
+  <titlealts><navtitle>CREATE ROLE (CDH 5.2 or higher only)</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="DDL"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Sentry"/>
+      <data name="Category" value="Roles"/>
+      <!-- Consider whether to go deeper into categories like Security for the Sentry-related statements. -->
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">CREATE ROLE statement</indexterm>
+<!-- Copied from Sentry docs. Turn into conref. -->
+      The <codeph>CREATE ROLE</codeph> statement creates a role to which privileges can be granted. Privileges can
+      be granted to roles, which can then be assigned to users. A user that has been assigned a role will only be
+      able to exercise the privileges of that role. Only users that have administrative privileges can create/drop
+      roles. By default, the <codeph>hive</codeph>, <codeph>impala</codeph> and <codeph>hue</codeph> users have
+      administrative privileges in Sentry.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>CREATE ROLE <varname>role_name</varname>
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/privileges_blurb"/>
+
+    <p>
+      Only administrative users (those with <codeph>ALL</codeph> privileges on the server, defined in the Sentry
+      policy file) can use this statement.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/compatibility_blurb"/>
+
+    <p>
+      Impala makes use of any roles and privileges specified by the <codeph>GRANT</codeph> and
+      <codeph>REVOKE</codeph> statements in Hive, and Hive makes use of any roles and privileges specified by the
+      <codeph>GRANT</codeph> and <codeph>REVOKE</codeph> statements in Impala. The Impala <codeph>GRANT</codeph>
+      and <codeph>REVOKE</codeph> statements for privileges do not require the <codeph>ROLE</codeph> keyword to be
+      repeated before each role name, unlike the equivalent Hive statements.
+    </p>
+
+<!-- To do: nail down the new SHOW syntax, e.g. SHOW ROLES, SHOW CURRENT ROLES, SHOW GROUPS. -->
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_authorization.xml#authorization"/>, <xref href="impala_grant.xml#grant"/>,
+      <xref href="impala_revoke.xml#revoke"/>, <xref href="impala_drop_role.xml#drop_role"/>,
+      <xref href="impala_show.xml#show"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_create_table.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_create_table.xml b/docs/topics/impala_create_table.xml
new file mode 100644
index 0000000..cdaee4a
--- /dev/null
+++ b/docs/topics/impala_create_table.xml
@@ -0,0 +1,650 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="create_table">
+
+  <title>CREATE TABLE Statement</title>
+  <titlealts><navtitle>CREATE TABLE</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="DDL"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="HDFS Caching"/>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Schemas"/>
+      <data audience="impala_next" name="Category" value="Kudu"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">CREATE TABLE statement</indexterm>
+      Creates a new table and specifies its characteristics. While creating a table, you optionally specify aspects
+      such as:
+    </p>
+
+    <ul>
+      <li>
+        Whether the table is internal or external.
+      </li>
+
+      <li>
+        The columns and associated data types.
+      </li>
+
+      <li>
+        The columns used for physically partitioning the data.
+      </li>
+
+      <li>
+        The file format for data files.
+      </li>
+
+      <li>
+        The HDFS directory where the data files are located.
+      </li>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      The general syntax for creating a table and specifying its columns is as follows:
+    </p>
+
+    <p>
+      <b>Explicit column definitions:</b>
+    </p>
+
+<codeblock>CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [<varname>db_name</varname>.]<varname>table_name</varname>
+  [(<varname>col_name</varname> <varname>data_type</varname> [COMMENT '<varname>col_comment</varname>'], ...)]
+  [COMMENT '<varname>table_comment</varname>']
+  [PARTITIONED BY (<varname>col_name</varname> <varname>data_type</varname> [COMMENT '<varname>col_comment</varname>'], ...)]
+  [WITH SERDEPROPERTIES ('<varname>key1</varname>'='<varname>value1</varname>', '<varname>key2</varname>'='<varname>value2</varname>', ...)]
+  [
+   [ROW FORMAT <varname>row_format</varname>] [STORED AS <varname>file_format</varname>]
+  ]
+  [LOCATION '<varname>hdfs_path</varname>']
+  [TBLPROPERTIES ('<varname>key1</varname>'='<varname>value1</varname>', '<varname>key2</varname>'='<varname>value2</varname>', ...)]
+<ph rev="1.4.0">  [CACHED IN '<varname>pool_name</varname>'</ph> <ph rev="2.2.0">[WITH REPLICATION = <varname>integer</varname>]</ph> | UNCACHED]
+</codeblock>
+
+    <p>
+      <b>Column definitions inferred from data file:</b>
+    </p>
+
+<codeblock>CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [<varname>db_name</varname>.]<varname>table_name</varname>
+  LIKE PARQUET '<varname>hdfs_path_of_parquet_file</varname>'
+  [COMMENT '<varname>table_comment</varname>']
+  [PARTITIONED BY (<varname>col_name</varname> <varname>data_type</varname> [COMMENT '<varname>col_comment</varname>'], ...)]
+  [WITH SERDEPROPERTIES ('<varname>key1</varname>'='<varname>value1</varname>', '<varname>key2</varname>'='<varname>value2</varname>', ...)]
+  [
+   [ROW FORMAT <varname>row_format</varname>] [STORED AS <varname>file_format</varname>]
+  ]
+  [LOCATION '<varname>hdfs_path</varname>']
+  [TBLPROPERTIES ('<varname>key1</varname>'='<varname>value1</varname>', '<varname>key2</varname>'='<varname>value2</varname>', ...)]
+<ph rev="1.4.0">  [CACHED IN '<varname>pool_name</varname>'</ph> <ph rev="2.2.0">[WITH REPLICATION = <varname>integer</varname>]</ph> | UNCACHED]
+data_type:
+    <varname>primitive_type</varname>
+  | array_type
+  | map_type
+  | struct_type
+</codeblock>
+
+    <p>
+      <b>CREATE TABLE AS SELECT:</b>
+    </p>
+
+<codeblock>CREATE [EXTERNAL] TABLE [IF NOT EXISTS] <varname>db_name</varname>.]<varname>table_name</varname>
+  [COMMENT '<varname>table_comment</varname>']
+  [WITH SERDEPROPERTIES ('<varname>key1</varname>'='<varname>value1</varname>', '<varname>key2</varname>'='<varname>value2</varname>', ...)]
+  [
+   [ROW FORMAT <varname>row_format</varname>] [STORED AS <varname>file_format</varname>]
+  ]
+  [LOCATION '<varname>hdfs_path</varname>']
+  [TBLPROPERTIES ('<varname>key1</varname>'='<varname>value1</varname>', '<varname>key2</varname>'='<varname>value2</varname>', ...)]
+<ph rev="1.4.0">  [CACHED IN '<varname>pool_name</varname>'</ph> <ph rev="2.2.0">[WITH REPLICATION = <varname>integer</varname>]</ph> | UNCACHED]
+AS
+  <varname>select_statement</varname></codeblock>
+
+<codeblock>primitive_type:
+    TINYINT
+  | SMALLINT
+  | INT
+  | BIGINT
+  | BOOLEAN
+  | FLOAT
+  | DOUBLE
+  <ph rev="1.4.0">| DECIMAL</ph>
+  | STRING
+  <ph rev="2.0.0">| CHAR</ph>
+  <ph rev="2.0.0">| VARCHAR</ph>
+  | TIMESTAMP
+
+<ph rev="2.3.0">complex_type:
+    struct_type
+  | array_type
+  | map_type
+
+struct_type: STRUCT &lt; <varname>name</varname> : <varname>primitive_or_complex_type</varname> [COMMENT '<varname>comment_string</varname>'], ... &gt;
+
+array_type: ARRAY &lt; <varname>primitive_or_complex_type</varname> &gt;
+
+map_type: MAP &lt; <varname>primitive_type</varname>, <varname>primitive_or_complex_type</varname> &gt;
+</ph>
+row_format:
+  DELIMITED [FIELDS TERMINATED BY '<varname>char</varname>' [ESCAPED BY '<varname>char</varname>']]
+  [LINES TERMINATED BY '<varname>char</varname>']
+
+file_format:
+    PARQUET
+  | TEXTFILE
+  | AVRO
+  | SEQUENCEFILE
+  | RCFILE
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/ddl_blurb"/>
+
+    <!-- Should really have some info up front about all the data types and file formats.
+         Consider adding here, or at least making inline links to the relevant keywords
+         in the syntax spec above. -->
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p rev="2.3.0">
+      The Impala complex types (<codeph>STRUCT</codeph>, <codeph>ARRAY</codeph>, or <codeph>MAP</codeph>)
+      are available in CDH 5.5 / Impala 2.3 and higher.
+      Because you can nest these types (for example, to make an array of maps or a struct
+      with an array field), these types are also sometimes referred to as nested types.
+      See <xref href="impala_complex_types.xml#complex_types"/> for usage details.
+    </p>
+
+    <!-- This is kind of an obscure and rare usage scenario. Consider moving all the complex type stuff further down
+         after some of the more common clauses. -->
+    <p rev="2.3.0">
+      Impala can create tables containing complex type columns, with any supported file format.
+      Because currently Impala can only query complex type columns in Parquet tables, creating
+      tables with complex type columns and other file formats such as text is of limited use.
+      For example, you might create a text table including some columns with complex types with Impala, and use Hive
+      as part of your to ingest the nested type data and copy it to an identical Parquet table.
+      Or you might create a partitioned table containing complex type columns using one file format, and
+      use <codeph>ALTER TABLE</codeph> to change the file format of individual partitions to Parquet; Impala
+      can then query only the Parquet-format partitions in that table.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_partitioning"/>
+
+    <p>
+      <b>Internal and external tables (EXTERNAL and LOCATION clauses):</b>
+    </p>
+
+    <p>
+      By default, Impala creates an <q>internal</q> table, where Impala manages the underlying data files for the
+      table, and physically deletes the data files when you drop the table. If you specify the
+      <codeph>EXTERNAL</codeph> clause, Impala treats the table as an <q>external</q> table, where the data files
+      are typically produced outside Impala and queried from their original locations in HDFS, and Impala leaves
+      the data files in place when you drop the table. For details about internal and external tables, see
+      <xref href="impala_tables.xml#tables"/>.
+    </p>
+
+    <p>
+      Typically, for an external table you include a <codeph>LOCATION</codeph> clause to specify the path to the
+      HDFS directory where Impala reads and writes files for the table. For example, if your data pipeline produces
+      Parquet files in the HDFS directory <filepath>/user/etl/destination</filepath>, you might create an external
+      table as follows:
+    </p>
+
+<codeblock>CREATE EXTERNAL TABLE external_parquet (c1 INT, c2 STRING, c3 TIMESTAMP)
+  STORED AS PARQUET LOCATION '/user/etl/destination';
+</codeblock>
+
+    <p>
+      Although the <codeph>EXTERNAL</codeph> and <codeph>LOCATION</codeph> clauses are often specified together,
+      <codeph>LOCATION</codeph> is optional for external tables, and you can also specify <codeph>LOCATION</codeph>
+      for internal tables. The difference is all about whether Impala <q>takes control</q> of the underlying data
+      files and moves them when you rename the table, or deletes them when you drop the table. For more about
+      internal and external tables and how they interact with the <codeph>LOCATION</codeph> attribute, see
+      <xref href="impala_tables.xml#tables"/>.
+    </p>
+
+    <p>
+      <b>Partitioned tables (PARTITIONED BY clause):</b>
+    </p>
+
+    <p>
+      The <codeph>PARTITIONED BY</codeph> clause divides the data files based on the values from one or more
+      specified columns. Impala queries can use the partition metadata to minimize the amount of data that is read
+      from disk or transmitted across the network, particularly during join queries. For details about
+      partitioning, see <xref href="impala_partitioning.xml#partitioning"/>.
+    </p>
+
+    <p rev="kudu" audience="impala_next">
+      <b>Partitioning for Kudu tables (DISTRIBUTE BY clause)</b>
+    </p>
+
+    <p rev="kudu" audience="impala_next">
+      For Kudu tables, you specify logical partitioning across one or more columns using the
+      <codeph>DISTRIBUTE BY</codeph> clause. In contrast to partitioning for HDFS-based tables,
+      multiple values for a partition key column can be located in the same partition.
+      The optional <codeph>HASH</codeph> clause lets you divide one or a set of partition key columns
+      into a specified number of buckets; you can use more than one <codeph>HASH</codeph>
+      clause, specifying a distinct set of partition key columns for each.
+      The optional <codeph>RANGE</codeph> clause further subdivides the partitions, based on
+      a set of literal values for the partition key columns.
+    </p>
+
+    <p>
+      <b>Specifying file format (STORED AS and ROW FORMAT clauses):</b>
+    </p>
+
+    <p>
+      The <codeph>STORED AS</codeph> clause identifies the format of the underlying data files. Currently, Impala
+      can query more types of file formats than it can create or insert into. Use Hive to perform any create or
+      data load operations that are not currently available in Impala. For example, Impala can create a
+      SequenceFile table but cannot insert data into it. There are also Impala-specific procedures for using
+      compression with each kind of file format. For details about working with data files of various formats, see
+      <xref href="impala_file_formats.xml#file_formats"/>.
+    </p>
+
+    <note>
+      In Impala 1.4.0 and higher, Impala can create Avro tables, which formerly required doing the <codeph>CREATE
+      TABLE</codeph> statement in Hive. See <xref href="impala_avro.xml#avro"/> for details and examples.
+    </note>
+
+    <p>
+      By default (when no <codeph>STORED AS</codeph> clause is specified), data files in Impala tables are created
+      as text files with Ctrl-A (hex 01) characters as the delimiter.
+<!-- Verify if ROW FORMAT is entirely ignored outside of text tables, or does it apply somehow to SequenceFile and/or RCFile too? -->
+      Specify the <codeph>ROW FORMAT DELIMITED</codeph> clause to produce or ingest data files that use a different
+      delimiter character such as tab or <codeph>|</codeph>, or a different line end character such as carriage
+      return or newline. When specifying delimiter and line end characters with the <codeph>FIELDS TERMINATED
+      BY</codeph> and <codeph>LINES TERMINATED BY</codeph> clauses, use <codeph>'\t'</codeph> for tab,
+      <codeph>'\n'</codeph> for newline or linefeed, <codeph>'\r'</codeph> for carriage return, and
+      <codeph>\</codeph><codeph>0</codeph> for ASCII <codeph>nul</codeph> (hex 00). For more examples of text
+      tables, see <xref href="impala_txtfile.xml#txtfile"/>.
+    </p>
+
+    <p>
+      The <codeph>ESCAPED BY</codeph> clause applies both to text files that you create through an
+      <codeph>INSERT</codeph> statement to an Impala <codeph>TEXTFILE</codeph> table, and to existing data files
+      that you put into an Impala table directory. (You can ingest existing data files either by creating the table
+      with <codeph>CREATE EXTERNAL TABLE ... LOCATION</codeph>, the <codeph>LOAD DATA</codeph> statement, or
+      through an HDFS operation such as <codeph>hdfs dfs -put <varname>file</varname>
+      <varname>hdfs_path</varname></codeph>.) Choose an escape character that is not used anywhere else in the
+      file, and put it in front of each instance of the delimiter character that occurs within a field value.
+      Surrounding field values with quotation marks does not help Impala to parse fields with embedded delimiter
+      characters; the quotation marks are considered to be part of the column value. If you want to use
+      <codeph>\</codeph> as the escape character, specify the clause in <cmdname>impala-shell</cmdname> as
+      <codeph>ESCAPED BY '\\'</codeph>.
+    </p>
+
+    <note conref="../shared/impala_common.xml#common/thorn"/>
+
+    <p>
+      <b>Cloning tables (LIKE clause):</b>
+    </p>
+
+    <p>
+      To create an empty table with the same columns, comments, and other attributes as another table, use the
+      following variation. The <codeph>CREATE TABLE ... LIKE</codeph> form allows a restricted set of clauses,
+      currently only the <codeph>LOCATION</codeph>, <codeph>COMMENT</codeph>, and <codeph>STORED AS</codeph>
+      clauses.
+    </p>
+
+<codeblock>CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [<varname>db_name</varname>.]<varname>table_name</varname>
+  <ph rev="1.4.0">LIKE { [<varname>db_name</varname>.]<varname>table_name</varname> | PARQUET '<varname>hdfs_path_of_parquet_file</varname>' }</ph>
+  [COMMENT '<varname>table_comment</varname>']
+  [STORED AS <varname>file_format</varname>]
+  [LOCATION '<varname>hdfs_path</varname>']</codeblock>
+
+    <note rev="1.2">
+      To clone the structure of a table and transfer data into it in a single operation, use the <codeph>CREATE
+      TABLE AS SELECT</codeph> syntax described in the next subsection.
+    </note>
+
+    <p>
+      When you clone the structure of an existing table using the <codeph>CREATE TABLE ... LIKE</codeph> syntax,
+      the new table keeps the same file format as the original one, so you only need to specify the <codeph>STORED
+      AS</codeph> clause if you want to use a different file format, or when specifying a view as the original
+      table. (Creating a table <q>like</q> a view produces a text table by default.)
+    </p>
+
+    <p>
+      Although normally Impala cannot create an HBase table directly, Impala can clone the structure of an existing
+      HBase table with the <codeph>CREATE TABLE ... LIKE</codeph> syntax, preserving the file format and metadata
+      from the original table.
+    </p>
+
+    <p>
+      There are some exceptions to the ability to use <codeph>CREATE TABLE ... LIKE</codeph> with an Avro table.
+      For example, you cannot use this technique for an Avro table that is specified with an Avro schema but no
+      columns. When in doubt, check if a <codeph>CREATE TABLE ... LIKE</codeph> operation works in Hive; if not, it
+      typically will not work in Impala either.
+    </p>
+
+    <p>
+      If the original table is partitioned, the new table inherits the same partition key columns. Because the new
+      table is initially empty, it does not inherit the actual partitions that exist in the original one. To create
+      partitions in the new table, insert data or issue <codeph>ALTER TABLE ... ADD PARTITION</codeph> statements.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/create_table_like_view"/>
+
+    <p>
+      Because <codeph>CREATE TABLE ... LIKE</codeph> only manipulates table metadata, not the physical data of the
+      table, issue <codeph>INSERT INTO TABLE</codeph> statements afterward to copy any data from the original table
+      into the new one, optionally converting the data to a new file format. (For some file formats, Impala can do
+      a <codeph>CREATE TABLE ... LIKE</codeph> to create the table, but Impala cannot insert data in that file
+      format; in these cases, you must load the data in Hive. See
+      <xref href="impala_file_formats.xml#file_formats"/> for details.)
+    </p>
+
+    <p rev="1.2" id="ctas">
+      <b>CREATE TABLE AS SELECT:</b>
+    </p>
+
+    <p>
+      The <codeph>CREATE TABLE AS SELECT</codeph> syntax is a shorthand notation to create a table based on column
+      definitions from another table, and copy data from the source table to the destination table without issuing
+      any separate <codeph>INSERT</codeph> statement. This idiom is so popular that it has its own acronym,
+      <q>CTAS</q>.
+<!--
+        The <codeph>CREATE TABLE AS SELECT</codeph> syntax is as follows:
+        -->
+    </p>
+
+<!-- CREATE TABLE AS <select> now incorporated up higher in the original syntax diagram,
+     thus commented out here.
+     Does CTAS only accept a limited subset of clauses? -->
+
+<!--
+<codeblock rev="1.2">CREATE [EXTERNAL] TABLE [IF NOT EXISTS] <varname>db_name</varname>.]<varname>table_name</varname>
+  [COMMENT '<varname>table_comment</varname>']
+  [STORED AS <varname>file_format</varname>]
+  [LOCATION '<varname>hdfs_path</varname>']
+AS
+  <varname>select_statement</varname></codeblock>
+-->
+
+    <p rev="1.2">
+      See <xref href="impala_select.xml#select"/> for details about query syntax for the <codeph>SELECT</codeph>
+      portion of a <codeph>CREATE TABLE AS SELECT</codeph> statement.
+    </p>
+
+    <p rev="1.2">
+      The newly created table inherits the column names that you select from the original table, which you can
+      override by specifying column aliases in the query. Any column or table comments from the original table are
+      not carried over to the new table.
+    </p>
+
+    <p rev="obwl" conref="../shared/impala_common.xml#common/insert_sort_blurb"/>
+
+    <p rev="1.2">
+      For example, the following statements show how you can clone all the data in a table, or a subset of the
+      columns and/or rows, or reorder columns, rename them, or construct them out of expressions:
+    </p>
+
+<codeblock rev="1.2">-- Create new table and copy all data.
+CREATE TABLE clone_of_t1 AS SELECT * FROM t1;
+-- Same idea as CREATE TABLE LIKE, don't copy any data.
+CREATE TABLE empty_clone_of_t1 AS SELECT * FROM t1 WHERE 1=0;
+-- Copy some data.
+CREATE TABLE subset_of_t1 AS SELECT * FROM t1 WHERE x &gt; 100 AND y LIKE 'A%';
+CREATE TABLE summary_of_t1 AS SELECT c1, sum(c2) AS total, avg(c2) AS average FROM t1 GROUP BY c2;
+-- Switch file format.
+CREATE TABLE parquet_version_of_t1 STORED AS PARQUET AS SELECT * FROM t1;
+-- Create tables with different column order, names, or types than the original.
+CREATE TABLE some_columns_from_t1 AS SELECT c1, c3, c5 FROM t1;
+CREATE TABLE reordered_columns_from_t1 AS SELECT c4, c3, c1, c2 FROM t1;
+CREATE TABLE synthesized_columns AS SELECT upper(c1) AS all_caps, c2+c3 AS total, "California" AS state FROM t1;</codeblock>
+
+    <p rev="1.2">
+      As part of a CTAS operation, you can convert the data to any file format that Impala can write (currently,
+      <codeph>TEXTFILE</codeph> and <codeph>PARQUET</codeph>). You cannot specify the lower-level properties of a
+      text table, such as the delimiter. Although you can use a partitioned table as the source and copy data from
+      it, you cannot specify any partitioning clauses for the new table.
+    </p>
+
+    <p rev="1.4.0">
+      <b>CREATE TABLE LIKE PARQUET:</b>
+    </p>
+
+    <p rev="1.4.0">
+      The variation <codeph>CREATE TABLE ... LIKE PARQUET '<varname>hdfs_path_of_parquet_file</varname>'</codeph>
+      lets you skip the column definitions of the <codeph>CREATE TABLE</codeph> statement. The column names and
+      data types are automatically configured based on the organization of the specified Parquet data file, which
+      must already reside in HDFS. You can use a data file located outside the Impala database directories, or a
+      file from an existing Impala Parquet table; either way, Impala only uses the column definitions from the file
+      and does not use the HDFS location for the <codeph>LOCATION</codeph> attribute of the new table. (Although
+      you can also specify the enclosing directory with the <codeph>LOCATION</codeph> attribute, to both use the
+      same schema as the data file and point the Impala table at the associated directory for querying.)
+    </p>
+
+    <p rev="1.4.0">
+      The following considerations apply when you use the <codeph>CREATE TABLE LIKE PARQUET</codeph> technique:
+    </p>
+
+    <ul rev="1.4.0">
+      <li>
+        Any column comments from the original table are not preserved in the new table. Each column in the new
+        table has a comment stating the low-level Parquet field type used to deduce the appropriate SQL column
+        type.
+      </li>
+
+      <li>
+        If you use a data file from a partitioned Impala table, any partition key columns from the original table
+        are left out of the new table, because they are represented in HDFS directory names rather than stored in
+        the data file. To preserve the partition information, repeat the same <codeph>PARTITION</codeph> clause as
+        in the original <codeph>CREATE TABLE</codeph> statement.
+      </li>
+
+      <li>
+        The file format of the new table defaults to text, as with other kinds of <codeph>CREATE TABLE</codeph>
+        statements. To make the new table also use Parquet format, include the clause <codeph>STORED AS
+        PARQUET</codeph> in the <codeph>CREATE TABLE LIKE PARQUET</codeph> statement.
+      </li>
+
+      <li>
+        If the Parquet data file comes from an existing Impala table, currently, any <codeph>TINYINT</codeph> or
+        <codeph>SMALLINT</codeph> columns are turned into <codeph>INT</codeph> columns in the new table.
+        Internally, Parquet stores such values as 32-bit integers.
+      </li>
+
+      <li>
+        When the destination table uses the Parquet file format, the <codeph>CREATE TABLE AS SELECT</codeph> and
+        <codeph>INSERT ... SELECT</codeph> statements always create at least one data file, even if the
+        <codeph>SELECT</codeph> part of the statement does not match any rows. You can use such an empty Parquet
+        data file as a template for subsequent <codeph>CREATE TABLE LIKE PARQUET</codeph> statements.
+      </li>
+    </ul>
+
+    <p>
+      For more details about creating Parquet tables, and examples of the <codeph>CREATE TABLE LIKE
+      PARQUET</codeph> syntax, see <xref href="impala_parquet.xml#parquet"/>.
+    </p>
+
+    <p>
+      <b>Visibility and Metadata (TBLPROPERTIES and WITH SERDEPROPERTIES clauses):</b>
+    </p>
+
+    <p rev="1.2">
+      You can associate arbitrary items of metadata with a table by specifying the <codeph>TBLPROPERTIES</codeph>
+      clause. This clause takes a comma-separated list of key-value pairs and stores those items in the metastore
+      database. You can also change the table properties later with an <codeph>ALTER TABLE</codeph> statement. You
+      can observe the table properties for different delimiter and escape characters using the <codeph>DESCRIBE
+      FORMATTED</codeph> command, and change those settings for an existing table with <codeph>ALTER TABLE ... SET
+      TBLPROPERTIES</codeph>.
+    </p>
+
+    <p rev="1.2">
+      You can also associate SerDes properties with the table by specifying key-value pairs through the
+      <codeph>WITH SERDEPROPERTIES</codeph> clause. This metadata is not used by Impala, which has its own built-in
+      serializer and deserializer for the file formats it supports. Particular property values might be needed for
+      Hive compatibility with certain variations of file formats, particularly Avro.
+    </p>
+
+    <p>
+      Some DDL operations that interact with other Hadoop components require specifying particular values in the
+      <codeph>SERDEPROPERTIES</codeph> or <codeph>TBLPROPERTIES</codeph> fields, such as creating an Avro table or
+      an HBase table. (You typically create HBase tables in Hive, because they require additional clauses not
+      currently available in Impala.)
+<!-- Haven't got a working example from Lenni, so suppressing this recommendation for now.
+        The Avro schema properties can be specified through either
+        <codeph>TBLPROPERTIES</codeph> or <codeph>SERDEPROPERTIES</codeph>;
+        for best compatibility with future versions of Hive,
+        use <codeph>SERDEPROPERTIES</codeph> in this case.
+-->
+    </p>
+
+    <p>
+      To see the column definitions and column comments for an existing table, for example before issuing a
+      <codeph>CREATE TABLE ... LIKE</codeph> or a <codeph>CREATE TABLE ... AS SELECT</codeph> statement, issue the
+      statement <codeph>DESCRIBE <varname>table_name</varname></codeph>. To see even more detail, such as the
+      location of data files and the values for clauses such as <codeph>ROW FORMAT</codeph> and <codeph>STORED
+      AS</codeph>, issue the statement <codeph>DESCRIBE FORMATTED <varname>table_name</varname></codeph>.
+      <codeph>DESCRIBE FORMATTED</codeph> is also needed to see any overall table comment (as opposed to individual
+      column comments).
+    </p>
+
+    <p>
+      After creating a table, your <cmdname>impala-shell</cmdname> session or another
+      <cmdname>impala-shell</cmdname> connected to the same node can immediately query that table. There might be a
+      brief interval (one statestore heartbeat) before the table can be queried through a different Impala node. To
+      make the <codeph>CREATE TABLE</codeph> statement return only when the table is recognized by all Impala nodes
+      in the cluster, enable the <codeph>SYNC_DDL</codeph> query option.
+    </p>
+
+    <p rev="1.4.0">
+      <b>HDFS caching (CACHED IN clause):</b>
+    </p>
+
+    <p rev="1.4.0">
+      If you specify the <codeph>CACHED IN</codeph> clause, any existing or future data files in the table
+      directory or the partition subdirectories are designated to be loaded into memory with the HDFS caching
+      mechanism. See <xref href="impala_perf_hdfs_caching.xml#hdfs_caching"/> for details about using the HDFS
+      caching feature.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/impala_cache_replication_factor"/>
+
+<!-- Say something in here about the SHOW statement, e.g. SHOW TABLES, SHOW TABLE/COLUMN STATS, SHOW PARTITIONS. -->
+
+    <p>
+      <b>Column order</b>:
+    </p>
+
+    <p>
+      If you intend to use the table to hold data files produced by some external source, specify the columns in
+      the same order as they appear in the data files.
+    </p>
+
+    <p>
+      If you intend to insert or copy data into the table through Impala, or if you have control over the way
+      externally produced data files are arranged, use your judgment to specify columns in the most convenient
+      order:
+    </p>
+
+    <ul>
+      <li>
+        <p>
+          If certain columns are often <codeph>NULL</codeph>, specify those columns last. You might produce data
+          files that omit these trailing columns entirely. Impala automatically fills in the <codeph>NULL</codeph>
+          values if so.
+        </p>
+      </li>
+
+      <li>
+        <p>
+          If an unpartitioned table will be used as the source for an <codeph>INSERT ... SELECT</codeph> operation
+          into a partitioned table, specify last in the unpartitioned table any columns that correspond to
+          partition key columns in the partitioned table, and in the same order as the partition key columns are
+          declared in the partitioned table. This technique lets you use <codeph>INSERT ... SELECT *</codeph> when
+          copying data to the partitioned table, rather than specifying each column name individually.
+        </p>
+      </li>
+
+      <li>
+        <p>
+          If you specify columns in an order that you later discover is suboptimal, you can sometimes work around
+          the problem without recreating the table. You can create a view that selects columns from the original
+          table in a permuted order, then do a <codeph>SELECT *</codeph> from the view. When inserting data into a
+          table, you can specify a permuted order for the inserted columns to match the order in the destination
+          table.
+        </p>
+      </li>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/hive_blurb"/>
+
+    <p>
+      Impala queries can make use of metadata about the table and columns, such as the number of rows in a table or
+      the number of different values in a column. Prior to Impala 1.2.2, to create this metadata, you issued the
+      <codeph>ANALYZE TABLE</codeph> statement in Hive to gather this information, after creating the table and
+      loading representative data into it. In Impala 1.2.2 and higher, the <codeph>COMPUTE STATS</codeph> statement
+      produces these statistics within Impala, without needing to use Hive at all.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/hbase_blurb"/>
+
+    <note>
+      <p>
+        The Impala <codeph>CREATE TABLE</codeph> statement cannot create an HBase table, because it currently does
+        not support the <codeph>STORED BY</codeph> clause needed for HBase tables. Create such tables in Hive, then
+        query them through Impala. For information on using Impala with HBase tables, see
+        <xref href="impala_hbase.xml#impala_hbase"/>.
+      </p>
+    </note>
+
+    <p conref="../shared/impala_common.xml#common/s3_blurb"/>
+    <p rev="2.2.0">
+      To create a table where the data resides in the Amazon Simple Storage Service (S3),
+      specify a <codeph>s3a://</codeph> prefix <codeph>LOCATION</codeph> attribute pointing to the data files in S3.
+      You can use this special <codeph>LOCATION</codeph> syntax when creating an empty table,
+      but not as part of a <codeph>CREATE TABLE AS SELECT</codeph> statement.
+      See <xref href="impala_s3.xml#s3"/> for details.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/insert_sort_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/hdfs_blurb"/>
+
+    <p>
+      The <codeph>CREATE TABLE</codeph> statement for an internal table creates a directory in HDFS. The
+      <codeph>CREATE EXTERNAL TABLE</codeph> statement associates the table with an existing HDFS directory, and
+      does not create any new directory in HDFS. To locate the HDFS data directory for a table, issue a
+      <codeph>DESCRIBE FORMATTED <varname>table</varname></codeph> statement. To examine the contents of that HDFS
+      directory, use an OS command such as <codeph>hdfs dfs -ls hdfs://<varname>path</varname></codeph>, either
+      from the OS command line or through the <codeph>shell</codeph> or <codeph>!</codeph> commands in
+      <cmdname>impala-shell</cmdname>.
+    </p>
+
+    <p>
+      The <codeph>CREATE TABLE AS SELECT</codeph> syntax creates data files under the table data directory to hold
+      any data copied by the <codeph>INSERT</codeph> portion of the statement. (Even if no data is copied, Impala
+      might create one or more empty data files.)
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+    <p rev="CDH-19187">
+    <!-- TBD. -->
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/security_blurb"/>
+    <p conref="../shared/impala_common.xml#common/redaction_yes"/>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_maybe"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_tables.xml#tables"/>,
+      <xref href="impala_alter_table.xml#alter_table"/>, <xref href="impala_drop_table.xml#drop_table"/>,
+      <xref href="impala_partitioning.xml#partitioning"/>, <xref href="impala_tables.xml#internal_tables"/>,
+      <xref href="impala_tables.xml#external_tables"/>, <xref href="impala_compute_stats.xml#compute_stats"/>,
+      <xref href="impala_sync_ddl.xml#sync_ddl"/>, <xref href="impala_show.xml#show_tables"/>,
+      <xref href="impala_show.xml#show_create_table"/>, <xref href="impala_describe.xml#describe"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_create_view.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_create_view.xml b/docs/topics/impala_create_view.xml
new file mode 100644
index 0000000..2458279
--- /dev/null
+++ b/docs/topics/impala_create_view.xml
@@ -0,0 +1,136 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.1" id="create_view">
+
+  <title>CREATE VIEW Statement</title>
+  <titlealts><navtitle>CREATE VIEW</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="DDL"/>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">CREATE VIEW statement</indexterm>
+      The <codeph>CREATE VIEW</codeph> statement lets you create a shorthand abbreviation for a more complicated
+      query. The base query can involve joins, expressions, reordered columns, column aliases, and other SQL
+      features that can make a query hard to understand or maintain.
+    </p>
+
+    <p>
+      Because a view is purely a logical construct (an alias for a query) with no physical data behind it,
+      <codeph>ALTER VIEW</codeph> only involves changes to metadata in the metastore database, not any data files
+      in HDFS.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>CREATE VIEW [IF NOT EXISTS] <varname>view_name</varname> [(<varname>column_list</varname>)]
+  AS <varname>select_statement</varname></codeblock>
+
+    <p conref="../shared/impala_common.xml#common/ddl_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      The <codeph>CREATE VIEW</codeph> statement can be useful in scenarios such as the following:
+    </p>
+
+    <ul>
+      <li>
+        To turn even the most lengthy and complicated SQL query into a one-liner. You can issue simple queries
+        against the view from applications, scripts, or interactive queries in <cmdname>impala-shell</cmdname>.
+        For example:
+<codeblock>select * from <varname>view_name</varname>;
+select * from <varname>view_name</varname> order by c1 desc limit 10;</codeblock>
+        The more complicated and hard-to-read the original query, the more benefit there is to simplifying the
+        query using a view.
+      </li>
+
+      <li>
+        To hide the underlying table and column names, to minimize maintenance problems if those names change. In
+        that case, you re-create the view using the new names, and all queries that use the view rather than the
+        underlying tables keep running with no changes.
+      </li>
+
+      <li>
+        To experiment with optimization techniques and make the optimized queries available to all applications.
+        For example, if you find a combination of <codeph>WHERE</codeph> conditions, join order, join hints, and so
+        on that works the best for a class of queries, you can establish a view that incorporates the
+        best-performing techniques. Applications can then make relatively simple queries against the view, without
+        repeating the complicated and optimized logic over and over. If you later find a better way to optimize the
+        original query, when you re-create the view, all the applications immediately take advantage of the
+        optimized base query.
+      </li>
+
+      <li>
+        To simplify a whole class of related queries, especially complicated queries involving joins between
+        multiple tables, complicated expressions in the column list, and other SQL syntax that makes the query
+        difficult to understand and debug. For example, you might create a view that joins several tables, filters
+        using several <codeph>WHERE</codeph> conditions, and selects several columns from the result set.
+        Applications might issue queries against this view that only vary in their <codeph>LIMIT</codeph>,
+        <codeph>ORDER BY</codeph>, and similar simple clauses.
+      </li>
+    </ul>
+
+    <p>
+      For queries that require repeating complicated clauses over and over again, for example in the select list,
+      <codeph>ORDER BY</codeph>, and <codeph>GROUP BY</codeph> clauses, you can use the <codeph>WITH</codeph>
+      clause as an alternative to creating a view.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+    <p conref="../shared/impala_common.xml#common/complex_types_views"/>
+    <p conref="../shared/impala_common.xml#common/complex_types_views_caveat"/>
+
+    <p conref="../shared/impala_common.xml#common/sync_ddl_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/security_blurb"/>
+    <p conref="../shared/impala_common.xml#common/redaction_yes"/>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<!-- TK: Elaborate on these, show queries and real output. -->
+
+<codeblock>-- Create a view that is exactly the same as the underlying table.
+create view v1 as select * from t1;
+
+-- Create a view that includes only certain columns from the underlying table.
+create view v2 as select c1, c3, c7 from t1;
+
+-- Create a view that filters the values from the underlying table.
+create view v3 as select distinct c1, c3, c7 from t1 where c1 is not null and c5 &gt; 0;
+
+-- Create a view that that reorders and renames columns from the underlying table.
+create view v4 as select c4 as last_name, c6 as address, c2 as birth_date from t1;
+
+-- Create a view that runs functions to convert or transform certain columns.
+create view v5 as select c1, cast(c3 as string) c3, concat(c4,c5) c5, trim(c6) c6, "Constant" c8 from t1;
+
+-- Create a view that hides the complexity of a view query.
+create view v6 as select t1.c1, t2.c2 from t1 join t2 on t1.id = t2.id;
+</codeblock>
+
+<!-- These examples show CREATE VIEW and corresponding DROP VIEW statements, with different combinations
+     of qualified and unqualified names. -->
+
+    <p conref="../shared/impala_common.xml#common/create_drop_view_examples"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_views.xml#views"/>, <xref href="impala_alter_view.xml#alter_view"/>,
+      <xref href="impala_drop_view.xml#drop_view"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_databases.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_databases.xml b/docs/topics/impala_databases.xml
new file mode 100644
index 0000000..ad0511f
--- /dev/null
+++ b/docs/topics/impala_databases.xml
@@ -0,0 +1,65 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="databases">
+
+  <title>Overview of Impala Databases</title>
+  <titlealts><navtitle>Databases</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Databases"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      In Impala, a database is a logical container for a group of tables. Each database defines a separate
+      namespace. Within a database, you can refer to the tables inside it using their unqualified names. Different
+      databases can contain tables with identical names.
+    </p>
+
+    <p>
+      Creating a database is a lightweight operation. There are minimal database-specific properties to configure,
+      only <codeph>LOCATION</codeph> and <codeph>COMMENT</codeph>.  There is no <codeph>ALTER DATABASE</codeph> statement.
+    </p>
+
+    <p>
+      Typically, you create a separate database for each project or application, to avoid naming conflicts between
+      tables and to make clear which tables are related to each other. The <codeph>USE</codeph> statement lets
+      you switch between databases. Unqualified references to tables, views, and functions refer to objects
+      within the current database. You can also refer to objects in other databases by using qualified names
+      of the form <codeph><varname>dbname</varname>.<varname>object_name</varname></codeph>.
+    </p>
+
+    <p>
+      Each database is physically represented by a directory in HDFS. When you do not specify a <codeph>LOCATION</codeph>
+      attribute, the directory is located in the Impala data directory with the associated tables managed by Impala.
+      When you do specify a <codeph>LOCATION</codeph> attribute, any read and write operations for tables in that
+      database are relative to the specified HDFS directory.
+    </p>
+
+    <p>
+      There is a special database, named <codeph>default</codeph>, where you begin when you connect to Impala.
+      Tables created in <codeph>default</codeph> are physically located one level higher in HDFS than all the
+      user-created databases.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/builtins_db"/>
+
+    <p>
+      <b>Related statements:</b>
+    </p>
+
+    <p>
+      <xref href="impala_create_database.xml#create_database"/>,
+      <xref href="impala_drop_database.xml#drop_database"/>, <xref href="impala_use.xml#use"/>,
+      <xref href="impala_show.xml#show_databases"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_datatypes.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_datatypes.xml b/docs/topics/impala_datatypes.xml
new file mode 100644
index 0000000..e45867e
--- /dev/null
+++ b/docs/topics/impala_datatypes.xml
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="datatypes">
+
+  <title>Data Types</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">data types</indexterm>
+      Impala supports a set of data types that you can use for table columns, expression values, and function
+      arguments and return values.
+    </p>
+
+    <note>
+      Currently, Impala supports only scalar types, not composite or nested types. Accessing a table containing any
+      columns with unsupported types causes an error.
+    </note>
+
+    <p outputclass="toc"/>
+
+    <p>
+      For the notation to write literals of each of these data types, see
+      <xref href="impala_literals.xml#literals"/>.
+    </p>
+
+    <p>
+      See <xref href="impala_langref_unsupported.xml#langref_hiveql_delta"/> for differences between Impala and
+      Hive data types.
+    </p>
+  </conbody>
+</concept>

[14/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

Posted by jr...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_datetime_functions.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_datetime_functions.xml b/docs/topics/impala_datetime_functions.xml
new file mode 100644
index 0000000..16ae088
--- /dev/null
+++ b/docs/topics/impala_datetime_functions.xml
@@ -0,0 +1,1505 @@
+<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="datetime_functions">
+
+  <title>Impala Date and Time Functions</title>
+  <titlealts><navtitle>Date and Time Functions</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Dates and Times"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      The underlying Impala data type for date and time data is
+      <codeph><xref href="impala_timestamp.xml#timestamp">TIMESTAMP</xref></codeph>, which has both a date and a
+      time portion. Functions that extract a single field, such as <codeph>hour()</codeph> or
+      <codeph>minute()</codeph>, typically return an integer value. Functions that format the date portion, such as
+      <codeph>date_add()</codeph> or <codeph>to_date()</codeph>, typically return a string value.
+    </p>
+
+    <p>
+      You can also adjust a <codeph>TIMESTAMP</codeph> value by adding or subtracting an <codeph>INTERVAL</codeph>
+      expression. See <xref href="impala_timestamp.xml#timestamp"/> for details. <codeph>INTERVAL</codeph>
+      expressions are also allowed as the second argument for the <codeph>date_add()</codeph> and
+      <codeph>date_sub()</codeph> functions, rather than integers.
+    </p>
+
+    <p rev="2.2.0">
+      Some of these functions are affected by the setting of the
+      <codeph>-use_local_tz_for_unix_timestamp_conversions</codeph> startup flag for the
+      <cmdname>impalad</cmdname> daemon. This setting is off by default, meaning that
+      functions such as <codeph>from_unixtime()</codeph> and <codeph>unix_timestamp()</codeph>
+      consider the input values to always represent the UTC time zone.
+      This setting also applies when you <codeph>CAST()</codeph> a <codeph>BIGINT</codeph>
+      value to <codeph>TIMESTAMP</codeph>, or a <codeph>TIMESTAMP</codeph>
+      value to <codeph>BIGINT</codeph>.
+      When this setting is enabled, these functions and operations convert to and from
+      values representing the local time zone.
+      See <xref href="impala_timestamp.xml#timestamp"/> for details about how 
+      Impala handles time zone considerations for the <codeph>TIMESTAMP</codeph> data type.
+    </p>
+
+    <p>
+      <b>Function reference:</b>
+    </p>
+
+    <p>
+      Impala supports the following data and time functions:
+    </p>
+
+<!-- New for 2.3:
+int_months_between
+timeofday
+timestamp_cmp
+months_between
+-->
+
+    <dl>
+      <dlentry rev="1.4.0" id="add_months">
+
+        <dt>
+          <codeph>add_months(timestamp date, int months)</codeph>, <codeph>add_months(timestamp date, bigint
+          months)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">add_months() function</indexterm>
+          <b>Purpose:</b> Returns the specified date and time plus some number of months.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            Same as <codeph>months_add()</codeph>. Available in Impala 1.4 and higher. For
+            compatibility when porting code with vendor extensions.
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="adddate">
+
+        <dt>
+          <codeph>adddate(timestamp startdate, int days)</codeph>, <codeph>adddate(timestamp startdate, bigint
+          days)</codeph>,
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">adddate() function</indexterm>
+          <b>Purpose:</b> Adds a specified number of days to a <codeph>TIMESTAMP</codeph> value. Similar to
+          <codeph>date_add()</codeph>, but starts with an actual <codeph>TIMESTAMP</codeph> value instead of a
+          string that is converted to a <codeph>TIMESTAMP</codeph>.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="current_timestamp">
+
+        <dt>
+          <codeph>current_timestamp()</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">current_timestamp() function</indexterm>
+          <b>Purpose:</b> Alias for the <codeph>now()</codeph> function.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="date_add">
+
+        <dt>
+          <codeph>date_add(timestamp startdate, int days)</codeph>, <codeph>date_add(timestamp startdate,
+          <varname>interval_expression</varname>)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">date_add() function</indexterm>
+          <b>Purpose:</b> Adds a specified number of days to a <codeph>TIMESTAMP</codeph> value. The first argument
+          can be a string, which is automatically cast to <codeph>TIMESTAMP</codeph> if it uses the recognized
+          format, as described in <xref href="impala_timestamp.xml#timestamp"/>. With an <codeph>INTERVAL</codeph>
+          expression as the second argument, you can calculate a delta value using other units such as weeks,
+          years, hours, seconds, and so on; see <xref href="impala_timestamp.xml#timestamp"/> for details.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="2.0.0" id="date_part">
+
+        <dt>
+          <codeph>date_part(string, timestamp)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">date_part() function</indexterm>
+          <b>Purpose:</b> Similar to <codeph>EXTRACT()</codeph>, with the argument order reversed. Supports the
+          same date and time units as <codeph>EXTRACT()</codeph>. For compatibility with SQL code containing vendor
+          extensions.
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="date_sub">
+
+        <dt>
+          <codeph>date_sub(timestamp startdate, int days)</codeph>, <codeph>date_sub(timestamp startdate,
+          <varname>interval_expression</varname>)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">date_sub() function</indexterm>
+          <b>Purpose:</b> Subtracts a specified number of days from a <codeph>TIMESTAMP</codeph> value. The first
+          argument can be a string, which is automatically cast to <codeph>TIMESTAMP</codeph> if it uses the
+          recognized format, as described in <xref href="impala_timestamp.xml#timestamp"/>. With an
+          <codeph>INTERVAL</codeph> expression as the second argument, you can calculate a delta value using other
+          units such as weeks, years, hours, seconds, and so on; see <xref href="impala_timestamp.xml#timestamp"/>
+          for details.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="datediff">
+
+        <dt>
+          <codeph>datediff(string enddate, string startdate)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">datediff() function</indexterm>
+          <b>Purpose:</b> Returns the number of days between two dates represented as strings.
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="day">
+
+        <dt>
+          <!-- <codeph>day(string date), <ph id="dayofmonth">dayofmonth(string date)</ph></codeph> -->
+          <codeph>day(timestamp date), <ph id="dayofmonth">dayofmonth(timestamp date)</ph></codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">day() function</indexterm>
+          <b>Purpose:</b> Returns the day field from the date portion of a <codeph>TIMESTAMP</codeph>.
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.2" id="dayname">
+
+        <dt>
+          <!-- <codeph>dayname(string date)</codeph> -->
+          <codeph>dayname(timestamp date)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">dayname() function</indexterm>
+          <b>Purpose:</b> Returns the day field from a date represented as a string, converted to the string
+          corresponding to that day name. The range of return values is <codeph>'Sunday'</codeph> to
+          <codeph>'Saturday'</codeph>. Used in report-generating queries, as an alternative to calling
+          <codeph>dayofweek()</codeph> and turning that numeric return value into a string using a
+          <codeph>CASE</codeph> expression.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.1" id="dayofweek">
+
+        <dt>
+          <!-- <codeph>dayofweek(string date)</codeph> -->
+          <codeph>dayofweek(timestamp date)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">dayofweek() function</indexterm>
+          <b>Purpose:</b> Returns the day field from the date portion of a <codeph>TIMESTAMP</codeph>, corresponding to the day of
+          the week. The range of return values is 1 (Sunday) to 7 (Saturday).
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="dayofyear">
+
+        <dt>
+          <codeph>dayofyear(timestamp date)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">dayofyear() function</indexterm>
+          <b>Purpose:</b> Returns the day field from a <codeph>TIMESTAMP</codeph> value, corresponding to the day
+          of the year. The range of return values is 1 (January 1) to 366 (December 31 of a leap year).
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="days_add">
+
+        <dt>
+          <codeph>days_add(timestamp startdate, int days)</codeph>, <codeph>days_add(timestamp startdate, bigint
+          days)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">days_add() function</indexterm>
+          <b>Purpose:</b> Adds a specified number of days to a <codeph>TIMESTAMP</codeph> value. Similar to
+          <codeph>date_add()</codeph>, but starts with an actual <codeph>TIMESTAMP</codeph> value instead of a
+          string that is converted to a <codeph>TIMESTAMP</codeph>.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="days_sub">
+
+        <dt>
+          <codeph>days_sub(timestamp startdate, int days)</codeph>, <codeph>days_sub(timestamp startdate, bigint
+          days)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">days_sub() function</indexterm>
+          <b>Purpose:</b> Subtracts a specified number of days from a <codeph>TIMESTAMP</codeph> value. Similar to
+          <codeph>date_sub()</codeph>, but starts with an actual <codeph>TIMESTAMP</codeph> value instead of a
+          string that is converted to a <codeph>TIMESTAMP</codeph>.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.4.0" id="extract">
+
+        <dt>
+          <codeph>extract(timestamp, string unit)</codeph><codeph rev="2.0.0">extract(unit FROM timestamp)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">extract() function</indexterm>
+          <b>Purpose:</b> Returns one of the numeric date or time fields from a <codeph>TIMESTAMP</codeph> value.
+          <p>
+            <b>Unit argument:</b> The <codeph>unit</codeph> string can be one of <codeph>year</codeph>,
+            <codeph>month</codeph>, <codeph>day</codeph>, <codeph>hour</codeph>, <codeph>minute</codeph>,
+            <codeph>second</codeph>, or <codeph>millisecond</codeph>. This argument value is case-insensitive.
+          </p>
+          <p rev="2.0.0">
+            In Impala 2.0 and higher, you can use special syntax rather than a regular function call, for
+            compatibility with code that uses the SQL-99 format with the <codeph>FROM</codeph> keyword. With this
+            style, the unit names are identifiers rather than <codeph>STRING</codeph> literals. For example, the
+            following calls are both equivalent:
+<codeblock>extract(year from now());
+extract(now(), "year");
+</codeblock>
+          </p>
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            Typically used in <codeph>GROUP BY</codeph> queries to arrange results by hour,
+            day, month, and so on. You can also use this function in an <codeph>INSERT ... SELECT</codeph> into a
+            partitioned table to split up <codeph>TIMESTAMP</codeph> values into individual parts, if the
+            partitioned table has separate partition key columns representing year, month, day, and so on. If you
+            need to divide by more complex units of time, such as by week or by quarter, use the
+            <codeph>TRUNC()</codeph> function instead.
+          </p>
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="from_unixtime">
+
+        <dt>
+          <codeph>from_unixtime(bigint unixtime[, string format])</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">from_unixtime() function</indexterm>
+          <b>Purpose:</b> Converts the number of seconds from the Unix epoch to the specified time into a string in
+          the local time zone.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/y2k38"/>
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            The format string accepts the variations allowed for the <codeph>TIMESTAMP</codeph>
+            data type: date plus time, date by itself, time by itself, and optional fractional seconds for the
+            time. See <xref href="impala_timestamp.xml#timestamp"/> for details.
+          </p>
+          <p rev="1.3.0">
+            Currently, the format string is case-sensitive, especially to distinguish <codeph>m</codeph> for
+            minutes and <codeph>M</codeph> for months. In Impala 1.3 and later, you can switch the order of
+            elements, use alternative separator characters, and use a different number of placeholders for each
+            unit. Adding more instances of <codeph>y</codeph>, <codeph>d</codeph>, <codeph>H</codeph>, and so on
+            produces output strings zero-padded to the requested number of characters. The exception is
+            <codeph>M</codeph> for months, where <codeph>M</codeph> produces a non-padded value such as
+            <codeph>3</codeph>, <codeph>MM</codeph> produces a zero-padded value such as <codeph>03</codeph>,
+            <codeph>MMM</codeph> produces an abbreviated month name such as <codeph>Mar</codeph>, and sequences of
+            4 or more <codeph>M</codeph> are not allowed. A date string including all fields could be
+            <codeph>"yyyy-MM-dd HH:mm:ss.SSSSSS"</codeph>, <codeph>"dd/MM/yyyy HH:mm:ss.SSSSSS"</codeph>,
+            <codeph>"MMM dd, yyyy HH.mm.ss (SSSSSS)"</codeph> or other combinations of placeholders and separator
+            characters.
+          </p>
+          <p conref="../shared/impala_common.xml#common/timezone_conversion_caveat"/>
+          <note rev="1.3.0">
+            The more flexible format strings allowed with the built-in functions do not change the rules about
+            using <codeph>CAST()</codeph> to convert from a string to a <codeph>TIMESTAMP</codeph> value. Strings
+            being converted through <codeph>CAST()</codeph> must still have the elements in the specified order and use the specified delimiter
+            characters, as described in <xref href="impala_timestamp.xml#timestamp"/>.
+          </note>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+<codeblock>[localhost:21000] &gt; select from_unixtime(1392394861,"yyyy-MM-dd HH:mm:ss.SSSS");
++-------------------------------------------------------+
+| from_unixtime(1392394861, 'yyyy-mm-dd hh:mm:ss.ssss') |
++-------------------------------------------------------+
+| 2014-02-14 16:21:01.0000                              |
++-------------------------------------------------------+
+[localhost:21000] &gt; select from_unixtime(1392394861,"yyyy-MM-dd");
++-----------------------------------------+
+| from_unixtime(1392394861, 'yyyy-mm-dd') |
++-----------------------------------------+
+| 2014-02-14                              |
++-----------------------------------------+
+[localhost:21000] &gt; select from_unixtime(1392394861,"HH:mm:ss.SSSS");
++--------------------------------------------+
+| from_unixtime(1392394861, 'hh:mm:ss.ssss') |
++--------------------------------------------+
+| 16:21:01.0000                              |
++--------------------------------------------+
+[localhost:21000] &gt; select from_unixtime(1392394861,"HH:mm:ss");
++---------------------------------------+
+| from_unixtime(1392394861, 'hh:mm:ss') |
++---------------------------------------+
+| 16:21:01                              |
++---------------------------------------+</codeblock>
+          <p conref="../shared/impala_common.xml#common/datetime_function_chaining"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="from_utc_timestamp">
+
+        <dt>
+          <codeph>from_utc_timestamp(timestamp, string timezone)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">from_utc_timestamp() function</indexterm>
+          <b>Purpose:</b> Converts a specified UTC timestamp value into the appropriate value for a specified time
+          zone.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+          <p>
+            <b>Usage notes:</b> Often used to translate UTC time zone data stored in a table back to the local
+            date and time for reporting. The opposite of the <codeph>to_utc_timestamp()</codeph> function.
+          </p>
+          <p>
+            <b>Examples:</b> See discussion of time zones in <xref href="impala_timestamp.xml#timestamp"/>
+            for information about using this function for conversions between the local time zone and UTC.
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="hour">
+
+        <dt>
+          <codeph>hour(string date)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">hour() function</indexterm>
+          <b>Purpose:</b> Returns the hour field from a date represented as a string.
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="hours_add">
+
+        <dt>
+          <codeph>hours_add(timestamp date, int hours)</codeph>, <codeph>hours_add(timestamp date, bigint
+          hours)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">hours_add() function</indexterm>
+          <b>Purpose:</b> Returns the specified date and time plus some number of hours.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="hours_sub">
+
+        <dt>
+          <codeph>hours_sub(timestamp date, int hours)</codeph>, <codeph>hours_sub(timestamp date, bigint
+          hours)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">hours_sub() function</indexterm>
+          <b>Purpose:</b> Returns the specified date and time minus some number of hours.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="2.3.0" id="int_months_between">
+
+        <dt>
+          <codeph>int_months_between(timestamp newer, timestamp older)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">int_months_between() function</indexterm>
+          <b>Purpose:</b> Returns the number of months between the date portions of two <codeph>TIMESTAMP</codeph> values,
+          as an <codeph>INT</codeph> representing only the full months that passed.
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/added_in_230"/>
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            Typically used in business contexts, for example to determine whether
+            a specified number of months have passed or whether some end-of-month deadline was reached.
+          </p>
+          <p>
+            The method of determining the number of elapsed months includes some special handling of
+            months with different numbers of days that creates edge cases for dates between the
+            28th and 31st days of certain months. See <codeph>months_between()</codeph> for details.
+            The <codeph>int_months_between()</codeph> result is essentially the <codeph>floor()</codeph>
+            of the <codeph>months_between()</codeph> result.
+          </p>
+          <p>
+            If either value is <codeph>NULL</codeph>, which could happen for example when converting a
+            nonexistent date string such as <codeph>'2015-02-29'</codeph> to a <codeph>TIMESTAMP</codeph>,
+            the result is also <codeph>NULL</codeph>.
+          </p>
+          <p>
+            If the first argument represents an earlier time than the second argument, the result is negative.
+          </p>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>/* Less than a full month = 0. */
+select int_months_between('2015-02-28', '2015-01-29');
++------------------------------------------------+
+| int_months_between('2015-02-28', '2015-01-29') |
++------------------------------------------------+
+| 0                                              |
++------------------------------------------------+
+
+/* Last day of month to last day of next month = 1. */
+select int_months_between('2015-02-28', '2015-01-31');
++------------------------------------------------+
+| int_months_between('2015-02-28', '2015-01-31') |
++------------------------------------------------+
+| 1                                              |
++------------------------------------------------+
+
+/* Slightly less than 2 months = 1. */
+select int_months_between('2015-03-28', '2015-01-31');
++------------------------------------------------+
+| int_months_between('2015-03-28', '2015-01-31') |
++------------------------------------------------+
+| 1                                              |
++------------------------------------------------+
+
+/* 2 full months (identical days of the month) = 2. */
+select int_months_between('2015-03-31', '2015-01-31');
++------------------------------------------------+
+| int_months_between('2015-03-31', '2015-01-31') |
++------------------------------------------------+
+| 2                                              |
++------------------------------------------------+
+
+/* Last day of month to last day of month-after-next = 2. */
+select int_months_between('2015-03-31', '2015-01-30');
++------------------------------------------------+
+| int_months_between('2015-03-31', '2015-01-30') |
++------------------------------------------------+
+| 2                                              |
++------------------------------------------------+
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="microseconds_add">
+
+        <dt>
+          <codeph>microseconds_add(timestamp date, int microseconds)</codeph>, <codeph>microseconds_add(timestamp
+          date, bigint microseconds)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">microseconds_add() function</indexterm>
+          <b>Purpose:</b> Returns the specified date and time plus some number of microseconds.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="microseconds_sub">
+
+        <dt>
+          <codeph>microseconds_sub(timestamp date, int microseconds)</codeph>, <codeph>microseconds_sub(timestamp
+          date, bigint microseconds)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">microseconds_sub() function</indexterm>
+          <b>Purpose:</b> Returns the specified date and time minus some number of microseconds.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="milliseconds_add">
+
+        <dt>
+          <codeph>milliseconds_add(timestamp date, int milliseconds)</codeph>, <codeph>milliseconds_add(timestamp
+          date, bigint milliseconds)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">milliseconds_add() function</indexterm>
+          <b>Purpose:</b> Returns the specified date and time plus some number of milliseconds.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="milliseconds_sub">
+
+        <dt>
+          <codeph>milliseconds_sub(timestamp date, int milliseconds)</codeph>, <codeph>milliseconds_sub(timestamp
+          date, bigint milliseconds)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">milliseconds_sub() function</indexterm>
+          <b>Purpose:</b> Returns the specified date and time minus some number of milliseconds.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="minute">
+
+        <dt>
+          <codeph>minute(string date)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">minute() function</indexterm>
+          <b>Purpose:</b> Returns the minute field from a date represented as a string.
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="minutes_add">
+
+        <dt>
+          <codeph>minutes_add(timestamp date, int minutes)</codeph>, <codeph>minutes_add(timestamp date, bigint
+          minutes)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">minutes_add() function</indexterm>
+          <b>Purpose:</b> Returns the specified date and time plus some number of minutes.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="minutes_sub">
+
+        <dt>
+          <codeph>minutes_sub(timestamp date, int minutes)</codeph>, <codeph>minutes_sub(timestamp date, bigint
+          minutes)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">minutes_sub() function</indexterm>
+          <b>Purpose:</b> Returns the specified date and time minus some number of minutes.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="month">
+
+        <dt>
+          <!-- <codeph>month(string date)</codeph> -->
+          <codeph>month(timestamp date)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">month() function</indexterm>
+          <b>Purpose:</b> Returns the month field from the date portion of a <codeph>TIMESTAMP</codeph>.
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="months_add">
+
+        <dt>
+          <codeph>months_add(timestamp date, int months)</codeph>, <codeph>months_add(timestamp date, bigint
+          months)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">months_add() function</indexterm>
+          <b>Purpose:</b> Returns the specified date and time plus some number of months.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="2.3.0" id="months_between">
+
+        <dt>
+          <codeph>months_between(timestamp newer, timestamp older)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">months_between() function</indexterm>
+          <b>Purpose:</b> Returns the number of months between the date portions of two <codeph>TIMESTAMP</codeph> values.
+          Can include a fractional part representing extra days in addition to the full months
+          between the dates. The fractional component is computed by dividing the difference in days by 31 (regardless of the month).
+          <p>
+            <b>Return type:</b> <codeph>double</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/added_in_230"/>
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            Typically used in business contexts, for example to determine whether
+            a specified number of months have passed or whether some end-of-month deadline was reached.
+          </p>
+          <p>
+            If the only consideration is the number of full months and any fractional value is
+            not significant, use <codeph>int_months_between()</codeph> instead.
+          </p>
+          <p>
+            The method of determining the number of elapsed months includes some special handling of
+            months with different numbers of days that creates edge cases for dates between the
+            28th and 31st days of certain months.
+          </p>
+          <p>
+            If either value is <codeph>NULL</codeph>, which could happen for example when converting a
+            nonexistent date string such as <codeph>'2015-02-29'</codeph> to a <codeph>TIMESTAMP</codeph>,
+            the result is also <codeph>NULL</codeph>.
+          </p>
+          <p>
+            If the first argument represents an earlier time than the second argument, the result is negative.
+          </p>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p>
+            The following examples show how dates that are on the same day of the month
+            are considered to be exactly N months apart, even if the months have different
+            numbers of days.
+          </p>
+<codeblock>select months_between('2015-02-28', '2015-01-28');
++--------------------------------------------+
+| months_between('2015-02-28', '2015-01-28') |
++--------------------------------------------+
+| 1                                          |
++--------------------------------------------+
+
+select months_between(now(), now() + interval 1 month);
++-------------------------------------------------+
+| months_between(now(), now() + interval 1 month) |
++-------------------------------------------------+
+| -1                                              |
++-------------------------------------------------+
+
+select months_between(now() + interval 1 year, now());
++------------------------------------------------+
+| months_between(now() + interval 1 year, now()) |
++------------------------------------------------+
+| 12                                             |
++------------------------------------------------+
+</codeblock>
+          <p>
+            The following examples show how dates that are on the last day of the month
+            are considered to be exactly N months apart, even if the months have different
+            numbers of days. For example, from January 28th to February 28th is exactly one
+            month because the day of the month is identical; January 31st to February 28th
+            is exactly one month because in both cases it is the last day of the month;
+            but January 29th or 30th to February 28th is considered a fractional month.
+          </p>
+<codeblock>select months_between('2015-02-28', '2015-01-31');
++--------------------------------------------+
+| months_between('2015-02-28', '2015-01-31') |
++--------------------------------------------+
+| 1                                          |
++--------------------------------------------+
+
+select months_between('2015-02-28', '2015-01-29');
++--------------------------------------------+
+| months_between('2015-02-28', '2015-01-29') |
++--------------------------------------------+
+| 0.967741935483871                          |
++--------------------------------------------+
+
+select months_between('2015-02-28', '2015-01-30');;
++--------------------------------------------+
+| months_between('2015-02-28', '2015-01-30') |
++--------------------------------------------+
+| 0.935483870967742                          |
++--------------------------------------------+
+</codeblock>
+          <p>
+            The following examples show how dates that are not a precise number
+            of months apart result in a fractional return value.
+          </p>
+<codeblock>select months_between('2015-03-01', '2015-01-28');
++--------------------------------------------+
+| months_between('2015-03-01', '2015-01-28') |
++--------------------------------------------+
+| 1.129032258064516                          |
++--------------------------------------------+
+
+select months_between('2015-03-01', '2015-02-28');
++--------------------------------------------+
+| months_between('2015-03-01', '2015-02-28') |
++--------------------------------------------+
+| 0.1290322580645161                         |
++--------------------------------------------+
+
+select months_between('2015-06-02', '2015-05-29');
++--------------------------------------------+
+| months_between('2015-06-02', '2015-05-29') |
++--------------------------------------------+
+| 0.1290322580645161                         |
++--------------------------------------------+
+
+select months_between('2015-03-01', '2015-01-25');
++--------------------------------------------+
+| months_between('2015-03-01', '2015-01-25') |
++--------------------------------------------+
+| 1.225806451612903                          |
++--------------------------------------------+
+
+select months_between('2015-03-01', '2015-02-25');
++--------------------------------------------+
+| months_between('2015-03-01', '2015-02-25') |
++--------------------------------------------+
+| 0.2258064516129032                         |
++--------------------------------------------+
+
+select months_between('2015-02-28', '2015-02-01');
++--------------------------------------------+
+| months_between('2015-02-28', '2015-02-01') |
++--------------------------------------------+
+| 0.8709677419354839                         |
++--------------------------------------------+
+
+select months_between('2015-03-28', '2015-03-01');
++--------------------------------------------+
+| months_between('2015-03-28', '2015-03-01') |
++--------------------------------------------+
+| 0.8709677419354839                         |
++--------------------------------------------+
+</codeblock>
+          <p>
+            The following examples show how the time portion of the <codeph>TIMESTAMP</codeph>
+            values are irrelevant for calculating the month interval. Even the fractional part
+            of the result only depends on the number of full days between the argument values,
+            regardless of the time portion.
+          </p>
+<codeblock>select months_between('2015-05-28 23:00:00', '2015-04-28 11:45:00');
++--------------------------------------------------------------+
+| months_between('2015-05-28 23:00:00', '2015-04-28 11:45:00') |
++--------------------------------------------------------------+
+| 1                                                            |
++--------------------------------------------------------------+
+
+select months_between('2015-03-28', '2015-03-01');
++--------------------------------------------+
+| months_between('2015-03-28', '2015-03-01') |
++--------------------------------------------+
+| 0.8709677419354839                         |
++--------------------------------------------+
+
+select months_between('2015-03-28 23:00:00', '2015-03-01 11:45:00');
++--------------------------------------------------------------+
+| months_between('2015-03-28 23:00:00', '2015-03-01 11:45:00') |
++--------------------------------------------------------------+
+| 0.8709677419354839                                           |
++--------------------------------------------------------------+
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="months_sub">
+
+        <dt>
+          <codeph>months_sub(timestamp date, int months)</codeph>, <codeph>months_sub(timestamp date, bigint
+          months)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">months_sub() function</indexterm>
+          <b>Purpose:</b> Returns the specified date and time minus some number of months.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="nanoseconds_add">
+
+        <dt>
+          <codeph>nanoseconds_add(timestamp date, int nanoseconds)</codeph>, <codeph>nanoseconds_add(timestamp
+          date, bigint nanoseconds)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">nanoseconds_add() function</indexterm>
+          <b>Purpose:</b> Returns the specified date and time plus some number of nanoseconds.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="nanoseconds_sub">
+
+        <dt>
+          <codeph>nanoseconds_sub(timestamp date, int nanoseconds)</codeph>, <codeph>nanoseconds_sub(timestamp
+          date, bigint nanoseconds)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">nanoseconds_sub() function</indexterm>
+          <b>Purpose:</b> Returns the specified date and time minus some number of nanoseconds.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="now">
+
+        <dt>
+          <codeph>now()</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">now() function</indexterm>
+<!-- <b>Purpose:</b> Returns the current date and time (in the UTC time zone) as a <codeph>timestamp</codeph> value. -->
+          <b>Purpose:</b> Returns the current date and time (in the local time zone) as a
+          <codeph>timestamp</codeph> value.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            To find a date/time value in the future or the past relative to the current date
+            and time, add or subtract an <codeph>INTERVAL</codeph> expression to the return value of
+            <codeph>now()</codeph>. See <xref href="impala_timestamp.xml#timestamp"/> for examples.
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="second">
+
+        <dt>
+          <codeph>second(string date)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">second() function</indexterm>
+          <b>Purpose:</b> Returns the second field from a date represented as a string.
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="seconds_add">
+
+        <dt>
+          <codeph>seconds_add(timestamp date, int seconds)</codeph>, <codeph>seconds_add(timestamp date, bigint
+          seconds)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">seconds_add() function</indexterm>
+          <b>Purpose:</b> Returns the specified date and time plus some number of seconds.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="seconds_sub">
+
+        <dt>
+          <codeph>seconds_sub(timestamp date, int seconds)</codeph>, <codeph>seconds_sub(timestamp date, bigint
+          seconds)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">seconds_sub() function</indexterm>
+          <b>Purpose:</b> Returns the specified date and time minus some number of seconds.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="subdate">
+
+        <dt>
+          <codeph>subdate(timestamp startdate, int days)</codeph>, <codeph>subdate(timestamp startdate, bigint
+          days)</codeph>,
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">subdate() function</indexterm>
+          <b>Purpose:</b> Subtracts a specified number of days from a <codeph>TIMESTAMP</codeph> value. Similar to
+          <codeph>date_sub()</codeph>, but starts with an actual <codeph>TIMESTAMP</codeph> value instead of a
+          string that is converted to a <codeph>TIMESTAMP</codeph>.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="2.3.0" id="timeofday">
+
+        <dt>
+          <codeph>timeofday()</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">timeofday() function</indexterm>
+          <b>Purpose:</b> Returns a string representation of the current date and time, according to the time of the local system,
+          including any time zone designation.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/added_in_230"/>
+          <p>
+            <b>Usage notes:</b> The result value represents similar information as the
+            <codeph>now()</codeph> function, only as a <codeph>STRING</codeph> type
+            and with somewhat different formatting. For example, the day of the week
+            and the time zone identifier are included. This function is intended
+            primarily for compatibility with SQL code from other systems that
+            also have a <codeph>timeofday()</codeph> function. Prefer to use
+            <codeph>now()</codeph> if practical for any new Impala code.
+          </p>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p>
+            The following examples show the format of the <codeph>timeofday()</codeph>
+            return value, illustrate how that value is represented as a <codeph>STRING</codeph>
+            that you can manipulate with string processing functions, and how the format
+            compares with the return value from the <codeph>now()</codeph> function.
+          </p>
+<codeblock>/* Date and time fields in a STRING return value. */
+select timeofday();
++------------------------------+
+| timeofday()                  |
++------------------------------+
+| Tue Sep 01 15:13:18 2015 PDT |
++------------------------------+
+
+/* The return value can be processed by other string functions. */
+select upper(timeofday());
++------------------------------+
+| upper(timeofday())           |
++------------------------------+
+| TUE SEP 01 15:13:38 2015 PDT |
++------------------------------+
+
+/* The TIMEOFDAY() result is formatted differently than NOW(). NOW() returns a TIMESTAMP. */
+select now(), timeofday();
++-------------------------------+------------------------------+
+| now()                         | timeofday()                  |
++-------------------------------+------------------------------+
+| 2015-09-01 15:15:25.930021000 | Tue Sep 01 15:15:25 2015 PDT |
++-------------------------------+------------------------------+
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="2.3.0" id="timestamp_cmp">
+
+        <dt>
+          <codeph>timestamp_cmp(timestamp t1, timestamp t2)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">timestamp_cmp() function</indexterm>
+          <b>Purpose:</b> Tests if one <codeph>TIMESTAMP</codeph> value is
+          newer than, older than, or identical to another <codeph>TIMESTAMP</codeph>
+          <p>
+            <b>Return type:</b> <codeph>int</codeph> (either -1, 0, 1, or <codeph>NULL</codeph>)
+          </p>
+          <p conref="../shared/impala_common.xml#common/added_in_230"/>
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            <b>Usage notes:</b> A comparison function for <codeph>TIMESTAMP</codeph>
+            values that only tests whether the date and time increases, decreases,
+            or stays the same. Similar to the <codeph>sign()</codeph> function
+            for numeric values.
+          </p>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p>
+            The following examples show all the possible return values for <codeph>timestamp_cmp()</codeph>.
+            If the first argument represents a later point in time than the second argument, the result is 1.
+            The amount of the difference is irrelevant, only the fact that one argument is greater than or less than the other.
+            If the first argument represents an earlier point in time than the second argument, the result is -1.
+            If the first and second arguments represent identical points in time, the result is 0.
+            If either argument is <codeph>NULL</codeph>, the result is <codeph>NULL</codeph>.
+          </p>
+<codeblock>/* First argument 'later' than second argument. */
+
+select timestamp_cmp(now() + interval 70 minutes, now());
++---------------------------------------------------+
+| timestamp_cmp(now() + interval 70 minutes, now()) |
++---------------------------------------------------+
+| 1                                                 |
++---------------------------------------------------+
+
+select timestamp_cmp(now() + interval 3 days + interval 5 hours, now());
++------------------------------------------------------------------+
+| timestamp_cmp(now() + interval 3 days + interval 5 hours, now()) |
++------------------------------------------------------------------+
+| 1                                                                |
++------------------------------------------------------------------+
+
+/* First argument 'earlier' than second argument. */
+select timestamp_cmp(now(), now() + interval 2 hours);
++------------------------------------------------+
+| timestamp_cmp(now(), now() + interval 2 hours) |
++------------------------------------------------+
+| -1                                             |
++------------------------------------------------+
+
+/* Both arguments represent the same point in time. */
+
+select timestamp_cmp(now(), now());
++-----------------------------+
+| timestamp_cmp(now(), now()) |
++-----------------------------+
+| 0                           |
++-----------------------------+
+
+select timestamp_cmp(now() + interval 1 hour, now() + interval 60 minutes);
++---------------------------------------------------------------------+
+| timestamp_cmp(now() + interval 1 hour, now() + interval 60 minutes) |
++---------------------------------------------------------------------+
+| 0                                                                   |
++---------------------------------------------------------------------+
+
+/* Either argument NULL. */
+
+select timestamp_cmp(now(), null);
++----------------------------+
+| timestamp_cmp(now(), null) |
++----------------------------+
+| NULL                       |
++----------------------------+
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="to_date">
+
+        <dt>
+          <codeph>to_date(timestamp)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">to_date() function</indexterm>
+          <b>Purpose:</b> Returns a string representation of the date field from a timestamp value.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="to_utc_timestamp">
+
+        <dt>
+          <codeph>to_utc_timestamp(timestamp, string timezone)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">to_utc_timestamp() function</indexterm>
+          <b>Purpose:</b> Converts a specified timestamp value in a specified time zone into the corresponding
+          value for the UTC time zone.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+          <p>
+            <b>Usage notes:</b> Often used in combination with the <codeph>now()</codeph> function,
+            to translate local date and time values to the UTC time zone for consistent representation
+            on disk. The opposite of the <codeph>from_utc_timestamp()</codeph> function.
+          </p>
+          <p>
+            <b>Examples:</b> See discussion of time zones in <xref href="impala_timestamp.xml#timestamp"/>
+            for information about using this function for conversions between the local time zone and UTC.
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.4.0" id="trunc">
+
+        <dt>
+          <codeph>trunc(timestamp, string unit)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">trunc() function</indexterm>
+          <b>Purpose:</b> Strips off fields from a <codeph>TIMESTAMP</codeph> value.
+          <p>
+            <b>Unit argument:</b> The <codeph>unit</codeph> argument value is case-sensitive. This argument string
+            can be one of:
+<!-- Some but not all of the arguments from http://docs.oracle.com/cd/B19306_01/server.102/b14200/functions230.htm#i1002084 are supported here.
+     Impala doesn't support 2-digit years or ISO-related years or values derived from ISO years.
+-->
+            <ul>
+              <li>
+                <codeph>SYYYY</codeph>, <codeph>YYYY</codeph>, <codeph>YEAR</codeph>, <codeph>SYEAR</codeph>,
+                <codeph>YYY</codeph>, <codeph>YY</codeph>, <codeph>Y</codeph>: Year.
+              </li>
+
+              <li>
+                <codeph>Q</codeph>: Quarter.
+              </li>
+
+              <li>
+                <codeph>MONTH</codeph>, <codeph>MON</codeph>, <codeph>MM</codeph>, <codeph>RM</codeph>: Month.
+              </li>
+
+              <li>
+                <codeph>WW</codeph>, <codeph>W</codeph>: Same day of the week as the first day of the month.
+              </li>
+
+              <li>
+                <codeph>DDD</codeph>, <codeph>DD</codeph>, <codeph>J</codeph>: Day.
+              </li>
+
+              <li>
+                <codeph>DAY</codeph>, <codeph>DY</codeph>, <codeph>D</codeph>: Starting day of the week.
+                (Not necessarily the current day.)
+              </li>
+
+              <li>
+                <codeph>HH</codeph>, <codeph>HH12</codeph>, <codeph>HH24</codeph>: Hour. A
+                <codeph>TIMESTAMP</codeph> value truncated to the hour is always represented in 24-hour
+                notation, even for the <codeph>HH12</codeph> argument string.
+              </li>
+
+              <li>
+                <codeph>MI</codeph>: Minute.
+              </li>
+            </ul>
+          </p>
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            Typically used in <codeph>GROUP BY</codeph> queries to aggregate results from the
+            same hour, day, week, month, quarter, and so on. You can also use this function in an <codeph>INSERT
+            ... SELECT</codeph> into a partitioned table to divide <codeph>TIMESTAMP</codeph> values into the
+            correct partition.
+          </p>
+          <p>
+            Because the return value is a <codeph>TIMESTAMP</codeph>, if you cast the result of
+            <codeph>TRUNC()</codeph> to <codeph>STRING</codeph>, you will often see zeroed-out portions such as
+            <codeph>00:00:00</codeph> in the time field. If you only need the individual units such as hour, day,
+            month, or year, use the <codeph>EXTRACT()</codeph> function instead. If you need the individual units
+            from a truncated <codeph>TIMESTAMP</codeph> value, run the <codeph>TRUNCATE()</codeph> function on the
+            original value, then run <codeph>EXTRACT()</codeph> on the result.
+          </p>
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="unix_timestamp">
+
+        <dt>
+          <codeph>unix_timestamp(), unix_timestamp(string datetime), unix_timestamp(string datetime, string
+          format), unix_timestamp(timestamp datetime)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">unix_timestamp() function</indexterm>
+          <b>Purpose:</b> Returns an integer value representing the current date and time as a delta from the Unix
+          epoch, or converts from a specified date and time value represented as a <codeph>TIMESTAMP</codeph> or
+          <codeph>STRING</codeph>.
+          <p>
+            <b>Return type:</b> <codeph rev="2.2.0">bigint</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p rev="1.3.0">
+            See <codeph>from_unixtime()</codeph> for details about the patterns you can use in
+            the <codeph>format</codeph> string to represent the position of year, month, day, and so on in the
+            <codeph>date</codeph> string. In Impala 1.3 and higher, you have more flexibility to switch the
+            positions of elements and use different separator characters.
+          </p>
+          <p rev="2.2.3">
+            In CDH 5.4.3 and higher, you can include a trailing uppercase <codeph>Z</codeph> qualifier
+            to indicate <q>Zulu</q> time, a synonym for UTC.
+          </p>
+          <p rev="2.3.0">
+            In CDH 5.5.0 and higher, you can include a timezone offset specified as minutes and hours,
+            provided you also specify the details in the format string argument. The offset is specified in the format
+            string as a plus or minus sign followed by <codeph>hh:mm</codeph>, <codeph>hhmm</codeph>, or <codeph>hh</codeph>.
+            The <codeph>hh</codeph> must be lowercase, to distinguish it from the <codeph>HH</codeph> represent
+            hours in the actual time value. Currently, only numeric timezone offsets are allowed, not symbolic names.
+          </p>
+          <p conref="../shared/impala_common.xml#common/y2k38"/>
+          <p conref="../shared/impala_common.xml#common/datetime_function_chaining"/>
+          <p conref="../shared/impala_common.xml#common/timezone_conversion_caveat"/>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p>
+            The following examples show different ways of turning the same date and time into an integer value.
+            A format string that Impala recognizes by default is interpreted as a UTC date and time.
+            The trailing <codeph>Z</codeph> is a confirmation that the timezone is UTC.
+            If the date and time string is formatted differently, a second argument specifies
+            the position and units for each of the date and time values.
+          </p>
+          <p>
+            The final two examples show how to specify a timezone offset of Pacific Daylight Saving Time, which is 7 hours earlier than UTC.
+            You can use the numeric offset <codeph>-07:00</codeph> and the equivalent suffix of <codeph>-hh:mm</codeph>
+            in the format string, or specify the mnemonic name for the time zone in a call to <codeph>to_utc_timestamp()</codeph>.
+            This particular date and time expressed in PDT translates to a different number than the same date and time expressed in UTC.
+          </p>
+<codeblock rev="2.3.0">
+-- 3 ways of expressing the same date/time in UTC and converting to an integer.
+
+select unix_timestamp('2015-05-15 12:00:00');
++---------------------------------------+
+| unix_timestamp('2015-05-15 12:00:00') |
++---------------------------------------+
+| 1431691200                            |
++---------------------------------------+
+
+select unix_timestamp('2015-05-15 12:00:00Z');
++----------------------------------------+
+| unix_timestamp('2015-05-15 12:00:00z') |
++----------------------------------------+
+| 1431691200                             |
++----------------------------------------+
+
+select unix_timestamp('May 15, 2015 12:00:00', 'MMM dd, yyyy HH:mm:ss');
++------------------------------------------------------------------+
+| unix_timestamp('may 15, 2015 12:00:00', 'mmm dd, yyyy hh:mm:ss') |
++------------------------------------------------------------------+
+| 1431691200                                                       |
++------------------------------------------------------------------+
+
+-- 2 ways of expressing the same date and time but in a different timezone.
+-- The resulting integer is different from the previous examples.
+
+select unix_timestamp('2015-05-15 12:00:00-07:00', 'yyyy-MM-dd HH:mm:ss-hh:mm');
++--------------------------------------------------------------------------+
+| unix_timestamp('2015-05-15 12:00:00-07:00', 'yyyy-mm-dd hh:mm:ss-hh:mm') |
++--------------------------------------------------------------------------+
+| 1431716400                                                               |
++--------------------------------------------------------------------------+
+
+select unix_timestamp(to_utc_timestamp('2015-05-15 12:00:00', 'PDT'))
++----------------------------------------------------------------+
+| unix_timestamp(to_utc_timestamp('2015-05-15 12:00:00', 'pdt')) |
++----------------------------------------------------------------+
+| 1431716400                                                     |
++----------------------------------------------------------------+
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="weekofyear">
+
+        <dt>
+          <!-- <codeph>weekofyear(string date)</codeph> -->
+          <codeph>weekofyear(timestamp date)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">weekofyear() function</indexterm>
+          <b>Purpose:</b> Returns the corresponding week (1-53) from the date portion of a <codeph>TIMESTAMP</codeph>.
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="weeks_add">
+
+        <dt>
+          <codeph>weeks_add(timestamp date, int weeks)</codeph>, <codeph>weeks_add(timestamp date, bigint
+          weeks)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">weeks_add() function</indexterm>
+          <b>Purpose:</b> Returns the specified date and time plus some number of weeks.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="weeks_sub">
+
+        <dt>
+          <codeph>weeks_sub(timestamp date, int weeks)</codeph>, <codeph>weeks_sub(timestamp date, bigint
+          weeks)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">weeks_sub() function</indexterm>
+          <b>Purpose:</b> Returns the specified date and time minus some number of weeks.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="year">
+
+        <dt>
+          <!-- <codeph>year(string date)</codeph> -->
+          <codeph>year(timestamp date)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">year() function</indexterm>
+          <b>Purpose:</b> Returns the year field from the date portion of a <codeph>TIMESTAMP</codeph>.
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="years_add">
+
+        <dt>
+          <codeph>years_add(timestamp date, int years)</codeph>, <codeph>years_add(timestamp date, bigint
+          years)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">years_add() function</indexterm>
+          <b>Purpose:</b> Returns the specified date and time plus some number of years.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="years_sub">
+
+        <dt>
+          <codeph>years_sub(timestamp date, int years)</codeph>, <codeph>years_sub(timestamp date, bigint
+          years)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">years_sub() function</indexterm>
+          <b>Purpose:</b> Returns the specified date and time minus some number of years.
+          <p>
+            <b>Return type:</b> <codeph>timestamp</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+    </dl>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_ddl.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_ddl.xml b/docs/topics/impala_ddl.xml
new file mode 100644
index 0000000..8e6a3bd
--- /dev/null
+++ b/docs/topics/impala_ddl.xml
@@ -0,0 +1,150 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="ddl">
+
+  <title>DDL Statements</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="DDL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Schemas"/>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Databases"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      DDL refers to <q>Data Definition Language</q>, a subset of SQL statements that change the structure of the
+      database schema in some way, typically by creating, deleting, or modifying schema objects such as databases,
+      tables, and views. Most Impala DDL statements start with the keywords <codeph>CREATE</codeph>,
+      <codeph>DROP</codeph>, or <codeph>ALTER</codeph>.
+    </p>
+
+    <p>
+      The Impala DDL statements are:
+    </p>
+
+    <ul>
+      <li>
+        <xref href="impala_alter_table.xml#alter_table"/>
+      </li>
+
+      <li>
+        <xref href="impala_alter_view.xml#alter_view"/>
+      </li>
+
+      <li>
+        <xref href="impala_compute_stats.xml#compute_stats"/>
+      </li>
+
+      <li>
+        <xref href="impala_create_database.xml#create_database"/>
+      </li>
+
+      <li>
+        <xref href="impala_create_function.xml#create_function"/>
+      </li>
+
+      <li rev="2.0.0">
+        <xref href="impala_create_role.xml#create_role"/>
+      </li>
+
+      <li>
+        <xref href="impala_create_table.xml#create_table"/>
+      </li>
+
+      <li>
+        <xref href="impala_create_view.xml#create_view"/>
+      </li>
+
+      <li>
+        <xref href="impala_drop_database.xml#drop_database"/>
+      </li>
+
+      <li>
+        <xref href="impala_drop_function.xml#drop_function"/>
+      </li>
+
+      <li rev="2.0.0">
+        <xref href="impala_drop_role.xml#drop_role"/>
+      </li>
+
+      <li>
+        <xref href="impala_drop_table.xml#drop_table"/>
+      </li>
+
+      <li>
+        <xref href="impala_drop_view.xml#drop_view"/>
+      </li>
+
+      <li rev="2.0.0">
+        <xref href="impala_grant.xml#grant"/>
+      </li>
+
+      <li rev="2.0.0">
+        <xref href="impala_revoke.xml#revoke"/>
+      </li>
+    </ul>
+
+    <p>
+      After Impala executes a DDL command, information about available tables, columns, views, partitions, and so
+      on is automatically synchronized between all the Impala nodes in a cluster. (Prior to Impala 1.2, you had to
+      issue a <codeph>REFRESH</codeph> or <codeph>INVALIDATE METADATA</codeph> statement manually on the other
+      nodes to make them aware of the changes.)
+    </p>
+
+    <p>
+      If the timing of metadata updates is significant, for example if you use round-robin scheduling where each
+      query could be issued through a different Impala node, you can enable the
+      <xref href="impala_sync_ddl.xml#sync_ddl">SYNC_DDL</xref> query option to make the DDL statement wait until
+      all nodes have been notified about the metadata changes.
+    </p>
+
+    <p rev="2.2.0">
+      See <xref href="impala_s3.xml#s3"/> for details about how Impala DDL statements interact with
+      tables and partitions stored in the Amazon S3 filesystem.
+    </p>
+
+    <p>
+      Although the <codeph>INSERT</codeph> statement is officially classified as a DML (data manipulation language)
+      statement, it also involves metadata changes that must be broadcast to all Impala nodes, and so is also
+      affected by the <codeph>SYNC_DDL</codeph> query option.
+    </p>
+
+    <p>
+      Because the <codeph>SYNC_DDL</codeph> query option makes each DDL operation take longer than normal, you
+      might only enable it before the last DDL operation in a sequence. For example, if you are running a script
+      that issues multiple of DDL operations to set up an entire new schema, add several new partitions, and so on,
+      you might minimize the performance overhead by enabling the query option only before the last
+      <codeph>CREATE</codeph>, <codeph>DROP</codeph>, <codeph>ALTER</codeph>, or <codeph>INSERT</codeph> statement.
+      The script only finishes when all the relevant metadata changes are recognized by all the Impala nodes, so
+      you could connect to any node and issue queries through it.
+    </p>
+
+    <p>
+      The classification of DDL, DML, and other statements is not necessarily the same between Impala and Hive.
+      Impala organizes these statements in a way intended to be familiar to people familiar with relational
+      databases or data warehouse products. Statements that modify the metastore database, such as <codeph>COMPUTE
+      STATS</codeph>, are classified as DDL. Statements that only query the metastore database, such as
+      <codeph>SHOW</codeph> or <codeph>DESCRIBE</codeph>, are put into a separate category of utility statements.
+    </p>
+
+    <note>
+      The query types shown in the Impala debug web user interface might not match exactly the categories listed
+      here. For example, currently the <codeph>USE</codeph> statement is shown as DDL in the debug web UI. The
+      query types shown in the debug web UI are subject to change, for improved consistency.
+    </note>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      The other major classifications of SQL statements are data manipulation language (see
+      <xref href="impala_dml.xml#dml"/>) and queries (see <xref href="impala_select.xml#select"/>).
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_debug_action.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_debug_action.xml b/docs/topics/impala_debug_action.xml
new file mode 100644
index 0000000..b931979
--- /dev/null
+++ b/docs/topics/impala_debug_action.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="debug_action">
+
+  <title>DEBUG_ACTION Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">DEBUG_ACTION query option</indexterm>
+      Introduces artificial problem conditions within queries. For internal Cloudera debugging and troubleshooting.
+    </p>
+
+    <p>
+      <b>Type:</b> <codeph>STRING</codeph>
+    </p>
+
+    <p>
+      <b>Default:</b> empty string
+    </p>
+  </conbody>
+</concept>

[22/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

Posted by jr...@apache.org.

First try at porting over the source files necessary for the Impala SQL
Reference.


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/463ddf92
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/463ddf92
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/463ddf92

Branch: refs/heads/doc_prototype
Commit: 463ddf9243da95f1ab68a4f9b489ef31094e6fc3
Parents: 0ad935b
Author: John Russell <jr...@cloudera.com>
Authored: Tue Jul 26 16:02:54 2016 -0700
Committer: John Russell <jr...@cloudera.com>
Committed: Tue Jul 26 16:02:54 2016 -0700

----------------------------------------------------------------------
 docs/impala.ditamap                             |  252 ++
 docs/impala_sqlref.ditamap                      |  146 +
 docs/shared/ImpalaVariables.xml                 |   46 +
 docs/shared/impala_common.xml                   | 2477 ++++++++++++++++
 .../impala_abort_on_default_limit_exceeded.xml  |   20 +
 docs/topics/impala_abort_on_error.xml           |   40 +
 docs/topics/impala_aggregate_functions.xml      |   33 +
 docs/topics/impala_aliases.xml                  |   71 +
 .../topics/impala_allow_unsupported_formats.xml |   29 +
 docs/topics/impala_alter_table.xml              |  411 +++
 docs/topics/impala_alter_view.xml               |   73 +
 docs/topics/impala_analytic_functions.xml       | 1742 +++++++++++
 docs/topics/impala_appx_count_distinct.xml      |   77 +
 docs/topics/impala_appx_median.xml              |  122 +
 docs/topics/impala_array.xml                    |  266 ++
 docs/topics/impala_avg.xml                      |  223 ++
 docs/topics/impala_batch_size.xml               |   33 +
 docs/topics/impala_bigint.xml                   |  100 +
 docs/topics/impala_bit_functions.xml            |  798 +++++
 docs/topics/impala_boolean.xml                  |  128 +
 docs/topics/impala_char.xml                     |  275 ++
 docs/topics/impala_comments.xml                 |   51 +
 docs/topics/impala_complex_types.xml            | 2725 ++++++++++++++++++
 docs/topics/impala_compression_codec.xml        |   95 +
 docs/topics/impala_compute_stats.xml            |  418 +++
 docs/topics/impala_conditional_functions.xml    |  443 +++
 docs/topics/impala_conversion_functions.xml     |  758 +++++
 docs/topics/impala_count.xml                    |  230 ++
 docs/topics/impala_create_database.xml          |  115 +
 docs/topics/impala_create_function.xml          |  291 ++
 docs/topics/impala_create_role.xml              |   66 +
 docs/topics/impala_create_table.xml             |  650 +++++
 docs/topics/impala_create_view.xml              |  136 +
 docs/topics/impala_databases.xml                |   65 +
 docs/topics/impala_datatypes.xml                |   43 +
 docs/topics/impala_datetime_functions.xml       | 1505 ++++++++++
 docs/topics/impala_ddl.xml                      |  150 +
 docs/topics/impala_debug_action.xml             |   28 +
 docs/topics/impala_decimal.xml                  |  836 ++++++
 docs/topics/impala_default_order_by_limit.xml   |   34 +
 docs/topics/impala_delete.xml                   |   64 +
 docs/topics/impala_describe.xml                 |  561 ++++
 docs/topics/impala_disable_codegen.xml          |   36 +
 docs/topics/impala_disable_unsafe_spills.xml    |   48 +
 docs/topics/impala_distinct.xml                 |   59 +
 docs/topics/impala_dml.xml                      |   85 +
 docs/topics/impala_double.xml                   |  100 +
 docs/topics/impala_drop_database.xml            |  124 +
 docs/topics/impala_drop_function.xml            |   60 +
 docs/topics/impala_drop_role.xml                |   67 +
 docs/topics/impala_drop_stats.xml               |  275 ++
 docs/topics/impala_drop_table.xml               |  142 +
 docs/topics/impala_drop_view.xml                |   48 +
 .../impala_exec_single_node_rows_threshold.xml  |   91 +
 docs/topics/impala_explain.xml                  |  224 ++
 docs/topics/impala_explain_level.xml            |  338 +++
 docs/topics/impala_float.xml                    |   94 +
 docs/topics/impala_functions.xml                |  162 ++
 docs/topics/impala_functions_overview.xml       |  116 +
 docs/topics/impala_grant.xml                    |  117 +
 docs/topics/impala_group_by.xml                 |  137 +
 docs/topics/impala_group_concat.xml             |  133 +
 docs/topics/impala_having.xml                   |   42 +
 docs/topics/impala_hbase_cache_blocks.xml       |   34 +
 docs/topics/impala_hbase_caching.xml            |   39 +
 docs/topics/impala_hints.xml                    |  247 ++
 docs/topics/impala_identifiers.xml              |  114 +
 docs/topics/impala_insert.xml                   |  676 +++++
 docs/topics/impala_int.xml                      |   95 +
 docs/topics/impala_invalidate_metadata.xml      |  236 ++
 docs/topics/impala_joins.xml                    |  520 ++++
 docs/topics/impala_langref.xml                  |  179 ++
 docs/topics/impala_langref_sql.xml              |   35 +
 docs/topics/impala_langref_unsupported.xml      |  296 ++
 docs/topics/impala_limit.xml                    |  149 +
 docs/topics/impala_literals.xml                 |  384 +++
 docs/topics/impala_live_progress.xml            |   81 +
 docs/topics/impala_live_summary.xml             |  207 ++
 docs/topics/impala_load_data.xml                |  237 ++
 docs/topics/impala_map.xml                      |  264 ++
 docs/topics/impala_math_functions.xml           | 1336 +++++++++
 docs/topics/impala_max.xml                      |  192 ++
 docs/topics/impala_max_errors.xml               |   44 +
 docs/topics/impala_max_io_buffers.xml           |   28 +
 docs/topics/impala_max_scan_range_length.xml    |   45 +
 docs/topics/impala_mem_limit.xml                |  208 ++
 docs/topics/impala_min.xml                      |  191 ++
 docs/topics/impala_misc_functions.xml           |  148 +
 docs/topics/impala_ndv.xml                      |  133 +
 docs/topics/impala_num_nodes.xml                |   45 +
 docs/topics/impala_num_scanner_threads.xml      |   32 +
 docs/topics/impala_offset.xml                   |   64 +
 docs/topics/impala_operators.xml                | 1262 ++++++++
 docs/topics/impala_order_by.xml                 |  316 ++
 .../topics/impala_parquet_compression_codec.xml |   25 +
 docs/topics/impala_parquet_file_size.xml        |   82 +
 docs/topics/impala_porting.xml                  |  622 ++++
 docs/topics/impala_query_options.xml            |   75 +
 docs/topics/impala_query_timeout_s.xml          |   51 +
 docs/topics/impala_real.xml                     |   46 +
 docs/topics/impala_refresh.xml                  |  234 ++
 docs/topics/impala_request_pool.xml             |   45 +
 .../impala_reservation_request_timeout.xml      |   35 +
 docs/topics/impala_revoke.xml                   |   96 +
 docs/topics/impala_schema_objects.xml           |   57 +
 docs/topics/impala_select.xml                   |  203 ++
 docs/topics/impala_set.xml                      |   90 +
 docs/topics/impala_show.xml                     | 1263 ++++++++
 docs/topics/impala_smallint.xml                 |  101 +
 docs/topics/impala_stddev.xml                   |  116 +
 docs/topics/impala_string.xml                   |  161 ++
 docs/topics/impala_string_functions.xml         |  719 +++++
 docs/topics/impala_struct.xml                   |  406 +++
 docs/topics/impala_subqueries.xml               |  318 ++
 docs/topics/impala_sum.xml                      |  236 ++
 docs/topics/impala_support_start_over.xml       |   29 +
 docs/topics/impala_sync_ddl.xml                 |   56 +
 docs/topics/impala_tables.xml                   |  258 ++
 docs/topics/impala_timestamp.xml                |  441 +++
 docs/topics/impala_tinyint.xml                  |  101 +
 docs/topics/impala_truncate_table.xml           |  151 +
 docs/topics/impala_udf.xml                      | 1759 +++++++++++
 docs/topics/impala_union.xml                    |  150 +
 docs/topics/impala_update.xml                   |   64 +
 docs/topics/impala_use.xml                      |   77 +
 docs/topics/impala_v_cpu_cores.xml              |   37 +
 docs/topics/impala_varchar.xml                  |  215 ++
 docs/topics/impala_variance.xml                 |  127 +
 docs/topics/impala_views.xml                    |  185 ++
 docs/topics/impala_with.xml                     |   64 +
 130 files changed, 35656 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/impala.ditamap
----------------------------------------------------------------------
diff --git a/docs/impala.ditamap b/docs/impala.ditamap
new file mode 100644
index 0000000..f35f84a
--- /dev/null
+++ b/docs/impala.ditamap
@@ -0,0 +1,252 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE map PUBLIC "-//OASIS//DTD DITA Map//EN" "map.dtd">
+<map id="impala">
+  <title>Impala</title>
+  <topicmeta>
+    <prodinfo conref="shared/ImpalaVariables.xml#impala_vars/prodinfo_pmw_tmv_km">
+      <prodname/>
+      <vrmlist>
+        <vrm version="version_dlq_gry_sm"/>
+      </vrmlist>
+    </prodinfo>
+  </topicmeta>
+<!-- Here is the former site of the Release Notes. Experimenting with moving those to the end for better PDF experience. -->
+<!-- See if there's a way to move include the Release Notes here in HTML, but after Installing/Using for PDF. -->
+<!-- Bring the entire contents of the Installing and Using DITA map in here. -->
+<!--
+	<topicref audience="standalone" href="/Content/Installing-and-Using-Impala_xi42980.xml">
+		<mapref href="/Content/Installing-and-Using-Impala_xi42979.ditamap" format="ditamap"/>
+	</topicref>
+  -->
+  <topicref href="topics/impala_intro.xml" audience="standalone"/>
+  <topicref href="topics/impala_concepts.xml">
+    <topicref href="topics/impala_components.xml"/>
+    <topicref href="topics/impala_development.xml"/>
+    <topicref href="topics/impala_hadoop.xml"/>
+  </topicref>
+  <topicref href="topics/impala_planning.xml">
+    <topicref href="topics/impala_prereqs.xml#prereqs"/>
+    <topicref href="topics/impala_cluster_sizing.xml"/>
+    <topicref href="topics/impala_schema_design.xml"/>
+  </topicref>
+  <topicref audience="standalone" href="topics/impala_install.xml#install">
+    <topicref href="topics/impala_cm_installation.xml#cm_installation"/>
+    <topicref href="topics/impala_noncm_installation.xml#noncm_installation"/>
+  </topicref>
+  <topicref audience="standalone" href="topics/impala_config.xml">
+    <topicref href="topics/impala_config_performance.xml"/>
+    <topicref href="topics/impala_odbc.xml"/>
+    <topicref href="topics/impala_jdbc.xml"/>
+  </topicref>
+  <topicref audience="standalone" href="topics/impala_upgrading.xml"/>
+  <topicref audience="standalone" href="topics/impala_processes.xml">
+    <topicref href="topics/impala_config_options.xml"/>
+  </topicref>
+  <topicref href="topics/impala_tutorial.xml"/>
+  <topicref href="topics/impala_admin.xml">
+    <topicref audience="standalone" href="topics/impala_admission.xml"/>
+    <topicref audience="standalone" href="topics/impala_resource_management.xml"/>
+    <topicref href="topics/impala_timeouts.xml"/>
+    <topicref href="topics/impala_proxy.xml"/>
+    <topicref href="topics/impala_disk_space.xml"/>
+    <topicref audience="integrated" href="topics/impala_auditing.xml"/>
+    <topicref audience="integrated" href="topics/impala_lineage.xml"/>
+  </topicref>
+  <topicref audience="standalone" href="topics/impala_security.xml">
+    <topicref href="topics/impala_security_guidelines.xml"/>
+    <topicref href="topics/impala_security_files.xml"/>
+    <topicref href="topics/impala_security_install.xml"/>
+    <topicref href="topics/impala_security_metastore.xml"/>
+    <topicref href="topics/impala_security_webui.xml"/>
+    <topicref href="topics/impala_ssl.xml"/>
+    <topicref href="topics/impala_authorization.xml"/>
+    <topicref href="topics/impala_authentication.xml">
+      <topicref href="topics/impala_kerberos.xml"/>
+      <topicref href="topics/impala_ldap.xml"/>
+      <topicref href="topics/impala_mixed_security.xml"/>
+      <topicref href="topics/impala_delegation.xml"/>
+    </topicref>
+    <topicref href="topics/impala_auditing.xml"/>
+    <topicref href="topics/impala_lineage.xml"/>
+  </topicref>
+  <topicref href="topics/impala_langref.xml">
+    <topicref href="topics/impala_comments.xml"/>
+    <topicref href="topics/impala_datatypes.xml">
+      <topicref href="topics/impala_array.xml"/>
+      <topicref href="topics/impala_bigint.xml"/>
+      <topicref href="topics/impala_boolean.xml"/>
+      <topicref href="topics/impala_char.xml"/>
+      <topicref href="topics/impala_decimal.xml"/>
+      <topicref href="topics/impala_double.xml"/>
+      <topicref href="topics/impala_float.xml"/>
+      <topicref href="topics/impala_int.xml"/>
+      <topicref href="topics/impala_map.xml"/>
+      <topicref href="topics/impala_real.xml"/>
+      <topicref href="topics/impala_smallint.xml"/>
+      <topicref href="topics/impala_string.xml"/>
+      <topicref href="topics/impala_struct.xml"/>
+      <topicref href="topics/impala_timestamp.xml"/>
+      <topicref href="topics/impala_tinyint.xml"/>
+      <topicref href="topics/impala_varchar.xml"/>
+      <topicref href="topics/impala_complex_types.xml"/>
+    </topicref>
+    <topicref href="topics/impala_literals.xml"/>
+    <topicref href="topics/impala_operators.xml"/>
+    <topicref href="topics/impala_schema_objects.xml">
+      <topicref href="topics/impala_aliases.xml"/>
+      <topicref href="topics/impala_databases.xml"/>
+      <topicref href="topics/impala_functions_overview.xml"/>
+      <topicref href="topics/impala_identifiers.xml"/>
+      <topicref href="topics/impala_tables.xml"/>
+      <topicref href="topics/impala_views.xml"/>
+    </topicref>
+    <topicref href="topics/impala_langref_sql.xml">
+      <topicref href="topics/impala_ddl.xml"/>
+      <topicref href="topics/impala_dml.xml"/>
+      <topicref href="topics/impala_alter_table.xml"/>
+      <topicref href="topics/impala_alter_view.xml"/>
+      <topicref href="topics/impala_compute_stats.xml"/>
+      <topicref href="topics/impala_create_database.xml"/>
+      <topicref href="topics/impala_create_function.xml"/>
+      <topicref href="topics/impala_create_role.xml"/>
+      <topicref href="topics/impala_create_table.xml"/>
+      <topicref href="topics/impala_create_view.xml"/>
+      <topicref audience="impala_next" href="topics/impala_delete.xml"/>
+      <topicref href="topics/impala_describe.xml"/>
+      <topicref href="topics/impala_drop_database.xml"/>
+      <topicref href="topics/impala_drop_function.xml"/>
+      <topicref href="topics/impala_drop_role.xml"/>
+      <topicref href="topics/impala_drop_stats.xml"/>
+      <topicref href="topics/impala_drop_table.xml"/>
+      <topicref href="topics/impala_drop_view.xml"/>
+      <topicref href="topics/impala_explain.xml"/>
+      <topicref href="topics/impala_grant.xml"/>
+      <topicref href="topics/impala_insert.xml"/>
+      <topicref href="topics/impala_invalidate_metadata.xml"/>
+      <topicref href="topics/impala_load_data.xml"/>
+      <topicref href="topics/impala_refresh.xml"/>
+      <topicref href="topics/impala_revoke.xml"/>
+      <topicref href="topics/impala_select.xml">
+        <topicref href="topics/impala_joins.xml"/>
+        <topicref href="topics/impala_order_by.xml"/>
+        <topicref href="topics/impala_group_by.xml"/>
+        <topicref href="topics/impala_having.xml"/>
+        <topicref href="topics/impala_limit.xml"/>
+        <topicref href="topics/impala_offset.xml"/>
+        <topicref href="topics/impala_union.xml"/>
+        <topicref href="topics/impala_subqueries.xml"/>
+        <topicref href="topics/impala_with.xml"/>
+        <topicref href="topics/impala_distinct.xml"/>
+        <topicref href="topics/impala_hints.xml"/>
+      </topicref>
+      <topicref href="topics/impala_set.xml"/>
+        <topicref href="topics/impala_query_options.xml">
+          <topicref href="topics/impala_abort_on_default_limit_exceeded.xml"/>
+          <topicref href="topics/impala_abort_on_error.xml"/>
+          <topicref href="topics/impala_allow_unsupported_formats.xml"/>
+          <topicref href="topics/impala_appx_count_distinct.xml"/>
+          <topicref href="topics/impala_batch_size.xml"/>
+          <topicref href="topics/impala_compression_codec.xml"/>
+          <topicref href="topics/impala_debug_action.xml"/>
+          <topicref href="topics/impala_default_order_by_limit.xml"/>
+          <topicref href="topics/impala_disable_codegen.xml"/>
+          <topicref href="topics/impala_disable_unsafe_spills.xml"/>
+          <topicref href="topics/impala_exec_single_node_rows_threshold.xml"/>
+          <topicref href="topics/impala_explain_level.xml"/>
+          <topicref href="topics/impala_hbase_cache_blocks.xml"/>
+          <topicref href="topics/impala_hbase_caching.xml"/>
+          <topicref href="topics/impala_live_progress.xml"/>
+          <topicref href="topics/impala_live_summary.xml"/>
+          <topicref href="topics/impala_max_errors.xml"/>
+          <topicref href="topics/impala_max_io_buffers.xml"/>
+          <topicref href="topics/impala_max_scan_range_length.xml"/>
+          <topicref href="topics/impala_mem_limit.xml"/>
+          <topicref href="topics/impala_num_nodes.xml"/>
+          <topicref href="topics/impala_num_scanner_threads.xml"/>
+          <topicref href="topics/impala_parquet_compression_codec.xml"/>
+          <topicref href="topics/impala_parquet_file_size.xml"/>
+          <topicref href="topics/impala_query_timeout_s.xml"/>
+          <topicref href="topics/impala_request_pool.xml"/>
+          <topicref href="topics/impala_reservation_request_timeout.xml"/>
+          <topicref href="topics/impala_support_start_over.xml"/>
+          <topicref href="topics/impala_sync_ddl.xml"/>
+          <topicref href="topics/impala_v_cpu_cores.xml"/>
+        </topicref>
+      <topicref href="topics/impala_show.xml"/>
+      <topicref href="topics/impala_truncate_table.xml"/>
+      <topicref audience="impala_next" href="topics/impala_update.xml"/>
+      <topicref href="topics/impala_use.xml"/>
+    </topicref>
+    <topicref href="topics/impala_functions.xml">
+      <topicref href="topics/impala_math_functions.xml"/>
+      <topicref href="topics/impala_bit_functions.xml"/>
+      <topicref href="topics/impala_conversion_functions.xml"/>
+      <topicref href="topics/impala_datetime_functions.xml"/>
+      <topicref href="topics/impala_conditional_functions.xml"/>
+      <topicref href="topics/impala_string_functions.xml"/>
+      <topicref href="topics/impala_misc_functions.xml"/>
+      <topicref href="topics/impala_aggregate_functions.xml">
+        <topicref href="topics/impala_appx_median.xml"/>
+        <topicref href="topics/impala_avg.xml"/>
+        <topicref href="topics/impala_count.xml"/>
+        <topicref href="topics/impala_group_concat.xml"/>
+        <topicref href="topics/impala_max.xml"/>
+        <topicref href="topics/impala_min.xml"/>
+        <topicref href="topics/impala_ndv.xml"/>
+        <topicref href="topics/impala_stddev.xml"/>
+        <topicref href="topics/impala_sum.xml"/>
+        <topicref href="topics/impala_variance.xml"/>
+      </topicref>
+      <topicref href="topics/impala_analytic_functions.xml"/>
+      <topicref href="topics/impala_udf.xml"/>
+    </topicref>
+    <topicref href="topics/impala_langref_unsupported.xml"/>
+    <topicref href="topics/impala_porting.xml"/>
+  </topicref>
+  <topicref href="topics/impala_impala_shell.xml">
+    <topicref href="topics/impala_shell_options.xml"/>
+    <topicref href="topics/impala_connecting.xml"/>
+    <topicref href="topics/impala_shell_running_commands.xml"/>
+    <topicref href="topics/impala_shell_commands.xml"/>
+  </topicref>
+  <topicref href="topics/impala_performance.xml">
+    <topicref href="topics/impala_perf_cookbook.xml"/>
+    <topicref href="topics/impala_perf_joins.xml"/>
+    <topicref href="topics/impala_perf_stats.xml"/>
+    <topicref href="topics/impala_perf_benchmarking.xml"/>
+    <topicref href="topics/impala_perf_resources.xml"/>
+    <topicref href="topics/impala_perf_hdfs_caching.xml"/>
+    <topicref href="topics/impala_perf_testing.xml"/>
+    <topicref href="topics/impala_explain_plan.xml"/>
+    <topicref href="topics/impala_perf_skew.xml"/>
+    <topicref audience="Cloudera" href="topics/impala_perf_ddl.xml"/>
+  </topicref>
+  <topicref href="topics/impala_scalability.xml"/>
+  <topicref href="topics/impala_partitioning.xml"/>
+  <topicref href="topics/impala_file_formats.xml">
+    <topicref href="topics/impala_txtfile.xml"/>
+    <topicref href="topics/impala_parquet.xml"/>
+    <topicref href="topics/impala_avro.xml"/>
+    <topicref href="topics/impala_rcfile.xml"/>
+    <topicref href="topics/impala_seqfile.xml"/>
+  </topicref>
+  <topicref audience="impala_next" href="topics/impala_kudu.xml"/>
+  <topicref href="topics/impala_hbase.xml"/>
+  <topicref href="topics/impala_s3.xml"/>
+  <topicref href="topics/impala_isilon.xml"/>
+  <topicref href="topics/impala_logging.xml"/>
+  <topicref href="topics/impala_troubleshooting.xml">
+    <topicref href="topics/impala_webui.xml"/>
+  </topicref>
+  <topicref href="topics/impala_ports.xml"/>
+  <topicref href="topics/impala_reserved_words.xml"/>
+<!-- End of former contents of Installing-and-Using-Impala_xi42979.ditamap. -->
+<!-- Need to make this rg_ topic disappear from the Impala PDF. Put audience="standalone"
+       inside the topic itself? -->
+  <topicref audience="standalone" href="topics/rg_impala_vd.xml"/>
+  <topicref audience="standalone" href="topics/impala_faq.xml"/>
+  <topicref audience="standalone" href="topics/impala_release_notes.xml">
+    <mapref href="Cloudera-Impala-Release-Notes.ditamap" format="ditamap"
+      audience="standalone"/>
+  </topicref>
+</map>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/impala_sqlref.ditamap
----------------------------------------------------------------------
diff --git a/docs/impala_sqlref.ditamap b/docs/impala_sqlref.ditamap
new file mode 100644
index 0000000..1b1c345
--- /dev/null
+++ b/docs/impala_sqlref.ditamap
@@ -0,0 +1,146 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE map PUBLIC "-//OASIS//DTD DITA Map//EN" "map.dtd">
+<map id="impala_sqlref">
+  <title>Impala SQL Reference</title>
+  <topicmeta>
+    <prodinfo conref="shared/ImpalaVariables.xml#impala_vars/prodinfo_pmw_tmv_km">
+      <prodname/>
+      <vrmlist>
+        <vrm version="version_dlq_gry_sm"/>
+      </vrmlist>
+    </prodinfo>
+  </topicmeta>
+    <topicref href="topics/impala_langref.xml"/>
+    <topicref href="topics/impala_comments.xml"/>
+    <topicref href="topics/impala_datatypes.xml">
+      <topicref href="topics/impala_array.xml"/>
+      <topicref href="topics/impala_bigint.xml"/>
+      <topicref href="topics/impala_boolean.xml"/>
+      <topicref href="topics/impala_char.xml"/>
+      <topicref href="topics/impala_decimal.xml"/>
+      <topicref href="topics/impala_double.xml"/>
+      <topicref href="topics/impala_float.xml"/>
+      <topicref href="topics/impala_int.xml"/>
+      <topicref href="topics/impala_map.xml"/>
+      <topicref href="topics/impala_real.xml"/>
+      <topicref href="topics/impala_smallint.xml"/>
+      <topicref href="topics/impala_string.xml"/>
+      <topicref href="topics/impala_struct.xml"/>
+      <topicref href="topics/impala_timestamp.xml"/>
+      <topicref href="topics/impala_tinyint.xml"/>
+      <topicref href="topics/impala_varchar.xml"/>
+      <topicref href="topics/impala_complex_types.xml"/>
+    </topicref>
+    <topicref href="topics/impala_literals.xml"/>
+    <topicref href="topics/impala_operators.xml"/>
+    <topicref href="topics/impala_schema_objects.xml">
+      <topicref href="topics/impala_aliases.xml"/>
+      <topicref href="topics/impala_databases.xml"/>
+      <topicref href="topics/impala_functions_overview.xml"/>
+      <topicref href="topics/impala_identifiers.xml"/>
+      <topicref href="topics/impala_tables.xml"/>
+      <topicref href="topics/impala_views.xml"/>
+    </topicref>
+    <topicref href="topics/impala_langref_sql.xml">
+      <topicref href="topics/impala_ddl.xml"/>
+      <topicref href="topics/impala_dml.xml"/>
+      <topicref href="topics/impala_alter_table.xml"/>
+      <topicref href="topics/impala_alter_view.xml"/>
+      <topicref href="topics/impala_compute_stats.xml"/>
+      <topicref href="topics/impala_create_database.xml"/>
+      <topicref href="topics/impala_create_function.xml"/>
+      <topicref href="topics/impala_create_role.xml"/>
+      <topicref href="topics/impala_create_table.xml"/>
+      <topicref href="topics/impala_create_view.xml"/>
+      <topicref audience="impala_next" href="topics/impala_delete.xml"/>
+      <topicref href="topics/impala_describe.xml"/>
+      <topicref href="topics/impala_drop_database.xml"/>
+      <topicref href="topics/impala_drop_function.xml"/>
+      <topicref href="topics/impala_drop_role.xml"/>
+      <topicref href="topics/impala_drop_stats.xml"/>
+      <topicref href="topics/impala_drop_table.xml"/>
+      <topicref href="topics/impala_drop_view.xml"/>
+      <topicref href="topics/impala_explain.xml"/>
+      <topicref href="topics/impala_grant.xml"/>
+      <topicref href="topics/impala_insert.xml"/>
+      <topicref href="topics/impala_invalidate_metadata.xml"/>
+      <topicref href="topics/impala_load_data.xml"/>
+      <topicref href="topics/impala_refresh.xml"/>
+      <topicref href="topics/impala_revoke.xml"/>
+      <topicref href="topics/impala_select.xml">
+        <topicref href="topics/impala_joins.xml"/>
+        <topicref href="topics/impala_order_by.xml"/>
+        <topicref href="topics/impala_group_by.xml"/>
+        <topicref href="topics/impala_having.xml"/>
+        <topicref href="topics/impala_limit.xml"/>
+        <topicref href="topics/impala_offset.xml"/>
+        <topicref href="topics/impala_union.xml"/>
+        <topicref href="topics/impala_subqueries.xml"/>
+        <topicref href="topics/impala_with.xml"/>
+        <topicref href="topics/impala_distinct.xml"/>
+        <topicref href="topics/impala_hints.xml"/>
+      </topicref>
+      <topicref href="topics/impala_set.xml"/>
+        <topicref href="topics/impala_query_options.xml">
+          <topicref href="topics/impala_abort_on_default_limit_exceeded.xml"/>
+          <topicref href="topics/impala_abort_on_error.xml"/>
+          <topicref href="topics/impala_allow_unsupported_formats.xml"/>
+          <topicref href="topics/impala_appx_count_distinct.xml"/>
+          <topicref href="topics/impala_batch_size.xml"/>
+          <topicref href="topics/impala_compression_codec.xml"/>
+          <topicref href="topics/impala_debug_action.xml"/>
+          <topicref href="topics/impala_default_order_by_limit.xml"/>
+          <topicref href="topics/impala_disable_codegen.xml"/>
+          <topicref href="topics/impala_disable_unsafe_spills.xml"/>
+          <topicref href="topics/impala_exec_single_node_rows_threshold.xml"/>
+          <topicref href="topics/impala_explain_level.xml"/>
+          <topicref href="topics/impala_hbase_cache_blocks.xml"/>
+          <topicref href="topics/impala_hbase_caching.xml"/>
+          <topicref href="topics/impala_live_progress.xml"/>
+          <topicref href="topics/impala_live_summary.xml"/>
+          <topicref href="topics/impala_max_errors.xml"/>
+          <topicref href="topics/impala_max_io_buffers.xml"/>
+          <topicref href="topics/impala_max_scan_range_length.xml"/>
+          <topicref href="topics/impala_mem_limit.xml"/>
+          <topicref href="topics/impala_num_nodes.xml"/>
+          <topicref href="topics/impala_num_scanner_threads.xml"/>
+          <topicref href="topics/impala_parquet_compression_codec.xml"/>
+          <topicref href="topics/impala_parquet_file_size.xml"/>
+          <topicref href="topics/impala_query_timeout_s.xml"/>
+          <topicref href="topics/impala_request_pool.xml"/>
+          <topicref href="topics/impala_reservation_request_timeout.xml"/>
+          <topicref href="topics/impala_support_start_over.xml"/>
+          <topicref href="topics/impala_sync_ddl.xml"/>
+          <topicref href="topics/impala_v_cpu_cores.xml"/>
+        </topicref>
+      <topicref href="topics/impala_show.xml"/>
+      <topicref href="topics/impala_truncate_table.xml"/>
+      <topicref audience="impala_next" href="topics/impala_update.xml"/>
+      <topicref href="topics/impala_use.xml"/>
+    </topicref>
+    <topicref href="topics/impala_functions.xml">
+      <topicref href="topics/impala_math_functions.xml"/>
+      <topicref href="topics/impala_bit_functions.xml"/>
+      <topicref href="topics/impala_conversion_functions.xml"/>
+      <topicref href="topics/impala_datetime_functions.xml"/>
+      <topicref href="topics/impala_conditional_functions.xml"/>
+      <topicref href="topics/impala_string_functions.xml"/>
+      <topicref href="topics/impala_misc_functions.xml"/>
+      <topicref href="topics/impala_aggregate_functions.xml">
+        <topicref href="topics/impala_appx_median.xml"/>
+        <topicref href="topics/impala_avg.xml"/>
+        <topicref href="topics/impala_count.xml"/>
+        <topicref href="topics/impala_group_concat.xml"/>
+        <topicref href="topics/impala_max.xml"/>
+        <topicref href="topics/impala_min.xml"/>
+        <topicref href="topics/impala_ndv.xml"/>
+        <topicref href="topics/impala_stddev.xml"/>
+        <topicref href="topics/impala_sum.xml"/>
+        <topicref href="topics/impala_variance.xml"/>
+      </topicref>
+      <topicref href="topics/impala_analytic_functions.xml"/>
+      <topicref href="topics/impala_udf.xml"/>
+    </topicref>
+    <topicref href="topics/impala_langref_unsupported.xml"/>
+    <topicref href="topics/impala_porting.xml"/>
+</map>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/shared/ImpalaVariables.xml
----------------------------------------------------------------------
diff --git a/docs/shared/ImpalaVariables.xml b/docs/shared/ImpalaVariables.xml
new file mode 100644
index 0000000..226eee9
--- /dev/null
+++ b/docs/shared/ImpalaVariables.xml
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept xmlns:ditaarch="http://dita.oasis-open.org/architecture/2005/" id="impala_vars" ditaarch:DITAArchVersion="1.2" domains="(topic concept)                            (topic hi-d)                             (topic ut-d)                             (topic indexing-d)                            (topic hazard-d)                            (topic abbrev-d)                            (topic pr-d)                             (topic sw-d)                            (topic ui-d)    " xml:lang="en-US">
+  <title>Cloudera Impala Variables</title>
+  <prolog id="prolog_slg_nmv_km">
+    <metadata id="metadata_ecq_qmv_km">
+      <prodinfo id="prodinfo_pmw_tmv_km">
+        <prodname>Apache Impala (incubating)</prodname>
+        <vrmlist>
+          <vrm version="Impala 2.3.x (separated)" id="vrm_pj3_3hv_impala"/>
+          <vrm version="CDH 5.5. (separated)" id="vrm_pj3_3hv_cdh"/>
+        </vrmlist>
+      </prodinfo>
+    </metadata>
+  </prolog>
+  <conbody>
+    <p>Release Version Variable - <ph id="ReleaseVersion">Impala 2.3.x / CDH 5.5.x (combined)</ph></p>
+    <p>Substitution variables for denoting features available in release X or higher.
+       The upstream docs can refer to the Impala release number.
+       The docs included with a distro can refer to the distro release number by
+       editing the values here.
+       <ul>
+        <li><ph id="impala26">CDH 5.8</ph></li>
+        <li><ph id="impala25">CDH 5.7</ph></li>
+        <li><ph id="impala24">CDH 5.6</ph></li>
+        <li><ph id="impala23">CDH 5.5</ph></li>
+        <li><ph id="impala22">CDH 5.4</ph></li>
+        <li><ph id="impala21">CDH 5.3</ph></li>
+        <li><ph id="impala20">CDH 5.2</ph></li>
+        <li><ph id="impala14">CDH 5.1</ph></li>
+        <li><ph id="impala13">CDH 5.0</ph></li>
+      </ul>
+    </p>
+    <p>Banner for examples showing shell version - <ph id="ShellBanner">(Shell
+      build version: Impala Shell v2.3.x (<varname>hash</varname>) built on
+      <varname>date</varname>)</ph></p>
+    <p>Banner for examples showing impalad version -<ph id="ImpaladBanner">Server version: impalad version 2.3.x (build
+      x.y.z)</ph></p>
+    <data name="version-message" id="version-message">
+      <foreign>
+        <lines xml:space="preserve">This is the documentation for <data name="version"/>.
+Documentation for other versions is available at <xref href="http://www.cloudera.com/content/support/en/documentation.html" scope="external" format="html">Cloudera Documentation</xref>.</lines>
+      </foreign>
+    </data>
+  </conbody>
+</concept>

[04/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

Posted by jr...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_string_functions.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_string_functions.xml b/docs/topics/impala_string_functions.xml
new file mode 100644
index 0000000..a051ed5
--- /dev/null
+++ b/docs/topics/impala_string_functions.xml
@@ -0,0 +1,719 @@
+<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="string_functions">
+
+  <title>Impala String Functions</title>
+  <titlealts><navtitle>String Functions</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p rev="2.0.0">
+      String functions are classified as those primarily accepting or returning <codeph>STRING</codeph>,
+      <codeph>VARCHAR</codeph>, or <codeph>CHAR</codeph> data types, for example to measure the length of a string
+      or concatenate two strings together.
+      <ul>
+        <li>
+          All the functions that accept <codeph>STRING</codeph> arguments also accept the <codeph>VARCHAR</codeph>
+          and <codeph>CHAR</codeph> types introduced in Impala 2.0.
+        </li>
+
+        <li>
+          Whenever <codeph>VARCHAR</codeph> or <codeph>CHAR</codeph> values are passed to a function that returns a
+          string value, the return type is normalized to <codeph>STRING</codeph>. For example, a call to
+          <codeph>concat()</codeph> with a mix of <codeph>STRING</codeph>, <codeph>VARCHAR</codeph>, and
+          <codeph>CHAR</codeph> arguments produces a <codeph>STRING</codeph> result.
+        </li>
+      </ul>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      The string functions operate mainly on these data types: <xref href="impala_string.xml#string"/>,
+      <xref href="impala_varchar.xml#varchar"/>, and <xref href="impala_char.xml#char"/>.
+    </p>
+
+    <p>
+      <b>Function reference:</b>
+    </p>
+
+    <p>
+      Impala supports the following string functions:
+    </p>
+
+    <dl>
+      <dlentry id="ascii">
+
+        <dt>
+          <codeph>ascii(string str)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">ascii() function</indexterm>
+          <b>Purpose:</b> Returns the numeric ASCII code of the first character of the argument.
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="2.3.0" id="btrim">
+
+        <dt>
+          <codeph>btrim(string a)</codeph>,
+          <codeph>btrim(string a, string chars_to_trim)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">btrim() function</indexterm>
+          <b>Purpose:</b> Removes all instances of one or more characters
+          from the start and end of a <codeph>STRING</codeph> value.
+          By default, removes only spaces.
+          If a non-<codeph>NULL</codeph> optional second argument is specified, the function removes all
+          occurrences of characters in that second argument from the beginning and
+          end of the string.
+          <p><b>Return type:</b> <codeph>string</codeph></p>
+          <p conref="../shared/impala_common.xml#common/added_in_230"/>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p>
+            The following examples show the default <codeph>btrim()</codeph> behavior,
+            and what changes when you specify the optional second argument.
+            All the examples bracket the output value with <codeph>[ ]</codeph>
+            so that you can see any leading or trailing spaces in the <codeph>btrim()</codeph> result.
+            By default, the function removes and number of both leading and trailing spaces.
+            When the second argument is specified, any number of occurrences of any
+            character in the second argument are removed from the start and end of the
+            input string; in this case, spaces are not removed (unless they are part of the second
+            argument) and any instances of the characters are not removed if they do not come
+            right at the beginning or end of the string.
+          </p>
+<codeblock>-- Remove multiple spaces before and one space after.
+select concat('[',btrim('    hello '),']');
++---------------------------------------+
+| concat('[', btrim('    hello '), ']') |
++---------------------------------------+
+| [hello]                               |
++---------------------------------------+
+
+-- Remove any instances of x or y or z at beginning or end. Leave spaces alone.
+select concat('[',btrim('xy    hello zyzzxx','xyz'),']');
++------------------------------------------------------+
+| concat('[', btrim('xy    hello zyzzxx', 'xyz'), ']') |
++------------------------------------------------------+
+| [    hello ]                                         |
++------------------------------------------------------+
+
+-- Remove any instances of x or y or z at beginning or end.
+-- Leave x, y, z alone in the middle of the string.
+select concat('[',btrim('xyhelxyzlozyzzxx','xyz'),']');
++----------------------------------------------------+
+| concat('[', btrim('xyhelxyzlozyzzxx', 'xyz'), ']') |
++----------------------------------------------------+
+| [helxyzlo]                                         |
++----------------------------------------------------+
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="char_length">
+
+        <dt>
+          <codeph>char_length(string a), <ph rev="1.3.0" id="character_length">character_length(string a)</ph></codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">char_length() function</indexterm>
+          <indexterm audience="Cloudera">character_length() function</indexterm>
+          <b>Purpose:</b> Returns the length in characters of the argument string. Aliases for the
+          <codeph>length()</codeph> function.
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="2.3.0" id="chr">
+
+        <dt>
+          <codeph>chr(int character_code)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">chr() function</indexterm>
+          <b>Purpose:</b> Returns a character specified by a decimal code point value.
+          The interpretation and display of the resulting character depends on your system locale.
+          Because consistent processing of Impala string values is only guaranteed 
+          for values within the ASCII range, only use this function for values
+          corresponding to ASCII characters.
+          In particular, parameter values greater than 255 return an empty string.
+          <p><b>Return type:</b> <codeph>string</codeph></p>
+          <p>
+            <b>Usage notes:</b> Can be used as the inverse of the <codeph>ascii()</codeph> function, which
+            converts a character to its numeric ASCII code.
+          </p>
+          <p conref="../shared/impala_common.xml#common/added_in_230"/>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+<codeblock>SELECT chr(65);
++---------+
+| chr(65) |
++---------+
+| A       |
++---------+
+
+SELECT chr(97);
++---------+
+| chr(97) |
++---------+
+| a       |
++---------+
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="concat">
+
+        <dt>
+          <codeph>concat(string a, string b...)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">concat() function</indexterm>
+          <b>Purpose:</b> Returns a single string representing all the argument values joined together.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/concat_blurb"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="concat_ws">
+
+        <dt>
+          <codeph>concat_ws(string sep, string a, string b...)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">concat_ws() function</indexterm>
+          <b>Purpose:</b> Returns a single string representing the second and following argument values joined
+          together, delimited by a specified separator.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/concat_blurb"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="find_in_set">
+
+        <dt>
+          <codeph>find_in_set(string str, string strList)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">find_in_set() function</indexterm>
+          <b>Purpose:</b> Returns the position (starting from 1) of the first occurrence of a specified string
+          within a comma-separated string. Returns <codeph>NULL</codeph> if either argument is
+          <codeph>NULL</codeph>, 0 if the search string is not found, or 0 if the search string contains a comma.
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.2" id="group_concat">
+
+        <dt>
+          <codeph>group_concat(string s [, string sep])</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">group_concat() function</indexterm>
+          <b>Purpose:</b> Returns a single string representing the argument value concatenated together for each
+          row of the result set. If the optional separator string is specified, the separator is added between each
+          pair of concatenated values.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/concat_blurb"/>
+          <p>
+            By default, returns a single string covering the whole result set. To include other columns or values
+            in the result set, or to produce multiple concatenated strings for subsets of rows, include a
+            <codeph>GROUP BY</codeph> clause in the query.
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.2" id="initcap">
+
+        <dt>
+          <codeph>initcap(string str)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">initcap() function</indexterm>
+          <b>Purpose:</b> Returns the input string with the first letter capitalized.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="instr">
+
+        <dt>
+          <codeph>instr(string str, string substr)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">instr() function</indexterm>
+          <b>Purpose:</b> Returns the position (starting from 1) of the first occurrence of a substring within a
+          longer string.
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="length">
+
+        <dt>
+          <codeph>length(string a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">length() function</indexterm>
+          <b>Purpose:</b> Returns the length in characters of the argument string.
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="locate">
+
+        <dt>
+          <codeph>locate(string substr, string str[, int pos])</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">locate() function</indexterm>
+          <b>Purpose:</b> Returns the position (starting from 1) of the first occurrence of a substring within a
+          longer string, optionally after a particular position.
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="lower">
+
+        <dt>
+          <codeph>lower(string a), <ph id="lcase">lcase(string a)</ph> </codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">lower() function</indexterm>
+          <b>Purpose:</b> Returns the argument string converted to all-lowercase.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="lpad">
+
+        <dt>
+          <codeph>lpad(string str, int len, string pad)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">lpad() function</indexterm>
+          <b>Purpose:</b> Returns a string of a specified length, based on the first argument string. If the
+          specified string is too short, it is padded on the left with a repeating sequence of the characters from
+          the pad string. If the specified string is too long, it is truncated on the right.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="ltrim">
+
+        <dt>
+          <codeph>ltrim(string a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">ltrim() function</indexterm>
+          <b>Purpose:</b> Returns the argument string with any leading spaces removed from the left side.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="parse_url">
+
+        <dt>
+          <codeph>parse_url(string urlString, string partToExtract [, string keyToExtract])</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">parse_url() function</indexterm>
+          <b>Purpose:</b> Returns the portion of a URL corresponding to a specified part. The part argument can be
+          <codeph>'PROTOCOL'</codeph>, <codeph>'HOST'</codeph>, <codeph>'PATH'</codeph>, <codeph>'REF'</codeph>,
+          <codeph>'AUTHORITY'</codeph>, <codeph>'FILE'</codeph>, <codeph>'USERINFO'</codeph>, or
+          <codeph>'QUERY'</codeph>. Uppercase is required for these literal values. When requesting the
+          <codeph>QUERY</codeph> portion of the URL, you can optionally specify a key to retrieve just the
+          associated value from the key-value pairs in the query string.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+          <p>
+            <b>Usage notes:</b> This function is important for the traditional Hadoop use case of interpreting web
+            logs. For example, if the web traffic data features raw URLs not divided into separate table columns,
+            you can count visitors to a particular page by extracting the <codeph>'PATH'</codeph> or
+            <codeph>'FILE'</codeph> field, or analyze search terms by extracting the corresponding key from the
+            <codeph>'QUERY'</codeph> field.
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="regexp_extract">
+
+        <dt>
+          <codeph>regexp_extract(string subject, string pattern, int index)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">regexp_extract() function</indexterm>
+          <b>Purpose:</b> Returns the specified () group from a string based on a regular expression pattern. Group
+          0 refers to the entire extracted string, while group 1, 2, and so on refers to the first, second, and so
+          on <codeph>(...)</codeph> portion.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/regexp_re2"/>
+          <p conref="../shared/impala_common.xml#common/regexp_re2_warning"/>
+          <p conref="../shared/impala_common.xml#common/regexp_escapes"/>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p>
+            This example shows how group 0 matches the full pattern string, including the portion outside any
+            <codeph>()</codeph> group:
+          </p>
+<codeblock>[localhost:21000] &gt; select regexp_extract('abcdef123ghi456jkl','.*?(\\d+)',0);
++------------------------------------------------------+
+| regexp_extract('abcdef123ghi456jkl', '.*?(\\d+)', 0) |
++------------------------------------------------------+
+| abcdef123ghi456                                      |
++------------------------------------------------------+
+Returned 1 row(s) in 0.11s</codeblock>
+          <p>
+            This example shows how group 1 matches just the contents inside the first <codeph>()</codeph> group in
+            the pattern string:
+          </p>
+<codeblock>[localhost:21000] &gt; select regexp_extract('abcdef123ghi456jkl','.*?(\\d+)',1);
++------------------------------------------------------+
+| regexp_extract('abcdef123ghi456jkl', '.*?(\\d+)', 1) |
++------------------------------------------------------+
+| 456                                                  |
++------------------------------------------------------+
+Returned 1 row(s) in 0.11s</codeblock>
+          <p rev="2.0.0">
+            Unlike in earlier Impala releases, the regular expression library used in Impala 2.0 and later supports
+            the <codeph>.*?</codeph> idiom for non-greedy matches. This example shows how a pattern string starting
+            with <codeph>.*?</codeph> matches the shortest possible portion of the source string, returning the
+            rightmost set of lowercase letters. A pattern string both starting and ending with <codeph>.*?</codeph>
+            finds two potential matches of equal length, and returns the first one found (the leftmost set of
+            lowercase letters).
+          </p>
+<codeblock>[localhost:21000] &gt; select regexp_extract('AbcdBCdefGHI','.*?([[:lower:]]+)',1);
++--------------------------------------------------------+
+| regexp_extract('abcdbcdefghi', '.*?([[:lower:]]+)', 1) |
++--------------------------------------------------------+
+| def                                                    |
++--------------------------------------------------------+
+[localhost:21000] &gt; select regexp_extract('AbcdBCdefGHI','.*?([[:lower:]]+).*?',1);
++-----------------------------------------------------------+
+| regexp_extract('abcdbcdefghi', '.*?([[:lower:]]+).*?', 1) |
++-----------------------------------------------------------+
+| bcd                                                       |
++-----------------------------------------------------------+
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="regexp_replace">
+
+        <dt>
+          <codeph>regexp_replace(string initial, string pattern, string replacement)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">regexp_replace() function</indexterm>
+          <b>Purpose:</b> Returns the initial argument with the regular expression pattern replaced by the final
+          argument string.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/regexp_re2"/>
+          <p conref="../shared/impala_common.xml#common/regexp_re2_warning"/>
+          <p conref="../shared/impala_common.xml#common/regexp_escapes"/>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p>
+            These examples show how you can replace parts of a string matching a pattern with replacement text,
+            which can include backreferences to any <codeph>()</codeph> groups in the pattern string. The
+            backreference numbers start at 1, and any <codeph>\</codeph> characters must be escaped as
+            <codeph>\\</codeph>.
+          </p>
+          <p>
+            Replace a character pattern with new text:
+          </p>
+<codeblock>[localhost:21000] &gt; select regexp_replace('aaabbbaaa','b+','xyz');
++------------------------------------------+
+| regexp_replace('aaabbbaaa', 'b+', 'xyz') |
++------------------------------------------+
+| aaaxyzaaa                                |
++------------------------------------------+
+Returned 1 row(s) in 0.11s</codeblock>
+          <p>
+            Replace a character pattern with substitution text that includes the original matching text:
+          </p>
+<codeblock>[localhost:21000] &gt; select regexp_replace('aaabbbaaa','(b+)','&lt;\\1&gt;');
++----------------------------------------------+
+| regexp_replace('aaabbbaaa', '(b+)', '&lt;\\1&gt;') |
++----------------------------------------------+
+| aaa&lt;bbb&gt;aaa                                  |
++----------------------------------------------+
+Returned 1 row(s) in 0.11s</codeblock>
+          <p>
+            Remove all characters that are not digits:
+          </p>
+<codeblock>[localhost:21000] &gt; select regexp_replace('123-456-789','[^[:digit:]]','');
++---------------------------------------------------+
+| regexp_replace('123-456-789', '[^[:digit:]]', '') |
++---------------------------------------------------+
+| 123456789                                         |
++---------------------------------------------------+
+Returned 1 row(s) in 0.12s</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="repeat">
+
+        <dt>
+          <codeph>repeat(string str, int n)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">repeat() function</indexterm>
+          <b>Purpose:</b> Returns the argument string repeated a specified number of times.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="reverse">
+
+        <dt>
+          <codeph>reverse(string a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">reverse() function</indexterm>
+          <b>Purpose:</b> Returns the argument string with characters in reversed order.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="rpad">
+
+        <dt>
+          <codeph>rpad(string str, int len, string pad)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">rpad() function</indexterm>
+          <b>Purpose:</b> Returns a string of a specified length, based on the first argument string. If the
+          specified string is too short, it is padded on the right with a repeating sequence of the characters from
+          the pad string. If the specified string is too long, it is truncated on the right.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="rtrim">
+
+        <dt>
+          <codeph>rtrim(string a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">rtrim() function</indexterm>
+          <b>Purpose:</b> Returns the argument string with any trailing spaces removed from the right side.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="space">
+
+        <dt>
+          <codeph>space(int n)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">space() function</indexterm>
+          <b>Purpose:</b> Returns a concatenated string of the specified number of spaces. Shorthand for
+          <codeph>repeat(' ',<varname>n</varname>)</codeph>.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="strleft">
+
+        <dt>
+          <codeph>strleft(string a, int num_chars)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">strleft() function</indexterm>
+          <b>Purpose:</b> Returns the leftmost characters of the string. Shorthand for a call to
+          <codeph>substr()</codeph> with 2 arguments.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="strright">
+
+        <dt>
+          <codeph>strright(string a, int num_chars)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">strright() function</indexterm>
+          <b>Purpose:</b> Returns the rightmost characters of the string. Shorthand for a call to
+          <codeph>substr()</codeph> with 2 arguments.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="substr">
+
+        <dt>
+          <codeph>substr(string a, int start [, int len]), <ph id="substring">substring(string a, int start [, int
+          len])</ph></codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">substr() function</indexterm>
+          <b>Purpose:</b> Returns the portion of the string starting at a specified point, optionally with a
+          specified maximum length. The characters in the string are indexed starting at 1.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="translate">
+
+        <dt>
+          <codeph>translate(string input, string from, string to)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">translate() function</indexterm>
+          <b>Purpose:</b> Returns the input string with a set of characters replaced by another set of characters.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="trim">
+
+        <dt>
+          <codeph>trim(string a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">trim() function</indexterm>
+          <b>Purpose:</b> Returns the input string with both leading and trailing spaces removed. The same as
+          passing the string through both <codeph>ltrim()</codeph> and <codeph>rtrim()</codeph>.
+          <p>
+            <b>Usage notes:</b> Often used during data cleansing operations during the ETL cycle, if input values might still have surrounding spaces.
+            For a more general-purpose function that can remove other leading and trailing characters besides spaces, see <codeph>btrim()</codeph>.
+          </p>
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="upper">
+
+        <dt>
+          <codeph>upper(string a), <ph id="ucase">ucase(string a)</ph></codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">upper() function</indexterm>
+          <indexterm audience="Cloudera">ucase() function</indexterm>
+          <b>Purpose:</b> Returns the argument string converted to all-uppercase.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+    </dl>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_struct.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_struct.xml b/docs/topics/impala_struct.xml
new file mode 100644
index 0000000..1e440fc
--- /dev/null
+++ b/docs/topics/impala_struct.xml
@@ -0,0 +1,406 @@
+<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+
+  <concept id="struct">
+
+    <title>STRUCT Complex Type (CDH 5.5 or higher only)</title>
+
+    <prolog>
+      <metadata>
+        <data name="Category" value="Impala"/>
+        <data name="Category" value="Impala Data Types"/>
+      </metadata>
+    </prolog>
+
+    <conbody>
+
+      <p>
+        A complex data type, representing multiple fields of a single item.
+        Frequently used as the element type of an <codeph>ARRAY</codeph>
+        or the <codeph>VALUE</codeph> part of a <codeph>MAP</codeph>.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock><varname>column_name</varname> STRUCT &lt; <varname>name</varname> : <varname>type</varname> [COMMENT '<varname>comment_string</varname>'], ... &gt;
+
+type ::= <varname>primitive_type</varname> | <varname>complex_type</varname>
+</codeblock>
+
+      <p>
+        The names and number of fields within the <codeph>STRUCT</codeph> are fixed. Each field can be a different type.
+        A field within a <codeph>STRUCT</codeph> can also be another <codeph>STRUCT</codeph>, or an <codeph>ARRAY</codeph>
+        or a <codeph>MAP</codeph>, allowing you to create nested data structures with a maximum nesting depth of 100.
+      </p>
+
+      <p>
+        A <codeph>STRUCT</codeph> can be the top-level type for a column, or can itself be an item within an <codeph>ARRAY</codeph>
+        or the value part of the key-value pair in a <codeph>MAP</codeph>.
+      </p>
+
+      <p>
+        When a <codeph>STRUCT</codeph> is used as an <codeph>ARRAY</codeph> element or a <codeph>MAP</codeph> value,
+        you use a join clause to bring the <codeph>ARRAY</codeph> or <codeph>MAP</codeph> elements into the result set, and then refer
+        to <codeph><varname>array_name</varname>.ITEM.<varname>field</varname></codeph> or
+        <codeph><varname>map_name</varname>.VALUE.<varname>field</varname></codeph>.
+        In the case of a <codeph>STRUCT</codeph> directly inside an <codeph>ARRAY</codeph> or <codeph>MAP</codeph>,
+        you can omit the <codeph>.ITEM</codeph> and <codeph>.VALUE</codeph> pseudocolumns and refer directly to
+        <codeph><varname>array_name</varname>.<varname>field</varname></codeph> or
+        <codeph><varname>map_name</varname>.<varname>field</varname></codeph>.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/complex_types_combo"/>
+
+      <p>
+        A <codeph>STRUCT</codeph> is similar conceptually to a table row: it contains a fixed number of named fields,
+        each with a predefined type. To combine two related tables, while using complex types to
+        minimize repetition, the typical way to represent that data is as an <codeph>ARRAY</codeph> of <codeph>STRUCT</codeph> elements.
+      </p>
+
+      <p>
+        Because a <codeph>STRUCT</codeph> has a fixed number of named fields, it typically does not make sense
+        to have a <codeph>STRUCT</codeph> as the type of a table column. In such a case, you could just make each field of the
+        <codeph>STRUCT</codeph> into a separate column of the table. The <codeph>STRUCT</codeph> type is most
+        useful as an item of an <codeph>ARRAY</codeph> or the value part of the key-value pair in a <codeph>MAP</codeph>.
+        A nested type column with a <codeph>STRUCT</codeph> at the lowest level lets you associate a variable
+        number of row-like objects with each row of the table.
+      </p>
+
+      <p>
+        The <codeph>STRUCT</codeph> type is straightforward to reference within a query. You do not need to
+        include the <codeph>STRUCT</codeph> column in a join clause or give it a table alias, as is
+        required for the <codeph>ARRAY</codeph> and <codeph>MAP</codeph> types. You refer to the individual
+        fields using dot notation, such as <codeph><varname>struct_column_name</varname>.<varname>field_name</varname></codeph>,
+        without any pseudocolumn such as <codeph>ITEM</codeph> or <codeph>VALUE</codeph>.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/complex_types_describe"/>
+
+      <p conref="../shared/impala_common.xml#common/internals_blurb"/>
+
+      <p>
+        Within the Parquet data file, the values for each <codeph>STRUCT</codeph> field are stored adjacent to each other,
+        so that they can be encoded and compressed using all the Parquet techniques for storing sets of similar or
+        repeated values. The adjacency applies even when the <codeph>STRUCT</codeph> values are part of an
+        <codeph>ARRAY</codeph> or <codeph>MAP</codeph>. During a query, Impala avoids unnecessary I/O by reading only the portions
+        of the Parquet data file containing the requested <codeph>STRUCT</codeph> fields.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/added_in_230"/>
+
+      <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+      <ul conref="../shared/impala_common.xml#common/complex_types_restrictions">
+        <li/>
+      </ul>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <note conref="../shared/impala_common.xml#common/complex_type_schema_pointer"/>
+
+      <p>
+        The following example shows a table with various kinds of <codeph>STRUCT</codeph> columns,
+        both at the top level and nested within other complex types.
+        Practice the <codeph>CREATE TABLE</codeph> and query notation for complex type columns
+        using empty tables, until you can visualize a complex data structure and construct corresponding SQL statements reliably.
+      </p>
+
+<codeblock><![CDATA[CREATE TABLE struct_demo
+(
+  id BIGINT,
+  name STRING,
+
+-- A STRUCT as a top-level column. Demonstrates how the table ID column
+-- and the ID field within the STRUCT can coexist without a name conflict.
+  employee_info STRUCT < employer: STRING, id: BIGINT, address: STRING >,
+
+-- A STRUCT as the element type of an ARRAY.
+  places_lived ARRAY < STRUCT <street: STRING, city: STRING, country: STRING >>,
+
+-- A STRUCT as the value portion of the key-value pairs in a MAP.
+  memorable_moments MAP < STRING, STRUCT < year: INT, place: STRING, details: STRING >>,
+
+-- A STRUCT where one of the fields is another STRUCT.
+  current_address STRUCT < street_address: STRUCT <street_number: INT, street_name: STRING, street_type: STRING>, country: STRING, postal_code: STRING >
+)
+STORED AS PARQUET;
+]]>
+</codeblock>
+
+      <p>
+        The following example shows how to examine the structure of a table containing one or more
+        <codeph>STRUCT</codeph> columns by using the <codeph>DESCRIBE</codeph> statement. You can
+        visualize each <codeph>STRUCT</codeph> as its own table, with columns
+        named the same as each field of the <codeph>STRUCT</codeph>.
+        If the <codeph>STRUCT</codeph> is nested inside another complex type, such as <codeph>ARRAY</codeph>,
+        you can extend the qualified name passed to <codeph>DESCRIBE</codeph> until the output
+        shows just the <codeph>STRUCT</codeph> fields.
+      </p>
+
+<codeblock><![CDATA[DESCRIBE struct_demo;
++-------------------+--------------------------+
+| name              | type                     |
++-------------------+--------------------------+
+| id                | bigint                   |
+| name              | string                   |
+| employee_info     | struct<                  |
+|                   |   employer:string,       |
+|                   |   id:bigint,             |
+|                   |   address:string         |
+|                   | >                        |
+| places_lived      | array<struct<            |
+|                   |   street:string,         |
+|                   |   city:string,           |
+|                   |   country:string         |
+|                   | >>                       |
+| memorable_moments | map<string,struct<       |
+|                   |   year:int,              |
+|                   |   place:string,          |
+|                   |   details:string         |
+|                   | >>                       |
+| current_address   | struct<                  |
+|                   |   street_address:struct< |
+|                   |     street_number:int,   |
+|                   |     street_name:string,  |
+|                   |     street_type:string   |
+|                   |   >,                     |
+|                   |   country:string,        |
+|                   |   postal_code:string     |
+|                   | >                        |
++-------------------+--------------------------+
+
+DESCRIBE struct_demo.employee_info;
++----------+--------+
+| name     | type   |
++----------+--------+
+| employer | string |
+| id       | bigint |
+| address  | string |
++----------+--------+
+
+-- Because PLACES_LIVED is a STRUCT inside an ARRAY, the
+-- initial DESCRIBE shows the structure of the ARRAY.
+DESCRIBE struct_demo.places_lived;
++------+------------------+
+| name | type             |
++------+------------------+
+| item | struct<          |
+|      |   street:string, |
+|      |   city:string,   |
+|      |   country:string |
+|      | >                |
+| pos  | bigint           |
++------+------------------+
+
+-- Ask for the details of the ITEM field of the ARRAY to see
+-- just the layout of the STRUCT.
+DESCRIBE struct_demo.places_lived.item;
++---------+--------+
+| name    | type   |
++---------+--------+
+| street  | string |
+| city    | string |
+| country | string |
++---------+--------+
+
+-- Likewise, MEMORABLE_MOMENTS has a STRUCT inside a MAP,
+-- which requires an extra level of qualified name to see
+-- just the STRUCT part.
+DESCRIBE struct_demo.memorable_moments;
++-------+------------------+
+| name  | type             |
++-------+------------------+
+| key   | string           |
+| value | struct<          |
+|       |   year:int,      |
+|       |   place:string,  |
+|       |   details:string |
+|       | >                |
++-------+------------------+
+
+-- For a MAP, ask to see the VALUE field to see the
+-- corresponding STRUCT fields in a table-like structure.
+DESCRIBE struct_demo.memorable_moments.value;
++---------+--------+
+| name    | type   |
++---------+--------+
+| year    | int    |
+| place   | string |
+| details | string |
++---------+--------+
+
+-- For a STRUCT inside a STRUCT, we can see the fields of the
+-- outer STRUCT...
+DESCRIBE struct_demo.current_address;
++----------------+-----------------------+
+| name           | type                  |
++----------------+-----------------------+
+| street_address | struct<               |
+|                |   street_number:int,  |
+|                |   street_name:string, |
+|                |   street_type:string  |
+|                | >                     |
+| country        | string                |
+| postal_code    | string                |
++----------------+-----------------------+
+
+-- ...and then use a further qualified name to see just the
+-- fields of the inner STRUCT.
+DESCRIBE struct_demo.current_address.street_address;
++---------------+--------+
+| name          | type   |
++---------------+--------+
+| street_number | int    |
+| street_name   | string |
+| street_type   | string |
++---------------+--------+
+]]>
+</codeblock>
+
+      <p>
+        The following example shows how to examine the structure of a table containing one or more
+        <codeph>STRUCT</codeph> columns by using the <codeph>DESCRIBE</codeph> statement. You can
+        visualize each <codeph>STRUCT</codeph> as its own table, with columns
+        named the same as each field of the <codeph>STRUCT</codeph>.
+        If the <codeph>STRUCT</codeph> is nested inside another complex type, such as <codeph>ARRAY</codeph>,
+        you can extend the qualified name passed to <codeph>DESCRIBE</codeph> until the output
+        shows just the <codeph>STRUCT</codeph> fields.
+      </p>
+
+<!-- To do: See why the most verbose query form gives an error. -->
+<codeblock><![CDATA[DESCRIBE struct_demo;
++-------------------+--------------------------+---------+
+| name              | type                     | comment |
++-------------------+--------------------------+---------+
+| id                | bigint                   |         |
+| name              | string                   |         |
+| employee_info     | struct<                  |         |
+|                   |   employer:string,       |         |
+|                   |   id:bigint,             |         |
+|                   |   address:string         |         |
+|                   | >                        |         |
+| places_lived      | array<struct<            |         |
+|                   |   street:string,         |         |
+|                   |   city:string,           |         |
+|                   |   country:string         |         |
+|                   | >>                       |         |
+| memorable_moments | map<string,struct<       |         |
+|                   |   year:int,              |         |
+|                   |   place:string,          |         |
+|                   |   details:string         |         |
+|                   | >>                       |         |
+| current_address   | struct<                  |         |
+|                   |   street_address:struct< |         |
+|                   |     street_number:int,   |         |
+|                   |     street_name:string,  |         |
+|                   |     street_type:string   |         |
+|                   |   >,                     |         |
+|                   |   country:string,        |         |
+|                   |   postal_code:string     |         |
+|                   | >                        |         |
++-------------------+--------------------------+---------+
+
+SELECT id, employee_info.id FROM struct_demo;
+
+SELECT id, employee_info.id AS employee_id FROM struct_demo;
+
+SELECT id, employee_info.id AS employee_id, employee_info.employer
+  FROM struct_demo;
+
+SELECT id, name, street, city, country
+  FROM struct_demo, struct_demo.places_lived;
+
+SELECT id, name, struct_demo.places_lived.pos, struct_demo.places_lived.street, struct_demo.places_lived.city, struct_demo.places_lived.country
+  FROM struct_demo, struct_demo.places_lived;
+ERROR: AnalysisException: Illegal column/field reference 'struct_demo.places_lived.pos' with intermediate collection 'places_lived' of type 'ARRAY<STRUCT<street:STRING,city:STRING,country:STRING>>'
+
+SELECT id, name, pl.pos, pl.street, pl.city, pl.country
+  FROM struct_demo, struct_demo.places_lived AS pl;
+
+SELECT id, name, places_lived.pos, places_lived.street, places_lived.city, places_lived.country
+  FROM struct_demo, struct_demo.places_lived;
+
+SELECT id, name, pos, street, city, country
+  FROM struct_demo, struct_demo.places_lived;
+
+SELECT id, name, struct_demo.memorable_moments.key,
+  struct_demo.memorable_moments.value.year,
+  struct_demo.memorable_moments.value.place,
+  struct_demo.memorable_moments.value.details
+FROM struct_demo, struct_demo.memorable_moments
+WHERE struct_demo.memorable_moments.key IN ('Birthday','Anniversary','Graduation');
+ERROR: AnalysisException: Illegal column/field reference 'struct_demo.memorable_moments.key' with intermediate collection 'memorable_moments' of type 'MAP<STRING,STRUCT<year:INT,place:STRING,details:STRING>>'
+
+SELECT id, name, mm.key, mm.value.year, mm.value.place, mm.value.details
+  FROM struct_demo, struct_demo.memorable_moments AS mm
+WHERE mm.key IN ('Birthday','Anniversary','Graduation');
+
+SELECT id, name, memorable_moments.key, memorable_moments.value.year,
+  memorable_moments.value.place, memorable_moments.value.details
+FROM struct_demo, struct_demo.memorable_moments
+WHERE key IN ('Birthday','Anniversary','Graduation');
+
+SELECT id, name, key, value.year, value.place, value.details
+  FROM struct_demo, struct_demo.memorable_moments
+WHERE key IN ('Birthday','Anniversary','Graduation');
+
+SELECT id, name, key, year, place, details
+  FROM struct_demo, struct_demo.memorable_moments
+WHERE key IN ('Birthday','Anniversary','Graduation');
+
+SELECT id, name,
+  current_address.street_address.street_number,
+  current_address.street_address.street_name,
+  current_address.street_address.street_type,
+  current_address.country,
+  current_address.postal_code
+FROM struct_demo;
+]]>
+</codeblock>
+
+      <p>
+        For example, this table uses a struct that encodes several data values for each phone number associated
+        with a person. Each person can have a variable-length array of associated phone numbers, and queries can
+        refer to the category field to locate specific home, work, mobile, and so on kinds of phone numbers.
+      </p>
+
+<codeblock>CREATE TABLE contact_info_many_structs
+(
+  id BIGINT, name STRING,
+  phone_numbers ARRAY &lt; STRUCT &lt;category:STRING, country_code:STRING, area_code:SMALLINT, full_number:STRING, mobile:BOOLEAN, carrier:STRING &gt; &gt;
+) STORED AS PARQUET;
+</codeblock>
+
+      <p>
+        Because structs are naturally suited to composite values where the fields have different data types, you might use them
+        to decompose things such as addresses:
+      </p>
+
+<codeblock>CREATE TABLE contact_info_detailed_address
+(
+  id BIGINT, name STRING,
+  address STRUCT &lt; house_number:INT, street:STRING, street_type:STRING, apartment:STRING, city:STRING, region:STRING, country:STRING &gt;
+);
+</codeblock>
+
+      <p>
+        In a big data context, splitting out data fields such as the number part of the address and the street name
+        could let you do analysis on each field independently. For example, which streets have the largest number
+        range of addresses, what are the statistical properties of the street names, which areas have a higher
+        proportion of <q>Roads</q>, <q>Courts</q> or <q>Boulevards</q>, and so on.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_complex_types.xml#complex_types"/>,
+        <xref href="impala_array.xml#array"/>,
+        <!-- <xref href="impala_struct.xml#struct"/>, -->
+        <xref href="impala_map.xml#map"/>
+      </p>
+
+    </conbody>
+
+  </concept>
+

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_subqueries.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_subqueries.xml b/docs/topics/impala_subqueries.xml
new file mode 100644
index 0000000..ed99f3a
--- /dev/null
+++ b/docs/topics/impala_subqueries.xml
@@ -0,0 +1,318 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="2.0.0" id="subqueries">
+
+  <title>Subqueries in Impala SELECT Statements</title>
+  <titlealts><navtitle>Subqueries</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">subqueries</indexterm>
+      A <term>subquery</term> is a query that is nested within another query. Subqueries let queries on one table
+      dynamically adapt based on the contents of another table. This technique provides great flexibility and
+      expressive power for SQL queries.
+    </p>
+
+    <p>
+      A subquery can return a result set for use in the <codeph>FROM</codeph> or <codeph>WITH</codeph> clauses, or
+      with operators such as <codeph>IN</codeph> or <codeph>EXISTS</codeph>.
+    </p>
+
+    <p>
+      A <term>scalar subquery</term> produces a result set with a single row containing a single column, typically
+      produced by an aggregation function such as <codeph>MAX()</codeph> or <codeph>SUM()</codeph>. This single
+      result value can be substituted in scalar contexts such as arguments to comparison operators. If the result
+      set is empty, the value of the scalar subquery is <codeph>NULL</codeph>. For example, the following query
+      finds the maximum value of <codeph>T2.Y</codeph> and then substitutes that value into the
+      <codeph>WHERE</codeph> clause of the outer block that queries <codeph>T1</codeph>:
+    </p>
+
+<codeblock>SELECT x FROM t1 WHERE x &gt; (SELECT MAX(y) FROM t2);
+</codeblock>
+
+    <p>
+      <term>Uncorrelated subqueries</term> do not refer to any tables from the outer block of the query. The same
+      value or set of values produced by the subquery is used when evaluating each row from the outer query block.
+      In this example, the subquery returns an arbitrary number of values from <codeph>T2.Y</codeph>, and each
+      value of <codeph>T1.X</codeph> is tested for membership in that same set of values:
+    </p>
+
+<codeblock>SELECT x FROM t1 WHERE x IN (SELECT y FROM t2);
+</codeblock>
+
+    <p>
+      <term>Correlated subqueries</term> compare one or more values from the outer query block to values referenced
+      in the <codeph>WHERE</codeph> clause of the subquery. Each row evaluated by the outer <codeph>WHERE</codeph>
+      clause can be evaluated using a different set of values. These kinds of subqueries are restricted in the
+      kinds of comparisons they can do between columns of the inner and outer tables. (See the following
+      <b>Restrictions</b> item.)
+    </p>
+
+    <p>
+      For example, the following query finds all the employees with salaries that are higher than average for their
+      department. The subquery potentially computes a different <codeph>AVG()</codeph> value for each employee.
+    </p>
+
+<!-- TK: Construct an EMPLOYEES schema to try out examples like these. -->
+
+<codeblock>SELECT employee_name, employee_id FROM employees one WHERE
+  salary &gt; (SELECT avg(salary) FROM employees two WHERE one.dept_id = two.dept_id);
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      <b>Subquery in the <codeph>FROM</codeph> clause:</b>
+    </p>
+
+<codeblock>SELECT <varname>select_list</varname> FROM <varname>table_ref</varname> [, <varname>table_ref</varname> ...]
+
+<varname>table_ref</varname> ::= <varname>table_name</varname> | (<varname>select_statement</varname>)
+</codeblock>
+
+    <p>
+      <b>Subqueries in <codeph>WHERE</codeph> clause:</b>
+    </p>
+
+<codeblock>WHERE <varname>value</varname> <varname>comparison_operator</varname> (<varname>scalar_select_statement</varname>)
+WHERE <varname>value</varname> [NOT] IN (<varname>select_statement</varname>)
+WHERE [NOT] EXISTS (<varname>correlated_select_statement</varname>)
+WHERE NOT EXISTS (<varname>correlated_select_statement</varname>)
+</codeblock>
+
+    <p>
+      <codeph>comparison_operator</codeph> is a numeric comparison such as <codeph>=</codeph>,
+      <codeph>&lt;=</codeph>, <codeph>!=</codeph>, and so on, or a string comparison operator such as
+      <codeph>LIKE</codeph> or <codeph>REGEXP</codeph>.
+    </p>
+
+    <p rev="2.0.0">
+      Although you can use non-equality comparison operators such as <codeph>&lt;</codeph> or
+      <codeph>&gt;=</codeph>, the subquery must include at least one equality comparison between the columns of the
+      inner and outer query blocks.
+    </p>
+
+    <p>
+      All syntax is available for both correlated and uncorrelated queries, except that the <codeph>NOT
+      EXISTS</codeph> clause cannot be used with an uncorrelated subquery.
+    </p>
+
+    <p>
+      Impala subqueries can be nested arbitrarily deep.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/sql1999"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      This example illustrates how subqueries can be used in the <codeph>FROM</codeph> clause to organize the table
+      names, column names, and column values by producing intermediate result sets, especially for join queries.
+    </p>
+
+<codeblock>SELECT avg(t1.x), max(t2.y) FROM
+  (SELECT id, cast(a AS DECIMAL(10,5)) AS x FROM raw_data WHERE a BETWEEN 0 AND 100) AS t1
+  JOIN
+  (SELECT id, length(s) AS y FROM raw_data WHERE s LIKE 'A%') AS t2;
+  USING (id);
+</codeblock>
+
+    <p rev="2.0.0">
+      These examples show how a query can test for the existence of values in a separate table using the
+      <codeph>EXISTS()</codeph> operator with a subquery.
+<!--
+Internally, these queries are processed in a way similar to join queries.
+Because the values from the second table are not part of the result set, the subquery
+is more efficient than the equivalent join query.
+-->
+    </p>
+
+    <p>
+      The following examples show how a value can be compared against a set of values returned by a subquery.
+    </p>
+
+<codeblock rev="2.0.0">SELECT count(x) FROM t1 WHERE EXISTS(SELECT 1 FROM t2 WHERE t1.x = t2.y * 10);
+
+SELECT x FROM t1 WHERE x IN (SELECT y FROM t2 WHERE state = 'CA');
+</codeblock>
+
+    <p>
+      The following examples demonstrate scalar subqueries. When a subquery is known to return a single value, you
+      can substitute it where you would normally put a constant value.
+    </p>
+
+<codeblock>SELECT x FROM t1 WHERE y = (SELECT max(z) FROM t2);
+SELECT x FROM t1 WHERE y &gt; (SELECT count(z) FROM t2);
+</codeblock>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/partitioning_blurb"/> -->
+
+<!--
+<p conref="/Content/impala_common_xi44078.xml#common/hbase_blurb"/>
+<p>
+Currently, the <codeph>IN (<varname>subquery</varname>)</codeph> operator results in a full table scan
+of an HBase table, rather than being translated into a series of single-row lookups.
+Therefore, this is not an efficient construct to use with Impala queries for HBase tables.
+</p>
+-->
+
+<!--
+<p conref="/Content/impala_common_xi44078.xml#common/parquet_blurb"/>
+<p conref="/Content/impala_common_xi44078.xml#common/text_blurb"/>
+<p conref="/Content/impala_common_xi44078.xml#common/compatibility_blurb"/>
+-->
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      If the same table is referenced in both the outer and inner query blocks, construct a table alias in the
+      outer query block and use a fully qualified name to distinguish the inner and outer table references:
+    </p>
+
+<!-- TK: verify the logic of this example. Probably have other similar ones that could be reused here. -->
+
+<codeblock>SELECT * FROM t1 one WHERE id IN (SELECT parent FROM t1 two WHERE t1.parent = t2.id);
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/internals_blurb"/>
+
+    <p>
+      Internally, subqueries involving <codeph>IN</codeph>, <codeph>NOT IN</codeph>, <codeph>EXISTS</codeph>, or
+      <codeph>NOT EXISTS</codeph> clauses are rewritten into join queries. Depending on the syntax, the subquery
+      might be rewritten to an outer join, semi join, cross join, or anti join.
+    </p>
+
+    <p>
+      A query is processed differently depending on whether the subquery calls any aggregation functions. There are
+      correlated and uncorrelated forms, with and without calls to aggregation functions. Each of these four
+      categories is rewritten differently.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/column_stats_blurb"/>
+
+    <p>
+      Because queries that include correlated and uncorrelated subqueries in the <codeph>WHERE</codeph> clause are
+      written into join queries, to achieve best performance, follow the same guidelines for running the
+      <codeph>COMPUTE STATS</codeph> statement as you do for tables involved in regular join queries. Run the
+      <codeph>COMPUTE STATS</codeph> statement for each associated tables after loading or substantially changing
+      the data in that table. See <xref href="impala_perf_stats.xml#perf_stats"/> for details.
+    </p>
+
+    <p>
+      <b>Added in:</b> Subqueries are substantially enhanced starting in Impala 2.0 for CDH 4, and CDH 5.2.0. Now,
+      they can be used in the <codeph>WHERE</codeph> clause, in combination with clauses such as
+      <codeph>EXISTS</codeph> and <codeph>IN</codeph>, rather than just in the <codeph>FROM</codeph> clause.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <p>
+      The initial Impala support for nested subqueries addresses the most common use cases. Some restrictions
+      remain:
+    </p>
+
+    <ul>
+      <li>
+        <p>
+          Although you can use subqueries in a query involving <codeph>UNION</codeph> or <codeph>UNION ALL</codeph>
+          in Impala 2.1.0 and higher, currently you cannot construct a union of two subqueries (for example, in the
+          argument of an <codeph>IN</codeph> or <codeph>EXISTS</codeph> operator).
+        </p>
+      </li>
+
+      <li>
+        <p>
+          Subqueries returning scalar values cannot be used with the operators <codeph>ANY</codeph> or
+          <codeph>ALL</codeph>. (Impala does not currently have a <codeph>SOME</codeph> operator, but if it did,
+          the same restriction would apply.)
+        </p>
+      </li>
+
+      <li>
+        <p>
+          For the <codeph>EXISTS</codeph> and <codeph>NOT EXISTS</codeph> clauses, any subquery comparing values
+          from the outer query block to another table must use at least one equality comparison, not exclusively
+          other kinds of comparisons such as less than, greater than, <codeph>BETWEEN</codeph>, or
+          <codeph>!=</codeph>.
+        </p>
+      </li>
+
+      <li>
+<!-- TK: think this is no longer true. -->
+        <p>
+          Currently, a scalar subquery cannot be used as the first or second argument to the
+          <codeph>BETWEEN</codeph> operator.
+        </p>
+      </li>
+
+      <li>
+        <p>
+          A subquery cannot be used inside an <codeph>OR</codeph> conjunction. Expressions inside a subquery, for
+          example in the <codeph>WHERE</codeph> clause, can use <codeph>OR</codeph> conjunctions; the restriction
+          only applies to parts of the query <q>above</q> the subquery.
+        </p>
+      </li>
+
+      <li>
+        <p>
+          Scalar subqueries are only supported in numeric contexts. You cannot use a scalar subquery as an argument
+          to the <codeph>LIKE</codeph>, <codeph>REGEXP</codeph>, or <codeph>RLIKE</codeph> operators, or compare it
+          to a value of a non-numeric type such as <codeph>TIMESTAMP</codeph> or <codeph>BOOLEAN</codeph>.
+        </p>
+      </li>
+
+      <li>
+        <p>
+<!-- A subquery cannot be used to generate a scalar value for a function call. -->
+          You cannot use subqueries with the <codeph>CASE</codeph> function to generate the comparison value, the
+          values to be compared against, or the return value.
+        </p>
+      </li>
+
+      <li>
+        <p>
+          A subquery is not allowed in the filter condition for the <codeph>HAVING</codeph> clause. (Strictly
+          speaking, a subquery cannot appear anywhere outside the <codeph>WITH</codeph>, <codeph>FROM</codeph>, and
+          <codeph>WHERE</codeph> clauses.)
+        </p>
+      </li>
+
+      <li>
+        <p>
+          You must use a fully qualified name
+          (<codeph><varname>table_name</varname>.<varname>column_name</varname></codeph> or
+          <codeph><varname>database_name</varname>.<varname>table_name</varname>.<varname>column_name</varname></codeph>)
+          when referring to any column from the outer query block within a subquery.
+        </p>
+      </li>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p rev="2.3.0">
+      For the complex types (<codeph>ARRAY</codeph>, <codeph>STRUCT</codeph>, and <codeph>MAP</codeph>)
+      available in CDH 5.5 / Impala 2.3 and higher, the join queries that <q>unpack</q> complex type
+      columns often use correlated subqueries in the <codeph>FROM</codeph> clause.
+      For example, if the first table in the join clause is <codeph>CUSTOMER</codeph>, the second
+      join clause might have a subquery that selects from the column <codeph>CUSTOMER.C_ORDERS</codeph>,
+      which is an <codeph>ARRAY</codeph>. The subquery re-evaluates the <codeph>ARRAY</codeph> elements
+      corresponding to each row from the <codeph>CUSTOMER</codeph> table.
+      See <xref href="impala_complex_types.xml#complex_types"/> for details and examples of
+      using subqueries with complex types.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_operators.xml#exists"/>, <xref href="impala_operators.xml#in"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_sum.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_sum.xml b/docs/topics/impala_sum.xml
new file mode 100644
index 0000000..6d25f1c
--- /dev/null
+++ b/docs/topics/impala_sum.xml
@@ -0,0 +1,236 @@
+<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="sum">
+
+  <title>SUM Function</title>
+  <titlealts><navtitle>SUM</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="Analytic Functions"/>
+      <data name="Category" value="Aggregate Functions"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">sum() function</indexterm>
+      An aggregate function that returns the sum of a set of numbers. Its single argument can be numeric column, or
+      the numeric result of a function or expression applied to the column value. Rows with a <codeph>NULL</codeph>
+      value for the specified column are ignored. If the table is empty, or all the values supplied to
+      <codeph>MIN</codeph> are <codeph>NULL</codeph>, <codeph>SUM</codeph> returns <codeph>NULL</codeph>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>SUM([DISTINCT | ALL] <varname>expression</varname>) [OVER (<varname>analytic_clause</varname>)]</codeblock>
+
+    <p>
+      When the query contains a <codeph>GROUP BY</codeph> clause, returns one value for each combination of
+      grouping values.
+    </p>
+
+    <p>
+      <b>Return type:</b> <codeph>BIGINT</codeph> for integer arguments, <codeph>DOUBLE</codeph> for floating-point
+      arguments
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_aggregation_explanation"/>
+    
+    <p conref="../shared/impala_common.xml#common/complex_types_aggregation_example"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      The following example shows how to use <codeph>SUM()</codeph> to compute the total for all the values in the
+      table, a subset of values, or the sum for each combination of values in the <codeph>GROUP BY</codeph> clause:
+    </p>
+
+<codeblock>-- Total all the values for this column in the table.
+select sum(c1) from t1;
+-- Find the total for this column from a subset of the table.
+select sum(c1) from t1 where month = 'January' and year = '2013';
+-- Find the total from a set of numeric function results.
+select sum(length(s)) from t1;
+-- Often used with functions that return predefined values to compute a score.
+select sum(case when grade = 'A' then 1.0 when grade = 'B' then 0.75 else 0) as class_honors from test_scores;
+-- Can also be used in combination with DISTINCT and/or GROUP BY.
+-- Return more than one result.
+select month, year, sum(purchase_price) from store_stats group by month, year;
+-- Filter the input to eliminate duplicates before performing the calculation.
+select sum(distinct x) from t1;
+</codeblock>
+
+    <p rev="2.0.0">
+      The following examples show how to use <codeph>SUM()</codeph> in an analytic context. They use a table
+      containing integers from 1 to 10. Notice how the <codeph>SUM()</codeph> is reported for each input value, as
+      opposed to the <codeph>GROUP BY</codeph> clause which condenses the result set.
+<codeblock>select x, property, sum(x) <b>over (partition by property)</b> as sum from int_t where property in ('odd','even');
++----+----------+-----+
+| x  | property | sum |
++----+----------+-----+
+| 2  | even     | 30  |
+| 4  | even     | 30  |
+| 6  | even     | 30  |
+| 8  | even     | 30  |
+| 10 | even     | 30  |
+| 1  | odd      | 25  |
+| 3  | odd      | 25  |
+| 5  | odd      | 25  |
+| 7  | odd      | 25  |
+| 9  | odd      | 25  |
++----+----------+-----+
+</codeblock>
+
+Adding an <codeph>ORDER BY</codeph> clause lets you experiment with results that are cumulative or apply to a moving
+set of rows (the <q>window</q>). The following examples use <codeph>SUM()</codeph> in an analytic context
+(that is, with an <codeph>OVER()</codeph> clause) to produce a running total of all the even values,
+then a running total of all the odd values. The basic <codeph>ORDER BY x</codeph> clause implicitly
+activates a window clause of <codeph>RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW</codeph>,
+which is effectively the same as <codeph>ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW</codeph>,
+therefore all of these examples produce the same results:
+<codeblock>select x, property,
+  sum(x) over (partition by property <b>order by x</b>) as 'cumulative total'
+  from int_t where property in ('odd','even');
++----+----------+------------------+
+| x  | property | cumulative total |
++----+----------+------------------+
+| 2  | even     | 2                |
+| 4  | even     | 6                |
+| 6  | even     | 12               |
+| 8  | even     | 20               |
+| 10 | even     | 30               |
+| 1  | odd      | 1                |
+| 3  | odd      | 4                |
+| 5  | odd      | 9                |
+| 7  | odd      | 16               |
+| 9  | odd      | 25               |
++----+----------+------------------+
+
+select x, property,
+  sum(x) over
+  (
+    partition by property
+    <b>order by x</b>
+    <b>range between unbounded preceding and current row</b>
+  ) as 'cumulative total'
+from int_t where property in ('odd','even');
++----+----------+------------------+
+| x  | property | cumulative total |
++----+----------+------------------+
+| 2  | even     | 2                |
+| 4  | even     | 6                |
+| 6  | even     | 12               |
+| 8  | even     | 20               |
+| 10 | even     | 30               |
+| 1  | odd      | 1                |
+| 3  | odd      | 4                |
+| 5  | odd      | 9                |
+| 7  | odd      | 16               |
+| 9  | odd      | 25               |
++----+----------+------------------+
+
+select x, property,
+  sum(x) over
+  (
+    partition by property
+    <b>order by x</b>
+    <b>rows between unbounded preceding and current row</b>
+  ) as 'cumulative total'
+  from int_t where property in ('odd','even');
++----+----------+------------------+
+| x  | property | cumulative total |
++----+----------+------------------+
+| 2  | even     | 2                |
+| 4  | even     | 6                |
+| 6  | even     | 12               |
+| 8  | even     | 20               |
+| 10 | even     | 30               |
+| 1  | odd      | 1                |
+| 3  | odd      | 4                |
+| 5  | odd      | 9                |
+| 7  | odd      | 16               |
+| 9  | odd      | 25               |
++----+----------+------------------+
+</codeblock>
+
+Changing the direction of the <codeph>ORDER BY</codeph> clause causes the intermediate
+results of the cumulative total to be calculated in a different order:
+
+<codeblock>select sum(x) over (partition by property <b>order by x desc</b>) as 'cumulative total'  
+  from int_t where property in ('odd','even');
++----+----------+------------------+
+| x  | property | cumulative total |
++----+----------+------------------+
+| 10 | even     | 10               |
+| 8  | even     | 18               |
+| 6  | even     | 24               |
+| 4  | even     | 28               |
+| 2  | even     | 30               |
+| 9  | odd      | 9                |
+| 7  | odd      | 16               |
+| 5  | odd      | 21               |
+| 3  | odd      | 24               |
+| 1  | odd      | 25               |
++----+----------+------------------+
+</codeblock>
+
+The following examples show how to construct a moving window, with a running total taking into account 1 row before
+and 1 row after the current row, within the same partition (all the even values or all the odd values).
+Because of a restriction in the Impala <codeph>RANGE</codeph> syntax, this type of
+moving window is possible with the <codeph>ROWS BETWEEN</codeph> clause but not the <codeph>RANGE BETWEEN</codeph>
+clause:
+<codeblock>select x, property,
+  sum(x) over
+  (
+    partition by property
+    <b>order by x</b>
+    <b>rows between 1 preceding and 1 following</b>
+  ) as 'moving total'
+  from int_t where property in ('odd','even');
++----+----------+--------------+
+| x  | property | moving total |
++----+----------+--------------+
+| 2  | even     | 6            |
+| 4  | even     | 12           |
+| 6  | even     | 18           |
+| 8  | even     | 24           |
+| 10 | even     | 18           |
+| 1  | odd      | 4            |
+| 3  | odd      | 9            |
+| 5  | odd      | 15           |
+| 7  | odd      | 21           |
+| 9  | odd      | 16           |
++----+----------+--------------+
+
+-- Doesn't work because of syntax restriction on RANGE clause.
+select x, property,
+  sum(x) over
+  (
+    partition by property
+    <b>order by x</b>
+    <b>range between 1 preceding and 1 following</b>
+  ) as 'moving total'
+from int_t where property in ('odd','even');
+ERROR: AnalysisException: RANGE is only supported with both the lower and upper bounds UNBOUNDED or one UNBOUNDED and the other CURRENT ROW.
+</codeblock>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_analytic_functions.xml#analytic_functions"/>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+<!-- This conref appears under SUM(), AVG(), FLOAT, and DOUBLE topics. -->
+
+    <p conref="../shared/impala_common.xml#common/sum_double"/>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_support_start_over.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_support_start_over.xml b/docs/topics/impala_support_start_over.xml
new file mode 100644
index 0000000..2c17b5d
--- /dev/null
+++ b/docs/topics/impala_support_start_over.xml
@@ -0,0 +1,29 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="support_start_over">
+
+  <title>SUPPORT_START_OVER Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">SUPPORT_START_OVER query option</indexterm>
+      Leave this setting at its default value.
+      It is a read-only setting, tested by some client applications such as Hue.
+    </p>
+    <p>
+      If you accidentally change it through <cmdname>impala-shell</cmdname>,
+      subsequent queries encounter errors until you undo the change
+      by issuing <codeph>UNSET support_start_over</codeph>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/type_boolean"/>
+    <p conref="../shared/impala_common.xml#common/default_false"/>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_sync_ddl.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_sync_ddl.xml b/docs/topics/impala_sync_ddl.xml
new file mode 100644
index 0000000..b217f67
--- /dev/null
+++ b/docs/topics/impala_sync_ddl.xml
@@ -0,0 +1,56 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.2.1" id="sync_ddl">
+
+  <title>SYNC_DDL Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="DDL"/>
+      <data name="Category" value="SQL"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">SYNC_DDL query option</indexterm>
+      When enabled, causes any DDL operation such as <codeph>CREATE TABLE</codeph> or <codeph>ALTER TABLE</codeph>
+      to return only when the changes have been propagated to all other Impala nodes in the cluster by the Impala
+      catalog service. That way, if you issue a subsequent <codeph>CONNECT</codeph> statement in
+      <cmdname>impala-shell</cmdname> to connect to a different node in the cluster, you can be sure that other
+      node will already recognize any added or changed tables. (The catalog service automatically broadcasts the
+      DDL changes to all nodes automatically, but without this option there could be a period of inconsistency if
+      you quickly switched to another node, such as by issuing a subsequent query through a load-balancing proxy.)
+    </p>
+
+    <p>
+      Although <codeph>INSERT</codeph> is classified as a DML statement, when the <codeph>SYNC_DDL</codeph> option
+      is enabled, <codeph>INSERT</codeph> statements also delay their completion until all the underlying data and
+      metadata changes are propagated to all Impala nodes. Internally, Impala inserts have similarities with DDL
+      statements in traditional database systems, because they create metadata needed to track HDFS block locations
+      for new files and they potentially add new partitions to partitioned tables.
+    </p>
+
+    <note>
+      Because this option can introduce a delay after each write operation, if you are running a sequence of
+      <codeph>CREATE DATABASE</codeph>, <codeph>CREATE TABLE</codeph>, <codeph>ALTER TABLE</codeph>,
+      <codeph>INSERT</codeph>, and similar statements within a setup script, to minimize the overall delay you can
+      enable the <codeph>SYNC_DDL</codeph> query option only near the end, before the final DDL statement.
+    </note>
+
+    <p conref="../shared/impala_common.xml#common/type_boolean"/>
+    <p conref="../shared/impala_common.xml#common/default_false_0"/>
+
+    <draft-comment translate="no">
+Example could be useful here.
+</draft-comment>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+    <p>
+      <xref href="impala_ddl.xml#ddl"/>
+    </p>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_tables.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_tables.xml b/docs/topics/impala_tables.xml
new file mode 100644
index 0000000..30e3737
--- /dev/null
+++ b/docs/topics/impala_tables.xml
@@ -0,0 +1,258 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="tables">
+
+  <title>Overview of Impala Tables</title>
+  <titlealts><navtitle>Tables</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Databases"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p/>
+
+    <p>
+      Tables are the primary containers for data in Impala. They have the familiar row and column layout similar to
+      other database systems, plus some features such as partitioning often associated with higher-end data
+      warehouse systems.
+    </p>
+
+    <p>
+      Logically, each table has a structure based on the definition of its columns, partitions, and other
+      properties.
+    </p>
+
+    <p>
+      Physically, each table that uses HDFS storage is associated with a directory in HDFS. The table data consists of all the data files
+      underneath that directory:
+    </p>
+
+    <ul>
+      <li>
+        <xref href="impala_tables.xml#internal_tables">Internal tables</xref> are managed by Impala, and use directories
+        inside the designated Impala work area.
+      </li>
+
+      <li>
+        <xref href="impala_tables.xml#external_tables">External tables</xref> use arbitrary HDFS directories, where
+        the data files are typically shared between different Hadoop components.
+      </li>
+
+      <li>
+        Large-scale data is usually handled by partitioned tables, where the data files are divided among different
+        HDFS subdirectories.
+      </li>
+    </ul>
+
+    <p rev="2.2.0">
+      Impala tables can also represent data that is stored in HBase, or in the Amazon S3 filesystem (CDH 5.4.0 or higher),
+      or on Isilon storage devices (CDH 5.4.3 or higher).  See <xref href="impala_hbase.xml#impala_hbase"/>,
+      <xref href="impala_s3.xml#s3"/>, and <xref href="impala_isilon.xml#impala_isilon"/>
+      for details about those special kinds of tables.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/ignore_file_extensions"/>
+
+    <p>
+      <b>Related statements:</b> <xref href="impala_create_table.xml#create_table"/>,
+      <xref href="impala_drop_table.xml#drop_table"/>, <xref href="impala_alter_table.xml#alter_table"/>
+      <xref href="impala_insert.xml#insert"/>, <xref href="impala_load_data.xml#load_data"/>,
+      <xref href="impala_select.xml#select"/>
+    </p>
+  </conbody>
+
+  <concept id="internal_tables">
+
+    <title>Internal Tables</title>
+
+    <conbody>
+
+      <p>
+        <indexterm audience="Cloudera">internal tables</indexterm>
+        The default kind of table produced by the <codeph>CREATE TABLE</codeph> statement is known as an internal
+        table. (Its counterpart is the external table, produced by the <codeph>CREATE EXTERNAL TABLE</codeph>
+        syntax.)
+      </p>
+
+      <ul>
+        <li>
+          <p>
+            Impala creates a directory in HDFS to hold the data files.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            You can create data in internal tables by issuing <codeph>INSERT</codeph> or <codeph>LOAD DATA</codeph>
+            statements.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            If you add or replace data using HDFS operations, issue the <codeph>REFRESH</codeph> command in
+            <cmdname>impala-shell</cmdname> so that Impala recognizes the changes in data files, block locations,
+            and so on.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            When you issue a <codeph>DROP TABLE</codeph> statement, Impala physically removes all the data files
+            from the directory.
+          </p>
+        </li>
+
+        <li>
+          <p conref="../shared/impala_common.xml#common/check_internal_external_table"/>
+        </li>
+
+        <li>
+          <p>
+            When you issue an <codeph>ALTER TABLE</codeph> statement to rename an internal table, all data files
+            are moved into the new HDFS directory for the table. The files are moved even if they were formerly in
+            a directory outside the Impala data directory, for example in an internal table with a
+            <codeph>LOCATION</codeph> attribute pointing to an outside HDFS directory.
+          </p>
+        </li>
+      </ul>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/switch_internal_external_table"/>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_tables.xml#external_tables"/>, <xref href="impala_create_table.xml#create_table"/>,
+        <xref href="impala_drop_table.xml#drop_table"/>, <xref href="impala_alter_table.xml#alter_table"/>,
+        <xref href="impala_describe.xml#describe"/>
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="external_tables">
+
+    <title>External Tables</title>
+
+    <conbody>
+
+      <p>
+        <indexterm audience="Cloudera">external tables</indexterm>
+        The syntax <codeph>CREATE EXTERNAL TABLE</codeph> sets up an Impala table that points at existing data
+        files, potentially in HDFS locations outside the normal Impala data directories.. This operation saves the
+        expense of importing the data into a new table when you already have the data files in a known location in
+        HDFS, in the desired file format.
+      </p>
+
+      <ul>
+        <li>
+          <p>
+            You can use Impala to query the data in this table.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            You can create data in external tables by issuing <codeph>INSERT</codeph> or <codeph>LOAD DATA</codeph>
+            statements.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            If you add or replace data using HDFS operations, issue the <codeph>REFRESH</codeph> command in
+            <cmdname>impala-shell</cmdname> so that Impala recognizes the changes in data files, block locations,
+            and so on.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            When you issue a <codeph>DROP TABLE</codeph> statement in Impala, that removes the connection that
+            Impala has with the associated data files, but does not physically remove the underlying data. You can
+            continue to use the data files with other Hadoop components and HDFS operations.
+          </p>
+        </li>
+
+        <li>
+          <p conref="../shared/impala_common.xml#common/check_internal_external_table"/>
+        </li>
+
+        <li>
+          <p>
+            When you issue an <codeph>ALTER TABLE</codeph> statement to rename an external table, all data files
+            are left in their original locations.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            You can point multiple external tables at the same HDFS directory by using the same
+            <codeph>LOCATION</codeph> attribute for each one. The tables could have different column definitions,
+            as long as the number and types of columns are compatible with the schema evolution considerations for
+            the underlying file type. For example, for text data files, one table might define a certain column as
+            a <codeph>STRING</codeph> while another defines the same column as a <codeph>BIGINT</codeph>.
+          </p>
+        </li>
+      </ul>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/switch_internal_external_table"/>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_tables.xml#internal_tables"/>, <xref href="impala_create_table.xml#create_table"/>,
+        <xref href="impala_drop_table.xml#drop_table"/>, <xref href="impala_alter_table.xml#alter_table"/>,
+        <xref href="impala_describe.xml#describe"/>
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="table_file_formats">
+    <title>File Formats</title>
+    <conbody>
+      <p>
+        Each table has an associated file format, which determines how Impala interprets the
+        associated data files. See <xref href="impala_file_formats.xml#file_formats"/> for details.
+      </p>
+      <p>
+        You set the file format during the <codeph>CREATE TABLE</codeph> statement,
+        or change it later using the <codeph>ALTER TABLE</codeph> statement.
+        Partitioned tables can have a different file format for individual partitions,
+        allowing you to change the file format used in your ETL process for new data
+        without going back and reconverting all the existing data in the same table.
+      </p>
+      <p>
+        Any <codeph>INSERT</codeph> statements produce new data files with the current file format of the table.
+        For existing data files, changing the file format of the table does not automatically do any data conversion.
+        You must use <codeph>TRUNCATE TABLE</codeph> or <codeph>INSERT OVERWRITE</codeph> to remove any previous data
+        files that use the old file format.
+        Then you use the <codeph>LOAD DATA</codeph> statement, <codeph>INSERT ... SELECT</codeph>, or other mechanism
+        to put data files of the correct format into the table.
+      </p>
+      <p>
+        The default file format, text, is the most flexible and easy to produce when you are just getting started with
+        Impala. The Parquet file format offers the highest query performance and uses compression to reduce storage
+        requirements; therefore, Cloudera recommends using Parquet for Impala tables with substantial amounts of data.
+        <ph rev="2.3.0">Also, the complex types (<codeph>ARRAY</codeph>, <codeph>STRUCT</codeph>, and <codeph>MAP</codeph>)
+        available in CDH 5.5 / Impala 2.3 and higher are currently only supported with the Parquet file type.</ph>
+        Based on your existing ETL workflow, you might use other file formats such as Avro, possibly doing a final
+        conversion step to Parquet to take advantage of its performance for analytic queries.
+      </p>
+    </conbody>
+  </concept>
+
+</concept>

[19/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

Posted by jr...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_analytic_functions.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_analytic_functions.xml b/docs/topics/impala_analytic_functions.xml
new file mode 100644
index 0000000..293a512
--- /dev/null
+++ b/docs/topics/impala_analytic_functions.xml
@@ -0,0 +1,1742 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="2.0.0" id="analytic_functions">
+
+  <title>Impala Analytic Functions</title>
+
+  <titlealts>
+
+    <navtitle>Analytic Functions</navtitle>
+
+  </titlealts>
+
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="Aggregate Functions"/>
+      <data name="Category" value="Analytic Functions"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">analytic functions</indexterm>
+
+      <indexterm audience="Cloudera">window functions</indexterm>
+      Analytic functions (also known as window functions) are a special category of built-in functions. Like
+      aggregate functions, they examine the contents of multiple input rows to compute each output value. However,
+      rather than being limited to one result value per <codeph>GROUP BY</codeph> group, they operate on
+      <term>windows</term> where the input rows are ordered and grouped using flexible conditions expressed through
+      an <codeph>OVER()</codeph> clause.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/added_in_20"/>
+
+<!--
+    <p>
+      Analytic functions produce one output value for each input row, like scalar functions such as
+      <codeph>length()</codeph> or
+      <codeph>substr()</codeph>.
+    </p>
+-->
+
+    <p>
+      Some functions, such as <codeph>LAG()</codeph> and <codeph>RANK()</codeph>, can only be used in this analytic
+      context. Some aggregate functions do double duty: when you call the aggregation functions such as
+      <codeph>MAX()</codeph>, <codeph>SUM()</codeph>, <codeph>AVG()</codeph>, and so on with an
+      <codeph>OVER()</codeph> clause, they produce an output value for each row, based on computations across other
+      rows in the window.
+    </p>
+
+    <p>
+      Although analytic functions often compute the same value you would see from an aggregate function in a
+      <codeph>GROUP BY</codeph> query, the analytic functions produce a value for each row in the result set rather
+      than a single value for each group. This flexibility lets you include additional columns in the
+      <codeph>SELECT</codeph> list, offering more opportunities for organizing and filtering the result set.
+    </p>
+
+    <p>
+      Analytic function calls are only allowed in the <codeph>SELECT</codeph> list and in the outermost
+      <codeph>ORDER BY</codeph> clause of the query. During query processing, analytic functions are evaluated
+      after other query stages such as joins, <codeph>WHERE</codeph>, and <codeph>GROUP BY</codeph>,
+    </p>
+
+<!-- Oracle doesn't show examples until it gets to the actual functions, so let's follow that lead.
+    <p>
+      The following example shows a very simple call to <codeph>MAX()</codeph> in
+      an analytic context, and a similar query using a <codeph>GROUP BY</codeph> clause.
+    </p>
+-->
+
+<!--
+This basic query could be represented either as an analytic
+function call or an aggregation function call in a <codeph>GROUP BY</codeph> query.
+For more elaborate kinds of computations, the flexibility of the analytic window
+makes that the preferred option.
+-->
+
+<!-- TK: construct sample data and fill in query results. -->
+
+<!-- Other DB docs don't necessarily include examples up at this level, only for the individual functions.
+     So maybe take these placeholders out entirely.
+
+<codeblock>SELECT year, month, max(degrees_c) OVER (PARTITION BY year) FROM historical_temps;
+SELECT year, month, max(degrees_c) FROM historical_temps GROUP BY year;
+</codeblock>
+-->
+
+    <p>
+      The rows that are part of each partition are analyzed by computations across an ordered or unordered set of
+      rows. For example, <codeph>COUNT()</codeph> and <codeph>SUM()</codeph> might be applied to all the rows in
+      the partition, in which case the order of analysis does not matter. The <codeph>ORDER BY</codeph> clause
+      might be used inside the <codeph>OVER()</codeph> clause to defines the ordering that applies to functions
+      such as <codeph>LAG()</codeph> and <codeph>FIRST_VALUE()</codeph>.
+    </p>
+
+<!-- TK: output needed here also. -->
+
+<!--
+<codeblock>SELECT year, month, max(degrees_c) OVER (PARTITION BY year ORDER BY MONTH DESC) FROM historical_temps;
+</codeblock>
+-->
+
+    <p>
+      Analytic functions are frequently used in fields such as finance and science to provide trend, outlier, and
+      bucketed analysis for large data sets. You might also see the term <q>window functions</q> in database
+      literature, referring to the sequence of rows (the <q>window</q>) that the function call applies to,
+      particularly when the <codeph>OVER</codeph> clause includes a <codeph>ROWS</codeph> or <codeph>RANGE</codeph>
+      keyword.
+    </p>
+
+    <p>
+      The following sections describe the analytic query clauses and the pure analytic functions provided by
+      Impala. For usage information about aggregate functions in an analytic context, see
+      <xref href="impala_aggregate_functions.xml#aggregate_functions"/>.
+    </p>
+
+    <p outputclass="toc inpage"/>
+
+  </conbody>
+
+  <concept id="over">
+
+    <title>OVER Clause</title>
+
+    <conbody>
+
+      <p>
+        The <codeph>OVER</codeph> clause is required for calls to pure analytic functions such as
+        <codeph>LEAD()</codeph>, <codeph>RANK()</codeph>, and <codeph>FIRST_VALUE()</codeph>. When you include an
+        <codeph>OVER</codeph> clause with calls to aggregate functions such as <codeph>MAX()</codeph>,
+        <codeph>COUNT()</codeph>, or <codeph>SUM()</codeph>, they operate as analytic functions.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>function(<varname>args</varname>) OVER([<varname>partition_by_clause</varname>] [<varname>order_by_clause</varname> [<varname>window_clause</varname>]])
+
+partition_by_clause ::= PARTITION BY <varname>expr</varname> [, <varname>expr</varname> ...]
+order_by_clause ::= ORDER BY <varname>expr</varname>  [ASC | DESC] [NULLS FIRST | NULLS LAST] [, <varname>expr</varname> [ASC | DESC] [NULLS FIRST | NULLS LAST] ...]
+window_clause: See <xref href="#window_clause">Window Clause</xref>
+</codeblock>
+
+      <p>
+        <b>PARTITION BY clause:</b>
+      </p>
+
+      <p>
+        The <codeph>PARTITION BY</codeph> clause acts much like the <codeph>GROUP BY</codeph> clause in the
+        outermost block of a query. It divides the rows into groups containing identical values in one or more
+        columns. These logical groups are known as <term>partitions</term>. Throughout the discussion of analytic
+        functions, <q>partitions</q> refers to the groups produced by the <codeph>PARTITION BY</codeph> clause, not
+        to partitioned tables.
+      </p>
+
+      <p>
+        The sequence of results from an analytic function <q>resets</q> for each new partition in the result set.
+        That is, the set of preceding or following rows considered by the analytic function always come from a
+        single partition. Any <codeph>MAX()</codeph>, <codeph>SUM()</codeph>, <codeph>ROW_NUMBER()</codeph>, and so
+        on apply to each partition independently. Omit the <codeph>PARTITION BY</codeph> clause to apply the
+        analytic operation to all the rows in the table.
+      </p>
+
+      <p>
+        <b>ORDER BY clause:</b>
+      </p>
+
+      <p>
+        The <codeph>ORDER BY</codeph> clause works much like the <codeph>ORDER BY</codeph> clause in the outermost
+        block of a query. It defines the order in which rows are evaluated for the entire input set, or for each
+        group produced by a <codeph>PARTITION BY</codeph> clause. You can order by one or multiple expressions, and
+        for each expression optionally choose ascending or descending order and whether nulls come first or last in
+        the sort order. Because this <codeph>ORDER BY</codeph> clause only defines the order in which rows are
+        evaluated, if you want the results to be output in a specific order, also include an <codeph>ORDER
+        BY</codeph> clause in the outer block of the query.
+      </p>
+
+      <p>
+        When the <codeph>ORDER BY</codeph> clause is omitted, the analytic function applies to all items in the
+        group produced by the <codeph>PARTITION BY</codeph> clause. When the <codeph>ORDER BY</codeph> clause is
+        included, the analysis can apply to all or a subset of the items in the group, depending on the optional
+        window clause.
+      </p>
+
+      <p>
+        The order in which the rows are analyzed is only defined for those columns specified in <codeph>ORDER
+        BY</codeph> clauses.
+      </p>
+
+      <p>
+        One difference between the analytic and outer uses of the <codeph>ORDER BY</codeph> clause: inside the
+        <codeph>OVER</codeph> clause, <codeph>ORDER BY 1</codeph> or other integer value is interpreted as a
+        constant sort value (effectively a no-op) rather than referring to column 1.
+      </p>
+
+      <p>
+        <b>Window clause:</b>
+      </p>
+
+      <p>
+        The window clause is only allowed in combination with an <codeph>ORDER BY</codeph> clause. If the
+        <codeph>ORDER BY</codeph> clause is specified but the window clause is not, the default window is
+        <codeph>RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW</codeph>. See
+        <xref href="impala_analytic_functions.xml#window_clause"/> for full details.
+      </p>
+
+<!--
+      <p conref="/Content/impala_common_xi44078.xml#common/usage_notes_blurb"/>
+-->
+
+      <p conref="../shared/impala_common.xml#common/hbase_blurb"/>
+
+      <p>
+        Because HBase tables are optimized for single-row lookups rather than full scans, analytic functions using
+        the <codeph>OVER()</codeph> clause are not recommended for HBase tables. Although such queries work, their
+        performance is lower than on comparable tables using HDFS data files.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/parquet_blurb"/>
+
+      <p>
+        Analytic functions are very efficient for Parquet tables. The data that is examined during evaluation of
+        the <codeph>OVER()</codeph> clause comes from a specified set of columns, and the values for each column
+        are arranged sequentially within each data file.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/text_blurb"/>
+
+      <p>
+        Analytic functions are convenient to use with text tables for exploratory business intelligence. When the
+        volume of data is substantial, prefer to use Parquet tables for performance-critical analytic queries.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/added_in_20"/>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        The following example shows how to synthesize a numeric sequence corresponding to all the rows in a table.
+        The new table has the same columns as the old one, plus an additional column <codeph>ID</codeph> containing
+        the integers 1, 2, 3, and so on, corresponding to the order of a <codeph>TIMESTAMP</codeph> column in the
+        original table.
+      </p>
+
+<!-- TK: synthesize some data and fill in output here. -->
+
+<codeblock>CREATE TABLE events_with_id AS
+  SELECT
+    row_number() OVER (ORDER BY date_and_time) AS id,
+    c1, c2, c3, c4
+  FROM events;
+</codeblock>
+
+      <p>
+        The following example shows how to determine the number of rows containing each value for a column. Unlike
+        a corresponding <codeph>GROUP BY</codeph> query, this one can analyze a single column and still return all
+        values (not just the distinct ones) from the other columns.
+      </p>
+
+<!-- TK: verify the 'unbounded' shortcut syntax. -->
+
+<codeblock>SELECT x, y, z,
+  count() OVER (PARTITION BY x) AS how_many_x
+FROM t1;
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+      <p>
+        You cannot directly combine the <codeph>DISTINCT</codeph> operator with analytic function calls. You can
+        put the analytic function call in a <codeph>WITH</codeph> clause or an inline view, and apply the
+        <codeph>DISTINCT</codeph> operator to its result set.
+      </p>
+
+<codeblock>WITH t1 AS (SELECT x, sum(x) OVER (PARTITION BY x) AS total FROM t1)
+  SELECT DISTINCT x, total FROM t1;
+</codeblock>
+
+    </conbody>
+
+  </concept>
+
+  <concept id="window_clause">
+
+    <title>Window Clause</title>
+
+    <conbody>
+
+      <p>
+        Certain analytic functions accept an optional <term>window clause</term>, which makes the function analyze
+        only certain rows <q>around</q> the current row rather than all rows in the partition. For example, you can
+        get a moving average by specifying some number of preceding and following rows, or a running count or
+        running total by specifying all rows up to the current position. This clause can result in different
+        analytic results for rows within the same partition.
+      </p>
+
+      <p>
+        The window clause is supported with the <codeph>AVG()</codeph>, <codeph>COUNT()</codeph>,
+        <codeph>FIRST_VALUE()</codeph>, <codeph>LAST_VALUE()</codeph>, and <codeph>SUM()</codeph> functions.
+<!-- To do: fill in this factoid under MAX and MIN also. -->
+        For <codeph>MAX()</codeph> and <codeph>MIN()</codeph>, the window clause only allowed if the start bound is
+        <codeph>UNBOUNDED PRECEDING</codeph>
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>ROWS BETWEEN [ { <varname>m</varname> | UNBOUNDED } PRECEDING | CURRENT ROW] [ AND [CURRENT ROW | { UNBOUNDED | <varname>n</varname> } FOLLOWING] ]
+RANGE BETWEEN [ {<varname>m</varname> | UNBOUNDED } PRECEDING | CURRENT ROW] [ AND [CURRENT ROW | { UNBOUNDED | <varname>n</varname> } FOLLOWING] ]</codeblock>
+
+      <p>
+        <codeph>ROWS BETWEEN</codeph> defines the size of the window in terms of the indexes of the rows in the
+        result set. The size of the window is predictable based on the clauses the position within the result set.
+      </p>
+
+      <p>
+        <codeph>RANGE BETWEEN</codeph> does not currently support numeric arguments to define a variable-size
+        sliding window.
+<!--
+Currently, it effectively works the same as the
+equivalent <codeph>ROWS BETWEEN</codeph> clause.
+-->
+      </p>
+
+<!--
+<p>
+<codeph>RANGE BETWEEN</codeph> defines the size of the window based on arithmetic comparisons
+of the values in the result set.
+The size of the window varies depending on the order and distribution of values.
+</p>
+-->
+
+<!--      <p conref="/Content/impala_common_xi44078.xml#common/restrictions_blurb"/> -->
+
+      <p>
+        Currently, Impala supports only some combinations of arguments to the <codeph>RANGE</codeph> clause:
+      </p>
+
+      <ul>
+        <li>
+          <codeph>RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW</codeph> (the default when <codeph>ORDER
+          BY</codeph> is specified and the window clause is omitted)
+        </li>
+
+        <li>
+          <codeph>RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING</codeph>
+        </li>
+
+        <li>
+          <codeph>RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING</codeph>
+        </li>
+      </ul>
+
+      <p>
+        When <codeph>RANGE</codeph> is used, <codeph>CURRENT ROW</codeph> includes not just the current row but all
+        rows that are tied with the current row based on the <codeph>ORDER BY</codeph> expressions.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/added_in_20"/>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        The following examples show financial data for a fictional stock symbol <codeph>JDR</codeph>. The closing
+        price moves up and down each day.
+      </p>
+
+<codeblock>create table stock_ticker (stock_symbol string, closing_price decimal(8,2), closing_date timestamp);
+...load some data...
+select * from stock_ticker order by stock_symbol, closing_date
++--------------+---------------+---------------------+
+| stock_symbol | closing_price | closing_date        |
++--------------+---------------+---------------------+
+| JDR          | 12.86         | 2014-10-02 00:00:00 |
+| JDR          | 12.89         | 2014-10-03 00:00:00 |
+| JDR          | 12.94         | 2014-10-04 00:00:00 |
+| JDR          | 12.55         | 2014-10-05 00:00:00 |
+| JDR          | 14.03         | 2014-10-06 00:00:00 |
+| JDR          | 14.75         | 2014-10-07 00:00:00 |
+| JDR          | 13.98         | 2014-10-08 00:00:00 |
++--------------+---------------+---------------------+
+</codeblock>
+
+      <p>
+        The queries use analytic functions with window clauses to compute moving averages of the closing price. For
+        example, <codeph>ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING</codeph> produces an average of the value from a
+        3-day span, producing a different value for each row. The first row, which has no preceding row, only gets
+        averaged with the row following it. If the table contained more than one stock symbol, the
+        <codeph>PARTITION BY</codeph> clause would limit the window for the moving average to only consider the
+        prices for a single stock.
+      </p>
+
+<codeblock>select stock_symbol, closing_date, closing_price,
+  avg(closing_price) over (partition by stock_symbol order by closing_date
+    rows between 1 preceding and 1 following) as moving_average
+  from stock_ticker;
++--------------+---------------------+---------------+----------------+
+| stock_symbol | closing_date        | closing_price | moving_average |
++--------------+---------------------+---------------+----------------+
+| JDR          | 2014-10-02 00:00:00 | 12.86         | 12.87          |
+| JDR          | 2014-10-03 00:00:00 | 12.89         | 12.89          |
+| JDR          | 2014-10-04 00:00:00 | 12.94         | 12.79          |
+| JDR          | 2014-10-05 00:00:00 | 12.55         | 13.17          |
+| JDR          | 2014-10-06 00:00:00 | 14.03         | 13.77          |
+| JDR          | 2014-10-07 00:00:00 | 14.75         | 14.25          |
+| JDR          | 2014-10-08 00:00:00 | 13.98         | 14.36          |
++--------------+---------------------+---------------+----------------+
+</codeblock>
+
+      <p>
+        The clause <codeph>ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW</codeph> produces a cumulative moving
+        average, from the earliest data up to the value for each day.
+      </p>
+
+<codeblock>select stock_symbol, closing_date, closing_price,
+  avg(closing_price) over (partition by stock_symbol order by closing_date
+    rows between unbounded preceding and current row) as moving_average
+  from stock_ticker;
++--------------+---------------------+---------------+----------------+
+| stock_symbol | closing_date        | closing_price | moving_average |
++--------------+---------------------+---------------+----------------+
+| JDR          | 2014-10-02 00:00:00 | 12.86         | 12.86          |
+| JDR          | 2014-10-03 00:00:00 | 12.89         | 12.87          |
+| JDR          | 2014-10-04 00:00:00 | 12.94         | 12.89          |
+| JDR          | 2014-10-05 00:00:00 | 12.55         | 12.81          |
+| JDR          | 2014-10-06 00:00:00 | 14.03         | 13.05          |
+| JDR          | 2014-10-07 00:00:00 | 14.75         | 13.33          |
+| JDR          | 2014-10-08 00:00:00 | 13.98         | 13.42          |
++--------------+---------------------+---------------+----------------+
+</codeblock>
+
+<!-- Matt suggests not always true depending on data. Hiding until I can try myself.
+<p>
+The clause <codeph>RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW</codeph> would produce the same
+output as above. Because <codeph>RANGE</codeph> currently does not support numeric offsets while
+<codeph>ROWS</codeph> does, currently the <codeph>ROWS</codeph> syntax is more flexible.
+</p>
+-->
+
+    </conbody>
+
+  </concept>
+
+  <concept id="avg_analytic">
+
+    <title>AVG() Function - Analytic Context</title>
+
+    <conbody>
+
+      <p>
+        You can include an <codeph>OVER</codeph> clause with a call to this function to use it as an analytic
+        function. See <xref href="impala_avg.xml#avg"/> for details and examples.
+      </p>
+
+    </conbody>
+
+  </concept>
+
+  <concept id="count_analytic">
+
+    <title>COUNT() Function - Analytic Context</title>
+
+    <conbody>
+
+      <p>
+        You can include an <codeph>OVER</codeph> clause with a call to this function to use it as an analytic
+        function. See <xref href="impala_count.xml#count"/> for details and examples.
+      </p>
+
+    </conbody>
+
+  </concept>
+
+  <concept rev="2.3.0" id="cume_dist">
+
+    <title>CUME_DIST() Function (CDH 5.5 or higher only)</title>
+
+    <conbody>
+
+      <p>
+        Returns the cumulative distribution of a value. The value for each row in the result set is greater than 0
+        and less than or equal to 1.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>CUME_DIST (<varname>expr</varname>)
+  OVER ([<varname>partition_by_clause</varname>] <varname>order_by_clause</varname>)
+</codeblock>
+
+      <p>
+        The <codeph>ORDER BY</codeph> clause is required. The <codeph>PARTITION BY</codeph> clause is optional. The
+        window clause is not allowed.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+      <p>
+        Within each partition of the result set, the <codeph>CUME_DIST()</codeph> value represents an ascending
+        sequence that ends at 1. Each value represents the proportion of rows in the partition whose values are
+        less than or equal to the value in the current row.
+      </p>
+
+      <p>
+        If the sequence of input values contains ties, the <codeph>CUME_DIST()</codeph> results are identical for the
+        tied values.
+      </p>
+
+      <p>
+        Impala only supports the <codeph>CUME_DIST()</codeph> function in an analytic context, not as a regular
+        aggregate function.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        This example uses a table with 9 rows. The <codeph>CUME_DIST()</codeph>
+        function evaluates the entire table because there is no <codeph>PARTITION BY</codeph> clause,
+        with the rows ordered by the weight of the animal.
+        the sequence of values shows that 1/9 of the values are less than or equal to the lightest
+        animal (mouse), 2/9 of the values are less than or equal to the second-lightest animal,
+        and so on up to the heaviest animal (elephant), where 9/9 of the rows are less than or
+        equal to its weight.
+      </p>
+
+<codeblock>create table animals (name string, kind string, kilos decimal(9,3));
+insert into animals values
+  ('Elephant', 'Mammal', 4000), ('Giraffe', 'Mammal', 1200), ('Mouse', 'Mammal', 0.020),
+  ('Condor', 'Bird', 15), ('Horse', 'Mammal', 500), ('Owl', 'Bird', 2.5),
+  ('Ostrich', 'Bird', 145), ('Polar bear', 'Mammal', 700), ('Housecat', 'Mammal', 5);
+
+select name, cume_dist() over (order by kilos) from animals;
++------------+-----------------------+
+| name       | cume_dist() OVER(...) |
++------------+-----------------------+
+| Elephant   | 1                     |
+| Giraffe    | 0.8888888888888888    |
+| Polar bear | 0.7777777777777778    |
+| Horse      | 0.6666666666666666    |
+| Ostrich    | 0.5555555555555556    |
+| Condor     | 0.4444444444444444    |
+| Housecat   | 0.3333333333333333    |
+| Owl        | 0.2222222222222222    |
+| Mouse      | 0.1111111111111111    |
++------------+-----------------------+
+</codeblock>
+
+      <p>
+        Using a <codeph>PARTITION BY</codeph> clause produces a separate sequence for each partition
+        group, in this case one for mammals and one for birds. Because there are 3 birds and 6 mammals,
+        the sequence illustrates how 1/3 of the <q>Bird</q> rows have a <codeph>kilos</codeph> value that is less than or equal to
+        the lightest bird, 1/6 of the <q>Mammal</q> rows have a <codeph>kilos</codeph> value that is less than or equal to
+        the lightest mammal, and so on until both the heaviest bird and heaviest mammal have a <codeph>CUME_DIST()</codeph>
+        value of 1.
+      </p>
+
+<codeblock>select name, kind, cume_dist() over (partition by kind order by kilos) from animals
++------------+--------+-----------------------+
+| name       | kind   | cume_dist() OVER(...) |
++------------+--------+-----------------------+
+| Ostrich    | Bird   | 1                     |
+| Condor     | Bird   | 0.6666666666666666    |
+| Owl        | Bird   | 0.3333333333333333    |
+| Elephant   | Mammal | 1                     |
+| Giraffe    | Mammal | 0.8333333333333334    |
+| Polar bear | Mammal | 0.6666666666666666    |
+| Horse      | Mammal | 0.5                   |
+| Housecat   | Mammal | 0.3333333333333333    |
+| Mouse      | Mammal | 0.1666666666666667    |
++------------+--------+-----------------------+
+</codeblock>
+
+      <p>
+        We can reverse the ordering within each partition group by using an <codeph>ORDER BY ... DESC</codeph>
+        clause within the <codeph>OVER()</codeph> clause. Now the lightest (smallest value of <codeph>kilos</codeph>)
+        animal of each kind has a <codeph>CUME_DIST()</codeph> value of 1.
+      </p>
+
+<codeblock>select name, kind, cume_dist() over (partition by kind order by kilos desc) from animals
++------------+--------+-----------------------+
+| name       | kind   | cume_dist() OVER(...) |
++------------+--------+-----------------------+
+| Owl        | Bird   | 1                     |
+| Condor     | Bird   | 0.6666666666666666    |
+| Ostrich    | Bird   | 0.3333333333333333    |
+| Mouse      | Mammal | 1                     |
+| Housecat   | Mammal | 0.8333333333333334    |
+| Horse      | Mammal | 0.6666666666666666    |
+| Polar bear | Mammal | 0.5                   |
+| Giraffe    | Mammal | 0.3333333333333333    |
+| Elephant   | Mammal | 0.1666666666666667    |
++------------+--------+-----------------------+
+</codeblock>
+
+      <p>
+        The following example manufactures some rows with identical values in the <codeph>kilos</codeph> column,
+        to demonstrate how the results look in case of tie values. For simplicity, it only shows the <codeph>CUME_DIST()</codeph>
+        sequence for the <q>Bird</q> rows. Now with 3 rows all with a value of 15, all of those rows have the same
+        <codeph>CUME_DIST()</codeph> value. 4/5 of the rows have a value for <codeph>kilos</codeph> that is less than or
+        equal to 15.
+      </p>
+
+<codeblock>insert into animals values ('California Condor', 'Bird', 15), ('Andean Condor', 'Bird', 15)
+
+select name, kind, cume_dist() over (order by kilos) from animals where kind = 'Bird';
++-------------------+------+-----------------------+
+| name              | kind | cume_dist() OVER(...) |
++-------------------+------+-----------------------+
+| Ostrich           | Bird | 1                     |
+| Condor            | Bird | 0.8                   |
+| California Condor | Bird | 0.8                   |
+| Andean Condor     | Bird | 0.8                   |
+| Owl               | Bird | 0.2                   |
++-------------------+------+-----------------------+
+</codeblock>
+
+      <p>
+        The following example shows how to use an <codeph>ORDER BY</codeph> clause in the outer block
+        to order the result set in case of ties. Here, all the <q>Bird</q> rows are together, then in descending order
+        by the result of the <codeph>CUME_DIST()</codeph> function, and all tied <codeph>CUME_DIST()</codeph>
+        values are ordered by the animal name.
+      </p>
+
+<codeblock>select name, kind, cume_dist() over (partition by kind order by kilos) as ordering
+  from animals
+where
+  kind = 'Bird'
+order by kind, ordering desc, name;
++-------------------+------+----------+
+| name              | kind | ordering |
++-------------------+------+----------+
+| Ostrich           | Bird | 1        |
+| Andean Condor     | Bird | 0.8      |
+| California Condor | Bird | 0.8      |
+| Condor            | Bird | 0.8      |
+| Owl               | Bird | 0.2      |
++-------------------+------+----------+
+</codeblock>
+
+    </conbody>
+
+  </concept>
+
+  <concept rev="2.0.0" id="dense_rank">
+
+    <title>DENSE_RANK() Function</title>
+
+    <conbody>
+
+      <p>
+        Returns an ascending sequence of integers, starting with 1. The output sequence produces duplicate integers
+        for duplicate values of the <codeph>ORDER BY</codeph> expressions. After generating duplicate output values
+        for the <q>tied</q> input values, the function continues the sequence with the next higher integer.
+        Therefore, the sequence contains duplicates but no gaps when the input contains duplicates. Starts the
+        sequence over for each group produced by the <codeph>PARTITIONED BY</codeph> clause.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>DENSE_RANK() OVER([<varname>partition_by_clause</varname>] <varname>order_by_clause</varname>)</codeblock>
+
+      <p>
+        The <codeph>PARTITION BY</codeph> clause is optional. The <codeph>ORDER BY</codeph> clause is required. The
+        window clause is not allowed.
+      </p>
+
+<!-- Can make the text for ROW_NUMBER, RANK, and DENSE_RANK identical
+     so it can be conref'ed in all 3 places. -->
+
+      <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+      <p>
+        Often used for top-N and bottom-N queries. For example, it could produce a <q>top 10</q> report including
+        all the items with the 10 highest values, even if several items tied for 1st place.
+      </p>
+
+      <p>
+        Similar to <codeph>ROW_NUMBER</codeph> and <codeph>RANK</codeph>. These functions differ in how they treat
+        duplicate combinations of values.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/added_in_20"/>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        The following example demonstrates how the <codeph>DENSE_RANK()</codeph> function identifies where each
+        value <q>places</q> in the result set, producing the same result for duplicate values, but with a strict
+        sequence from 1 to the number of groups. For example, when results are ordered by the <codeph>X</codeph>
+        column, both <codeph>1</codeph> values are tied for first; both <codeph>2</codeph> values are tied for
+        second; and so on.
+      </p>
+
+<codeblock>select x, dense_rank() over(order by x) as rank, property from int_t;
++----+------+----------+
+| x  | rank | property |
++----+------+----------+
+| 1  | 1    | square   |
+| 1  | 1    | odd      |
+| 2  | 2    | even     |
+| 2  | 2    | prime    |
+| 3  | 3    | prime    |
+| 3  | 3    | odd      |
+| 4  | 4    | even     |
+| 4  | 4    | square   |
+| 5  | 5    | odd      |
+| 5  | 5    | prime    |
+| 6  | 6    | even     |
+| 6  | 6    | perfect  |
+| 7  | 7    | lucky    |
+| 7  | 7    | lucky    |
+| 7  | 7    | lucky    |
+| 7  | 7    | odd      |
+| 7  | 7    | prime    |
+| 8  | 8    | even     |
+| 9  | 9    | square   |
+| 9  | 9    | odd      |
+| 10 | 10   | round    |
+| 10 | 10   | even     |
++----+------+----------+
+</codeblock>
+
+      <p>
+        The following examples show how the <codeph>DENSE_RANK()</codeph> function is affected by the
+        <codeph>PARTITION</codeph> property within the <codeph>ORDER BY</codeph> clause.
+      </p>
+
+      <p>
+        Partitioning by the <codeph>PROPERTY</codeph> column groups all the even, odd, and so on values together,
+        and <codeph>DENSE_RANK()</codeph> returns the place of each value within the group, producing several
+        ascending sequences.
+      </p>
+
+<codeblock>select x, dense_rank() over(partition by property order by x) as rank, property from int_t;
++----+------+----------+
+| x  | rank | property |
++----+------+----------+
+| 2  | 1    | even     |
+| 4  | 2    | even     |
+| 6  | 3    | even     |
+| 8  | 4    | even     |
+| 10 | 5    | even     |
+| 7  | 1    | lucky    |
+| 7  | 1    | lucky    |
+| 7  | 1    | lucky    |
+| 1  | 1    | odd      |
+| 3  | 2    | odd      |
+| 5  | 3    | odd      |
+| 7  | 4    | odd      |
+| 9  | 5    | odd      |
+| 6  | 1    | perfect  |
+| 2  | 1    | prime    |
+| 3  | 2    | prime    |
+| 5  | 3    | prime    |
+| 7  | 4    | prime    |
+| 10 | 1    | round    |
+| 1  | 1    | square   |
+| 4  | 2    | square   |
+| 9  | 3    | square   |
++----+------+----------+
+</codeblock>
+
+      <p>
+        Partitioning by the <codeph>X</codeph> column groups all the duplicate numbers together and returns the
+        place each each value within the group; because each value occurs only 1 or 2 times,
+        <codeph>DENSE_RANK()</codeph> designates each <codeph>X</codeph> value as either first or second within its
+        group.
+      </p>
+
+<codeblock>select x, dense_rank() over(partition by x order by property) as rank, property from int_t;
++----+------+----------+
+| x  | rank | property |
++----+------+----------+
+| 1  | 1    | odd      |
+| 1  | 2    | square   |
+| 2  | 1    | even     |
+| 2  | 2    | prime    |
+| 3  | 1    | odd      |
+| 3  | 2    | prime    |
+| 4  | 1    | even     |
+| 4  | 2    | square   |
+| 5  | 1    | odd      |
+| 5  | 2    | prime    |
+| 6  | 1    | even     |
+| 6  | 2    | perfect  |
+| 7  | 1    | lucky    |
+| 7  | 1    | lucky    |
+| 7  | 1    | lucky    |
+| 7  | 2    | odd      |
+| 7  | 3    | prime    |
+| 8  | 1    | even     |
+| 9  | 1    | odd      |
+| 9  | 2    | square   |
+| 10 | 1    | even     |
+| 10 | 2    | round    |
++----+------+----------+
+</codeblock>
+
+      <p>
+        The following example shows how <codeph>DENSE_RANK()</codeph> produces a continuous sequence while still
+        allowing for ties. In this case, Croesus and Midas both have the second largest fortune, while Crassus has
+        the third largest. (In <xref href="impala_analytic_functions.xml#rank"/>, you see a similar query with the
+        <codeph>RANK()</codeph> function that shows that while Crassus has the third largest fortune, he is the
+        fourth richest person.)
+      </p>
+
+<codeblock>select dense_rank() over (order by net_worth desc) as placement, name, net_worth from wealth order by placement, name;
++-----------+---------+---------------+
+| placement | name    | net_worth     |
++-----------+---------+---------------+
+| 1         | Solomon | 2000000000.00 |
+| 2         | Croesus | 1000000000.00 |
+| 2         | Midas   | 1000000000.00 |
+| 3         | Crassus | 500000000.00  |
+| 4         | Scrooge | 80000000.00   |
++-----------+---------+---------------+
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_analytic_functions.xml#rank"/>, <xref href="impala_analytic_functions.xml#row_number"/>
+      </p>
+
+    </conbody>
+
+  </concept>
+
+  <concept rev="2.0.0" id="first_value">
+
+    <title>FIRST_VALUE() Function</title>
+
+    <conbody>
+
+      <p>
+        Returns the expression value from the first row in the window. The return value is <codeph>NULL</codeph> if
+        the input expression is <codeph>NULL</codeph>.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>FIRST_VALUE(<varname>expr</varname>) OVER([<varname>partition_by_clause</varname>] <varname>order_by_clause</varname> [<varname>window_clause</varname>])</codeblock>
+
+      <p>
+        The <codeph>PARTITION BY</codeph> clause is optional. The <codeph>ORDER BY</codeph> clause is required. The
+        window clause is optional.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+      <p>
+        If any duplicate values occur in the tuples evaluated by the <codeph>ORDER BY</codeph> clause, the result
+        of this function is not deterministic. Consider adding additional <codeph>ORDER BY</codeph> columns to
+        ensure consistent ordering.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/added_in_20"/>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        The following example shows a table with a wide variety of country-appropriate greetings. For consistency,
+        we want to standardize on a single greeting for each country. The <codeph>FIRST_VALUE()</codeph> function
+        helps to produce a mail merge report where every person from the same country is addressed with the same
+        greeting.
+      </p>
+
+<codeblock>select name, country, greeting from mail_merge
++---------+---------+--------------+
+| name    | country | greeting     |
++---------+---------+--------------+
+| Pete    | USA     | Hello        |
+| John    | USA     | Hi           |
+| Boris   | Germany | Guten tag    |
+| Michael | Germany | Guten morgen |
+| Bjorn   | Sweden  | Hej          |
+| Mats    | Sweden  | Tja          |
++---------+---------+--------------+
+
+select country, name,
+  first_value(greeting)
+    over (partition by country order by name, greeting) as greeting
+  from mail_merge;
++---------+---------+-----------+
+| country | name    | greeting  |
++---------+---------+-----------+
+| Germany | Boris   | Guten tag |
+| Germany | Michael | Guten tag |
+| Sweden  | Bjorn   | Hej       |
+| Sweden  | Mats    | Hej       |
+| USA     | John    | Hi        |
+| USA     | Pete    | Hi        |
++---------+---------+-----------+
+</codeblock>
+
+      <p>
+        Changing the order in which the names are evaluated changes which greeting is applied to each group.
+      </p>
+
+<codeblock>select country, name,
+  first_value(greeting)
+    over (partition by country order by name desc, greeting) as greeting
+  from mail_merge;
++---------+---------+--------------+
+| country | name    | greeting     |
++---------+---------+--------------+
+| Germany | Michael | Guten morgen |
+| Germany | Boris   | Guten morgen |
+| Sweden  | Mats    | Tja          |
+| Sweden  | Bjorn   | Tja          |
+| USA     | Pete    | Hello        |
+| USA     | John    | Hello        |
++---------+---------+--------------+
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_analytic_functions.xml#last_value"/>
+      </p>
+
+    </conbody>
+
+  </concept>
+
+  <concept rev="2.0.0" id="lag">
+
+    <title>LAG() Function</title>
+
+    <conbody>
+
+      <p>
+        This function returns the value of an expression using column values from a preceding row. You specify an
+        integer offset, which designates a row position some number of rows previous to the current row. Any column
+        references in the expression argument refer to column values from that prior row. Typically, the table
+        contains a time sequence or numeric sequence column that clearly distinguishes the ordering of the rows.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>LAG (<varname>expr</varname> [, <varname>offset</varname>] [, <varname>default</varname>])
+  OVER ([<varname>partition_by_clause</varname>] <varname>order_by_clause</varname>)</codeblock>
+
+      <p>
+        The <codeph>ORDER BY</codeph> clause is required. The <codeph>PARTITION BY</codeph> clause is optional. The
+        window clause is not allowed.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+      <p>
+        Sometimes used an an alternative to doing a self-join.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/added_in_20"/>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        The following example uses the same stock data created in <xref href="#window_clause"/>. For each day, the
+        query prints the closing price alongside the previous day's closing price. The first row for each stock
+        symbol has no previous row, so that <codeph>LAG()</codeph> value is <codeph>NULL</codeph>.
+      </p>
+
+<codeblock>select stock_symbol, closing_date, closing_price,
+    lag(closing_price,1) over (partition by stock_symbol order by closing_date) as "yesterday closing"
+  from stock_ticker
+    order by closing_date;
++--------------+---------------------+---------------+-------------------+
+| stock_symbol | closing_date        | closing_price | yesterday closing |
++--------------+---------------------+---------------+-------------------+
+| JDR          | 2014-09-13 00:00:00 | 12.86         | NULL              |
+| JDR          | 2014-09-14 00:00:00 | 12.89         | 12.86             |
+| JDR          | 2014-09-15 00:00:00 | 12.94         | 12.89             |
+| JDR          | 2014-09-16 00:00:00 | 12.55         | 12.94             |
+| JDR          | 2014-09-17 00:00:00 | 14.03         | 12.55             |
+| JDR          | 2014-09-18 00:00:00 | 14.75         | 14.03             |
+| JDR          | 2014-09-19 00:00:00 | 13.98         | 14.75             |
++--------------+---------------------+---------------+-------------------+
+</codeblock>
+
+      <p>
+        The following example does an arithmetic operation between the current row and a value from the previous
+        row, to produce a delta value for each day. This example also demonstrates how <codeph>ORDER BY</codeph>
+        works independently in the different parts of the query. The <codeph>ORDER BY closing_date</codeph> in the
+        <codeph>OVER</codeph> clause makes the query analyze the rows in chronological order. Then the outer query
+        block uses <codeph>ORDER BY closing_date DESC</codeph> to present the results with the most recent date
+        first.
+      </p>
+
+<codeblock>select stock_symbol, closing_date, closing_price,
+    cast(
+      closing_price - lag(closing_price,1) over
+        (partition by stock_symbol order by closing_date)
+      as decimal(8,2)
+    )
+    as "change from yesterday"
+  from stock_ticker
+    order by closing_date desc;
++--------------+---------------------+---------------+-----------------------+
+| stock_symbol | closing_date        | closing_price | change from yesterday |
++--------------+---------------------+---------------+-----------------------+
+| JDR          | 2014-09-19 00:00:00 | 13.98         | -0.76                 |
+| JDR          | 2014-09-18 00:00:00 | 14.75         | 0.72                  |
+| JDR          | 2014-09-17 00:00:00 | 14.03         | 1.47                  |
+| JDR          | 2014-09-16 00:00:00 | 12.55         | -0.38                 |
+| JDR          | 2014-09-15 00:00:00 | 12.94         | 0.04                  |
+| JDR          | 2014-09-14 00:00:00 | 12.89         | 0.03                  |
+| JDR          | 2014-09-13 00:00:00 | 12.86         | NULL                  |
++--------------+---------------------+---------------+-----------------------+
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        This function is the converse of <xref href="impala_analytic_functions.xml#lead"/>.
+      </p>
+
+    </conbody>
+
+  </concept>
+
+  <concept rev="2.0.0" id="last_value">
+
+    <title>LAST_VALUE() Function</title>
+
+    <conbody>
+
+      <p>
+        Returns the expression value from the last row in the window. This same value is repeated for all result
+        rows for the group. The return value is <codeph>NULL</codeph> if the input expression is
+        <codeph>NULL</codeph>.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>LAST_VALUE(<varname>expr</varname>) OVER([<varname>partition_by_clause</varname>] <varname>order_by_clause</varname> [<varname>window_clause</varname>])</codeblock>
+
+      <p>
+        The <codeph>PARTITION BY</codeph> clause is optional. The <codeph>ORDER BY</codeph> clause is required. The
+        window clause is optional.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+      <p>
+        If any duplicate values occur in the tuples evaluated by the <codeph>ORDER BY</codeph> clause, the result
+        of this function is not deterministic. Consider adding additional <codeph>ORDER BY</codeph> columns to
+        ensure consistent ordering.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/added_in_20"/>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        The following example uses the same <codeph>MAIL_MERGE</codeph> table as in the example for
+        <xref href="impala_analytic_functions.xml#first_value"/>. Because the default window when <codeph>ORDER
+        BY</codeph> is used is <codeph>BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW</codeph>, the query requires the
+        <codeph>UNBOUNDED FOLLOWING</codeph> to look ahead to subsequent rows and find the last value for each
+        country.
+      </p>
+
+<codeblock>select country, name,
+  last_value(greeting) over (
+    partition by country order by name, greeting
+    rows between unbounded preceding and unbounded following
+  ) as greeting
+  from mail_merge
++---------+---------+--------------+
+| country | name    | greeting     |
++---------+---------+--------------+
+| Germany | Boris   | Guten morgen |
+| Germany | Michael | Guten morgen |
+| Sweden  | Bjorn   | Tja          |
+| Sweden  | Mats    | Tja          |
+| USA     | John    | Hello        |
+| USA     | Pete    | Hello        |
++---------+---------+--------------+
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_analytic_functions.xml#first_value"/>
+      </p>
+
+    </conbody>
+
+  </concept>
+
+  <concept rev="2.0.0" id="lead">
+
+    <title>LEAD() Function</title>
+
+    <conbody>
+
+      <p>
+        This function returns the value of an expression using column values from a following row. You specify an
+        integer offset, which designates a row position some number of rows after to the current row. Any column
+        references in the expression argument refer to column values from that later row. Typically, the table
+        contains a time sequence or numeric sequence column that clearly distinguishes the ordering of the rows.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>LEAD (<varname>expr</varname> [, <varname>offset</varname>] [, <varname>default</varname>])
+  OVER ([<varname>partition_by_clause</varname>] <varname>order_by_clause</varname>)</codeblock>
+
+      <p>
+        The <codeph>ORDER BY</codeph> clause is required. The <codeph>PARTITION BY</codeph> clause is optional. The
+        window clause is not allowed.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+      <p>
+        Sometimes used an an alternative to doing a self-join.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/added_in_20"/>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        The following example uses the same stock data created in <xref href="#window_clause"/>. The query analyzes
+        the closing price for a stock symbol, and for each day evaluates if the closing price for the following day
+        is higher or lower.
+      </p>
+
+<codeblock>select stock_symbol, closing_date, closing_price,
+  case
+    (lead(closing_price,1)
+      over (partition by stock_symbol order by closing_date)
+        - closing_price) &gt; 0
+    when true then "higher"
+    when false then "flat or lower"
+  end as "trending"
+from stock_ticker
+  order by closing_date;
++--------------+---------------------+---------------+---------------+
+| stock_symbol | closing_date        | closing_price | trending      |
++--------------+---------------------+---------------+---------------+
+| JDR          | 2014-09-13 00:00:00 | 12.86         | higher        |
+| JDR          | 2014-09-14 00:00:00 | 12.89         | higher        |
+| JDR          | 2014-09-15 00:00:00 | 12.94         | flat or lower |
+| JDR          | 2014-09-16 00:00:00 | 12.55         | higher        |
+| JDR          | 2014-09-17 00:00:00 | 14.03         | higher        |
+| JDR          | 2014-09-18 00:00:00 | 14.75         | flat or lower |
+| JDR          | 2014-09-19 00:00:00 | 13.98         | NULL          |
++--------------+---------------------+---------------+---------------+
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        This function is the converse of <xref href="impala_analytic_functions.xml#lag"/>.
+      </p>
+
+    </conbody>
+
+  </concept>
+
+  <concept id="max_analytic">
+
+    <title>MAX() Function - Analytic Context</title>
+
+    <conbody>
+
+      <p>
+        You can include an <codeph>OVER</codeph> clause with a call to this function to use it as an analytic
+        function. See <xref href="impala_max.xml#max"/> for details and examples.
+      </p>
+
+    </conbody>
+
+  </concept>
+
+  <concept id="min_analytic">
+
+    <title>MIN() Function - Analytic Context</title>
+
+    <conbody>
+
+      <p>
+        You can include an <codeph>OVER</codeph> clause with a call to this function to use it as an analytic
+        function. See <xref href="impala_min.xml#min"/> for details and examples.
+      </p>
+
+    </conbody>
+
+  </concept>
+
+  <concept audience="Cloudera" rev="2.x.x" id="nth_value">
+
+    <title>NTH_VALUE() Function</title>
+
+    <conbody>
+
+      <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    </conbody>
+
+  </concept>
+
+  <concept rev="2.3.0" id="ntile">
+
+    <title>NTILE() Function (CDH 5.5 or higher only)</title>
+
+    <conbody>
+
+      <p>
+        Returns the <q>bucket number</q> associated with each row, between 1 and the value of an expression. For
+        example, creating 100 buckets puts the lowest 1% of values in the first bucket, while creating 10 buckets
+        puts the lowest 10% of values in the first bucket. Each partition can have a different number of buckets.
+<!-- What's the syntax or data distribution that would create a different number of buckets per partition? -->
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>NTILE (<varname>expr</varname> [, <varname>offset</varname> ...]
+  OVER ([<varname>partition_by_clause</varname>] <varname>order_by_clause</varname>)</codeblock>
+
+      <p>
+        The <codeph>ORDER BY</codeph> clause is required. The <codeph>PARTITION BY</codeph> clause is optional. The
+        window clause is not allowed.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+      <p>
+        The <q>ntile</q> name is derived from the practice of dividing result sets into fourths (quartile), tenths
+        (decile), and so on. The <codeph>NTILE()</codeph> function divides the result set based on an arbitrary
+        percentile value.
+      </p>
+
+      <p>
+        The number of buckets must be a positive integer.
+      </p>
+
+      <p>
+        The number of items in each bucket is identical or almost so, varying by at most 1. If the number of items
+        does not divide evenly between the buckets, the remaining N items are divided evenly among the first N
+        buckets.
+      </p>
+
+      <p>
+        If the number of buckets N is greater than the number of input rows in the partition, then the first N
+        buckets each contain one item, and the remaining buckets are empty.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        The following example shows divides groups of animals into 4 buckets based on their weight. The
+        <codeph>ORDER BY ... DESC</codeph> clause in the <codeph>OVER()</codeph> clause means that the heaviest 25%
+        are in the first group, and the lightest 25% are in the fourth group. (The <codeph>ORDER BY</codeph> in the
+        outermost part of the query shows how you can order the final result set independently from the order in
+        which the rows are evaluated by the <codeph>OVER()</codeph> clause.) Because there are 9 rows in the group,
+        divided into 4 buckets, the first bucket receives the extra item.
+      </p>
+
+<codeblock>create table animals (name string, kind string, kilos decimal(9,3));
+
+insert into animals values
+  ('Elephant', 'Mammal', 4000), ('Giraffe', 'Mammal', 1200), ('Mouse', 'Mammal', 0.020),
+  ('Condor', 'Bird', 15), ('Horse', 'Mammal', 500), ('Owl', 'Bird', 2.5),
+  ('Ostrich', 'Bird', 145), ('Polar bear', 'Mammal', 700), ('Housecat', 'Mammal', 5);
+
+select name, ntile(4) over (order by kilos desc) as quarter
+  from animals
+order by quarter desc;
++------------+---------+
+| name       | quarter |
++------------+---------+
+| Owl        | 4       |
+| Mouse      | 4       |
+| Condor     | 3       |
+| Housecat   | 3       |
+| Horse      | 2       |
+| Ostrich    | 2       |
+| Elephant   | 1       |
+| Giraffe    | 1       |
+| Polar bear | 1       |
++------------+---------+
+</codeblock>
+
+      <p>
+        The following examples show how the <codeph>PARTITION</codeph> clause works for the
+        <codeph>NTILE()</codeph> function. Here, we divide each kind of animal (mammal or bird) into 2 buckets,
+        the heavier half and the lighter half.
+      </p>
+
+<codeblock>select name, kind, ntile(2) over (partition by kind order by kilos desc) as half
+  from animals
+order by kind;
++------------+--------+------+
+| name       | kind   | half |
++------------+--------+------+
+| Ostrich    | Bird   | 1    |
+| Condor     | Bird   | 1    |
+| Owl        | Bird   | 2    |
+| Elephant   | Mammal | 1    |
+| Giraffe    | Mammal | 1    |
+| Polar bear | Mammal | 1    |
+| Horse      | Mammal | 2    |
+| Housecat   | Mammal | 2    |
+| Mouse      | Mammal | 2    |
++------------+--------+------+
+</codeblock>
+
+      <p>
+        Again, the result set can be ordered independently
+        from the analytic evaluation. This next example lists all the animals heaviest to lightest,
+        showing that elephant and giraffe are in the <q>top half</q> of mammals by weight, while
+        housecat and mouse are in the <q>bottom half</q>.
+      </p>
+
+<codeblock>select name, kind, ntile(2) over (partition by kind order by kilos desc) as half
+  from animals
+order by kilos desc;
++------------+--------+------+
+| name       | kind   | half |
++------------+--------+------+
+| Elephant   | Mammal | 1    |
+| Giraffe    | Mammal | 1    |
+| Polar bear | Mammal | 1    |
+| Horse      | Mammal | 2    |
+| Ostrich    | Bird   | 1    |
+| Condor     | Bird   | 1    |
+| Housecat   | Mammal | 2    |
+| Owl        | Bird   | 2    |
+| Mouse      | Mammal | 2    |
++------------+--------+------+
+</codeblock>
+
+    </conbody>
+
+  </concept>
+
+  <concept rev="2.3.0" id="percent_rank">
+
+    <title>PERCENT_RANK() Function (CDH 5.5 or higher only)</title>
+
+    <conbody>
+
+      <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>PERCENT_RANK (<varname>expr</varname>)
+  OVER ([<varname>partition_by_clause</varname>] <varname>order_by_clause</varname>)
+</codeblock>
+
+      <p>
+      Calculates the rank, expressed as a percentage, of each row within a group of rows.
+      If <codeph>rank</codeph> is the value for that same row from the <codeph>RANK()</codeph> function (from 1 to the total number of rows in the partition group),
+      then the <codeph>PERCENT_RANK()</codeph> value is calculated as <codeph>(<varname>rank</varname> - 1) / (<varname>rows_in_group</varname> - 1)</codeph> .
+      If there is only a single item in the partition group, its <codeph>PERCENT_RANK()</codeph> value is 0.
+      </p>
+
+      <p>
+        The <codeph>ORDER BY</codeph> clause is required. The <codeph>PARTITION BY</codeph> clause is optional. The
+        window clause is not allowed.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+      <p>
+        This function is similar to the <codeph>RANK</codeph> and <codeph>CUME_DIST()</codeph> functions: it returns an ascending sequence representing the position of each
+        row within the rows of the same partition group. The actual numeric sequence is calculated differently,
+        and the handling of duplicate (tied) values is different.
+      </p>
+
+      <p>
+        The return values range from 0 to 1 inclusive.
+        The first row in each partition group always has the value 0.
+        A <codeph>NULL</codeph> value is considered the lowest possible value.
+        In the case of duplicate input values, all the corresponding rows in the result set
+        have an identical value: the lowest <codeph>PERCENT_RANK()</codeph> value of those
+        tied rows. (In contrast to <codeph>CUME_DIST()</codeph>, where all tied rows have
+        the highest <codeph>CUME_DIST()</codeph> value.)
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        The following example uses the same <codeph>ANIMALS</codeph> table as the examples for <codeph>CUME_DIST()</codeph>
+        and <codeph>NTILE()</codeph>, with a few additional rows to illustrate the results where some values are
+        <codeph>NULL</codeph> or there is only a single row in a partition group.
+      </p>
+
+<codeblock>insert into animals values ('Komodo dragon', 'Reptile', 70);
+insert into animals values ('Unicorn', 'Mythical', NULL);
+insert into animals values ('Fire-breathing dragon', 'Mythical', NULL);
+</codeblock>
+
+      <p>
+        As with <codeph>CUME_DIST()</codeph>, there is an ascending sequence for each kind of animal.
+        For example, the <q>Birds</q> and <q>Mammals</q> rows each have a <codeph>PERCENT_RANK()</codeph> sequence
+        that ranges from 0 to 1.
+        The <q>Reptile</q> row has a <codeph>PERCENT_RANK()</codeph> of 0 because that partition group contains only a single item.
+        Both <q>Mythical</q> animals have a <codeph>PERCENT_RANK()</codeph> of 0 because
+        a <codeph>NULL</codeph> is considered the lowest value within its partition group.
+      </p>
+
+<codeblock>select name, kind, percent_rank() over (partition by kind order by kilos) from animals;
++-----------------------+----------+--------------------------+
+| name                  | kind     | percent_rank() OVER(...) |
++-----------------------+----------+--------------------------+
+| Mouse                 | Mammal   | 0                        |
+| Housecat              | Mammal   | 0.2                      |
+| Horse                 | Mammal   | 0.4                      |
+| Polar bear            | Mammal   | 0.6                      |
+| Giraffe               | Mammal   | 0.8                      |
+| Elephant              | Mammal   | 1                        |
+| Komodo dragon         | Reptile  | 0                        |
+| Owl                   | Bird     | 0                        |
+| California Condor     | Bird     | 0.25                     |
+| Andean Condor         | Bird     | 0.25                     |
+| Condor                | Bird     | 0.25                     |
+| Ostrich               | Bird     | 1                        |
+| Fire-breathing dragon | Mythical | 0                        |
+| Unicorn               | Mythical | 0                        |
++-----------------------+----------+--------------------------+
+</codeblock>
+    </conbody>
+
+  </concept>
+
+  <concept rev="2.0.0" id="rank">
+
+    <title>RANK() Function</title>
+
+    <conbody>
+
+      <p>
+        Returns an ascending sequence of integers, starting with 1. The output sequence produces duplicate integers
+        for duplicate values of the <codeph>ORDER BY</codeph> expressions. After generating duplicate output values
+        for the <q>tied</q> input values, the function increments the sequence by the number of tied values.
+        Therefore, the sequence contains both duplicates and gaps when the input contains duplicates. Starts the
+        sequence over for each group produced by the <codeph>PARTITIONED BY</codeph> clause.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>RANK() OVER([<varname>partition_by_clause</varname>] <varname>order_by_clause</varname>)</codeblock>
+
+      <p>
+        The <codeph>PARTITION BY</codeph> clause is optional. The <codeph>ORDER BY</codeph> clause is required. The
+        window clause is not allowed.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+<!-- Make a little tutorial to show these 3 functions side-by-side and illustrate their difference. -->
+
+      <p>
+        Often used for top-N and bottom-N queries. For example, it could produce a <q>top 10</q> report including
+        several items that were tied for 10th place.
+      </p>
+
+      <p>
+        Similar to <codeph>ROW_NUMBER</codeph> and <codeph>DENSE_RANK</codeph>. These functions differ in how they
+        treat duplicate combinations of values.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/added_in_20"/>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        The following example demonstrates how the <codeph>RANK()</codeph> function identifies where each value
+        <q>places</q> in the result set, producing the same result for duplicate values, and skipping values in the
+        sequence to account for the number of duplicates. For example, when results are ordered by the
+        <codeph>X</codeph> column, both <codeph>1</codeph> values are tied for first; both <codeph>2</codeph>
+        values are tied for third; and so on.
+      </p>
+
+<codeblock>select x, rank() over(order by x) as rank, property from int_t;
++----+------+----------+
+| x  | rank | property |
++----+------+----------+
+| 1  | 1    | square   |
+| 1  | 1    | odd      |
+| 2  | 3    | even     |
+| 2  | 3    | prime    |
+| 3  | 5    | prime    |
+| 3  | 5    | odd      |
+| 4  | 7    | even     |
+| 4  | 7    | square   |
+| 5  | 9    | odd      |
+| 5  | 9    | prime    |
+| 6  | 11   | even     |
+| 6  | 11   | perfect  |
+| 7  | 13   | lucky    |
+| 7  | 13   | lucky    |
+| 7  | 13   | lucky    |
+| 7  | 13   | odd      |
+| 7  | 13   | prime    |
+| 8  | 18   | even     |
+| 9  | 19   | square   |
+| 9  | 19   | odd      |
+| 10 | 21   | round    |
+| 10 | 21   | even     |
++----+------+----------+
+</codeblock>
+
+      <p>
+        The following examples show how the <codeph>RANK()</codeph> function is affected by the
+        <codeph>PARTITION</codeph> property within the <codeph>ORDER BY</codeph> clause.
+      </p>
+
+      <p>
+        Partitioning by the <codeph>PROPERTY</codeph> column groups all the even, odd, and so on values together,
+        and <codeph>RANK()</codeph> returns the place of each value within the group, producing several ascending
+        sequences.
+      </p>
+
+<codeblock>select x, rank() over(partition by property order by x) as rank, property from int_t;
++----+------+----------+
+| x  | rank | property |
++----+------+----------+
+| 2  | 1    | even     |
+| 4  | 2    | even     |
+| 6  | 3    | even     |
+| 8  | 4    | even     |
+| 10 | 5    | even     |
+| 7  | 1    | lucky    |
+| 7  | 1    | lucky    |
+| 7  | 1    | lucky    |
+| 1  | 1    | odd      |
+| 3  | 2    | odd      |
+| 5  | 3    | odd      |
+| 7  | 4    | odd      |
+| 9  | 5    | odd      |
+| 6  | 1    | perfect  |
+| 2  | 1    | prime    |
+| 3  | 2    | prime    |
+| 5  | 3    | prime    |
+| 7  | 4    | prime    |
+| 10 | 1    | round    |
+| 1  | 1    | square   |
+| 4  | 2    | square   |
+| 9  | 3    | square   |
++----+------+----------+
+</codeblock>
+
+      <p>
+        Partitioning by the <codeph>X</codeph> column groups all the duplicate numbers together and returns the
+        place each each value within the group; because each value occurs only 1 or 2 times,
+        <codeph>RANK()</codeph> designates each <codeph>X</codeph> value as either first or second within its
+        group.
+      </p>
+
+<codeblock>select x, rank() over(partition by x order by property) as rank, property from int_t;
++----+------+----------+
+| x  | rank | property |
++----+------+----------+
+| 1  | 1    | odd      |
+| 1  | 2    | square   |
+| 2  | 1    | even     |
+| 2  | 2    | prime    |
+| 3  | 1    | odd      |
+| 3  | 2    | prime    |
+| 4  | 1    | even     |
+| 4  | 2    | square   |
+| 5  | 1    | odd      |
+| 5  | 2    | prime    |
+| 6  | 1    | even     |
+| 6  | 2    | perfect  |
+| 7  | 1    | lucky    |
+| 7  | 1    | lucky    |
+| 7  | 1    | lucky    |
+| 7  | 4    | odd      |
+| 7  | 5    | prime    |
+| 8  | 1    | even     |
+| 9  | 1    | odd      |
+| 9  | 2    | square   |
+| 10 | 1    | even     |
+| 10 | 2    | round    |
++----+------+----------+
+</codeblock>
+
+      <p>
+        The following example shows how a magazine might prepare a list of history's wealthiest people. Croesus and
+        Midas are tied for second, then Crassus is fourth.
+      </p>
+
+<codeblock>select rank() over (order by net_worth desc) as rank, name, net_worth from wealth order by rank, name;
++------+---------+---------------+
+| rank | name    | net_worth     |
++------+---------+---------------+
+| 1    | Solomon | 2000000000.00 |
+| 2    | Croesus | 1000000000.00 |
+| 2    | Midas   | 1000000000.00 |
+| 4    | Crassus | 500000000.00  |
+| 5    | Scrooge | 80000000.00   |
++------+---------+---------------+
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_analytic_functions.xml#dense_rank"/>,
+        <xref href="impala_analytic_functions.xml#row_number"/>
+      </p>
+
+    </conbody>
+
+  </concept>
+
+  <concept rev="2.0.0" id="row_number">
+
+    <title>ROW_NUMBER() Function</title>
+
+    <conbody>
+
+      <p>
+        Returns an ascending sequence of integers, starting with 1. Starts the sequence over for each group
+        produced by the <codeph>PARTITIONED BY</codeph> clause. The output sequence includes different values for
+        duplicate input values. Therefore, the sequence never contains any duplicates or gaps, regardless of
+        duplicate input values.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>ROW_NUMBER() OVER([<varname>partition_by_clause</varname>] <varname>order_by_clause</varname>)</codeblock>
+
+      <p>
+        The <codeph>ORDER BY</codeph> clause is required. The <codeph>PARTITION BY</codeph> clause is optional. The
+        window clause is not allowed.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+      <p>
+        Often used for top-N and bottom-N queries where the input values are known to be unique, or precisely N
+        rows are needed regardless of duplicate values.
+      </p>
+
+      <p>
+        Because its result value is different for each row in the result set (when used without a <codeph>PARTITION
+        BY</codeph> clause), <codeph>ROW_NUMBER()</codeph> can be used to synthesize unique numeric ID values, for
+        example for result sets involving unique values or tuples.
+      </p>
+
+      <p>
+        Similar to <codeph>RANK</codeph> and <codeph>DENSE_RANK</codeph>. These functions differ in how they treat
+        duplicate combinations of values.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/added_in_20"/>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        The following example demonstrates how <codeph>ROW_NUMBER()</codeph> produces a continuous numeric
+        sequence, even though some values of <codeph>X</codeph> are repeated.
+      </p>
+
+<codeblock>select x, row_number() over(order by x, property) as row_number, property from int_t;
++----+------------+----------+
+| x  | row_number | property |
++----+------------+----------+
+| 1  | 1          | odd      |
+| 1  | 2          | square   |
+| 2  | 3          | even     |
+| 2  | 4          | prime    |
+| 3  | 5          | odd      |
+| 3  | 6          | prime    |
+| 4  | 7          | even     |
+| 4  | 8          | square   |
+| 5  | 9          | odd      |
+| 5  | 10         | prime    |
+| 6  | 11         | even     |
+| 6  | 12         | perfect  |
+| 7  | 13         | lucky    |
+| 7  | 14         | lucky    |
+| 7  | 15         | lucky    |
+| 7  | 16         | odd      |
+| 7  | 17         | prime    |
+| 8  | 18         | even     |
+| 9  | 19         | odd      |
+| 9  | 20         | square   |
+| 10 | 21         | even     |
+| 10 | 22         | round    |
++----+------------+----------+
+</codeblock>
+
+      <p>
+        The following example shows how a financial institution might assign customer IDs to some of history's
+        wealthiest figures. Although two of the people have identical net worth figures, unique IDs are required
+        for this purpose. <codeph>ROW_NUMBER()</codeph> produces a sequence of five different values for the five
+        input rows.
+      </p>
+
+<codeblock>select row_number() over (order by net_worth desc) as account_id, name, net_worth
+  from wealth order by account_id, name;
++------------+---------+---------------+
+| account_id | name    | net_worth     |
++------------+---------+---------------+
+| 1          | Solomon | 2000000000.00 |
+| 2          | Croesus | 1000000000.00 |
+| 3          | Midas   | 1000000000.00 |
+| 4          | Crassus | 500000000.00  |
+| 5          | Scrooge | 80000000.00   |
++------------+---------+---------------+
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_analytic_functions.xml#rank"/>, <xref href="impala_analytic_functions.xml#dense_rank"/>
+      </p>
+
+    </conbody>
+
+  </concept>
+
+  <concept id="sum_analytic">
+
+    <title>SUM() Function - Analytic Context</title>
+
+    <conbody>
+
+      <p>
+        You can include an <codeph>OVER</codeph> clause with a call to this function to use it as an analytic
+        function. See <xref href="impala_sum.xml#sum"/> for details and examples.
+      </p>
+
+    </conbody>
+
+  </concept>
+
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_appx_count_distinct.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_appx_count_distinct.xml b/docs/topics/impala_appx_count_distinct.xml
new file mode 100644
index 0000000..31a9679
--- /dev/null
+++ b/docs/topics/impala_appx_count_distinct.xml
@@ -0,0 +1,77 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="2.0.0" id="appx_count_distinct">
+
+  <title>APPX_COUNT_DISTINCT Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">APPX_COUNT_DISTINCT query option</indexterm>
+      Allows multiple <codeph>COUNT(DISTINCT)</codeph> operations within a single query, by internally rewriting
+      each <codeph>COUNT(DISTINCT)</codeph> to use the <codeph>NDV()</codeph> function. The resulting count is
+      approximate rather than precise.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/type_boolean"/>
+
+    <p conref="../shared/impala_common.xml#common/default_false_0"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      The following examples show how the <codeph>APPX_COUNT_DISTINCT</codeph> lets you work around the restriction
+      where a query can only evaluate <codeph>COUNT(DISTINCT <varname>col_name</varname>)</codeph> for a single
+      column. By default, you can count the distinct values of one column or another, but not both in a single
+      query:
+    </p>
+
+<codeblock>[localhost:21000] &gt; select count(distinct x) from int_t;
++-------------------+
+| count(distinct x) |
++-------------------+
+| 10                |
++-------------------+
+[localhost:21000] &gt; select count(distinct property) from int_t;
++--------------------------+
+| count(distinct property) |
++--------------------------+
+| 7                        |
++--------------------------+
+[localhost:21000] &gt; select count(distinct x), count(distinct property) from int_t;
+ERROR: AnalysisException: all DISTINCT aggregate functions need to have the same set of parameters
+as count(DISTINCT x); deviating function: count(DISTINCT property)
+</codeblock>
+
+    <p>
+      When you enable the <codeph>APPX_COUNT_DISTINCT</codeph> query option, now the query with multiple
+      <codeph>COUNT(DISTINCT)</codeph> works. The reason this behavior requires a query option is that each
+      <codeph>COUNT(DISTINCT)</codeph> is rewritten internally to use the <codeph>NDV()</codeph> function instead,
+      which provides an approximate result rather than a precise count.
+    </p>
+
+<codeblock>[localhost:21000] &gt; set APPX_COUNT_DISTINCT=true;
+[localhost:21000] &gt; select count(distinct x), count(distinct property) from int_t;
++-------------------+--------------------------+
+| count(distinct x) | count(distinct property) |
++-------------------+--------------------------+
+| 10                | 7                        |
++-------------------+--------------------------+
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_count.xml#count"/>,
+      <xref href="impala_distinct.xml#distinct"/>,
+      <xref href="impala_ndv.xml#ndv"/>
+    </p>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_appx_median.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_appx_median.xml b/docs/topics/impala_appx_median.xml
new file mode 100644
index 0000000..d874ead
--- /dev/null
+++ b/docs/topics/impala_appx_median.xml
@@ -0,0 +1,122 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.2.1" id="appx_median">
+
+  <title>APPX_MEDIAN Function</title>
+  <titlealts><navtitle>APPX_MEDIAN</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="Aggregate Functions"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">appx_median() function</indexterm>
+      An aggregate function that returns a value that is approximately the median (midpoint) of values in the set
+      of input values.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>APPX_MEDIAN([DISTINCT | ALL] <varname>expression</varname>)
+</codeblock>
+
+    <p>
+      This function works with any input type, because the only requirement is that the type supports less-than and
+      greater-than comparison operators.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      Because the return value represents the estimated midpoint, it might not reflect the precise midpoint value,
+      especially if the cardinality of the input values is very high. If the cardinality is low (up to
+      approximately 20,000), the result is more accurate because the sampling considers all or almost all of the
+      different values.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/return_type_same_except_string"/>
+
+    <p>
+      The return value is always the same as one of the input values, not an <q>in-between</q> value produced by
+      averaging.
+    </p>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/restrictions_sliding_window"/> -->
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/analytic_not_allowed_caveat"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      The following example uses a table of a million random floating-point numbers ranging up to approximately
+      50,000. The average is approximately 25,000. Because of the random distribution, we would expect the median
+      to be close to this same number. Computing the precise median is a more intensive operation than computing
+      the average, because it requires keeping track of every distinct value and how many times each occurs. The
+      <codeph>APPX_MEDIAN()</codeph> function uses a sampling algorithm to return an approximate result, which in
+      this case is close to the expected value. To make sure that the value is not substantially out of range due
+      to a skewed distribution, subsequent queries confirm that there are approximately 500,000 values higher than
+      the <codeph>APPX_MEDIAN()</codeph> value, and approximately 500,000 values lower than the
+      <codeph>APPX_MEDIAN()</codeph> value.
+    </p>
+
+<codeblock>[localhost:21000] &gt; select min(x), max(x), avg(x) from million_numbers;
++-------------------+-------------------+-------------------+
+| min(x)            | max(x)            | avg(x)            |
++-------------------+-------------------+-------------------+
+| 4.725693727250069 | 49994.56852674231 | 24945.38563793553 |
++-------------------+-------------------+-------------------+
+[localhost:21000] &gt; select appx_median(x) from million_numbers;
++----------------+
+| appx_median(x) |
++----------------+
+| 24721.6        |
++----------------+
+[localhost:21000] &gt; select count(x) as higher from million_numbers where x &gt; (select appx_median(x) from million_numbers);
++--------+
+| higher |
++--------+
+| 502013 |
++--------+
+[localhost:21000] &gt; select count(x) as lower from million_numbers where x &lt; (select appx_median(x) from million_numbers);
++--------+
+| lower  |
++--------+
+| 497987 |
++--------+
+</codeblock>
+
+    <p>
+      The following example computes the approximate median using a subset of the values from the table, and then
+      confirms that the result is a reasonable estimate for the midpoint.
+    </p>
+
+<codeblock>[localhost:21000] &gt; select appx_median(x) from million_numbers where x between 1000 and 5000;
++-------------------+
+| appx_median(x)    |
++-------------------+
+| 3013.107787358159 |
++-------------------+
+[localhost:21000] &gt; select count(x) as higher from million_numbers where x between 1000 and 5000 and x &gt; 3013.107787358159;
++--------+
+| higher |
++--------+
+| 37692  |
++--------+
+[localhost:21000] &gt; select count(x) as lower from million_numbers where x between 1000 and 5000 and x &lt; 3013.107787358159;
++-------+
+| lower |
++-------+
+| 37089 |
++-------+
+</codeblock>
+  </conbody>
+</concept>

[17/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

Posted by jr...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_complex_types.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_complex_types.xml b/docs/topics/impala_complex_types.xml
new file mode 100644
index 0000000..9fe7362
--- /dev/null
+++ b/docs/topics/impala_complex_types.xml
@@ -0,0 +1,2725 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="2.3.0" id="complex_types">
+
+  <title id="nested_types">Complex Types (CDH 5.5 and higher only)</title>
+
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">complex types</indexterm>
+
+      <indexterm audience="Cloudera">nested types</indexterm>
+      <term>Complex types</term> (also referred to as <term>nested types</term>) let you represent multiple data values within a single
+      row/column position. They differ from the familiar column types such as <codeph>BIGINT</codeph> and <codeph>STRING</codeph>, known as
+      <term>scalar types</term> or <term>primitive types</term>, which represent a single data value within a given row/column position.
+      Impala supports the complex types <codeph>ARRAY</codeph>, <codeph>MAP</codeph>, and <codeph>STRUCT</codeph> in Impala 2.3 / CDH 5.5
+      and higher. The Hive <codeph>UNION</codeph> type is not currently supported.
+    </p>
+
+    <p outputclass="toc inpage"/>
+
+    <p>
+      Once you understand the basics of complex types, refer to the individual type topics when you need to refresh your memory about syntax
+      and examples:
+    </p>
+
+    <ul>
+      <li>
+        <xref href="impala_array.xml#array"/>
+      </li>
+
+      <li>
+        <xref href="impala_struct.xml#struct"/>
+      </li>
+
+      <li>
+        <xref href="impala_map.xml#map"/>
+      </li>
+    </ul>
+
+  </conbody>
+
+  <concept id="complex_types_benefits">
+
+    <title>Benefits of Impala Complex Types</title>
+
+    <conbody>
+
+      <p>
+        The reasons for using Impala complex types include the following:
+      </p>
+
+      <ul>
+        <li>
+          <p>
+            You already have data produced by Hive or other non-Impala component that uses the complex type column names. You might need to
+            convert the underlying data to Parquet to use it with Impala.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            Your data model originates with a non-SQL programming language or a NoSQL data management system. For example, if you are
+            representing Python data expressed as nested lists, dictionaries, and tuples, those data structures correspond closely to Impala
+            <codeph>ARRAY</codeph>, <codeph>MAP</codeph>, and <codeph>STRUCT</codeph> types.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            Your analytic queries involving multiple tables could benefit from greater locality during join processing. By packing more
+            related data items within each HDFS data block, complex types let join queries avoid the network overhead of the traditional
+            Hadoop shuffle or broadcast join techniques.
+          </p>
+        </li>
+      </ul>
+
+      <p>
+        The Impala complex type support produces result sets with all scalar values, and the scalar components of complex types can be used
+        with all SQL clauses, such as <codeph>GROUP BY</codeph>, <codeph>ORDER BY</codeph>, all kinds of joins, subqueries, and inline
+        views. The ability to process complex type data entirely in SQL reduces the need to write application-specific code in Java or other
+        programming languages to deconstruct the underlying data structures.
+      </p>
+
+    </conbody>
+
+  </concept>
+
+  <concept id="complex_types_overview">
+
+    <title>Overview of Impala Complex Types</title>
+
+    <conbody>
+
+      <p>
+<!--
+      Each <codeph>ARRAY</codeph>, <codeph>MAP</codeph>, or <codeph>STRUCT</codeph> column can include multiple instances of scalar types
+      such as <codeph>BIGINT</codeph> and <codeph>STRING</codeph>.
+-->
+        The <codeph>ARRAY</codeph> and <codeph>MAP</codeph> types are closely related: they represent collections with arbitrary numbers of
+        elements, where each element is the same type. In contrast, <codeph>STRUCT</codeph> groups together a fixed number of items into a
+        single element. The parts of a <codeph>STRUCT</codeph> element (the <term>fields</term>) can be of different types, and each field
+        has a name.
+      </p>
+
+      <p>
+        The elements of an <codeph>ARRAY</codeph> or <codeph>MAP</codeph>, or the fields of a <codeph>STRUCT</codeph>, can also be other
+        complex types. You can construct elaborate data structures with up to 100 levels of nesting. For example, you can make an
+        <codeph>ARRAY</codeph> whose elements are <codeph>STRUCT</codeph>s. Within each <codeph>STRUCT</codeph>, you can have some fields
+        that are <codeph>ARRAY</codeph>, <codeph>MAP</codeph>, or another kind of <codeph>STRUCT</codeph>. The Impala documentation uses the
+        terms complex and nested types interchangeably; for simplicity, it primarily uses the term complex types, to encompass all the
+        properties of these types.
+      </p>
+
+      <p>
+        When visualizing your data model in familiar SQL terms, you can think of each <codeph>ARRAY</codeph> or <codeph>MAP</codeph> as a
+        miniature table, and each <codeph>STRUCT</codeph> as a row within such a table. By default, the table represented by an
+        <codeph>ARRAY</codeph> has two columns, <codeph>POS</codeph> to represent ordering of elements, and <codeph>ITEM</codeph>
+        representing the value of each element. Likewise, by default, the table represented by a <codeph>MAP</codeph> encodes key-value
+        pairs, and therefore has two columns, <codeph>KEY</codeph> and <codeph>VALUE</codeph>.
+<!--
+        When you use a <codeph>STRUCT</codeph> as an
+        <codeph>ARRAY</codeph> element or the <codeph>VALUE</codeph> part of a <codeph>MAP</codeph>, the field names of the
+        <codeph>STRUCT</codeph> become additional columns in the result set.
+-->
+      </p>
+
+      <p>
+        The <codeph>ITEM</codeph> and <codeph>VALUE</codeph> names are only required for the very simplest kinds of <codeph>ARRAY</codeph>
+        and <codeph>MAP</codeph> columns, ones that hold only scalar values. When the elements within the <codeph>ARRAY</codeph> or
+        <codeph>MAP</codeph> are of type <codeph>STRUCT</codeph> rather than a scalar type, then the result set contains columns with names
+        corresponding to the <codeph>STRUCT</codeph> fields rather than <codeph>ITEM</codeph> or <codeph>VALUE</codeph>.
+      </p>
+
+<!--
+      <p>
+        <codeph>ARRAY</codeph> and <codeph>MAP</codeph> are both <term>collection</term> types, which can have a variable number of
+        elements; <codeph>ARRAY</codeph> and <codeph>MAP</codeph> are typically used as the top-level type of a table column.
+        <codeph>STRUCT</codeph> represents a single element and has a fixed number of fields; <codeph>STRUCT</codeph> is typically used as
+        the final, lowest level of a nested type definition.
+      </p>
+-->
+
+      <p>
+        You write most queries that process complex type columns using familiar join syntax, even though the data for both sides of the join
+        resides in a single table. The join notation brings together the scalar values from a row with the values from the complex type
+        columns for that same row. The final result set contains all scalar values, allowing you to do all the familiar filtering,
+        aggregation, ordering, and so on for the complex data entirely in SQL or using business intelligence tools that issue SQL queries.
+<!--
+        Instead of pulling together values from different tables, the join selects the specified values from both
+        the scalar columns, and from inside the complex type columns, producing a flattened result set consisting of all scalar values. When
+        doing a join query involving a complex type column, Impala derives the join key automatically, without the need to create additional
+        ID columns in the table.
+-->
+      </p>
+
+      <p>
+        Behind the scenes, Impala ensures that the processing for each row is done efficiently on a single host, without the network traffic
+        involved in broadcast or shuffle joins. The most common type of join query for tables with complex type columns is <codeph>INNER
+        JOIN</codeph>, which returns results only in those cases where the complex type contains some elements. Therefore, most query
+        examples in this section use either the <codeph>INNER JOIN</codeph> clause or the equivalent comma notation.
+      </p>
+
+      <note>
+        <p>
+          Although Impala can query complex types that are present in Parquet files, Impala currently cannot create new Parquet files
+          containing complex types. Therefore, the discussion and examples presume that you are working with existing Parquet data produced
+          through Hive, Spark, or some other source. See <xref href="#complex_types_ex_hive_etl"/> for examples of constructing Parquet data
+          files with complex type columns.
+        </p>
+
+        <p>
+          For learning purposes, you can create empty tables with complex type columns and practice query syntax, even if you do not have
+          sample data with the required structure.
+        </p>
+      </note>
+
+    </conbody>
+
+  </concept>
+
+  <concept id="complex_types_design">
+
+    <title>Design Considerations for Complex Types</title>
+
+    <conbody>
+
+      <p>
+        When planning to use Impala complex types, and designing the Impala schema, first learn how this kind of schema differs from
+        traditional table layouts from the relational database and data warehousing fields. Because you might have already encountered
+        complex types in a Hadoop context while using Hive for ETL, also learn how to write high-performance analytic queries for complex
+        type data using Impala SQL syntax.
+      </p>
+
+      <p outputclass="toc inpage"/>
+
+    </conbody>
+
+    <concept id="complex_types_vs_rdbms">
+
+      <title>How Complex Types Differ from Traditional Data Warehouse Schemas</title>
+
+      <conbody>
+
+        <p>
+          Complex types let you associate arbitrary data structures with a particular row. If you are familiar with schema design for
+          relational database management systems or data warehouses, a schema with complex types has the following differences:
+        </p>
+
+        <ul>
+          <li>
+            <p>
+              Logically, related values can now be grouped tightly together in the same table.
+            </p>
+
+            <p>
+              In traditional data warehousing, related values were typically arranged in one of two ways:
+            </p>
+            <ul>
+              <li>
+                <p>
+                  Split across multiple normalized tables. Foreign key columns specified which rows from each table were associated with
+                  each other. This arrangement avoided duplicate data and therefore the data was compact, but join queries could be
+                  expensive because the related data had to be retrieved from separate locations. (In the case of distributed Hadoop
+                  queries, the joined tables might even be transmitted between different hosts in a cluster.)
+                </p>
+              </li>
+
+              <li>
+                <p>
+                  Flattened into a single denormalized table. Although this layout eliminated some potential performance issues by removing
+                  the need for join queries, the table typically became larger because values were repeated. The extra data volume could
+                  cause performance issues in other parts of the workflow, such as longer ETL cycles or more expensive full-table scans
+                  during queries.
+                </p>
+              </li>
+            </ul>
+            <p>
+              Complex types represent a middle ground that addresses these performance and volume concerns. By physically locating related
+              data within the same data files, complex types increase locality and reduce the expense of join queries. By associating an
+              arbitrary amount of data with a single row, complex types avoid the need to repeat lengthy values such as strings. Because
+              Impala knows which complex type values are associated with each row, you can save storage by avoiding artificial foreign key
+              values that are only used for joins. The flexibility of the <codeph>STRUCT</codeph>, <codeph>ARRAY</codeph>, and
+              <codeph>MAP</codeph> types lets you model familiar constructs such as fact and dimension tables from a data warehouse, and
+              wide tables representing sparse matrixes.
+            </p>
+          </li>
+        </ul>
+
+      </conbody>
+
+    </concept>
+
+    <concept id="complex_types_physical">
+
+      <title>Physical Storage for Complex Types</title>
+
+      <conbody>
+
+        <p>
+          Physically, the scalar and complex columns in each row are located adjacent to each other in the same Parquet data file, ensuring
+          that they are processed on the same host rather than being broadcast across the network when cross-referenced within a query. This
+          co-location simplifies the process of copying, converting, and backing all the columns up at once. Because of the column-oriented
+          layout of Parquet files, you can still query only the scalar columns of a table without imposing the I/O penalty of reading the
+          (possibly large) values of the composite columns.
+        </p>
+
+        <p>
+          Within each Parquet data file, the constituent parts of complex type columns are stored in column-oriented format:
+        </p>
+
+        <ul>
+          <li>
+            <p>
+              Each field of a <codeph>STRUCT</codeph> type is stored like a column, with all the scalar values adjacent to each other and
+              encoded, compressed, and so on using the Parquet space-saving techniques.
+            </p>
+          </li>
+
+          <li>
+            <p>
+              For an <codeph>ARRAY</codeph> containing scalar values, all those values (represented by the <codeph>ITEM</codeph>
+              pseudocolumn) are stored adjacent to each other.
+            </p>
+          </li>
+
+          <li>
+            <p>
+              For a <codeph>MAP</codeph>, the values of the <codeph>KEY</codeph> pseudocolumn are stored adjacent to each other. If the
+              <codeph>VALUE</codeph> pseudocolumn is a scalar type, its values are also stored adjacent to each other.
+            </p>
+          </li>
+
+          <li>
+            <p>
+              If an <codeph>ARRAY</codeph> element, <codeph>STRUCT</codeph> field, or <codeph>MAP</codeph> <codeph>VALUE</codeph> part is
+              another complex type, the column-oriented storage applies to the next level down (or the next level after that, and so on for
+              deeply nested types) where the final elements, fields, or values are of scalar types.
+            </p>
+          </li>
+        </ul>
+
+        <p>
+          The numbers represented by the <codeph>POS</codeph> pseudocolumn of an <codeph>ARRAY</codeph> are not physically stored in the
+          data files. They are synthesized at query time based on the order of the <codeph>ARRAY</codeph> elements associated with each row.
+        </p>
+
+      </conbody>
+
+    </concept>
+
+    <concept id="complex_types_file_formats">
+
+      <title>File Format Support for Impala Complex Types</title>
+
+      <conbody>
+
+        <p>
+          Currently, Impala queries support complex type data only in the Parquet file format. See <xref href="impala_parquet.xml#parquet"/>
+          for details about the performance benefits and physical layout of this file format.
+        </p>
+
+        <p>
+          Each table, or each partition within a table, can have a separate file format, and you can change file format at the table or
+          partition level through an <codeph>ALTER TABLE</codeph> statement. Because this flexibility makes it difficult to guarantee ahead
+          of time that all the data files for a table or partition are in a compatible format, Impala does not throw any errors when you
+          change the file format for a table or partition using <codeph>ALTER TABLE</codeph>. Any errors come at runtime when Impala
+          actually processes a table or partition that contains nested types and is not in one of the supported formats. If a query on a
+          partitioned table only processes some partitions, and all those partitions are in one of the supported formats, the query
+          succeeds.
+        </p>
+
+        <p>
+          Because Impala does not parse the data structures containing nested types for unsupported formats such as text, Avro,
+          SequenceFile, or RCFile, you cannot use data files in these formats with Impala, even if the query does not refer to the nested
+          type columns. Also, if a table using an unsupported format originally contained nested type columns, and then those columns were
+          dropped from the table using <codeph>ALTER TABLE ... DROP COLUMN</codeph>, any existing data files in the table still contain the
+          nested type data and Impala queries on that table will generate errors.
+        </p>
+
+        <p>
+          You can perform DDL operations (even <codeph>CREATE TABLE</codeph>) for tables involving complex types in file formats other than
+          Parquet. The DDL support lets you set up intermediate tables in your ETL pipeline, to be populated by Hive, before the final stage
+          where the data resides in a Parquet table and is queryable by Impala. Also, you can have a partitioned table with complex type
+          columns that uses a non-Parquet format, and use <codeph>ALTER TABLE</codeph> to change the file format to Parquet for individual
+          partitions. When you put Parquet data files into those partitions, Impala can execute queries against that data as long as the
+          query does not involve any of the non-Parquet partitions.
+        </p>
+
+        <p>
+          If you use the <cmdname>parquet-tools</cmdname> command to examine the structure of a Parquet data file that includes complex
+          types, you see that both <codeph>ARRAY</codeph> and <codeph>MAP</codeph> are represented as a <codeph>Bag</codeph> in Parquet
+          terminology, with all fields marked <codeph>Optional</codeph> because Impala allows any column to be nullable.
+        </p>
+
+        <p>
+          Impala supports either 2-level and 3-level encoding within each Parquet data file. When constructing Parquet data files outside
+          Impala, use either encoding style but do not mix 2-level and 3-level encoding within the same data file.
+        </p>
+
+      </conbody>
+
+    </concept>
+
+    <concept id="complex_types_vs_normalization">
+
+      <title>Choosing Between Complex Types and Normalized Tables</title>
+
+      <conbody>
+
+        <p>
+          Choosing between multiple normalized fact and dimension tables, or a single table containing complex types, is an important design
+          decision.
+        </p>
+
+        <ul>
+          <li>
+            <p>
+              If you are coming from a traditional database or data warehousing background, you might be familiar with how to split up data
+              between tables. Your business intelligence tools might already be optimized for dealing with this kind of multi-table scenario
+              through join queries.
+            </p>
+          </li>
+
+          <li>
+            <p>
+              If you are pulling data from Impala into an application written in a programming language that has data structures analogous
+              to the complex types, such as Python or Java, complex types in Impala could simplify data interchange and improve
+              understandability and reliability of your program logic.
+            </p>
+          </li>
+
+          <li>
+            <p>
+              You might already be faced with existing infrastructure or receive high volumes of data that assume one layout or the other.
+              For example, complex types are popular with web-oriented applications, for example to keep information about an online user
+              all in one place for convenient lookup and analysis, or to deal with sparse or constantly evolving data fields.
+            </p>
+          </li>
+
+          <li>
+            <p>
+              If some parts of the data change over time while related data remains constant, using multiple normalized tables lets you
+              replace certain parts of the data without reloading the entire data set. Conversely, if you receive related data all bundled
+              together, such as in JSON files, using complex types can save the overhead of splitting the related items across multiple
+              tables.
+            </p>
+          </li>
+
+          <li>
+            <p>
+              From a performance perspective:
+            </p>
+            <ul>
+              <li>
+                <p>
+                  In Parquet tables, Impala can skip columns that are not referenced in a query, avoiding the I/O penalty of reading the
+                  embedded data. When complex types are nested within a column, the data is physically divided at a very granular level; for
+                  example, a query referring to data nested multiple levels deep in a complex type column does not have to read all the data
+                  from that column, only the data for the relevant parts of the column type hierarchy.
+<!-- Avro not supported in 5.5 / 2.3: Avro tables might experience some performance overhead due to
+                the need to skip past the complex type columns in each row when reading the data. -->
+                </p>
+              </li>
+
+              <li>
+                <p>
+                  Complex types avoid the possibility of expensive join queries when data from fact and dimension tables is processed in
+                  parallel across multiple hosts. All the information for a row containing complex types is typically to be in the same data
+                  block, and therefore does not need to be transmitted across the network when joining fields that are all part of the same
+                  row.
+                </p>
+              </li>
+
+              <li>
+                <p>
+                  The tradeoff with complex types is that fewer rows fit in each data block. Whether it is better to have more data blocks
+                  with fewer rows, or fewer data blocks with many rows, depends on the distribution of your data and the characteristics of
+                  your query workload. If the complex columns are rarely referenced, using them might lower efficiency. If you are seeing
+                  low parallelism due to a small volume of data (relatively few data blocks) in each table partition, increasing the row
+                  size by including complex columns might produce more data blocks and thus spread the work more evenly across the cluster.
+                  See <xref href="impala_scalability.xml#scalability"/> for more on this advanced topic.
+                </p>
+              </li>
+            </ul>
+          </li>
+        </ul>
+
+      </conbody>
+
+    </concept>
+
+    <concept id="complex_types_hive">
+
+      <title>Differences Between Impala and Hive Complex Types</title>
+
+      <conbody>
+
+<!-- HiveQL functions like nested type constructors and posexplode(): https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF -->
+
+<!-- HiveQL complex types: https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types#LanguageManualTypes-ComplexTypes -->
+
+<!-- HiveQL lateral views: https://cwiki.apache.org/confluence/display/Hive/LanguageManual+LateralView -->
+
+        <p>
+          Impala can query Parquet tables containing <codeph>ARRAY</codeph>, <codeph>STRUCT</codeph>, and <codeph>MAP</codeph> columns
+          produced by Hive. There are some differences to be aware of between the Impala SQL and HiveQL syntax for complex types, primarily
+          for queries.
+        </p>
+
+        <p>
+          The syntax for specifying <codeph>ARRAY</codeph>, <codeph>STRUCT</codeph>, and <codeph>MAP</codeph> types in a <codeph>CREATE
+          TABLE</codeph> statement is compatible between Impala and Hive.
+        </p>
+
+        <p>
+          Because Impala <codeph>STRUCT</codeph> columns include user-specified field names, you use the <codeph>NAMED_STRUCT()</codeph>
+          constructor in Hive rather than the <codeph>STRUCT()</codeph> constructor when you populate an Impala <codeph>STRUCT</codeph>
+          column using a Hive <codeph>INSERT</codeph> statement.
+        </p>
+
+        <p>
+          The Hive <codeph>UNION</codeph> type is not currently supported in Impala.
+        </p>
+
+        <p>
+          While Impala usually aims for a high degree of compatibility with HiveQL query syntax, Impala syntax differs from Hive for queries
+          involving complex types. The differences are intended to provide extra flexibility for queries involving these kinds of tables.
+        </p>
+
+        <ul>
+          <li>
+            Impala uses dot notation for referring to element names or elements within complex types, and join notation for
+            cross-referencing scalar columns with the elements of complex types within the same row, rather than the <codeph>LATERAL
+            VIEW</codeph> clause and <codeph>EXPLODE()</codeph> function of HiveQL.
+          </li>
+
+          <li>
+            Using join notation lets you use all the kinds of join queries with complex type columns. For example, you can use a
+            <codeph>LEFT OUTER JOIN</codeph>, <codeph>LEFT ANTI JOIN</codeph>, or <codeph>LEFT SEMI JOIN</codeph> query to evaluate
+            different scenarios where the complex columns do or do not contain any elements.
+          </li>
+
+          <li>
+            You can include references to collection types inside subqueries and inline views. For example, you can construct a
+            <codeph>FROM</codeph> clause where one of the <q>tables</q> is a subquery against a complex type column, or use a subquery
+            against a complex type column as the argument to an <codeph>IN</codeph> or <codeph>EXISTS</codeph> clause.
+          </li>
+
+          <li>
+            The Impala pseudocolumn <codeph>POS</codeph> lets you retrieve the position of elements in an array along with the elements
+            themselves, equivalent to the <codeph>POSEXPLODE()</codeph> function of HiveQL. You do not use index notation to retrieve a
+            single array element in a query; the join query loops through the array elements and you use <codeph>WHERE</codeph> clauses to
+            specify which elements to return.
+          </li>
+
+          <li>
+            <p>
+              Join clauses involving complex type columns do not require an <codeph>ON</codeph> or <codeph>USING</codeph> clause. Impala
+              implicitly applies the join key so that the correct array entries or map elements are associated with the correct row from the
+              table.
+            </p>
+          </li>
+
+          <li>
+            <p>
+              Impala does not currently support the <codeph>UNION</codeph> complex type.
+            </p>
+          </li>
+        </ul>
+
+      </conbody>
+
+    </concept>
+
+    <concept id="complex_types_limits">
+
+      <title>Limitations and Restrictions for Complex Types</title>
+
+      <conbody>
+
+        <p>
+          Complex type columns can only be used in tables or partitions with the Parquet file format.
+        </p>
+
+        <p>
+          Complex type columns cannot be used as partition key columns in a partitioned table.
+        </p>
+
+        <p>
+          When you use complex types with the <codeph>ORDER BY</codeph>, <codeph>GROUP BY</codeph>, <codeph>HAVING</codeph>, or
+          <codeph>WHERE</codeph> clauses, you cannot refer to the column name by itself. Instead, you refer to the names of the scalar
+          values within the complex type, such as the <codeph>ITEM</codeph>, <codeph>POS</codeph>, <codeph>KEY</codeph>, or
+          <codeph>VALUE</codeph> pseudocolumns, or the field names from a <codeph>STRUCT</codeph>.
+        </p>
+
+        <p>
+          The maximum depth of nesting for complex types is 100 levels.
+        </p>
+
+        <p>
+          For ideal performance and scalability, use small or medium-sized collections, where all the complex columns contain at most a few
+          hundred megabytes per row. Remember, all the columns of a row are stored in the same HDFS data block, whose size in Parquet files
+          typically ranges from 256 MB to 1 GB.
+        </p>
+
+        <p>
+          Including complex type columns in a table introduces some overhead that might make queries that do not reference those columns
+          somewhat slower than Impala queries against tables without any complex type columns. Expect at most a 2x slowdown compared to
+          tables that do not have any complex type columns.
+        </p>
+
+        <p>
+          Currently, the <codeph>COMPUTE STATS</codeph> statement does not collect any statistics for columns containing complex types.
+          Impala uses heuristics to construct execution plans involving complex type columns.
+        </p>
+
+        <p>
+          Currently, Impala built-in functions and user-defined functions cannot accept complex types as parameters or produce them as
+          function return values. (When the complex type values are materialized in an Impala result set, the result set contains the scalar
+          components of the values, such as the <codeph>POS</codeph> or <codeph>ITEM</codeph> for an <codeph>ARRAY</codeph>, the
+          <codeph>KEY</codeph> or <codeph>VALUE</codeph> for a <codeph>MAP</codeph>, or the fields of a <codeph>STRUCT</codeph>; these
+          scalar data items <i>can</i> be used with built-in functions and UDFs as usual.)
+        </p>
+
+        <p conref="../shared/impala_common.xml#common/complex_types_read_only"/>
+
+        <p>
+          Currently, Impala can query complex type columns only from Parquet tables or Parquet partitions within partitioned tables.
+          Although you can use complex types in tables with Avro, text, and other file formats as part of your ETL pipeline, for example as
+          intermediate tables populated through Hive, doing analytics through Impala requires that the data eventually ends up in a Parquet
+          table. The requirement for Parquet data files means that you can use complex types with Impala tables hosted on other kinds of
+          file storage systems such as Isilon and Amazon S3, but you cannot use Impala to query complex types from HBase tables. See
+          <xref href="impala_complex_types.xml#complex_types_file_formats"/> for more details.
+        </p>
+
+      </conbody>
+
+    </concept>
+
+  </concept>
+
+  <concept id="complex_types_using">
+
+    <title>Using Complex Types from SQL</title>
+
+    <conbody>
+
+      <p>
+        When using complex types through SQL in Impala, you learn the notation for <codeph>&lt; &gt;</codeph> delimiters for the complex
+        type columns in <codeph>CREATE TABLE</codeph> statements, and how to construct join queries to <q>unpack</q> the scalar values
+        nested inside the complex data structures. You might need to condense a traditional RDBMS or data warehouse schema into a smaller
+        number of Parquet tables, and use Hive, Spark, Pig, or other mechanism outside Impala to populate the tables with data.
+      </p>
+
+      <p outputclass="toc inpage"/>
+
+    </conbody>
+
+    <concept id="nested_types_ddl">
+
+      <title>Complex Type Syntax for DDL Statements</title>
+
+      <conbody>
+
+        <p>
+          The definition of <varname>data_type</varname>, as seen in the <codeph>CREATE TABLE</codeph> and <codeph>ALTER TABLE</codeph>
+          statements, now includes complex types in addition to primitive types:
+        </p>
+
+<codeblock>  primitive_type
+| array_type
+| map_type
+| struct_type 
+</codeblock>
+
+        <p>
+          Unions are not currently supported.
+        </p>
+
+        <p>
+          Array, struct, and map column type declarations are specified in the <codeph>CREATE TABLE</codeph> statement. You can also add or
+          change the type of complex columns through the <codeph>ALTER TABLE</codeph> statement.
+        </p>
+
+        <note>
+          <p>
+            Currently, Impala queries allow complex types only in tables that use the Parquet format. If an Impala query encounters complex
+            types in a table or partition using another file format, the query returns a runtime error.
+          </p>
+
+          <p>
+            The Impala DDL support for complex types works for all file formats, so that you can create tables using text or other
+            non-Parquet formats for Hive to use as staging tables in an ETL cycle that ends with the data in a Parquet table. You can also
+            use <codeph>ALTER TABLE ... SET FILEFORMAT PARQUET</codeph> to change the file format of an existing table containing complex
+            types to Parquet, after which Impala can query it. Make sure to load Parquet files into the table after changing the file
+            format, because the <codeph>ALTER TABLE ... SET FILEFORMAT</codeph> statement does not convert existing data to the new file
+            format.
+          </p>
+        </note>
+
+        <p conref="../shared/impala_common.xml#common/complex_types_partitioning"/>
+
+        <p>
+          Because use cases for Impala complex types require that you already have Parquet data files produced outside of Impala, you can
+          use the Impala <codeph>CREATE TABLE LIKE PARQUET</codeph> syntax to produce a table with columns that match the structure of an
+          existing Parquet file, including complex type columns for nested data structures. Remember to include the <codeph>STORED AS
+          PARQUET</codeph> clause in this case, because even with <codeph>CREATE TABLE LIKE PARQUET</codeph>, the default file format of the
+          resulting table is still text.
+        </p>
+
+        <p>
+          Because the complex columns are omitted from the result set of an Impala <codeph>SELECT *</codeph> or <codeph>SELECT
+          <varname>col_name</varname></codeph> query, and because Impala currently does not support writing Parquet files with complex type
+          columns, you cannot use the <codeph>CREATE TABLE AS SELECT</codeph> syntax to create a table with nested type columns.
+        </p>
+
+        <note>
+          <p>
+            Once you have a table set up with complex type columns, use the <codeph>DESCRIBE</codeph> and <codeph>SHOW CREATE TABLE</codeph>
+            statements to see the correct notation with <codeph>&lt;</codeph> and <codeph>&gt;</codeph> delimiters and comma and colon
+            separators within the complex type definitions. If you do not have existing data with the same layout as the table, you can
+            query the empty table to practice with the notation for the <codeph>SELECT</codeph> statement. In the <codeph>SELECT</codeph>
+            list, you use dot notation and pseudocolumns such as <codeph>ITEM</codeph>, <codeph>KEY</codeph>, and <codeph>VALUE</codeph> for
+            referring to items within the complex type columns. In the <codeph>FROM</codeph> clause, you use join notation to construct
+            table aliases for any referenced <codeph>ARRAY</codeph> and <codeph>MAP</codeph> columns.
+          </p>
+        </note>
+
+<!-- To do: show some simple CREATE TABLE statements for each of the complex types, without so much backstory for the schema. -->
+
+        <p>
+          For example, when defining a table that holds contact information, you might represent phone numbers differently depending on the
+          expected layout and relationships of the data, and how well you can predict those properties in advance.
+        </p>
+
+        <p>
+          Here are different ways that you might represent phone numbers in a traditional relational schema, with equivalent representations
+          using complex types.
+        </p>
+
+        <fig id="complex_types_phones_flat_fixed">
+
+          <title>Traditional Relational Representation of Phone Numbers: Single Table</title>
+
+          <p>
+            The traditional, simplest way to represent phone numbers in a relational table is to store all contact info in a single table,
+            with all columns having scalar types, and each potential phone number represented as a separate column. In this example, each
+            person can only have these 3 types of phone numbers. If the person does not have a particular kind of phone number, the
+            corresponding column is <codeph>NULL</codeph> for that row.
+          </p>
+
+<codeblock>
+CREATE TABLE contacts_fixed_phones
+(
+    id BIGINT
+  , name STRING
+  , address STRING
+  , home_phone STRING
+  , work_phone STRING
+  , mobile_phone STRING
+) STORED AS PARQUET;
+</codeblock>
+
+        </fig>
+
+        <fig id="complex_types_phones_array">
+
+          <title>An Array of Phone Numbers</title>
+
+          <p>
+            Using a complex type column to represent the phone numbers adds some extra flexibility. Now there could be an unlimited number
+            of phone numbers. Because the array elements have an order but not symbolic names, you could decide in advance that
+            phone_number[0] is the home number, [1] is the work number, [2] is the mobile number, and so on. (In subsequent examples, you
+            will see how to create a more flexible naming scheme using other complex type variations, such as a <codeph>MAP</codeph> or an
+            <codeph>ARRAY</codeph> where each element is a <codeph>STRUCT</codeph>.)
+          </p>
+
+<codeblock><![CDATA[
+CREATE TABLE contacts_array_of_phones
+(
+    id BIGINT
+  , name STRING
+  , address STRING
+  , phone_number ARRAY < STRING >
+) STORED AS PARQUET;
+]]>
+</codeblock>
+
+        </fig>
+
+        <fig id="complex_types_phones_map">
+
+          <title>A Map of Phone Numbers</title>
+
+          <p>
+            Another way to represent an arbitrary set of phone numbers is with a <codeph>MAP</codeph> column. With a <codeph>MAP</codeph>,
+            each element is associated with a key value that you specify, which could be a numeric, string, or other scalar type. This
+            example uses a <codeph>STRING</codeph> key to give each phone number a name, such as <codeph>'home'</codeph> or
+            <codeph>'mobile'</codeph>. A query could filter the data based on the key values, or display the key values in reports.
+          </p>
+
+<codeblock><![CDATA[
+CREATE TABLE contacts_unlimited_phones
+(
+  id BIGINT, name STRING, address STRING, phone_number MAP < STRING,STRING >
+) STORED AS PARQUET;
+]]>
+</codeblock>
+
+        </fig>
+
+        <fig id="complex_types_phones_flat_normalized">
+
+          <title>Traditional Relational Representation of Phone Numbers: Normalized Tables</title>
+
+          <p>
+            If you are an experienced database designer, you already know how to work around the limitations of the single-table schema from
+            <xref href="#nested_types_ddl/complex_types_phones_flat_fixed"/>. By normalizing the schema, with the phone numbers in their own
+            table, you can associate an arbitrary set of phone numbers with each person, and associate additional details with each phone
+            number, such as whether it is a home, work, or mobile phone.
+          </p>
+
+          <p>
+            The flexibility of this approach comes with some drawbacks. Reconstructing all the data for a particular person requires a join
+            query, which might require performance tuning on Hadoop because the data from each table might be transmitted from a different
+            host. Data management tasks such as backups and refreshing the data require dealing with multiple tables instead of a single
+            table.
+          </p>
+
+          <p>
+            This example illustrates a traditional database schema to store contact info normalized across 2 tables. The fact table
+            establishes the identity and basic information about person. A dimension table stores information only about phone numbers,
+            using an ID value to associate each phone number with a person ID from the fact table. Each person can have 0, 1, or many
+            phones; the categories are not restricted to a few predefined ones; and the phone table can contain as many columns as desired,
+            to represent all sorts of details about each phone number.
+          </p>
+
+<codeblock>
+CREATE TABLE fact_contacts (id BIGINT, name STRING, address STRING) STORED AS PARQUET;
+CREATE TABLE dim_phones
+(
+    contact_id BIGINT
+  , category STRING
+  , international_code STRING
+  , area_code STRING
+  , exchange STRING
+  , extension STRING
+  , mobile BOOLEAN
+  , carrier STRING
+  , current BOOLEAN
+  , service_start_date TIMESTAMP
+  , service_end_date TIMESTAMP
+)
+STORED AS PARQUET;
+</codeblock>
+
+        </fig>
+
+        <fig id="complex_types_phones_array_struct">
+
+          <title>Phone Numbers Represented as an Array of Structs</title>
+
+          <p>
+            To represent a schema equivalent to the one from <xref href="#nested_types_ddl/complex_types_phones_flat_normalized"/> using
+            complex types, this example uses an <codeph>ARRAY</codeph> where each array element is a <codeph>STRUCT</codeph>. As with the
+            earlier complex type examples, each person can have an arbitrary set of associated phone numbers. Making each array element into
+            a <codeph>STRUCT</codeph> lets us associate multiple data items with each phone number, and give a separate name and type to
+            each data item. The <codeph>STRUCT</codeph> fields of the <codeph>ARRAY</codeph> elements reproduce the columns of the dimension
+            table from the previous example.
+          </p>
+
+          <p>
+            You can do all the same kinds of queries with the complex type schema as with the normalized schema from the previous example.
+            The advantages of the complex type design are in the areas of convenience and performance. Now your backup and ETL processes
+            only deal with a single table. When a query uses a join to cross-reference the information about a person with their associated
+            phone numbers, all the relevant data for each row resides in the same HDFS data block, meaning each row can be processed on a
+            single host without requiring network transmission.
+          </p>
+
+<codeblock><![CDATA[
+CREATE TABLE contacts_detailed_phones
+(
+  id BIGINT, name STRING, address STRING
+    , phone ARRAY < STRUCT <
+        category: STRING
+      , international_code: STRING
+      , area_code: STRING
+      , exchange: STRING
+      , extension: STRING
+      , mobile: BOOLEAN
+      , carrier: STRING
+      , current: BOOLEAN
+      , service_start_date: TIMESTAMP
+      , service_end_date: TIMESTAMP
+    >>
+) STORED AS PARQUET;
+]]>
+</codeblock>
+
+        </fig>
+
+      </conbody>
+
+    </concept>
+
+    <concept id="complex_types_sql">
+
+      <title>SQL Statements that Support Complex Types</title>
+
+      <conbody>
+
+        <p>
+          The Impala SQL statements that support complex types are currently
+          <codeph><xref href="impala_create_table.xml#create_table">CREATE TABLE</xref></codeph>,
+          <codeph><xref href="impala_alter_table.xml#alter_table">ALTER TABLE</xref></codeph>,
+          <codeph><xref href="impala_describe.xml#describe">DESCRIBE</xref></codeph>,
+          <codeph><xref href="impala_load_data.xml#load_data">LOAD DATA</xref></codeph>, and
+          <codeph><xref href="impala_select.xml#select">SELECT</xref></codeph>. That is, currently Impala can create or alter tables
+          containing complex type columns, examine the structure of a table containing complex type columns, import existing data files
+          containing complex type columns into a table, and query Parquet tables containing complex types.
+        </p>
+
+        <p conref="../shared/impala_common.xml#common/complex_types_read_only"/>
+
+        <p outputclass="toc inpage"/>
+
+      </conbody>
+
+      <concept id="complex_types_ddl">
+
+        <title>DDL Statements and Complex Types</title>
+
+        <conbody>
+
+          <p>
+            Column specifications for complex or nested types use <codeph>&lt;</codeph> and <codeph>&gt;</codeph> delimiters:
+          </p>
+
+<codeblock><![CDATA[-- What goes inside the < > for an ARRAY is a single type, either a scalar or another
+-- complex type (ARRAY, STRUCT, or MAP).
+CREATE TABLE array_t
+(
+  id BIGINT,
+  a1 ARRAY <STRING>,
+  a2 ARRAY <BIGINT>,
+  a3 ARRAY <TIMESTAMP>,
+  a4 ARRAY <STRUCT <f1: STRING, f2: INT, f3: BOOLEAN>>
+)
+STORED AS PARQUET;
+
+-- What goes inside the < > for a MAP is two comma-separated types specifying the types of the key-value pair:
+-- a scalar type representing the key, and a scalar or complex type representing the value.
+CREATE TABLE map_t
+(
+  id BIGINT,
+  m1 MAP <STRING, STRING>,
+  m2 MAP <STRING, BIGINT>,
+  m3 MAP <BIGINT, STRING>,
+  m4 MAP <BIGINT, BIGINT>,
+  m5 MAP <STRING, ARRAY <STRING>>
+)
+STORED AS PARQUET;
+
+-- What goes inside the < > for a STRUCT is a comma-separated list of fields, each field defined as
+-- name:type. The type can be a scalar or a complex type. The field names for each STRUCT do not clash
+-- with the names of table columns or fields in other STRUCTs. A STRUCT is most often used inside
+-- an ARRAY or a MAP rather than as a top-level column.
+CREATE TABLE struct_t
+(
+  id BIGINT,
+  s1 STRUCT <f1: STRING, f2: BIGINT>,
+  s2 ARRAY <STRUCT <f1: INT, f2: TIMESTAMP>>,
+  s3 MAP <BIGINT, STRUCT <name: STRING, birthday: TIMESTAMP>>
+)
+STORED AS PARQUET;
+]]>
+</codeblock>
+
+        </conbody>
+
+      </concept>
+
+      <concept id="complex_types_queries">
+
+        <title>Queries and Complex Types</title>
+
+        <conbody>
+
+<!-- Hive does the JSON output business: http://www.datascience-labs.com/hive/hiveql-data-manipulation/ -->
+
+<!-- SELECT * works but skips any nested type coloumns. -->
+
+          <p>
+            The result set of an Impala query always contains all scalar types; the elements and fields within any complex type queries must
+            be <q>unpacked</q> using join queries. A query cannot directly retrieve the entire value for a complex type column. Impala
+            returns an error in this case. Queries using <codeph>SELECT *</codeph> are allowed for tables with complex types, but the
+            columns with complex types are skipped.
+          </p>
+
+          <p>
+            The following example shows how referring directly to a complex type column returns an error, while <codeph>SELECT *</codeph> on
+            the same table succeeds, but only retrieves the scalar columns.
+          </p>
+
+          <note conref="../shared/impala_common.xml#common/complex_type_schema_pointer"/>
+
+<!-- Original error message:
+ERROR: AnalysisException: Expr 'c_orders' in select list returns a complex type 'ARRAY<STRUCT<o_orderkey:BIGINT,o_orderstatus:STRING,o_totalprice:DECIMAL(12,2),o_orderdate:STRING,o_orderpriority:STRING,o_clerk:STRING,o_shippriority:INT,o_comment:STRING,o_lineitems:ARRAY<STRUCT<l_partkey:BIGINT,l_suppkey:BIGINT,l_linenumber:INT,l_quantity:DECIMAL(12,2),l_extendedprice:DECIMAL(12,2),l_discount:DECIMAL(12,2),l_tax:DECIMAL(12,2),l_returnflag:STRING,l_linestatus:STRING,l_shipdate:STRING,l_commitdate:STRING,l_receiptdate:STRING,l_shipinstruct:STRING,l_shipmode:STRING,l_comment:STRING>>>>'.
+-->
+
+<codeblock><![CDATA[SELECT c_orders FROM customer LIMIT 1;
+ERROR: AnalysisException: Expr 'c_orders' in select list returns a complex type 'ARRAY<STRUCT<o_orderkey:BIGINT,o_orderstatus:STRING, ... l_receiptdate:STRING,l_shipinstruct:STRING,l_shipmode:STRING,l_comment:STRING>>>>'.
+Only scalar types are allowed in the select list.
+
+-- Original column has several scalar and one complex column.
+DESCRIBE customer;
++--------------+------------------------------------+
+| name         | type                               |
++--------------+------------------------------------+
+| c_custkey    | bigint                             |
+| c_name       | string                             |
+...
+| c_orders     | array<struct<                      |
+|              |   o_orderkey:bigint,               |
+|              |   o_orderstatus:string,            |
+|              |   o_totalprice:decimal(12,2),      |
+...
+|              | >>                                 |
++--------------+------------------------------------+
+
+-- When we SELECT * from that table, only the scalar columns come back in the result set.
+CREATE TABLE select_star_customer STORED AS PARQUET AS SELECT * FROM customer;
++------------------------+
+| summary                |
++------------------------+
+| Inserted 150000 row(s) |
++------------------------+
+
+-- The c_orders column, being of complex type, was not included in the SELECT * result set.
+DESC select_star_customer;
++--------------+---------------+
+| name         | type          |
++--------------+---------------+
+| c_custkey    | bigint        |
+| c_name       | string        |
+| c_address    | string        |
+| c_nationkey  | smallint      |
+| c_phone      | string        |
+| c_acctbal    | decimal(12,2) |
+| c_mktsegment | string        |
+| c_comment    | string        |
++--------------+---------------+
+]]>
+</codeblock>
+
+<!-- To do: These "references to..." bits could be promoted to their own 'expressions' subheads. -->
+
+          <p>
+            References to fields within <codeph>STRUCT</codeph> columns use dot notation. If the field name is unambiguous, you can omit
+            qualifiers such as table name, column name, or even the <codeph>ITEM</codeph> or <codeph>VALUE</codeph> pseudocolumn names for
+            <codeph>STRUCT</codeph> elements inside an <codeph>ARRAY</codeph> or a <codeph>MAP</codeph>.
+          </p>
+
+<!-- To do: rewrite example to use CUSTOMER table. -->
+
+<!-- Don't think TPC-H schema has a bare STRUCT to use in such a simple query though. -->
+
+<!-- Perhaps reuse the STRUCT_DEMO example here. -->
+
+<codeblock>SELECT id, address.city FROM customers WHERE address.zip = 94305;
+</codeblock>
+
+          <p>
+            References to elements within <codeph>ARRAY</codeph> columns use the <codeph>ITEM</codeph> pseudocolumn:
+          </p>
+
+<!-- To do: shorten qualified names. -->
+
+<codeblock>select r_name, r_nations.item.n_name from region, region.r_nations limit 7;
++--------+----------------+
+| r_name | item.n_name    |
++--------+----------------+
+| EUROPE | UNITED KINGDOM |
+| EUROPE | RUSSIA         |
+| EUROPE | ROMANIA        |
+| EUROPE | GERMANY        |
+| EUROPE | FRANCE         |
+| ASIA   | VIETNAM        |
+| ASIA   | CHINA          |
++--------+----------------+
+</codeblock>
+
+          <p>
+            References to fields within <codeph>MAP</codeph> columns use the <codeph>KEY</codeph> and <codeph>VALUE</codeph> pseudocolumns.
+            In this example, once the query establishes the alias <codeph>MAP_FIELD</codeph> for a <codeph>MAP</codeph> column with a
+            <codeph>STRING</codeph> key and an <codeph>INT</codeph> value, the query can refer to <codeph>MAP_FIELD.KEY</codeph> and
+            <codeph>MAP_FIELD.VALUE</codeph>, which have zero, one, or many instances for each row from the containing table.
+          </p>
+
+<codeblock><![CDATA[DESCRIBE table_0;
++---------+-----------------------+
+| name    | type                  |
++---------+-----------------------+
+| field_0 | string                |
+| field_1 | map<string,int>       |
+...
+
+SELECT field_0, map_field.key, map_field.value
+  FROM table_0, table_0.field_1 AS map_field
+WHERE length(field_0) = 1
+LIMIT 10;
++---------+-----------+-------+
+| field_0 | key       | value |
++---------+-----------+-------+
+| b       | gshsgkvd  | NULL  |
+| b       | twrtcxj6  | 18    |
+| b       | 2vp5      | 39    |
+| b       | fh0s      | 13    |
+| v       | 2         | 41    |
+| v       | 8b58mz    | 20    |
+| v       | hw        | 16    |
+| v       | 65l388pyt | 29    |
+| v       | 03k68g91z | 30    |
+| v       | r2hlg5b   | NULL  |
++---------+-----------+-------+
+]]>
+</codeblock>
+
+<!-- To do: refer to or reuse examples from the other subtopics that discuss pseudocolumns etc. -->
+
+          <p>
+            When complex types are nested inside each other, you use a combination of joins, pseudocolumn names, and dot notation to refer
+            to specific fields at the appropriate level. This is the most frequent form of query syntax for complex columns, because the
+            typical use case involves two levels of complex types, such as an <codeph>ARRAY</codeph> of <codeph>STRUCT</codeph> elements.
+          </p>
+
+<!-- To do: rewrite example to use CUSTOMER table. -->
+
+<!-- This is my own manufactured example so I have the table, and the query works, but I don't have sample data to show. -->
+
+<codeblock>SELECT id, phone_numbers.area_code FROM contact_info_many_structs INNER JOIN contact_info_many_structs.phone_numbers phone_numbers LIMIT 3;
+</codeblock>
+
+          <p>
+            You can express relationships between <codeph>ARRAY</codeph> and <codeph>MAP</codeph> columns at different levels as joins. You
+            include comparison operators between fields at the top level and within the nested type columns so that Impala can do the
+            appropriate join operation.
+          </p>
+
+<!-- Don't think TPC-H schema has any instances where outer field matches up with inner one though. -->
+
+<!-- But don't think this usage is important enough to call out at this early point. Hide the example for now. -->
+
+<!--
+<codeblock>SELECT o.txn_id FROM customers c, c.orders o WHERE o.cc = c.preferred_cc;
+SELECT c.id, o.txn_id FROM customers c, c.orders o;
+</codeblock>
+-->
+
+<!-- To do: move these examples down, to the examples subtopic at the end. -->
+
+          <note conref="../shared/impala_common.xml#common/complex_type_schema_pointer"/>
+
+          <p>
+            For example, the following queries work equivalently. They each return customer and order data for customers that have at least
+            one order.
+          </p>
+
+<codeblock>SELECT c.c_name, o.o_orderkey FROM customer c, c.c_orders o LIMIT 5;
++--------------------+------------+
+| c_name             | o_orderkey |
++--------------------+------------+
+| Customer#000072578 | 558821     |
+| Customer#000072578 | 2079810    |
+| Customer#000072578 | 5768068    |
+| Customer#000072578 | 1805604    |
+| Customer#000072578 | 3436389    |
++--------------------+------------+
+
+SELECT c.c_name, o.o_orderkey FROM customer c INNER JOIN c.c_orders o LIMIT 5;
++--------------------+------------+
+| c_name             | o_orderkey |
++--------------------+------------+
+| Customer#000072578 | 558821     |
+| Customer#000072578 | 2079810    |
+| Customer#000072578 | 5768068    |
+| Customer#000072578 | 1805604    |
+| Customer#000072578 | 3436389    |
++--------------------+------------+
+</codeblock>
+
+          <p>
+            The following query using an outer join returns customers that have orders, plus customers with no orders (no entries in the
+            <codeph>C_ORDERS</codeph> array):
+          </p>
+
+<codeblock><![CDATA[SELECT c.c_custkey, o.o_orderkey
+  FROM customer c LEFT OUTER JOIN c.c_orders o
+LIMIT 5;
++-----------+------------+
+| c_custkey | o_orderkey |
++-----------+------------+
+| 60210     | NULL       |
+| 147873    | NULL       |
+| 72578     | 558821     |
+| 72578     | 2079810    |
+| 72578     | 5768068    |
++-----------+------------+
+]]>
+</codeblock>
+
+          <p>
+            The following query returns <i>only</i> customers that have no orders. (With <codeph>LEFT ANTI JOIN</codeph> or <codeph>LEFT
+            SEMI JOIN</codeph>, the query can only refer to columns from the left-hand table, because by definition there is no matching
+            information in the right-hand table.)
+          </p>
+
+<codeblock><![CDATA[SELECT c.c_custkey, c.c_name
+  FROM customer c LEFT ANTI JOIN c.c_orders o
+LIMIT 5;
++-----------+--------------------+
+| c_custkey | c_name             |
++-----------+--------------------+
+| 60210     | Customer#000060210 |
+| 147873    | Customer#000147873 |
+| 141576    | Customer#000141576 |
+| 85365     | Customer#000085365 |
+| 70998     | Customer#000070998 |
++-----------+--------------------+
+]]>
+</codeblock>
+
+<!-- To do: promote the correlated subquery aspect into its own subtopic. -->
+
+          <p>
+            You can also perform correlated subqueries to examine the properties of complex type columns for each row in the result set.
+          </p>
+
+          <p>
+            Count the number of orders per customer. Note the correlated reference to the table alias <codeph>C</codeph>. The
+            <codeph>COUNT(*)</codeph> operation applies to all the elements of the <codeph>C_ORDERS</codeph> array for the corresponding
+            row, avoiding the need for a <codeph>GROUP BY</codeph> clause.
+          </p>
+
+<codeblock>select c_name, howmany FROM customer c, (SELECT COUNT(*) howmany FROM c.c_orders) v limit 5;
++--------------------+---------+
+| c_name             | howmany |
++--------------------+---------+
+| Customer#000030065 | 15      |
+| Customer#000065455 | 18      |
+| Customer#000113644 | 21      |
+| Customer#000111078 | 0       |
+| Customer#000024621 | 0       |
++--------------------+---------+
+</codeblock>
+
+          <p>
+            Count the number of orders per customer, ignoring any customers that have not placed any orders:
+          </p>
+
+<codeblock>SELECT c_name, howmany_orders
+FROM
+  customer c,
+  (SELECT COUNT(*) howmany_orders FROM c.c_orders) subq1
+WHERE howmany_orders > 0
+LIMIT 5;
++--------------------+----------------+
+| c_name             | howmany_orders |
++--------------------+----------------+
+| Customer#000072578 | 7              |
+| Customer#000046378 | 26             |
+| Customer#000069815 | 11             |
+| Customer#000079058 | 12             |
+| Customer#000092239 | 26             |
++--------------------+----------------+
+</codeblock>
+
+          <p>
+            Count the number of line items in each order. The reference to <codeph>C.C_ORDERS</codeph> in the <codeph>FROM</codeph> clause
+            is needed because the <codeph>O_ORDERKEY</codeph> field is a member of the elements in the <codeph>C_ORDERS</codeph> array. The
+            subquery labelled <codeph>SUBQ1</codeph> is correlated: it is re-evaluated for the <codeph>C_ORDERS.O_LINEITEMS</codeph> array
+            from each row of the <codeph>CUSTOMERS</codeph> table.
+          </p>
+
+<codeblock>SELECT c_name, o_orderkey, howmany_line_items
+FROM
+  customer c,
+  c.c_orders t2,
+  (SELECT COUNT(*) howmany_line_items FROM c.c_orders.o_lineitems) subq1
+WHERE howmany_line_items > 0
+LIMIT 5;
++--------------------+------------+--------------------+
+| c_name             | o_orderkey | howmany_line_items |
++--------------------+------------+--------------------+
+| Customer#000020890 | 1884930    | 95                 |
+| Customer#000020890 | 4570754    | 95                 |
+| Customer#000020890 | 3771072    | 95                 |
+| Customer#000020890 | 2555489    | 95                 |
+| Customer#000020890 | 919171     | 95                 |
++--------------------+------------+--------------------+
+</codeblock>
+
+          <p>
+            Get the number of orders, the average order price, and the maximum items in any order per customer. For this example, the
+            subqueries labelled <codeph>SUBQ1</codeph> and <codeph>SUBQ2</codeph> are correlated: they are re-evaluated for each row from
+            the original <codeph>CUSTOMER</codeph> table, and only apply to the complex columns associated with that row.
+          </p>
+
+<codeblock>SELECT c_name, howmany, average_price, most_items
+FROM
+  customer c,
+  (SELECT COUNT(*) howmany, AVG(o_totalprice) average_price FROM c.c_orders) subq1,
+  (SELECT MAX(l_quantity) most_items FROM c.c_orders.o_lineitems ) subq2
+LIMIT 5;
++--------------------+---------+---------------+------------+
+| c_name             | howmany | average_price | most_items |
++--------------------+---------+---------------+------------+
+| Customer#000030065 | 15      | 128908.34     | 50.00      |
+| Customer#000088191 | 0       | NULL          | NULL       |
+| Customer#000101555 | 10      | 164250.31     | 50.00      |
+| Customer#000022092 | 0       | NULL          | NULL       |
+| Customer#000036277 | 27      | 166040.06     | 50.00      |
++--------------------+---------+---------------+------------+
+</codeblock>
+
+          <p>
+            For example, these queries show how to access information about the <codeph>ARRAY</codeph> elements within the
+            <codeph>CUSTOMER</codeph> table from the <q>nested TPC-H</q> schema, starting with the initial <codeph>ARRAY</codeph> elements
+            and progressing to examine the <codeph>STRUCT</codeph> fields of the <codeph>ARRAY</codeph>, and then the elements nested within
+            another <codeph>ARRAY</codeph> of <codeph>STRUCT</codeph>:
+          </p>
+
+<codeblock><![CDATA[-- How many orders does each customer have?
+-- The type of the ARRAY column doesn't matter, this is just counting the elements.
+SELECT c_custkey, count(*)
+  FROM customer, customer.c_orders
+GROUP BY c_custkey
+LIMIT 5;
++-----------+----------+
+| c_custkey | count(*) |
++-----------+----------+
+| 61081     | 21       |
+| 115987    | 15       |
+| 69685     | 19       |
+| 109124    | 15       |
+| 50491     | 12       |
++-----------+----------+
+
+-- How many line items are part of each customer order?
+-- Now we examine a field from a STRUCT nested inside the ARRAY.
+SELECT c_custkey, c_orders.o_orderkey, count(*)
+  FROM customer, customer.c_orders c_orders, c_orders.o_lineitems
+GROUP BY c_custkey, c_orders.o_orderkey
+LIMIT 5;
++-----------+------------+----------+
+| c_custkey | o_orderkey | count(*) |
++-----------+------------+----------+
+| 63367     | 4985959    | 7        |
+| 53989     | 1972230    | 2        |
+| 143513    | 5750498    | 5        |
+| 17849     | 4857989    | 1        |
+| 89881     | 1046437    | 1        |
++-----------+------------+----------+
+
+-- What are the line items in each customer order?
+-- One of the STRUCT fields inside the ARRAY is another
+-- ARRAY containing STRUCT elements. The join finds
+-- all the related items from both levels of ARRAY.
+SELECT c_custkey, o_orderkey, l_partkey
+  FROM customer, customer.c_orders, c_orders.o_lineitems
+LIMIT 5;
++-----------+------------+-----------+
+| c_custkey | o_orderkey | l_partkey |
++-----------+------------+-----------+
+| 113644    | 2738497    | 175846    |
+| 113644    | 2738497    | 27309     |
+| 113644    | 2738497    | 175873    |
+| 113644    | 2738497    | 88559     |
+| 113644    | 2738497    | 8032      |
++-----------+------------+-----------+
+]]>
+</codeblock>
+
+        </conbody>
+
+      </concept>
+
+    </concept>
+
+    <concept id="pseudocolumns">
+
+      <title>Pseudocolumns for ARRAY and MAP Types</title>
+
+      <conbody>
+
+        <p>
+          Each element in an <codeph>ARRAY</codeph> type has a position, indexed starting from zero, and a value. Each element in a
+          <codeph>MAP</codeph> type represents a key-value pair. Impala provides pseudocolumns that let you retrieve this metadata as part
+          of a query, or filter query results by including such things in a <codeph>WHERE</codeph> clause. You refer to the pseudocolumns as
+          part of qualified column names in queries:
+        </p>
+
+        <ul>
+          <li>
+            <codeph>ITEM</codeph>: The value of an array element. If the <codeph>ARRAY</codeph> contains <codeph>STRUCT</codeph> elements,
+            you can refer to either <codeph><varname>array_name</varname>.ITEM.<varname>field_name</varname></codeph> or use the shorthand
+            <codeph><varname>array_name</varname>.<varname>field_name</varname></codeph>.
+          </li>
+
+          <li>
+            <codeph>POS</codeph>: The position of an element within an array.
+          </li>
+
+          <li>
+            <codeph>KEY</codeph>: The value forming the first part of a key-value pair in a map. It is not necessarily unique.
+          </li>
+
+          <li>
+            <codeph>VALUE</codeph>: The data item forming the second part of a key-value pair in a map. If the <codeph>VALUE</codeph> part
+            of the <codeph>MAP</codeph> element is a <codeph>STRUCT</codeph>, you can refer to either
+            <codeph><varname>map_name</varname>.VALUE.<varname>field_name</varname></codeph> or use the shorthand
+            <codeph><varname>map_name</varname>.<varname>field_name</varname></codeph>.
+          </li>
+        </ul>
+
+<!-- To do: Consider whether to move the detailed subtopics underneath ARRAY and MAP instead of embedded here. -->
+
+        <p outputclass="toc inpage"/>
+
+      </conbody>
+
+      <concept id="item">
+
+        <title id="pos">ITEM and POS Pseudocolumns</title>
+
+        <conbody>
+
+          <p>
+            When an <codeph>ARRAY</codeph> column contains <codeph>STRUCT</codeph> elements, you can refer to a field within the
+            <codeph>STRUCT</codeph> using a qualified name of the form
+            <codeph><varname>array_column</varname>.<varname>field_name</varname></codeph>. If the <codeph>ARRAY</codeph> contains scalar
+            values, Impala recognizes the special name <codeph><varname>array_column</varname>.ITEM</codeph> to represent the value of each
+            scalar array element. For example, if a column contained an <codeph>ARRAY</codeph> where each element was a
+            <codeph>STRING</codeph>, you would use <codeph><varname>array_name</varname>.ITEM</codeph> to refer to each scalar value in the
+            <codeph>SELECT</codeph> list, or the <codeph>WHERE</codeph> or other clauses.
+          </p>
+
+          <p>
+            This example shows a table with two <codeph>ARRAY</codeph> columns whose elements are of the scalar type
+            <codeph>STRING</codeph>. When referring to the values of the array elements in the <codeph>SELECT</codeph> list,
+            <codeph>WHERE</codeph> clause, or <codeph>ORDER BY</codeph> clause, you use the <codeph>ITEM</codeph> pseudocolumn because
+            within the array, the individual elements have no defined names.
+          </p>
+
+<codeblock><![CDATA[create TABLE persons_of_interest
+(
+person_id BIGINT,
+aliases ARRAY <STRING>,
+associates ARRAY <STRING>,
+real_name STRING
+)
+STORED AS PARQUET;
+
+-- Get all the aliases of each person.
+SELECT real_name, aliases.ITEM
+  FROM persons_of_interest, persons_of_interest.aliases
+ORDER BY real_name, aliases.item;
+
+-- Search for particular associates of each person.
+SELECT real_name, associates.ITEM
+  FROM persons_of_interest, persons_of_interest.associates
+WHERE associates.item LIKE '% MacGuffin';
+]]>
+</codeblock>
+
+          <p>
+            Because an array is inherently an ordered data structure, Impala recognizes the special name
+            <codeph><varname>array_column</varname>.POS</codeph> to represent the numeric position of each element within the array. The
+            <codeph>POS</codeph> pseudocolumn lets you filter or reorder the result set based on the sequence of array elements.
+          </p>
+
+          <p>
+            The following example uses a table from a flattened version of the TPC-H schema. The <codeph>REGION</codeph> table only has a
+            few rows, such as one row for Europe and one for Asia. The row for each region represents all the countries in that region as an
+            <codeph>ARRAY</codeph> of <codeph>STRUCT</codeph> elements:
+          </p>
+
+<codeblock><![CDATA[[localhost:21000] > desc region;
++-------------+--------------------------------------------------------------------+
+| name        | type                                                               |
++-------------+--------------------------------------------------------------------+
+| r_regionkey | smallint                                                           |
+| r_name      | string                                                             |
+| r_comment   | string                                                             |
+| r_nations   | array<struct<n_nationkey:smallint,n_name:string,n_comment:string>> |
++-------------+--------------------------------------------------------------------+
+]]>
+</codeblock>
+
+          <p>
+            To find the countries within a specific region, you use a join query. To find out the order of elements in the array, you also
+            refer to the <codeph>POS</codeph> pseudocolumn in the select list:
+          </p>
+
+<codeblock>[localhost:21000] > SELECT r1.r_name, r2.n_name, <b>r2.POS</b>
+                  > FROM region r1 INNER JOIN r1.r_nations r2
+                  > WHERE r1.r_name = 'ASIA'; 
++--------+-----------+-----+
+| r_name | n_name    | pos |
++--------+-----------+-----+
+| ASIA   | VIETNAM   | 0   |
+| ASIA   | CHINA     | 1   |
+| ASIA   | JAPAN     | 2   |
+| ASIA   | INDONESIA | 3   |
+| ASIA   | INDIA     | 4   |
++--------+-----------+-----+
+</codeblock>
+
+          <p>
+            Once you know the positions of the elements, you can use that information in subsequent queries, for example to change the
+            ordering of results from the complex type column or to filter certain elements from the array:
+          </p>
+
+<codeblock>[localhost:21000] > SELECT r1.r_name, r2.n_name, r2.POS
+                  > FROM region r1 INNER JOIN r1.r_nations r2
+                  > WHERE r1.r_name = 'ASIA'
+                  > <b>ORDER BY r2.POS DESC</b>; 
++--------+-----------+-----+
+| r_name | n_name    | pos |
++--------+-----------+-----+
+| ASIA   | INDIA     | 4   |
+| ASIA   | INDONESIA | 3   |
+| ASIA   | JAPAN     | 2   |
+| ASIA   | CHINA     | 1   |
+| ASIA   | VIETNAM   | 0   |
++--------+-----------+-----+
+[localhost:21000] > SELECT r1.r_name, r2.n_name, r2.POS
+                  > FROM region r1 INNER JOIN r1.r_nations r2
+                  > WHERE r1.r_name = 'ASIA' AND <b>r2.POS BETWEEN 1 and 3</b>; 
++--------+-----------+-----+
+| r_name | n_name    | pos |
++--------+-----------+-----+
+| ASIA   | CHINA     | 1   |
+| ASIA   | JAPAN     | 2   |
+| ASIA   | INDONESIA | 3   |
++--------+-----------+-----+
+</codeblock>
+
+        </conbody>
+
+      </concept>
+
+      <concept id="key">
+
+        <title id="value">KEY and VALUE Pseudocolumns</title>
+
+        <conbody>
+
+          <p>
+            The <codeph>MAP</codeph> data type is suitable for representing sparse or wide data structures, where each row might only have
+            entries for a small subset of named fields. Because the element names (the map keys) vary depending on the row, a query must be
+            able to refer to both the key and the value parts of each key-value pair. The <codeph>KEY</codeph> and <codeph>VALUE</codeph>
+            pseudocolumns let you refer to the parts of the key-value pair independently within the query, as
+            <codeph><varname>map_column</varname>.KEY</codeph> and <codeph><varname>map_column</varname>.VALUE</codeph>.
+          </p>
+
+          <p>
+            The <codeph>KEY</codeph> must always be a scalar type, such as <codeph>STRING</codeph>, <codeph>BIGINT</codeph>, or
+            <codeph>TIMESTAMP</codeph>. It can be <codeph>NULL</codeph>. Values of the <codeph>KEY</codeph> field are not necessarily unique
+            within the same <codeph>MAP</codeph>. You apply any required <codeph>DISTINCT</codeph>, <codeph>GROUP BY</codeph>, and other
+            clauses in the query, and loop through the result set to process all the values matching any specified keys.
+          </p>
+
+          <p>
+            The <codeph>VALUE</codeph> can be either a scalar type or another complex type. If the <codeph>VALUE</codeph> is a
+            <codeph>STRUCT</codeph>, you can construct a qualified name
+            <codeph><varname>map_column</varname>.VALUE.<varname>struct_field</varname></codeph> to refer to the individual fields inside
+            the value part. If the <codeph>VALUE</codeph> is an <codeph>ARRAY</codeph> or another <codeph>MAP</codeph>, you must include
+            another join condition that establishes a table alias for <codeph><varname>map_column</varname>.VALUE</codeph>, and then
+            construct another qualified name using that alias, for example <codeph><varname>table_alias</varname>.ITEM</codeph> or
+            <codeph><varname>table_alias</varname>.KEY</codeph> and <codeph><varname>table_alias</varname>.VALUE</codeph>
+          </p>
+
+          <p>
+            The following example shows different ways to access a <codeph>MAP</codeph> column using the <codeph>KEY</codeph> and
+            <codeph>VALUE</codeph> pseudocolumns. The <codeph>DETAILS</codeph> column has a <codeph>STRING</codeph> first part with short,
+            standardized values such as <codeph>'Recurring'</codeph>, <codeph>'Lucid'</codeph>, or <codeph>'Anxiety'</codeph>. This is the
+            <q>key</q> that is used to look up particular kinds of elements from the <codeph>MAP</codeph>. The second part, also a
+            <codeph>STRING</codeph>, is a longer free-form explanation. Impala gives you the standard pseudocolumn names
+            <codeph>KEY</codeph> and <codeph>VALUE</codeph> for the two parts, and you apply your own conventions and interpretations to the
+            underlying values.
+          </p>
+
+          <note>
+            If you find that the single-item nature of the <codeph>VALUE</codeph> makes it difficult to model your data accurately, the
+            solution is typically to add some nesting to the complex type. For example, to have several sets of key-value pairs, make the
+            column an <codeph>ARRAY</codeph> whose elements are <codeph>MAP</codeph>. To make a set of key-value pairs that holds more
+            elaborate information, make a <codeph>MAP</codeph> column whose <codeph>VALUE</codeph> part contains an <codeph>ARRAY</codeph>
+            or a <codeph>STRUCT</codeph>.
+          </note>
+
+<codeblock><![CDATA[CREATE TABLE dream_journal
+(
+  dream_id BIGINT,
+  details MAP <STRING,STRING>
+)
+STORED AS PARQUET;
+]]>
+
+-- What are all the types of dreams that are recorded?
+SELECT DISTINCT details.KEY FROM dream_journal, dream_journal.details;
+
+-- How many lucid dreams were recorded?
+-- Because there is no GROUP BY, we count the 'Lucid' keys across all rows.
+SELECT <b>COUNT(details.KEY)</b>
+  FROM dream_journal, dream_journal.details
+WHERE <b>details.KEY = 'Lucid'</b>;
+
+-- Print a report of a subset of dreams, filtering based on both the lookup key
+-- and the detailed value.
+SELECT dream_id, <b>details.KEY AS "Dream Type"</b>, <b>details.VALUE AS "Dream Summary"</b>
+  FROM dream_journal, dream_journal.details
+WHERE
+  <b>details.KEY IN ('Happy', 'Pleasant', 'Joyous')</b>
+  AND <b>details.VALUE LIKE '%childhood%'</b>;
+</codeblock>
+
+          <p>
+            The following example shows a more elaborate version of the previous table, where the <codeph>VALUE</codeph> part of the
+            <codeph>MAP</codeph> entry is a <codeph>STRUCT</codeph> rather than a scalar type. Now instead of referring to the
+            <codeph>VALUE</codeph> pseudocolumn directly, you use dot notation to refer to the <codeph>STRUCT</codeph> fields inside it.
+          </p>
+
+<codeblock><![CDATA[CREATE TABLE better_dream_journal
+(
+  dream_id BIGINT,
+  details MAP <STRING,STRUCT <summary: STRING, when_happened: TIMESTAMP, duration: DECIMAL(5,2), woke_up: BOOLEAN> >
+)
+STORED AS PARQUET;
+]]>
+
+-- Do more elaborate reporting and filtering by examining multiple attributes within the same dream.
+SELECT dream_id, <b>details.KEY AS "Dream Type"</b>, <b>details.VALUE.summary AS "Dream Summary"</b>, <b>details.VALUE.duration AS "Duration"</b>
+  FROM better_dream_journal, better_dream_journal.details
+WHERE
+  <b>details.KEY IN ('Anxiety', 'Nightmare')</b>
+  AND <b>details.VALUE.duration > 60</b>
+  AND <b>details.VALUE.woke_up = TRUE</b>;
+
+-- Remember that if the ITEM or VALUE contains a STRUCT, you can reference
+-- the STRUCT fields directly without the .ITEM or .VALUE qualifier.
+SELECT dream_id, <b>details.KEY AS "Dream Type"</b>, <b>details.summary AS "Dream Summary"</b>, <b>details.duration AS "Duration"</b>
+  FROM better_dream_journal, better_dream_journal.details
+WHERE
+  <b>details.KEY IN ('Anxiety', 'Nightmare')</b>
+  AND <b>details.duration > 60</b>
+  AND <b>details.woke_up = TRUE</b>;
+</codeblock>
+
+        </conbody>
+
+      </concept>
+
+    </concept>
+
+    <concept id="complex_types_etl">
+
+<!-- This topic overlaps in many ways with the preceding one. See which theme resonates with users, and combine them under the better title. -->
+
+      <title>Loading Data Containing Complex Types</title>
+
+      <conbody>
+
+        <p>
+          Because the Impala <codeph>INSERT</codeph> statement does not currently support creating new data with complex type columns, or
+          copying existing complex type values from one table to another, you primarily use Impala to query Parquet tables with complex
+          types where the data was inserted through Hive, or create tables with complex types where you already have existing Parquet data
+          files.
+        </p>
+
+        <p>
+          If you have created a Hive table with the Parquet file format and containing complex types, use the same table for Impala queries
+          with no changes. If you have such a Hive table in some other format, use a Hive <codeph>CREATE TABLE AS SELECT ... STORED AS
+          PARQUET</codeph> or <codeph>INSERT ... SELECT</codeph> statement to produce an equivalent Parquet table that Impala can query.
+        </p>
+
+        <p>
+          If you have existing Parquet data files containing complex types, located outside of any Impala or Hive table, such as data files
+          created by Spark jobs, you can use an Impala <codeph>CREATE TABLE ... STORED AS PARQUET</codeph> statement, followed by an Impala
+          <codeph>LOAD DATA</codeph> statement to move the data files into the table. As an alternative, you can use an Impala
+          <codeph>CREATE EXTERNAL TABLE</codeph> statement to create a table pointing to the HDFS directory that already contains the data
+          files.
+        </p>
+
+        <p>
+          Perhaps the simplest way to get started with complex type data is to take a denormalized table containing duplicated values, and
+          use an <codeph>INSERT ... SELECT</codeph> statement to copy the data into a Parquet table and condense the repeated values into
+          complex types. With the Hive <codeph>INSERT</codeph> statement, you use the <codeph>COLLECT_LIST()</codeph>,
+          <codeph>NAMED_STRUCT()</codeph>, and <codeph>MAP()</codeph> constructor functions within a <codeph>GROUP BY</codeph> query to
+          produce the complex type values. <codeph>COLLECT_LIST()</codeph> turns a sequence of values into an <codeph>ARRAY</codeph>.
+          <codeph>NAMED_STRUCT()</codeph> uses the first, third, and so on arguments as the field names for a <codeph>STRUCT</codeph>, to
+          match the field names from the <codeph>CREATE TABLE</codeph> statement.
+        </p>
+
+        <note>
+          Because Hive currently cannot construct individual rows using complex types through the <codeph>INSERT ... VALUES</codeph> syntax,
+          you prepare the data in flat form in a separate table, then copy it to the table with complex columns using <codeph>INSERT ...
+          SELECT</codeph> and the complex type constructors. See <xref href="impala_complex_types.xml#complex_types_ex_hive_etl"/> for
+          examples.
+        </note>
+
+      </conbody>
+
+    </concept>
+
+    <concept id="complex_types_nesting">
+
+      <title>Using Complex Types as Nested Types</title>
+
+      <conbody>
+
+        <p>
+          The <codeph>ARRAY</codeph>, <codeph>STRUCT</codeph>, and <codeph>MAP</codeph> types can be the top-level types for <q>nested
+          type</q> columns. That is, each of these types can contain other complex or scalar types, with multiple levels of nesting to a
+          maximum depth of 100. For example, you can have an array of structures, a map containing other maps, a structure containing an
+          array of other structures, and so on. At the lowest level, there are always scalar types making up the fields of a
+          <codeph>STRUCT</codeph>, elements of an <codeph>ARRAY</codeph>, and keys and values of a <codeph>MAP</codeph>.
+        </p>
+
+        <p>
+          Schemas involving complex types typically use some level of nesting for the complex type columns.
+        </p>
+
+        <p>
+          For example, to model a relationship like a dimension table and a fact table, you typically use an <codeph>ARRAY</codeph> where
+          each array element is a <codeph>STRUCT</codeph>. The <codeph>STRUCT</codeph> fields represent what would traditionally be columns
+          in a separate joined table. It makes little sense to use a <codeph>STRUCT</codeph> as the top-level type for a column, because you
+          could just make the fields of the <codeph>STRUCT</codeph> into regular table columns.
+        </p>
+
+<!-- To do: this example might move somewhere else, under STRUCT itself or in a tips-and-tricks section. -->
+
+        <p>
+          Perhaps the only use case for a top-level <codeph>STRUCT</codeph> would be to to allow <codeph>STRUCT</codeph> fields with the
+          same name as columns to coexist in the same table. The following example shows how a table could have a column named
+          <codeph>ID</codeph>, and two separate <codeph>STRUCT</codeph> fields also named <codeph>ID</codeph>. Because the
+          <codeph>STRUCT</codeph> fields are always referenced using qualified names, the identical <codeph>ID</codeph> names do not cause a
+          conflict.
+        </p>
+
+<codeblock><![CDATA[CREATE TABLE struct_namespaces
+(
+  id BIGINT
+  , s1 STRUCT < id: BIGINT, field1: STRING >
+  , s2 STRUCT < id: BIGINT, when_happened: TIMESTAMP >
+)
+STORED AS PARQUET;
+
+select id, s1.id, s2.id from struct_namespaces;
+]]>
+</codeblock>
+
+        <p>
+          It is common to make the value portion of each key-value pair in a <codeph>MAP</codeph> a <codeph>STRUCT</codeph>,
+          <codeph>ARRAY</codeph> of <codeph>STRUCT</codeph>, or other complex type variation. That way, each key in the <codeph>MAP</codeph>
+          can be associated with a flexible and extensible data structure. The key values are not predefined ahead of time (other than by
+          specifying their data type). Therefore, the <codeph>MAP</codeph> can accomodate a rapidly evolving schema, or sparse data
+          structures where each row contains only a few data values drawn from a large set of possible choices.
+        </p>
+
+        <p>
+          Although you can use an <codeph>ARRAY</codeph> of scalar values as the top-level column in a table, such a simple array is
+          typically of limited use for analytic queries. The only property of the array elements, aside from the element value, is the
+          ordering sequence available through the <codeph>POS</codeph> pseudocolumn. To record any additional item about each array element,
+          such as a <codeph>TIMESTAMP</codeph> or a symbolic name, you use an <codeph>ARRAY</codeph> of <codeph>STRUCT</codeph> rather than
+          of scalar values.
+        </p>
+
+        <p>
+          If you are considering having multiple <codeph>ARRAY</codeph> or <codeph>MAP</codeph> columns, with related items under the same
+          position in each <codeph>ARRAY</codeph> or the same key in each <codeph>MAP</codeph>, prefer to use a <codeph>STRUCT</codeph> to
+          group all the related items into a single <codeph>ARRAY</codeph> or <codeph>MAP</codeph>. Doing so avoids the additional storage
+          overhead and potential duplication of key values from having an extra complex type column. Also, because each
+          <codeph>ARRAY</codeph> or <codeph>MAP</codeph> that you reference in the query <codeph>SELECT</codeph> list requires an additional
+          join clause, minimizing the number of complex type columns also makes the query easier to read and maintain, relying more on dot
+          notation to refer to the relevant fields rather than a sequence of join clauses.
+        </p>
+
+        <p>
+          For example, here is a table with several complex type columns all at the top level and containing only scalar types. To retrieve
+          every data item for the row requires a separate join for each <codeph>ARRAY</codeph> or <codeph>MAP</codeph> column. The fields of
+          the <codeph>STRUCT</codeph> can be referenced using dot notation, but there is no real advantage to using the
+          <codeph>STRUCT</codeph> at the top level rather than just making separate columns <codeph>FIELD1</codeph> and
+          <codeph>FIELD2</codeph>.
+        </p>
+
+<codeblock><![CDATA[CREATE TABLE complex_types_top_level
+(
+  id BIGINT,
+  a1 ARRAY<INT>,
+  a2 ARRAY<STRING>,
+  s STRUCT<field1: INT, field2: STRING>,
+-- Numeric lookup key for a string value.
+  m1 MAP<INT,STRING>,
+-- String lookup key for a numeric value.
+  m2 MAP<STRING,INT>
+)
+STORED AS PARQUET;
+
+describe complex_types_top_level;
++------+-----------------+
+| name | type            |
++------+-----------------+
+| id   | bigint          |
+| a1   | array<int>      |
+| a2   | array<string>   |
+| s    | struct<         |
+|      |   field1:int,   |
+|      |   field2:string |
+|      | >               |
+| m1   | map<int,string> |
+| m2   | map<string,int> |
++------+-----------------+
+
+select
+  id,
+  a1.item,
+  a2.item,
+  s.field1,
+  s.field2,
+  m1.key,
+  m1.value,
+  m2.key,
+  m2.value
+from
+  complex_types_top_level,
+  complex_types_top_level.a1,
+  complex_types_top_level.a2,
+  complex_types_top_level.m1,
+  complex_types_top_level.m2;
+]]>
+</codeblock>
+
+        <p>
+          For example, here is a table with columns containing an <codeph>ARRAY</codeph> of <codeph>STRUCT</codeph>, a <codeph>MAP</codeph>
+          where each key value is a <codeph>STRUCT</codeph>, and a <codeph>MAP</codeph> where each key value is an <codeph>ARRAY</codeph> of
+          <codeph>STRUCT</codeph>.
+        </p>
+
+<codeblock><![CDATA[CREATE TABLE nesting_demo
+(
+  user_id BIGINT,
+  family_members ARRAY < STRUCT < name: STRING, email: STRING, date_joined: TIMESTAMP >>,
+  foo map < STRING, STRUCT < f1: INT, f2: INT, f3: TIMESTAMP, f4: BOOLEAN >>,
+  gameplay MAP < STRING , ARRAY < STRUCT <
+    name: STRING, highest: BIGINT, lives_used: INT, total_spent: DECIMAL(16,2)
+  >>>
+)
+STORED AS PARQUET;
+]]>
+</codeblock>
+
+        <p>
+          The <codeph>DESCRIBE</codeph> statement rearranges the <codeph>&lt;</codeph> and <codeph>&gt;</codeph> separators and the field
+          names within each <codeph>STRUCT</codeph> for easy readability:
+        </p>
+
+<codeblock><![CDATA[DESCRIBE nesting_demo;
++----------------+-----------------------------+
+| name           | type                        |
++----------------+-----------------------------+
+| user_id        | bigint                      |
+| family_members | array<struct<               |
+|                |   name:string,              |
+|                |   email:string,             |
+|                |   date_joined:timestamp     |
+|                | >>                     

<TRUNCATED>

[05/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

Posted by jr...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_show.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_show.xml b/docs/topics/impala_show.xml
new file mode 100644
index 0000000..1e8c17d
--- /dev/null
+++ b/docs/topics/impala_show.xml
@@ -0,0 +1,1263 @@
+<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="show">
+
+  <title>SHOW Statement</title>
+  <titlealts><navtitle>SHOW</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Reports"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">SHOW statement</indexterm>
+      The <codeph>SHOW</codeph> statement is a flexible way to get information about different types of Impala
+      objects.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>SHOW DATABASES [[LIKE] '<varname>pattern</varname>']
+SHOW SCHEMAS [[LIKE] '<varname>pattern</varname>'] - an alias for SHOW DATABASES
+SHOW TABLES [IN <varname>database_name</varname>] [[LIKE] '<varname>pattern</varname>']
+<ph rev="1.2.0">SHOW [AGGREGATE | ANALYTIC] FUNCTIONS [IN <varname>database_name</varname>] [[LIKE] '<varname>pattern</varname>']</ph>
+<ph rev="1.2.1">SHOW CREATE TABLE [<varname>database_name</varname>].<varname>table_name</varname></ph>
+<ph rev="1.2.1">SHOW TABLE STATS [<varname>database_name</varname>.]<varname>table_name</varname></ph>
+<ph rev="1.2.1">SHOW COLUMN STATS [<varname>database_name</varname>.]<varname>table_name</varname></ph>
+<ph rev="1.4.0">SHOW PARTITIONS [<varname>database_name</varname>.]<varname>table_name</varname></ph>
+<ph rev="2.2.0">SHOW FILES IN [<varname>database_name</varname>.]<varname>table_name</varname> [PARTITION (<varname>key_col</varname>=<varname>value</varname> [, <varname>key_col</varname>=<varname>value</varname>]]</ph>
+
+<ph rev="2.0.0">SHOW ROLES
+SHOW CURRENT ROLES
+SHOW ROLE GRANT GROUP <varname>group_name</varname>
+SHOW GRANT ROLE <varname>role_name</varname></ph>
+</codeblock>
+
+<!-- SHOW ROLE GRANT { USER <varname>user_name</varname> | GROUP <varname>group_name</varname> | ROLE <varname>role_name</varname> } -->
+
+<!-- Extracted from the previous codeblock because even hidden content produces blank lines.
+<ph audience="Cloudera" rev="1.4.0">SHOW DATA SOURCES [LIKE '<varname>source_name</varname>]</ph>
+-->
+
+<!-- Some suggestion there would be this syntax for 1.4, but it's not in the builds:
+<ph rev="1.4.0">SHOW [CACHED] TABLES [IN <varname>database_name</varname>] [[LIKE] '<varname>pattern</varname>']</ph>
+<ph rev="1.4.0">SHOW [CACHED] PARTITIONS [<varname>database_name</varname>.]<varname>table_name</varname></ph>
+-->
+
+    <p>
+      Issue a <codeph>SHOW <varname>object_type</varname></codeph> statement to see the appropriate objects in the
+      current database, or <codeph>SHOW <varname>object_type</varname> IN <varname>database_name</varname></codeph>
+      to see objects in a specific database.
+    </p>
+
+    <p>
+      The optional <varname>pattern</varname> argument is a quoted string literal, using Unix-style
+      <codeph>*</codeph> wildcards and allowing <codeph>|</codeph> for alternation. The preceding
+      <codeph>LIKE</codeph> keyword is also optional. All object names are stored in lowercase, so use all
+      lowercase letters in the pattern string. For example:
+    </p>
+
+<codeblock>show databases 'a*';
+show databases like 'a*';
+show tables in some_db like '*fact*';
+use some_db;
+show tables '*dim*|*fact*';</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p outputclass="toc inpage"/>
+
+  </conbody>
+
+  <concept rev="2.2.0" id="show_files">
+
+    <title>SHOW FILES Statement</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Disk Storage"/>
+      <data name="Category" value="Tables"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        The <codeph>SHOW FILES</codeph> statement displays the files that constitute a specified table,
+        or a partition within a partitioned table. This syntax is available in CDH 5.4 and higher
+        only. The output includes the names of the files, the size of each file, and the applicable partition
+        for a partitioned table. The size includes a suffix of <codeph>B</codeph> for bytes,
+        <codeph>MB</codeph> for megabytes, and <codeph>GB</codeph> for gigabytes.
+      </p>
+
+      <note>
+        This statement applies to tables and partitions stored on HDFS, or in the Amazon Simple Storage System (S3).
+        It does not apply to views.
+        It does not apply to tables mapped onto HBase, because HBase does not use the same file-based storage layout.
+      </note>
+
+      <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+      <p>
+        You can use this statement to verify the results of your ETL process: that is, that
+        the expected files are present, with the expected sizes. You can examine the file information
+        to detect conditions such as empty files, missing files, or inefficient layouts due to
+        a large number of small files. When you use <codeph>INSERT</codeph> statements to copy
+        from one table to another, you can see how the file layout changes due to file format
+        conversions, compaction of small input files into large data blocks, and
+        multiple output files from parallel queries and partitioned inserts.
+      </p>
+
+      <p>
+        The output from this statement does not include files that Impala considers to be hidden
+        or invisible, such as those whose names start with a dot or an underscore, or that
+        end with the suffixes <codeph>.copying</codeph> or <codeph>.tmp</codeph>.
+      </p>
+
+      <p>
+        The information for partitioned tables complements the output of the <codeph>SHOW PARTITIONS</codeph>
+        statement, which summarizes information about each partition. <codeph>SHOW PARTITIONS</codeph>
+        produces some output for each partition, while <codeph>SHOW FILES</codeph> does not
+        produce any output for empty partitions because they do not include any data files.
+      </p>
+
+<!-- Extensive round of testing makes me pretty confident of these findings. -->
+      <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+      <p rev="CDH-19187">
+        The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+        typically the <codeph>impala</codeph> user, must have read 
+        permission for all the table files, read and execute permission for all the directories that make up the table,
+        and execute permission for the database directory and all its parent directories.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        The following example constructs <codeph>SHOW FILES</codeph> statements
+        for an unpartitioned tables using text format:
+      </p>
+
+<codeblock>[localhost:21000] > create table unpartitioned_text (x bigint, s string);
+[localhost:21000] > insert into unpartitioned_text (x, s) select id, name from oreilly.sample_data limit 20e6;
+[localhost:21000] > show files in unpartitioned_text;
++-------------------------------------------------------------------------------------+----------+-----------+
+| path                                                                                | size     | partition |
++-------------------------------------------------------------------------------------+----------+-----------+
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_text/35665776ef85cfaf_1012432410_data.0. | 448.31MB |           |
++-------------------------------------------------------------------------------------+----------+-----------+
+[localhost:21000] > insert into unpartitioned_text (x, s) select id, name from oreilly.sample_data limit 100e6;
+[localhost:21000] > show files in unpartitioned_text;
++---------------------------------------------------------------------------------------------+----------+-----------+
+| path                                                                                        | size     | partition |
++---------------------------------------------------------------------------------------------+----------+-----------+
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_text/35665776ef85cfaf_1012432410_data.0. | 448.31MB |           |
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_text/ac3dba252a8952b8_1663177415_data.0. | 2.19GB   |           |
++---------------------------------------------------------------------------------------------+----------+-----------+
+</codeblock>
+
+      <p>
+        This example illustrates how, after issuing some <codeph>INSERT ... VALUES</codeph> statements,
+        the table now contains some tiny files of just a few bytes. Such small files could cause inefficient processing of
+        parallel queries that are expecting multi-megabyte input files. The example shows how you might compact the small files by doing
+        an <codeph>INSERT ... SELECT</codeph> into a different table, possibly converting the data to Parquet in the process:
+      </p>
+
+<codeblock>[localhost:21000] > insert into unpartitioned_text values (10,'hello'), (20, 'world');
+[localhost:21000] > insert into unpartitioned_text values (-1,'foo'), (-1000, 'bar');
+[localhost:21000] > show files in unpartitioned_text;
++---------------------------------------------------------------------------------------------+----------+-----------+
+| path                                                                                        | size     | partition |
++---------------------------------------------------------------------------------------------+----------+-----------+
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_text/4f11b8bdf8b6aa92_238145083_data.0.  | 18B      |           |
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_text/35665776ef85cfaf_1012432410_data.0. | 448.31MB |           |
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_text/ac3dba252a8952b8_1663177415_data.0. | 2.19GB   |           |
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_text/cfb8252452445682_1868457216_data.0. | 17B      |           |
++---------------------------------------------------------------------------------------------+----------+-----------+
+[localhost:21000] > create table unpartitioned_parquet stored as parquet as select * from unpartitioned_text;
++---------------------------+
+| summary                   |
++---------------------------+
+| Inserted 120000002 row(s) |
++---------------------------+
+[localhost:21000] > show files in unpartitioned_parquet;
++----------------------------------------------------------------------------------------------------+----------+-----------+
+| path                                                                                               | size     | partition |
++----------------------------------------------------------------------------------------------------+----------+-----------+
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_parquet/60798d96ba630184_549959007_data.0.parq  | 255.36MB |           |
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_parquet/60798d96ba630184_549959007_data.1.parq  | 178.52MB |           |
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_parquet/60798d96ba630185_549959007_data.0.parq  | 255.37MB |           |
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_parquet/60798d96ba630185_549959007_data.1.parq  | 57.71MB  |           |
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_parquet/60798d96ba630186_2141167244_data.0.parq | 255.40MB |           |
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_parquet/60798d96ba630186_2141167244_data.1.parq | 175.52MB |           |
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_parquet/60798d96ba630187_1006832086_data.0.parq | 255.40MB |           |
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_parquet/60798d96ba630187_1006832086_data.1.parq | 214.61MB |           |
++----------------------------------------------------------------------------------------------------+----------+-----------+
+</codeblock>
+
+      <p>
+        The following example shows a <codeph>SHOW FILES</codeph> statement for a partitioned text table
+        with data in two different partitions, and two empty partitions.
+        The partitions with no data are not represented in the <codeph>SHOW FILES</codeph> output.
+      </p>
+<codeblock>[localhost:21000] > create table partitioned_text (x bigint, y int, s string) partitioned by (year bigint, month bigint, day bigint);
+[localhost:21000] > insert overwrite partitioned_text (x, y, s) partition (year=2014,month=1,day=1) select id, val, name from oreilly.normalized_parquet
+where id between 1 and 1000000;
+[localhost:21000] > insert overwrite partitioned_text (x, y, s) partition (year=2014,month=1,day=2) select id, val, name from oreilly.normalized_parquet
+where id between 1000001 and 2000000;
+[localhost:21000] > alter table partitioned_text add partition (year=2014,month=1,day=3);
+[localhost:21000] > alter table partitioned_text add partition (year=2014,month=1,day=4);
+[localhost:21000] > show partitions partitioned_text;
++-------+-------+-----+-------+--------+---------+--------------+-------------------+--------+-------------------+
+| year  | month | day | #Rows | #Files | Size    | Bytes Cached | Cache Replication | Format | Incremental stats |
++-------+-------+-----+-------+--------+---------+--------------+-------------------+--------+-------------------+
+| 2014  | 1     | 1   | -1    | 4      | 25.16MB | NOT CACHED   | NOT CACHED        | TEXT   | false             |
+| 2014  | 1     | 2   | -1    | 4      | 26.22MB | NOT CACHED   | NOT CACHED        | TEXT   | false             |
+| 2014  | 1     | 3   | -1    | 0      | 0B      | NOT CACHED   | NOT CACHED        | TEXT   | false             |
+| 2014  | 1     | 4   | -1    | 0      | 0B      | NOT CACHED   | NOT CACHED        | TEXT   | false             |
+| Total |       |     | -1    | 8      | 51.38MB | 0B           |                   |        |                   |
++-------+-------+-----+-------+--------+---------+--------------+-------------------+--------+-------------------+
+[localhost:21000] > show files in partitioned_text;
++----------------------------------------------------------------------------------------------------------------+--------+-------------------------+
+| path                                                                                                           | size   | partition               |
++----------------------------------------------------------------------------------------------------------------+--------+-------------------------+
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_text/year=2014/month=1/day=1/80732d9dc80689f_1418645991_data.0.  | 5.77MB | year=2014/month=1/day=1 |
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_text/year=2014/month=1/day=1/80732d9dc8068a0_1418645991_data.0.  | 6.25MB | year=2014/month=1/day=1 |
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_text/year=2014/month=1/day=1/80732d9dc8068a1_147082319_data.0.   | 7.16MB | year=2014/month=1/day=1 |
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_text/year=2014/month=1/day=1/80732d9dc8068a2_2111411753_data.0.  | 5.98MB | year=2014/month=1/day=1 |
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_text/year=2014/month=1/day=2/21a828cf494b5bbb_501271652_data.0.  | 6.42MB | year=2014/month=1/day=2 |
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_text/year=2014/month=1/day=2/21a828cf494b5bbc_501271652_data.0.  | 6.62MB | year=2014/month=1/day=2 |
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_text/year=2014/month=1/day=2/21a828cf494b5bbd_1393490200_data.0. | 6.98MB | year=2014/month=1/day=2 |
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_text/year=2014/month=1/day=2/21a828cf494b5bbe_1393490200_data.0. | 6.20MB | year=2014/month=1/day=2 |
++----------------------------------------------------------------------------------------------------------------+--------+-------------------------+
+</codeblock>
+      <p>
+        The following example shows a <codeph>SHOW FILES</codeph> statement for a partitioned Parquet table.
+        The number and sizes of files are different from the equivalent partitioned text table
+        used in the previous example, because <codeph>INSERT</codeph> operations for Parquet tables
+        are parallelized differently than for text tables. (Also, the amount of data is so small
+        that it can be written to Parquet without involving all the hosts in this 4-node cluster.)
+      </p>
+<codeblock>[localhost:21000] > create table partitioned_parquet (x bigint, y int, s string) partitioned by (year bigint, month bigint, day bigint) stored as parquet;
+[localhost:21000] > insert into partitioned_parquet partition (year,month,day) select x, y, s, year, month, day from partitioned_text;
+[localhost:21000] > show partitions partitioned_parquet;
++-------+-------+-----+-------+--------+---------+--------------+-------------------+---------+-------------------+
+| year  | month | day | #Rows | #Files | Size    | Bytes Cached | Cache Replication | Format  | Incremental stats |
++-------+-------+-----+-------+--------+---------+--------------+-------------------+---------+-------------------+
+| 2014  | 1     | 1   | -1    | 3      | 17.89MB | NOT CACHED   | NOT CACHED        | PARQUET | false             |
+| 2014  | 1     | 2   | -1    | 3      | 17.89MB | NOT CACHED   | NOT CACHED        | PARQUET | false             |
+| Total |       |     | -1    | 6      | 35.79MB | 0B           |                   |         |                   |
++-------+-------+-----+-------+--------+---------+--------------+-------------------+---------+-------------------+
+[localhost:21000] > show files in partitioned_parquet;
++---------------------------------------------------------------------------------------------------------+--------+-------------------------+
+| path                                                                                                    | size   | partition               |
++---------------------------------------------------------------------------------------------------------+--------+-------------------------+
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_parquet/year=2014/month=1/day=1/1134113650_data.0.parq | 4.49MB | year=2014/month=1/day=1 |
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_parquet/year=2014/month=1/day=1/617567880_data.0.parq  | 5.14MB | year=2014/month=1/day=1 |
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_parquet/year=2014/month=1/day=1/2099499416_data.0.parq | 8.27MB | year=2014/month=1/day=1 |
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_parquet/year=2014/month=1/day=2/945567189_data.0.parq  | 8.80MB | year=2014/month=1/day=2 |
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_parquet/year=2014/month=1/day=2/2145850112_data.0.parq | 4.80MB | year=2014/month=1/day=2 |
+| hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_parquet/year=2014/month=1/day=2/665613448_data.0.parq  | 4.29MB | year=2014/month=1/day=2 |
++---------------------------------------------------------------------------------------------------------+--------+-------------------------+
+</codeblock>
+<p>
+  The following example shows output from the <codeph>SHOW FILES</codeph> statement
+  for a table where the data files are stored in Amazon S3:
+</p>
+<codeblock>[localhost:21000] > show files in s3_testing.sample_data_s3;
++-----------------------------------------------------------------------+---------+-----------+
+| path                                                                  | size    | partition |
++-----------------------------------------------------------------------+---------+-----------+
+| s3a://impala-demo/sample_data/e065453cba1988a6_1733868553_data.0.parq | 24.84MB |           |
++-----------------------------------------------------------------------+---------+-----------+
+</codeblock>
+<!--
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_authorization.xml#authorization"/>
+      </p>
+-->
+    </conbody>
+  </concept>
+
+  <concept rev="2.0.0" id="show_roles">
+
+    <title>SHOW ROLES Statement</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Sentry"/>
+      <data name="Category" value="Roles"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        The <codeph>SHOW ROLES</codeph> statement displays roles. This syntax is available in CDH 5.2 and later
+        only, when you are using the Sentry authorization framework along with the Sentry service, as described in
+        <xref href="impala_authorization.xml#sentry_service"/>. It does not apply when you use the Sentry framework
+        with privileges defined in a policy file.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/security_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/show_security"/>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        Depending on the roles set up within your organization by the <codeph>CREATE ROLE</codeph> statement, the
+        output might look something like this:
+      </p>
+
+<codeblock>show roles;
++-----------+
+| role_name |
++-----------+
+| analyst   |
+| role1     |
+| sales     |
+| superuser |
+| test_role |
++-----------+
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_authorization.xml#authorization"/>
+      </p>
+    </conbody>
+  </concept>
+
+  <concept rev="2.0.0" id="show_current_role">
+
+    <title>SHOW CURRENT ROLE</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Sentry"/>
+      <data name="Category" value="Roles"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p rev="2.0.0">
+        The <codeph>SHOW CURRENT ROLE</codeph> statement displays roles assigned to the current user. This syntax
+        is available in CDH 5.2 and later only, when you are using the Sentry authorization framework along with
+        the Sentry service, as described in <xref href="impala_authorization.xml#sentry_service"/>. It does not
+        apply when you use the Sentry framework with privileges defined in a policy file.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/security_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/show_security"/>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        Depending on the roles set up within your organization by the <codeph>CREATE ROLE</codeph> statement, the
+        output might look something like this:
+      </p>
+
+<codeblock>show current roles;
++-----------+
+| role_name |
++-----------+
+| role1     |
+| superuser |
++-----------+
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_authorization.xml#authorization"/>
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="show_role_grant">
+
+    <title>SHOW ROLE GRANT Statement</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Sentry"/>
+      <data name="Category" value="Roles"/>
+    </metadata>
+  </prolog>
+
+
+    <conbody>
+
+      <p rev="2.0.0">
+<!-- Copied from Sentry docs. Turn into conref. I did some rewording for clarity. -->
+        The <codeph>SHOW ROLE GRANT</codeph> statement lists all the roles assigned to the specified group. This
+        statement is only allowed for Sentry administrative users and others users that are part of the specified
+        group. This syntax is available in CDH 5.2 and later only, when you are using the Sentry authorization
+        framework along with the Sentry service, as described in
+        <xref href="impala_authorization.xml#sentry_service"/>. It does not apply when you use the Sentry framework
+        with privileges defined in a policy file.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/security_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/show_security"/>
+
+<!--
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<codeblock>To do: construct example for SHOW ROLE GRANT</codeblock>
+-->
+
+      <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_authorization.xml#authorization"/>
+      </p>
+    </conbody>
+  </concept>
+
+  <concept rev="2.0.0" id="show_grant_role">
+
+    <title>SHOW GRANT ROLE Statement</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Sentry"/>
+      <data name="Category" value="Roles"/>
+    </metadata>
+  </prolog>
+
+
+    <conbody>
+
+      <p>
+<!-- Copied from Sentry docs. Turn into conref. I did some rewording for clarity. -->
+        The <codeph>SHOW GRANT ROLE</codeph> statement list all the grants for the given role name. This statement
+        is only allowed for Sentry administrative users and other users that have been granted the specified role.
+        This syntax is available in CDH 5.2 and later only, when you are using the Sentry authorization framework
+        along with the Sentry service, as described in <xref href="impala_authorization.xml#sentry_service"/>. It
+        does not apply when you use the Sentry framework with privileges defined in a policy file.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/security_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/show_security"/>
+
+<!--
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<codeblock>To do: construct example for SHOW GRANT ROLE</codeblock>
+-->
+
+      <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_authorization.xml#authorization"/>
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="show_databases">
+
+    <title>SHOW DATABASES</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Databases"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        The <codeph>SHOW DATABASES</codeph> statement is often the first one you issue when connecting to an
+        instance for the first time. You typically issue <codeph>SHOW DATABASES</codeph> to see the names you can
+        specify in a <codeph>USE <varname>db_name</varname></codeph> statement, then after switching to a database
+        you issue <codeph>SHOW TABLES</codeph> to see the names you can specify in <codeph>SELECT</codeph> and
+        <codeph>INSERT</codeph> statements.
+      </p>
+
+      <p>
+        The output of <codeph>SHOW DATABASES</codeph> includes the special <codeph>_impala_builtins</codeph>
+        database, which lets you view definitions of built-in functions, as described under <codeph>SHOW
+        FUNCTIONS</codeph>.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/security_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/show_security"/>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        This example shows how you might locate a particular table on an unfamiliar system. The
+        <codeph>DEFAULT</codeph> database is the one you initially connect to; a database with that name is present
+        on every system. You can issue <codeph>SHOW TABLES IN <varname>db_name</varname></codeph> without going
+        into a database, or <codeph>SHOW TABLES</codeph> once you are inside a particular database.
+      </p>
+
+<codeblock>[localhost:21000] &gt; show databases;
++--------------------+
+| name               |
++--------------------+
+| _impala_builtins   |
+| analyze_testing    |
+| avro               |
+| ctas               |
+| d1                 |
+| d2                 |
+| d3                 |
+| default            |
+| file_formats       |
+| hbase              |
+| load_data          |
+| partitioning       |
+| regexp_testing     |
+| reports            |
+| temporary          |
++--------------------+
+Returned 14 row(s) in 0.02s
+[localhost:21000] &gt; show tables in file_formats;
++--------------------+
+| name               |
++--------------------+
+| parquet_table      |
+| rcfile_table       |
+| sequencefile_table |
+| textfile_table     |
++--------------------+
+Returned 4 row(s) in 0.01s
+[localhost:21000] &gt; use file_formats;
+[localhost:21000] &gt; show tables like '*parq*';
++--------------------+
+| name               |
++--------------------+
+| parquet_table      |
++--------------------+
+Returned 1 row(s) in 0.01s</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_databases.xml#databases"/>, <xref href="impala_create_database.xml#create_database"/>,
+        <xref href="impala_drop_database.xml#drop_database"/>, <xref href="impala_use.xml#use"/>
+        <xref href="impala_show.xml#show_tables"/>,
+        <xref href="impala_show.xml#show_functions"/>
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="show_tables">
+
+    <title>SHOW TABLES Statement</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        Displays the names of tables. By default, lists tables in the current database, or with the
+        <codeph>IN</codeph> clause, in a specified database. By default, lists all tables, or with the
+        <codeph>LIKE</codeph> clause, only those whose name match a pattern with <codeph>*</codeph> wildcards.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/security_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/show_security"/>
+
+      <p rev="CDH-19187">
+        The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+        typically the <codeph>impala</codeph> user, must have read and execute
+        permissions for all directories that are part of the table.
+        (A table could span multiple different HDFS directories if it is partitioned.
+        The directories could be widely scattered because a partition can reside
+        in an arbitrary HDFS directory based on its <codeph>LOCATION</codeph> attribute.)
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        The following examples demonstrate the <codeph>SHOW TABLES</codeph> statement.
+        If the database contains no tables, the result set is empty.
+        If the database does contain tables, <codeph>SHOW TABLES IN <varname>db_name</varname></codeph>
+        lists all the table names. <codeph>SHOW TABLES</codeph> with no qualifiers lists
+        all the table names in the current database.
+      </p>
+
+<codeblock>create database empty_db;
+show tables in empty_db;
+Fetched 0 row(s) in 0.11s
+
+create database full_db;
+create table full_db.t1 (x int);
+create table full_db.t2 like full_db.t1;
+
+show tables in full_db;
++------+
+| name |
++------+
+| t1   |
+| t2   |
++------+
+
+use full_db;
+show tables;
++------+
+| name |
++------+
+| t1   |
+| t2   |
++------+
+</codeblock>
+
+      <p>
+        This example demonstrates how <codeph>SHOW TABLES LIKE '<varname>wildcard_pattern</varname>'</codeph>
+        lists table names that match a pattern, or multiple alternative patterns.
+        The ability to do wildcard matches for table names makes it helpful to establish naming conventions for tables to
+        conveniently locate a group of related tables.
+      </p>
+
+<codeblock>create table fact_tbl (x int);
+create table dim_tbl_1 (s string);
+create table dim_tbl_2 (s string);
+
+/* Asterisk is the wildcard character. Only 2 out of the 3 just-created tables are returned. */
+show tables like 'dim*';
++-----------+
+| name      |
++-----------+
+| dim_tbl_1 |
+| dim_tbl_2 |
++-----------+
+
+/* We are already in the FULL_DB database, but just to be sure we can specify the database name also. */
+show tables in full_db like 'dim*';
++-----------+
+| name      |
++-----------+
+| dim_tbl_1 |
+| dim_tbl_2 |
++-----------+
+
+/* The pipe character separates multiple wildcard patterns. */
+show tables like '*dim*|t*';
++-----------+
+| name      |
++-----------+
+| dim_tbl_1 |
+| dim_tbl_2 |
+| t1        |
+| t2        |
++-----------+
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_tables.xml#tables"/>, <xref href="impala_create_table.xml#create_table"/>,
+        <xref href="impala_alter_table.xml#alter_table"/>, <xref href="impala_drop_table.xml#drop_table"/>,
+        <xref href="impala_describe.xml#describe"/>, <xref href="impala_show.xml#show_create_table"/>,
+        <xref href="impala_show.xml#show_table_stats"/>,
+        <xref href="impala_show.xml#show_databases"/>,
+        <xref href="impala_show.xml#show_functions"/>
+      </p>
+    </conbody>
+  </concept>
+
+  <concept rev="1.2.1" id="show_create_table">
+
+    <title>SHOW CREATE TABLE Statement</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Schemas"/>
+      <data name="Category" value="Impala Data Types"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        As a schema changes over time, you might run a <codeph>CREATE TABLE</codeph> statement followed by several
+        <codeph>ALTER TABLE</codeph> statements. To capture the cumulative effect of all those statements,
+        <codeph>SHOW CREATE TABLE</codeph> displays a <codeph>CREATE TABLE</codeph> statement that would reproduce
+        the current structure of a table. You can use this output in scripts that set up or clone a group of
+        tables, rather than trying to reproduce the original sequence of <codeph>CREATE TABLE</codeph> and
+        <codeph>ALTER TABLE</codeph> statements. When creating variations on the original table, or cloning the
+        original table on a different system, you might need to edit the <codeph>SHOW CREATE TABLE</codeph> output
+        to change things such as the database name, <codeph>LOCATION</codeph> field, and so on that might be
+        different on the destination system.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/security_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/show_security"/>
+
+      <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        The following example shows how various clauses from the <codeph>CREATE TABLE</codeph> statement are
+        represented in the output of <codeph>SHOW CREATE TABLE</codeph>.
+      </p>
+
+<codeblock>create table show_create_table_demo (id int comment "Unique ID", y double, s string)
+  partitioned by (year smallint)
+  stored as parquet;
+
+show create table show_create_table_demo;
++----------------------------------------------------------------------------------------+
+| result                                                                                 |
++----------------------------------------------------------------------------------------+
+| CREATE TABLE scratch.show_create_table_demo (                                          |
+|   id INT COMMENT 'Unique ID',                                                          |
+|   y DOUBLE,                                                                            |
+|   s STRING                                                                             |
+| )                                                                                      |
+| PARTITIONED BY (                                                                       |
+|   year SMALLINT                                                                        |
+| )                                                                                      |
+| STORED AS PARQUET                                                                      |
+| LOCATION 'hdfs://127.0.0.1:8020/user/hive/warehouse/scratch.db/show_create_table_demo' |
+| TBLPROPERTIES ('transient_lastDdlTime'='1418152582')                                   |
++----------------------------------------------------------------------------------------+
+</codeblock>
+
+      <p>
+        The following example shows how, after a sequence of <codeph>ALTER TABLE</codeph> statements, the output
+        from <codeph>SHOW CREATE TABLE</codeph> represents the current state of the table. This output could be
+        used to create a matching table rather than executing the original <codeph>CREATE TABLE</codeph> and
+        sequence of <codeph>ALTER TABLE</codeph> statements.
+      </p>
+
+<codeblock>alter table show_create_table_demo drop column s;
+alter table show_create_table_demo set fileformat textfile;
+
+show create table show_create_table_demo;
++----------------------------------------------------------------------------------------+
+| result                                                                                 |
++----------------------------------------------------------------------------------------+
+| CREATE TABLE scratch.show_create_table_demo (                                          |
+|   id INT COMMENT 'Unique ID',                                                          |
+|   y DOUBLE                                                                             |
+| )                                                                                      |
+| PARTITIONED BY (                                                                       |
+|   year SMALLINT                                                                        |
+| )                                                                                      |
+| STORED AS TEXTFILE                                                                     |
+| LOCATION 'hdfs://127.0.0.1:8020/user/hive/warehouse/demo.db/show_create_table_demo'    |
+| TBLPROPERTIES ('transient_lastDdlTime'='1418152638')                                   |
++----------------------------------------------------------------------------------------+
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_create_table.xml#create_table"/>, <xref href="impala_describe.xml#describe"/>,
+        <xref href="impala_show.xml#show_tables"/>
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="show_table_stats">
+
+    <title>SHOW TABLE STATS Statement</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Performance"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        The <codeph>SHOW TABLE STATS</codeph> and <codeph>SHOW COLUMN STATS</codeph> variants are important for
+        tuning performance and diagnosing performance issues, especially with the largest tables and the most
+        complex join queries.
+      </p>
+
+      <p>
+        Any values that are not available (because the <codeph>COMPUTE STATS</codeph> statement has not been run
+        yet) are displayed as <codeph>-1</codeph>.
+      </p>
+
+      <p>
+        <codeph>SHOW TABLE STATS</codeph> provides some general information about the table, such as the number of
+        files, overall size of the data, whether some or all of the data is in the HDFS cache, and the file format,
+        that is useful whether or not you have run the <codeph>COMPUTE STATS</codeph> statement. A
+        <codeph>-1</codeph> in the <codeph>#Rows</codeph> output column indicates that the <codeph>COMPUTE
+        STATS</codeph> statement has never been run for this table. If the table is partitioned, <codeph>SHOW TABLE
+        STATS</codeph> provides this information for each partition. (It produces the same output as the
+        <codeph>SHOW PARTITIONS</codeph> statement in this case.)
+      </p>
+
+      <p>
+        The output of <codeph>SHOW COLUMN STATS</codeph> is primarily only useful after the <codeph>COMPUTE
+        STATS</codeph> statement has been run on the table. A <codeph>-1</codeph> in the <codeph>#Distinct
+        Values</codeph> output column indicates that the <codeph>COMPUTE STATS</codeph> statement has never been
+        run for this table. Currently, Impala always leaves the <codeph>#Nulls</codeph> column as
+        <codeph>-1</codeph>, even after <codeph>COMPUTE STATS</codeph> has been run.
+      </p>
+
+      <p>
+        These <codeph>SHOW</codeph> statements work on actual tables only, not on views.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/security_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/show_security"/>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        The following examples show how the <codeph>SHOW TABLE STATS</codeph> statement displays physical
+        information about a table and the associated data files:
+      </p>
+
+<codeblock>show table stats store_sales;
++-------+--------+----------+--------------+--------+-------------------+
+| #Rows | #Files | Size     | Bytes Cached | Format | Incremental stats |
++-------+--------+----------+--------------+--------+-------------------+
+| -1    | 1      | 370.45MB | NOT CACHED   | TEXT   | false             |
++-------+--------+----------+--------------+--------+-------------------+
+
+show table stats customer;
++-------+--------+---------+--------------+--------+-------------------+
+| #Rows | #Files | Size    | Bytes Cached | Format | Incremental stats |
++-------+--------+---------+--------------+--------+-------------------+
+| -1    | 1      | 12.60MB | NOT CACHED   | TEXT   | false             |
++-------+--------+---------+--------------+--------+-------------------+
+</codeblock>
+
+      <p>
+        The following example shows how, after a <codeph>COMPUTE STATS</codeph> or <codeph>COMPUTE INCREMENTAL
+        STATS</codeph> statement, the <codeph>#Rows</codeph> field is now filled in. Because the
+        <codeph>STORE_SALES</codeph> table in this example is not partitioned, the <codeph>COMPUTE INCREMENTAL
+        STATS</codeph> statement produces regular stats rather than incremental stats, therefore the
+        <codeph>Incremental stats</codeph> field remains <codeph>false</codeph>.
+      </p>
+
+<codeblock>compute stats customer;
++------------------------------------------+
+| summary                                  |
++------------------------------------------+
+| Updated 1 partition(s) and 18 column(s). |
++------------------------------------------+
+
+show table stats customer;
++--------+--------+---------+--------------+--------+-------------------+
+| #Rows  | #Files | Size    | Bytes Cached | Format | Incremental stats |
++--------+--------+---------+--------------+--------+-------------------+
+| 100000 | 1      | 12.60MB | NOT CACHED   | TEXT   | false             |
++--------+--------+---------+--------------+--------+-------------------+
+
+compute incremental stats store_sales;
++------------------------------------------+
+| summary                                  |
++------------------------------------------+
+| Updated 1 partition(s) and 23 column(s). |
++------------------------------------------+
+
+show table stats store_sales;
++---------+--------+----------+--------------+--------+-------------------+
+| #Rows   | #Files | Size     | Bytes Cached | Format | Incremental stats |
++---------+--------+----------+--------------+--------+-------------------+
+| 2880404 | 1      | 370.45MB | NOT CACHED   | TEXT   | false             |
++---------+--------+----------+--------------+--------+-------------------+
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+      <p rev="CDH-19187">
+        The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+        typically the <codeph>impala</codeph> user, must have read and execute
+        permissions for all directories that are part of the table.
+        (A table could span multiple different HDFS directories if it is partitioned.
+        The directories could be widely scattered because a partition can reside
+        in an arbitrary HDFS directory based on its <codeph>LOCATION</codeph> attribute.)
+        The Impala user must also have execute
+        permission for the database directory, and any parent directories of the database directory in HDFS.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_compute_stats.xml#compute_stats"/>, <xref href="impala_show.xml#show_column_stats"/>
+      </p>
+
+      <p>
+        See <xref href="impala_perf_stats.xml#perf_stats"/> for usage information and examples.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="show_column_stats">
+
+    <title>SHOW COLUMN STATS Statement</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Performance"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        The <codeph>SHOW TABLE STATS</codeph> and <codeph>SHOW COLUMN STATS</codeph> variants are important for
+        tuning performance and diagnosing performance issues, especially with the largest tables and the most
+        complex join queries.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/security_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/show_security"/>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        The following examples show the output of the <codeph>SHOW COLUMN STATS</codeph> statement for some tables,
+        before the <codeph>COMPUTE STATS</codeph> statement is run. Impala deduces some information, such as
+        maximum and average size for fixed-length columns, and leaves and unknown values as <codeph>-1</codeph>.
+      </p>
+
+<codeblock>show column stats customer;
++------------------------+--------+------------------+--------+----------+----------+
+| Column                 | Type   | #Distinct Values | #Nulls | Max Size | Avg Size |
++------------------------+--------+------------------+--------+----------+----------+
+| c_customer_sk          | INT    | -1               | -1     | 4        | 4        |
+| c_customer_id          | STRING | -1               | -1     | -1       | -1       |
+| c_current_cdemo_sk     | INT    | -1               | -1     | 4        | 4        |
+| c_current_hdemo_sk     | INT    | -1               | -1     | 4        | 4        |
+| c_current_addr_sk      | INT    | -1               | -1     | 4        | 4        |
+| c_first_shipto_date_sk | INT    | -1               | -1     | 4        | 4        |
+| c_first_sales_date_sk  | INT    | -1               | -1     | 4        | 4        |
+| c_salutation           | STRING | -1               | -1     | -1       | -1       |
+| c_first_name           | STRING | -1               | -1     | -1       | -1       |
+| c_last_name            | STRING | -1               | -1     | -1       | -1       |
+| c_preferred_cust_flag  | STRING | -1               | -1     | -1       | -1       |
+| c_birth_day            | INT    | -1               | -1     | 4        | 4        |
+| c_birth_month          | INT    | -1               | -1     | 4        | 4        |
+| c_birth_year           | INT    | -1               | -1     | 4        | 4        |
+| c_birth_country        | STRING | -1               | -1     | -1       | -1       |
+| c_login                | STRING | -1               | -1     | -1       | -1       |
+| c_email_address        | STRING | -1               | -1     | -1       | -1       |
+| c_last_review_date     | STRING | -1               | -1     | -1       | -1       |
++------------------------+--------+------------------+--------+----------+----------+
+
+show column stats store_sales;
++-----------------------+-------+------------------+--------+----------+----------+
+| Column                | Type  | #Distinct Values | #Nulls | Max Size | Avg Size |
++-----------------------+-------+------------------+--------+----------+----------+
+| ss_sold_date_sk       | INT   | -1               | -1     | 4        | 4        |
+| ss_sold_time_sk       | INT   | -1               | -1     | 4        | 4        |
+| ss_item_sk            | INT   | -1               | -1     | 4        | 4        |
+| ss_customer_sk        | INT   | -1               | -1     | 4        | 4        |
+| ss_cdemo_sk           | INT   | -1               | -1     | 4        | 4        |
+| ss_hdemo_sk           | INT   | -1               | -1     | 4        | 4        |
+| ss_addr_sk            | INT   | -1               | -1     | 4        | 4        |
+| ss_store_sk           | INT   | -1               | -1     | 4        | 4        |
+| ss_promo_sk           | INT   | -1               | -1     | 4        | 4        |
+| ss_ticket_number      | INT   | -1               | -1     | 4        | 4        |
+| ss_quantity           | INT   | -1               | -1     | 4        | 4        |
+| ss_wholesale_cost     | FLOAT | -1               | -1     | 4        | 4        |
+| ss_list_price         | FLOAT | -1               | -1     | 4        | 4        |
+| ss_sales_price        | FLOAT | -1               | -1     | 4        | 4        |
+| ss_ext_discount_amt   | FLOAT | -1               | -1     | 4        | 4        |
+| ss_ext_sales_price    | FLOAT | -1               | -1     | 4        | 4        |
+| ss_ext_wholesale_cost | FLOAT | -1               | -1     | 4        | 4        |
+| ss_ext_list_price     | FLOAT | -1               | -1     | 4        | 4        |
+| ss_ext_tax            | FLOAT | -1               | -1     | 4        | 4        |
+| ss_coupon_amt         | FLOAT | -1               | -1     | 4        | 4        |
+| ss_net_paid           | FLOAT | -1               | -1     | 4        | 4        |
+| ss_net_paid_inc_tax   | FLOAT | -1               | -1     | 4        | 4        |
+| ss_net_profit         | FLOAT | -1               | -1     | 4        | 4        |
++-----------------------+-------+------------------+--------+----------+----------+
+</codeblock>
+
+      <p>
+        The following examples show the output of the <codeph>SHOW COLUMN STATS</codeph> statement for some tables,
+        after the <codeph>COMPUTE STATS</codeph> statement is run. Now most of the <codeph>-1</codeph> values are
+        changed to reflect the actual table data. The <codeph>#Nulls</codeph> column remains <codeph>-1</codeph>
+        because Impala does not use the number of <codeph>NULL</codeph> values to influence query planning.
+      </p>
+
+<codeblock>compute stats customer;
++------------------------------------------+
+| summary                                  |
++------------------------------------------+
+| Updated 1 partition(s) and 18 column(s). |
++------------------------------------------+
+
+compute stats store_sales;
++------------------------------------------+
+| summary                                  |
++------------------------------------------+
+| Updated 1 partition(s) and 23 column(s). |
++------------------------------------------+
+
+show column stats customer;
++------------------------+--------+------------------+--------+----------+--------+
+| Column                 | Type   | #Distinct Values | #Nulls | Max Size | Avg Size
++------------------------+--------+------------------+--------+----------+--------+
+| c_customer_sk          | INT    | 139017           | -1     | 4        | 4      |
+| c_customer_id          | STRING | 111904           | -1     | 16       | 16     |
+| c_current_cdemo_sk     | INT    | 95837            | -1     | 4        | 4      |
+| c_current_hdemo_sk     | INT    | 8097             | -1     | 4        | 4      |
+| c_current_addr_sk      | INT    | 57334            | -1     | 4        | 4      |
+| c_first_shipto_date_sk | INT    | 4374             | -1     | 4        | 4      |
+| c_first_sales_date_sk  | INT    | 4409             | -1     | 4        | 4      |
+| c_salutation           | STRING | 7                | -1     | 4        | 3.1308 |
+| c_first_name           | STRING | 3887             | -1     | 11       | 5.6356 |
+| c_last_name            | STRING | 4739             | -1     | 13       | 5.9106 |
+| c_preferred_cust_flag  | STRING | 3                | -1     | 1        | 0.9656 |
+| c_birth_day            | INT    | 31               | -1     | 4        | 4      |
+| c_birth_month          | INT    | 12               | -1     | 4        | 4      |
+| c_birth_year           | INT    | 71               | -1     | 4        | 4      |
+| c_birth_country        | STRING | 205              | -1     | 20       | 8.4001 |
+| c_login                | STRING | 1                | -1     | 0        | 0      |
+| c_email_address        | STRING | 94492            | -1     | 46       | 26.485 |
+| c_last_review_date     | STRING | 349              | -1     | 7        | 6.7561 |
++------------------------+--------+------------------+--------+----------+--------+
+
+show column stats store_sales;
++-----------------------+-------+------------------+--------+----------+----------+
+| Column                | Type  | #Distinct Values | #Nulls | Max Size | Avg Size |
++-----------------------+-------+------------------+--------+----------+----------+
+| ss_sold_date_sk       | INT   | 4395             | -1     | 4        | 4        |
+| ss_sold_time_sk       | INT   | 63617            | -1     | 4        | 4        |
+| ss_item_sk            | INT   | 19463            | -1     | 4        | 4        |
+| ss_customer_sk        | INT   | 122720           | -1     | 4        | 4        |
+| ss_cdemo_sk           | INT   | 242982           | -1     | 4        | 4        |
+| ss_hdemo_sk           | INT   | 8097             | -1     | 4        | 4        |
+| ss_addr_sk            | INT   | 70770            | -1     | 4        | 4        |
+| ss_store_sk           | INT   | 6                | -1     | 4        | 4        |
+| ss_promo_sk           | INT   | 355              | -1     | 4        | 4        |
+| ss_ticket_number      | INT   | 304098           | -1     | 4        | 4        |
+| ss_quantity           | INT   | 105              | -1     | 4        | 4        |
+| ss_wholesale_cost     | FLOAT | 9600             | -1     | 4        | 4        |
+| ss_list_price         | FLOAT | 22191            | -1     | 4        | 4        |
+| ss_sales_price        | FLOAT | 20693            | -1     | 4        | 4        |
+| ss_ext_discount_amt   | FLOAT | 228141           | -1     | 4        | 4        |
+| ss_ext_sales_price    | FLOAT | 433550           | -1     | 4        | 4        |
+| ss_ext_wholesale_cost | FLOAT | 406291           | -1     | 4        | 4        |
+| ss_ext_list_price     | FLOAT | 574871           | -1     | 4        | 4        |
+| ss_ext_tax            | FLOAT | 91806            | -1     | 4        | 4        |
+| ss_coupon_amt         | FLOAT | 228141           | -1     | 4        | 4        |
+| ss_net_paid           | FLOAT | 493107           | -1     | 4        | 4        |
+| ss_net_paid_inc_tax   | FLOAT | 653523           | -1     | 4        | 4        |
+| ss_net_profit         | FLOAT | 611934           | -1     | 4        | 4        |
++-----------------------+-------+------------------+--------+----------+----------+
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+      <p rev="CDH-19187">
+        The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+        typically the <codeph>impala</codeph> user, must have read and execute
+        permissions for all directories that are part of the table.
+        (A table could span multiple different HDFS directories if it is partitioned.
+        The directories could be widely scattered because a partition can reside
+        in an arbitrary HDFS directory based on its <codeph>LOCATION</codeph> attribute.)
+        The Impala user must also have execute
+        permission for the database directory, and any parent directories of the database directory in HDFS.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_compute_stats.xml#compute_stats"/>, <xref href="impala_show.xml#show_table_stats"/>
+      </p>
+
+      <p>
+        See <xref href="impala_perf_stats.xml#perf_stats"/> for usage information and examples.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept rev="1.4.0" id="show_partitions">
+
+    <title>SHOW PARTITIONS Statement</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Schemas"/>
+      <!-- At some point, need to figure out categories related to partitioning. (Partitioned Tables etc.) -->
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        <codeph>SHOW PARTITIONS</codeph> displays information about each partition for a partitioned table. (The
+        output is the same as the <codeph>SHOW TABLE STATS</codeph> statement, but <codeph>SHOW PARTITIONS</codeph>
+        only works on a partitioned table.) Because it displays table statistics for all partitions, the output is
+        more informative if you have run the <codeph>COMPUTE STATS</codeph> statement after creating all the
+        partitions. See <xref href="impala_compute_stats.xml#compute_stats"/> for details. For example, on a
+        <codeph>CENSUS</codeph> table partitioned on the <codeph>YEAR</codeph> column:
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/security_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/show_security"/>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock rev="1.4.0">[localhost:21000] &gt; show partitions census;
++-------+-------+--------+------+---------+
+| year  | #Rows | #Files | Size | Format  |
++-------+-------+--------+------+---------+
+| 2000  | -1    | 0      | 0B   | TEXT    |
+| 2004  | -1    | 0      | 0B   | TEXT    |
+| 2008  | -1    | 0      | 0B   | TEXT    |
+| 2010  | -1    | 0      | 0B   | TEXT    |
+| 2011  | 4     | 1      | 22B  | TEXT    |
+| 2012  | 4     | 1      | 22B  | TEXT    |
+| 2013  | 1     | 1      | 231B | PARQUET |
+| Total | 9     | 3      | 275B |         |
++-------+-------+--------+------+---------+
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+      <p rev="CDH-19187">
+        The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+        typically the <codeph>impala</codeph> user, must have read and execute
+        permissions for all directories that are part of the table.
+        (A table could span multiple different HDFS directories if it is partitioned.
+        The directories could be widely scattered because a partition can reside
+        in an arbitrary HDFS directory based on its <codeph>LOCATION</codeph> attribute.)
+        The Impala user must also have execute
+        permission for the database directory, and any parent directories of the database directory in HDFS.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        See <xref href="impala_perf_stats.xml#perf_stats"/> for usage information and examples.
+      </p>
+
+      <p>
+        <xref href="impala_show.xml#show_table_stats"/>, <xref href="impala_partitioning.xml#partitioning"/>
+      </p>
+    </conbody>
+  </concept>
+
+  <concept rev="1.3.0" id="show_functions">
+
+    <title>SHOW FUNCTIONS Statement</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="UDFs"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        By default, <codeph>SHOW FUNCTIONS</codeph> displays user-defined functions (UDFs) and <codeph>SHOW
+        AGGREGATE FUNCTIONS</codeph> displays user-defined aggregate functions (UDAFs) associated with a particular
+        database. The output from <codeph>SHOW FUNCTIONS</codeph> includes the argument signature of each function.
+        You specify this argument signature as part of the <codeph>DROP FUNCTION</codeph> statement. You might have
+        several UDFs with the same name, each accepting different argument data types.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/security_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/show_security"/>
+
+      <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        To display Impala built-in functions, specify the special database name <codeph>_impala_builtins</codeph>:
+      </p>
+
+<codeblock>show functions in _impala_builtins;
++----------------+----------------------------------------+
+| return type    | signature                              |
++----------------+----------------------------------------+
+| BOOLEAN        | ifnull(BOOLEAN, BOOLEAN)               |
+| TINYINT        | ifnull(TINYINT, TINYINT)               |
+| SMALLINT       | ifnull(SMALLINT, SMALLINT)             |
+| INT            | ifnull(INT, INT)                       |
+...
+
+show functions in _impala_builtins like '*week*';
++-------------+------------------------------+
+| return type | signature                    |
++-------------+------------------------------+
+| INT         | weekofyear(TIMESTAMP)        |
+| TIMESTAMP   | weeks_add(TIMESTAMP, INT)    |
+| TIMESTAMP   | weeks_add(TIMESTAMP, BIGINT) |
+| TIMESTAMP   | weeks_sub(TIMESTAMP, INT)    |
+| TIMESTAMP   | weeks_sub(TIMESTAMP, BIGINT) |
+| INT         | dayofweek(TIMESTAMP)         |
++-------------+------------------------------+
+</codeblock>
+
+      <p>
+        To search for functions that use a particular data type, specify a case-sensitive data type name in all
+        capitals:
+      </p>
+
+<codeblock>show functions in _impala_builtins like '*BIGINT*';
++----------------------------------------+
+| name                                   |
++----------------------------------------+
+| adddate(TIMESTAMP, BIGINT)             |
+| bin(BIGINT)                            |
+| coalesce(BIGINT...)                    |
+...</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_functions_overview.xml#functions"/>, <xref href="impala_functions.xml#builtins"/>,
+        <xref href="impala_udf.xml#udfs"/>,
+        <xref href="impala_show.xml#show_databases"/>,
+        <xref href="impala_show.xml#show_tables"/>
+      </p>
+    </conbody>
+  </concept>
+
+  <concept rev="someday" audience="Cloudera" id="show_data_sources">
+
+    <title>SHOW DATA SOURCES Statement (CDH x.y and later only)</title>
+
+    <conbody>
+
+      <p>
+        <codeph>SHOW DATA SOURCES</codeph> lists the external data sources defined by the <codeph>CREATE DATA
+        SOURCE</codeph> statement. To show only those names matching a pattern, use the <codeph>LIKE</codeph>
+        clause with asterisks for wildcards, for example <codeph>SHOW DATA SOURCES LIKE '*sql*'</codeph>. These
+        data sources are global, not associated with a specific Impala database, so there is no <codeph>IN</codeph>
+        clause as in most other kinds of objects.
+      </p>
+
+<!--
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<codeblock>To do: construct example for SHOW DATA SOURCES when that statement is externalized</codeblock>
+-->
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p></p>
+    </conbody>
+  </concept>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_smallint.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_smallint.xml b/docs/topics/impala_smallint.xml
new file mode 100644
index 0000000..3aae9ad
--- /dev/null
+++ b/docs/topics/impala_smallint.xml
@@ -0,0 +1,101 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="smallint">
+
+  <title>SMALLINT Data Type</title>
+  <titlealts><navtitle>SMALLINT</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      A 2-byte integer data type used in <codeph>CREATE TABLE</codeph> and <codeph>ALTER TABLE</codeph> statements.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      In the column definition of a <codeph>CREATE TABLE</codeph> statement:
+    </p>
+
+<codeblock><varname>column_name</varname> SMALLINT</codeblock>
+
+    <p>
+      <b>Range:</b> -32768 .. 32767. There is no <codeph>UNSIGNED</codeph> subtype.
+    </p>
+
+    <p>
+      <b>Conversions:</b> Impala automatically converts to a larger integer type (<codeph>INT</codeph> or
+      <codeph>BIGINT</codeph>) or a floating-point type (<codeph>FLOAT</codeph> or <codeph>DOUBLE</codeph>)
+      automatically. Use <codeph>CAST()</codeph> to convert to <codeph>TINYINT</codeph>, <codeph>STRING</codeph>,
+      or <codeph>TIMESTAMP</codeph>.
+      <ph conref="../shared/impala_common.xml#common/cast_int_to_timestamp"/>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      For a convenient and automated way to check the bounds of the <codeph>SMALLINT</codeph> type, call the
+      functions <codeph>MIN_SMALLINT()</codeph> and <codeph>MAX_SMALLINT()</codeph>.
+    </p>
+
+    <p>
+      If an integer value is too large to be represented as a <codeph>SMALLINT</codeph>, use an
+      <codeph>INT</codeph> instead.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/null_bad_numeric_cast"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>CREATE TABLE t1 (x SMALLINT);
+SELECT CAST(1000 AS SMALLINT);
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/parquet_blurb"/>
+
+<!-- Duplicated under TINYINT and SMALLINT. Turn into a conref in both places. -->
+
+    <p rev="1.4.0">
+      Physically, Parquet files represent <codeph>TINYINT</codeph> and <codeph>SMALLINT</codeph> values as 32-bit
+      integers. Although Impala rejects attempts to insert out-of-range values into such columns, if you create a
+      new table with the <codeph>CREATE TABLE ... LIKE PARQUET</codeph> syntax, any <codeph>TINYINT</codeph> or
+      <codeph>SMALLINT</codeph> columns in the original table turn into <codeph>INT</codeph> columns in the new
+      table.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/partitioning_good"/>
+
+    <p conref="../shared/impala_common.xml#common/hbase_ok"/>
+
+    <p conref="../shared/impala_common.xml#common/text_bulky"/>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/compatibility_blurb"/> -->
+
+    <p conref="../shared/impala_common.xml#common/internals_2_bytes"/>
+
+    <p conref="../shared/impala_common.xml#common/added_forever"/>
+
+    <p conref="../shared/impala_common.xml#common/column_stats_constant"/>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/restrictions_blurb"/> -->
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_literals.xml#numeric_literals"/>, <xref href="impala_tinyint.xml#tinyint"/>,
+      <xref href="impala_smallint.xml#smallint"/>, <xref href="impala_int.xml#int"/>,
+      <xref href="impala_bigint.xml#bigint"/>, <xref href="impala_decimal.xml#decimal"/>,
+      <xref href="impala_math_functions.xml#math_functions"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_stddev.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_stddev.xml b/docs/topics/impala_stddev.xml
new file mode 100644
index 0000000..0cdff45
--- /dev/null
+++ b/docs/topics/impala_stddev.xml
@@ -0,0 +1,116 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.4" id="stddev">
+
+  <title>STDDEV, STDDEV_SAMP, STDDEV_POP Functions</title>
+  <titlealts><navtitle>STDDEV, STDDEV_SAMP, STDDEV_POP</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="Aggregate Functions"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">stddev() function</indexterm>
+      <indexterm audience="Cloudera">stddev_samp() function</indexterm>
+      <indexterm audience="Cloudera">stddev_pop() function</indexterm>
+      An aggregate function that
+      <xref href="http://en.wikipedia.org/wiki/Standard_deviation" scope="external" format="html">standard
+      deviation</xref> of a set of numbers.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>{ STDDEV | STDDEV_SAMP | STDDEV_POP } ([DISTINCT | ALL] <varname>expression</varname>)</codeblock>
+
+    <p>
+      This function works with any numeric data type.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/former_odd_return_type_string"/>
+
+    <p>
+      This function is typically used in mathematical formulas related to probability distributions.
+    </p>
+
+    <p>
+      The <codeph>STDDEV_POP()</codeph> and <codeph>STDDEV_SAMP()</codeph> functions compute the population
+      standard deviation and sample standard deviation, respectively, of the input values.
+      (<codeph>STDDEV()</codeph> is an alias for <codeph>STDDEV_SAMP()</codeph>.) Both functions evaluate all input
+      rows matched by the query. The difference is that <codeph>STDDEV_SAMP()</codeph> is scaled by
+      <codeph>1/(N-1)</codeph> while <codeph>STDDEV_POP()</codeph> is scaled by <codeph>1/N</codeph>.
+    </p>
+
+    <p>
+      If no input rows match the query, the result of any of these functions is <codeph>NULL</codeph>. If a single
+      input row matches the query, the result of any of these functions is <codeph>"0.0"</codeph>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      This example demonstrates how <codeph>STDDEV()</codeph> and <codeph>STDDEV_SAMP()</codeph> return the same
+      result, while <codeph>STDDEV_POP()</codeph> uses a slightly different calculation to reflect that the input
+      data is considered part of a larger <q>population</q>.
+    </p>
+
+<codeblock>[localhost:21000] &gt; select stddev(score) from test_scores;
++---------------+
+| stddev(score) |
++---------------+
+| 28.5          |
++---------------+
+[localhost:21000] &gt; select stddev_samp(score) from test_scores;
++--------------------+
+| stddev_samp(score) |
++--------------------+
+| 28.5               |
++--------------------+
+[localhost:21000] &gt; select stddev_pop(score) from test_scores;
++-------------------+
+| stddev_pop(score) |
++-------------------+
+| 28.4858           |
++-------------------+
+</codeblock>
+
+    <p>
+      This example demonstrates that, because the return value of these aggregate functions is a
+      <codeph>STRING</codeph>, you must currently convert the result with <codeph>CAST</codeph>.
+    </p>
+
+<codeblock>[localhost:21000] &gt; create table score_stats as select cast(stddev(score) as decimal(7,4)) `standard_deviation`, cast(variance(score) as decimal(7,4)) `variance` from test_scores;
++-------------------+
+| summary           |
++-------------------+
+| Inserted 1 row(s) |
++-------------------+
+[localhost:21000] &gt; desc score_stats;
++--------------------+--------------+---------+
+| name               | type         | comment |
++--------------------+--------------+---------+
+| standard_deviation | decimal(7,4) |         |
+| variance           | decimal(7,4) |         |
++--------------------+--------------+---------+
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/analytic_not_allowed_caveat"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      The <codeph>STDDEV()</codeph>, <codeph>STDDEV_POP()</codeph>, and <codeph>STDDEV_SAMP()</codeph> functions
+      compute the standard deviation (square root of the variance) based on the results of
+      <codeph>VARIANCE()</codeph>, <codeph>VARIANCE_POP()</codeph>, and <codeph>VARIANCE_SAMP()</codeph>
+      respectively. See <xref href="impala_variance.xml#variance"/> for details about the variance property.
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_string.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_string.xml b/docs/topics/impala_string.xml
new file mode 100644
index 0000000..9ad77c3
--- /dev/null
+++ b/docs/topics/impala_string.xml
@@ -0,0 +1,161 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="string">
+
+  <title>STRING Data Type</title>
+  <titlealts><navtitle>STRING</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      A data type used in <codeph>CREATE TABLE</codeph> and <codeph>ALTER TABLE</codeph> statements.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      In the column definition of a <codeph>CREATE TABLE</codeph> statement:
+    </p>
+
+<codeblock><varname>column_name</varname> STRING</codeblock>
+
+    <p>
+      <b>Length:</b> Maximum of 32,767 bytes. Do not use any length constraint when declaring
+      <codeph>STRING</codeph> columns, as you might be familiar with from <codeph>VARCHAR</codeph>,
+      <codeph>CHAR</codeph>, or similar column types from relational database systems. <ph rev="2.0.0">If you do
+      need to manipulate string values with precise or maximum lengths, in Impala 2.0 and higher you can declare
+      columns as <codeph>VARCHAR(<varname>max_length</varname>)</codeph> or
+      <codeph>CHAR(<varname>length</varname>)</codeph>, but for best performance use <codeph>STRING</codeph>
+      where practical.</ph>
+    </p>
+
+    <p>
+      <b>Character sets:</b> For full support in all Impala subsystems, restrict string values to the ASCII
+      character set. UTF-8 character data can be stored in Impala and retrieved through queries, but UTF-8 strings
+      containing non-ASCII characters are not guaranteed to work properly with string manipulation functions,
+      comparison operators, or the <codeph>ORDER BY</codeph> clause. For any national language aspects such as
+      collation order or interpreting extended ASCII variants such as ISO-8859-1 or ISO-8859-2 encodings, Impala
+      does not include such metadata with the table definition. If you need to sort, manipulate, or display data
+      depending on those national language characteristics of string data, use logic on the application side.
+    </p>
+
+    <p>
+      <b>Conversions:</b>
+    </p>
+
+    <ul>
+      <li>
+        <p>
+          Impala does not automatically convert <codeph>STRING</codeph> to any numeric type. Impala does
+          automatically convert <codeph>STRING</codeph> to <codeph>TIMESTAMP</codeph> if the value matches one of
+          the accepted <codeph>TIMESTAMP</codeph> formats; see <xref href="impala_timestamp.xml#timestamp"/> for
+          details.
+        </p>
+      </li>
+
+      <li>
+        <p>
+          You can use <codeph>CAST()</codeph> to convert <codeph>STRING</codeph> values to
+          <codeph>TINYINT</codeph>, <codeph>SMALLINT</codeph>, <codeph>INT</codeph>, <codeph>BIGINT</codeph>,
+          <codeph>FLOAT</codeph>, <codeph>DOUBLE</codeph>, or <codeph>TIMESTAMP</codeph>.
+        </p>
+      </li>
+
+      <li>
+        <p>
+          You cannot directly cast a <codeph>STRING</codeph> value to <codeph>BOOLEAN</codeph>. You can use a
+          <codeph>CASE</codeph> expression to evaluate string values such as <codeph>'T'</codeph>,
+          <codeph>'true'</codeph>, and so on and return Boolean <codeph>true</codeph> and <codeph>false</codeph>
+          values as appropriate.
+        </p>
+      </li>
+
+      <li>
+        <p>
+          You can cast a <codeph>BOOLEAN</codeph> value to <codeph>STRING</codeph>, returning <codeph>'1'</codeph>
+          for <codeph>true</codeph> values and <codeph>'0'</codeph> for <codeph>false</codeph> values.
+        </p>
+      </li>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/partitioning_blurb"/>
+
+    <p>
+      Although it might be convenient to use <codeph>STRING</codeph> columns for partition keys, even when those
+      columns contain numbers, for performance and scalability it is much better to use numeric columns as
+      partition keys whenever practical. Although the underlying HDFS directory name might be the same in either
+      case, the in-memory storage for the partition key columns is more compact, and computations are faster, if
+      partition key columns such as <codeph>YEAR</codeph>, <codeph>MONTH</codeph>, <codeph>DAY</codeph> and so on
+      are declared as <codeph>INT</codeph>, <codeph>SMALLINT</codeph>, and so on.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/zero_length_strings"/>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/hbase_blurb"/> -->
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/parquet_blurb"/> -->
+
+    <p conref="../shared/impala_common.xml#common/text_bulky"/>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/compatibility_blurb"/> -->
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/internals_blurb"/> -->
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/added_in_20"/> -->
+
+    <p conref="../shared/impala_common.xml#common/column_stats_variable"/>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/restrictions_blurb"/> -->
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      The following examples demonstrate double-quoted and single-quoted string literals, and required escaping for
+      quotation marks within string literals:
+    </p>
+
+<codeblock>SELECT 'I am a single-quoted string';
+SELECT "I am a double-quoted string";
+SELECT 'I\'m a single-quoted string with an apostrophe';
+SELECT "I\'m a double-quoted string with an apostrophe";
+SELECT 'I am a "short" single-quoted string containing quotes';
+SELECT "I am a \"short\" double-quoted string containing quotes";
+</codeblock>
+
+    <p>
+      The following examples demonstrate calls to string manipulation functions to concatenate strings, convert
+      numbers to strings, or pull out substrings:
+    </p>
+
+<codeblock>SELECT CONCAT("Once upon a time, there were ", CAST(3 AS STRING), ' little pigs.');
+SELECT SUBSTR("hello world",7,5);
+</codeblock>
+
+    <p>
+      The following examples show how to perform operations on <codeph>STRING</codeph> columns within a table:
+    </p>
+
+<codeblock>CREATE TABLE t1 (s1 STRING, s2 STRING);
+INSERT INTO t1 VALUES ("hello", 'world'), (CAST(7 AS STRING), "wonders");
+SELECT s1, s2, length(s1) FROM t1 WHERE s2 LIKE 'w%';
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_literals.xml#string_literals"/>, <xref href="impala_char.xml#char"/>,
+      <xref href="impala_varchar.xml#varchar"/>, <xref href="impala_string_functions.xml#string_functions"/>,
+      <xref href="impala_datetime_functions.xml#datetime_functions"/>
+    </p>
+  </conbody>
+</concept>

[11/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

Posted by jr...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_functions.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_functions.xml b/docs/topics/impala_functions.xml
new file mode 100644
index 0000000..527744b
--- /dev/null
+++ b/docs/topics/impala_functions.xml
@@ -0,0 +1,162 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="builtins">
+
+  <title id="title_functions">Impala Built-In Functions</title>
+  <titlealts><navtitle>Built-In Functions</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <draft-comment translate="no">
+Opportunity to conref some material between here and the "Functions" topic under "Schema Objects".
+</draft-comment>
+
+    <p>
+      Impala supports several categories of built-in functions. These functions let you perform mathematical
+      calculations, string manipulation, date calculations, and other kinds of data transformations directly in
+      <codeph>SELECT</codeph> statements. The built-in functions let a SQL query return results with all
+      formatting, calculating, and type conversions applied, rather than performing time-consuming postprocessing
+      in another application. By applying function calls where practical, you can make a SQL query that is as
+      convenient as an expression in a procedural programming language or a formula in a spreadsheet.
+    </p>
+
+    <p>
+      The categories of functions supported by Impala are:
+    </p>
+
+    <ul>
+      <li>
+        <xref href="impala_math_functions.xml#math_functions"/>
+      </li>
+
+      <li>
+        <xref href="impala_conversion_functions.xml#conversion_functions"/>
+      </li>
+
+      <li>
+        <xref href="impala_datetime_functions.xml#datetime_functions"/>
+      </li>
+
+      <li>
+        <xref href="impala_conditional_functions.xml#conditional_functions"/>
+      </li>
+
+      <li>
+        <xref href="impala_string_functions.xml#string_functions"/>
+      </li>
+
+      <li>
+        Aggregation functions, explained in <xref href="impala_aggregate_functions.xml#aggregate_functions"/>.
+      </li>
+    </ul>
+
+    <p>
+      You call any of these functions through the <codeph>SELECT</codeph> statement. For most functions, you can
+      omit the <codeph>FROM</codeph> clause and supply literal values for any required arguments:
+    </p>
+
+<codeblock>select abs(-1);
++---------+
+| abs(-1) |
++---------+
+| 1       |
++---------+
+
+select concat('The rain ', 'in Spain');
++---------------------------------+
+| concat('the rain ', 'in spain') |
++---------------------------------+
+| The rain in Spain               |
++---------------------------------+
+
+select power(2,5);
++-------------+
+| power(2, 5) |
++-------------+
+| 32          |
++-------------+
+</codeblock>
+
+    <p>
+      When you use a <codeph>FROM</codeph> clause and specify a column name as a function argument, the function is
+      applied for each item in the result set:
+    </p>
+
+<!-- TK: make real output for these; change the queries if necessary to use tables I already have. -->
+
+<codeblock>select concat('Country = ',country_code) from all_countries where population &gt; 100000000;
+select round(price) as dollar_value from product_catalog where price between 0.0 and 100.0;
+</codeblock>
+
+    <p>
+      Typically, if any argument to a built-in function is <codeph>NULL</codeph>, the result value is also
+      <codeph>NULL</codeph>:
+    </p>
+
+<codeblock>select cos(null);
++-----------+
+| cos(null) |
++-----------+
+| NULL      |
++-----------+
+
+select power(2,null);
++----------------+
+| power(2, null) |
++----------------+
+| NULL           |
++----------------+
+
+select concat('a',null,'b');
++------------------------+
+| concat('a', null, 'b') |
++------------------------+
+| NULL                   |
++------------------------+
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/aggr1"/>
+
+<codeblock conref="../shared/impala_common.xml#common/aggr2"/>
+
+    <p conref="../shared/impala_common.xml#common/aggr3"/>
+
+    <p>
+      Aggregate functions are a special category with different rules. These functions calculate a return value
+      across all the items in a result set, so they do require a <codeph>FROM</codeph> clause in the query:
+    </p>
+
+<!-- TK: make real output for these; change the queries if necessary to use tables I already have. -->
+
+<codeblock>select count(product_id) from product_catalog;
+select max(height), avg(height) from census_data where age &gt; 20;
+</codeblock>
+
+    <p>
+      Aggregate functions also ignore <codeph>NULL</codeph> values rather than returning a <codeph>NULL</codeph>
+      result. For example, if some rows have <codeph>NULL</codeph> for a particular column, those rows are ignored
+      when computing the AVG() for that column. Likewise, specifying <codeph>COUNT(col_name)</codeph> in a query
+      counts only those rows where <codeph>col_name</codeph> contains a non-<codeph>NULL</codeph> value.
+    </p>
+
+    <p rev="2.0.0">
+      Analytic functions are a variation on aggregate functions. Instead of returning a single value, or an
+      identical value for each group of rows, they can compute values that vary based on a <q>window</q> consisting
+      of of other rows around them in the result set.
+    </p>
+
+    <p outputclass="toc"/>
+
+  </conbody>
+
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_functions_overview.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_functions_overview.xml b/docs/topics/impala_functions_overview.xml
new file mode 100644
index 0000000..26a4d35
--- /dev/null
+++ b/docs/topics/impala_functions_overview.xml
@@ -0,0 +1,116 @@
+<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="functions">
+
+  <title>Overview of Impala Functions</title>
+  <titlealts><navtitle>Functions</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Functions let you apply arithmetic, string, or other computations and transformations to Impala data. You
+      typically use them in <codeph>SELECT</codeph> lists and <codeph>WHERE</codeph> clauses to filter and format
+      query results so that the result set is exactly what you want, with no further processing needed on the
+      application side.
+    </p>
+
+    <p>
+      Scalar functions return a single result for each input row. See <xref href="impala_functions.xml#builtins"/>.
+    </p>
+
+<codeblock>[localhost:21000] > select name, population from country where continent = 'North America' order by population desc limit 4;
+[localhost:21000] > select upper(name), population from country where continent = 'North America' order by population desc limit 4;
++-------------+------------+
+| upper(name) | population |
++-------------+------------+
+| USA         | 320000000  |
+| MEXICO      | 122000000  |
+| CANADA      | 25000000   |
+| GUATEMALA   | 16000000   |
++-------------+------------+
+</codeblock>
+    <p>
+      Aggregate functions combine the results from multiple rows:
+      either a single result for the entire table, or a separate result for each group of rows.
+      Aggregate functions are frequently used in combination with <codeph>GROUP BY</codeph>
+      and <codeph>HAVING</codeph> clauses in the <codeph>SELECT</codeph> statement.
+      See <xref href="impala_aggregate_functions.xml#aggregate_functions"/>.
+    </p>
+
+<codeblock>[localhost:21000] > select continent, <b>sum(population)</b> as howmany from country <b>group by continent</b> order by howmany desc;
++---------------+------------+
+| continent     | howmany    |
++---------------+------------+
+| Asia          | 4298723000 |
+| Africa        | 1110635000 |
+| Europe        | 742452000  |
+| North America | 565265000  |
+| South America | 406740000  |
+| Oceania       | 38304000   |
++---------------+------------+
+</codeblock>
+
+    <p>
+      User-defined functions (UDFs) let you code your own logic.  They can be either scalar or aggregate functions.
+      UDFs let you implement important business or scientific logic using high-performance code for Impala to automatically parallelize.
+      You can also use UDFs to implement convenience functions to simplify reporting or porting SQL from other database systems.
+      See <xref href="impala_udf.xml#udfs"/>.
+    </p>
+
+<codeblock>[localhost:21000] > select <b>rot13('Hello world!')</b> as 'Weak obfuscation';
++------------------+
+| weak obfuscation |
++------------------+
+| Uryyb jbeyq!     |
++------------------+
+[localhost:21000] > select <b>likelihood_of_new_subatomic_particle(sensor1, sensor2, sensor3)</b> as probability
+                  > from experimental_results group by experiment;
+</codeblock>
+
+    <p>
+      Each function is associated with a specific database. For example, if you issue a <codeph>USE somedb</codeph>
+      statement followed by <codeph>CREATE FUNCTION somefunc</codeph>, the new function is created in the
+      <codeph>somedb</codeph> database, and you could refer to it through the fully qualified name
+      <codeph>somedb.somefunc</codeph>. You could then issue another <codeph>USE</codeph> statement
+      and create a function with the same name in a different database.
+    </p>
+
+    <p>
+      Impala built-in functions are associated with a special database named <codeph>_impala_builtins</codeph>,
+      which lets you refer to them from any database without qualifying the name.
+    </p>
+
+<codeblock>[localhost:21000] > show databases;
++-------------------------+
+| name                    |
++-------------------------+
+| <b>_impala_builtins</b>        |
+| analytic_functions      |
+| avro_testing            |
+| data_file_size          |
+...
+[localhost:21000] > show functions in _impala_builtins like '*subs*';
++-------------+-----------------------------------+
+| return type | signature                         |
++-------------+-----------------------------------+
+| STRING      | substr(STRING, BIGINT)            |
+| STRING      | substr(STRING, BIGINT, BIGINT)    |
+| STRING      | substring(STRING, BIGINT)         |
+| STRING      | substring(STRING, BIGINT, BIGINT) |
++-------------+-----------------------------------+
+</codeblock>
+
+    <p>
+      <b>Related statements:</b> <xref href="impala_create_function.xml#create_function"/>,
+      <xref href="impala_drop_function.xml#drop_function"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_grant.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_grant.xml b/docs/topics/impala_grant.xml
new file mode 100644
index 0000000..6aad41e
--- /dev/null
+++ b/docs/topics/impala_grant.xml
@@ -0,0 +1,117 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="2.0.0" id="grant">
+
+  <title>GRANT Statement (CDH 5.2 or higher only)</title>
+  <titlealts><navtitle>GRANT (CDH 5.2 or higher only)</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="DDL"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Sentry"/>
+      <data name="Category" value="Roles"/>
+      <!-- Consider whether to go deeper into categories like Security for the Sentry-related statements. -->
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">GRANT statement</indexterm>
+<!-- Copied from Sentry docs. Turn into conref. I did some rewording for clarity. -->
+      The <codeph>GRANT</codeph> statement grants roles or privileges on specified objects to groups. Only Sentry
+      administrative users can grant roles to a group.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock rev="2.3.0 collevelauth">GRANT ROLE <varname>role_name</varname> TO GROUP <varname>group_name</varname>
+
+GRANT <varname>privilege</varname> ON <varname>object_type</varname> <varname>object_name</varname>
+   TO [ROLE] <varname>roleName</varname>
+   [WITH GRANT OPTION]
+
+<ph rev="2.3.0">privilege ::= SELECT | SELECT(<varname>column_name</varname>) | INSERT | ALL</ph>
+object_type ::= TABLE | DATABASE | SERVER | URI
+</codeblock>
+
+    <p>
+      Typically, the object name is an identifier. For URIs, it is a string literal.
+    </p>
+
+<!-- Turn privilege info into a conref or series of conrefs. (In both GRANT and REVOKE.) -->
+
+    <p conref="../shared/impala_common.xml#common/privileges_blurb"/>
+
+    <p>
+<!-- To do: The wording here can be fluid, and it's reused in several statements. Turn into a conref. -->
+      Only administrative users (initially, a predefined set of users specified in the Sentry service configuration
+      file) can use this statement.
+    </p>
+
+    <p>
+      The <codeph>WITH GRANT OPTION</codeph> clause allows members of the specified role to issue
+      <codeph>GRANT</codeph> and <codeph>REVOKE</codeph> statements for those same privileges
+<!-- Copied from Sentry docs. Turn into conref. I did some rewording for clarity. -->
+      Hence, if a role has the <codeph>ALL</codeph> privilege on a database and the <codeph>WITH GRANT
+      OPTION</codeph> set, users granted that role can execute <codeph>GRANT</codeph>/<codeph>REVOKE</codeph>
+      statements only for that database or child tables of the database. This means a user could revoke the
+      privileges of the user that provided them the <codeph>GRANT OPTION</codeph>.
+    </p>
+
+    <p>
+<!-- Copied from Sentry docs. Turn into conref. Except I changed Hive to Impala. -->
+      Impala does not currently support revoking only the <codeph>WITH GRANT OPTION</codeph> from a privilege
+      previously granted to a role. To remove the <codeph>WITH GRANT OPTION</codeph>, revoke the privilege and
+      grant it again without the <codeph>WITH GRANT OPTION</codeph> flag.
+    </p>
+
+    <p rev="2.3.0 collevelauth">
+      The ability to grant or revoke <codeph>SELECT</codeph> privilege on specific columns is available
+      in CDH 5.5 / Impala 2.3 and higher. See <xref href="sg_hive_sql.xml#concept_c2q_4qx_p4/col_level_auth_sentry"/>
+      for details.
+    </p>
+
+<!-- Turn compatibility info into a conref or series of conrefs. (In both GRANT and REVOKE.) -->
+
+<!-- If they diverge during development, consider the version here in GRANT the authoritative one. -->
+
+    <p conref="../shared/impala_common.xml#common/compatibility_blurb"/>
+
+    <p>
+      <ul>
+        <li>
+          The Impala <codeph>GRANT</codeph> and <codeph>REVOKE</codeph> statements are available in CDH 5.2 and
+          later.
+        </li>
+
+        <li>
+          In CDH 5.1 and later, Impala can make use of any roles and privileges specified by the
+          <codeph>GRANT</codeph> and <codeph>REVOKE</codeph> statements in Hive, when your system is configured to
+          use the Sentry service instead of the file-based policy mechanism.
+        </li>
+
+        <li>
+          The Impala <codeph>GRANT</codeph> and <codeph>REVOKE</codeph> statements for privileges do not require
+          the <codeph>ROLE</codeph> keyword to be repeated before each role name, unlike the equivalent Hive
+          statements.
+        </li>
+
+        <li conref="../shared/impala_common.xml#common/grant_revoke_single"/>
+      </ul>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_authorization.xml#authorization"/>, <xref href="impala_revoke.xml#revoke"/>,
+      <xref href="impala_create_role.xml#create_role"/>, <xref href="impala_drop_role.xml#drop_role"/>,
+      <xref href="impala_show.xml#show"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_group_by.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_group_by.xml b/docs/topics/impala_group_by.xml
new file mode 100644
index 0000000..10b7de4
--- /dev/null
+++ b/docs/topics/impala_group_by.xml
@@ -0,0 +1,137 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="group_by">
+
+  <title>GROUP BY Clause</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Aggregate Functions"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Specify the <codeph>GROUP BY</codeph> clause in queries that use aggregation functions, such as
+      <codeph><xref href="impala_count.xml#count">COUNT()</xref></codeph>,
+      <codeph><xref href="impala_sum.xml#sum">SUM()</xref></codeph>,
+      <codeph><xref href="impala_avg.xml#avg">AVG()</xref></codeph>,
+      <codeph><xref href="impala_min.xml#min">MIN()</xref></codeph>, and
+      <codeph><xref href="impala_max.xml#max">MAX()</xref></codeph>. Specify in the
+      <codeph><xref href="impala_group_by.xml#group_by">GROUP BY</xref></codeph> clause the names of all the
+      columns that do not participate in the aggregation operation.
+    </p>
+
+    <!-- Good to show an example of cases where ORDER BY does and doesn't work with complex types. -->
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p rev="2.3.0">
+      In CDH 5.5 / Impala 2.3 and higher, the complex data types <codeph>STRUCT</codeph>,
+      <codeph>ARRAY</codeph>, and <codeph>MAP</codeph> are available. These columns cannot
+      be referenced directly in the <codeph>ORDER BY</codeph> clause.
+      When you query a complex type column, you use join notation to <q>unpack</q> the elements
+      of the complex type, and within the join query you can include an <codeph>ORDER BY</codeph>
+      clause to control the order in the result set of the scalar elements from the complex type.
+      See <xref href="impala_complex_types.xml#complex_types"/> for details about Impala support for complex types.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/zero_length_strings"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      For example, the following query finds the 5 items that sold the highest total quantity (using the
+      <codeph>SUM()</codeph> function, and also counts the number of sales transactions for those items (using the
+      <codeph>COUNT()</codeph> function). Because the column representing the item IDs is not used in any
+      aggregation functions, we specify that column in the <codeph>GROUP BY</codeph> clause.
+    </p>
+
+<codeblock>select
+  <b>ss_item_sk</b> as Item,
+  <b>count</b>(ss_item_sk) as Times_Purchased,
+  <b>sum</b>(ss_quantity) as Total_Quantity_Purchased
+from store_sales
+  <b>group by ss_item_sk</b>
+  order by sum(ss_quantity) desc
+  limit 5;
++-------+-----------------+--------------------------+
+| item  | times_purchased | total_quantity_purchased |
++-------+-----------------+--------------------------+
+| 9325  | 372             | 19072                    |
+| 4279  | 357             | 18501                    |
+| 7507  | 371             | 18475                    |
+| 5953  | 369             | 18451                    |
+| 16753 | 375             | 18446                    |
++-------+-----------------+--------------------------+</codeblock>
+
+    <p>
+      The <codeph>HAVING</codeph> clause lets you filter the results of aggregate functions, because you cannot
+      refer to those expressions in the <codeph>WHERE</codeph> clause. For example, to find the 5 lowest-selling
+      items that were included in at least 100 sales transactions, we could use this query:
+    </p>
+
+<codeblock>select
+  <b>ss_item_sk</b> as Item,
+  <b>count</b>(ss_item_sk) as Times_Purchased,
+  <b>sum</b>(ss_quantity) as Total_Quantity_Purchased
+from store_sales
+  <b>group by ss_item_sk</b>
+  <b>having times_purchased &gt;= 100</b>
+  order by sum(ss_quantity)
+  limit 5;
++-------+-----------------+--------------------------+
+| item  | times_purchased | total_quantity_purchased |
++-------+-----------------+--------------------------+
+| 13943 | 105             | 4087                     |
+| 2992  | 101             | 4176                     |
+| 4773  | 107             | 4204                     |
+| 14350 | 103             | 4260                     |
+| 11956 | 102             | 4275                     |
++-------+-----------------+--------------------------+</codeblock>
+
+    <p>
+      When performing calculations involving scientific or financial data, remember that columns with type
+      <codeph>FLOAT</codeph> or <codeph>DOUBLE</codeph> are stored as true floating-point numbers, which cannot
+      precisely represent every possible fractional value. Thus, if you include a <codeph>FLOAT</codeph> or
+      <codeph>DOUBLE</codeph> column in a <codeph>GROUP BY</codeph> clause, the results might not precisely match
+      literal values in your query or from an original Text data file. Use rounding operations, the
+      <codeph>BETWEEN</codeph> operator, or another arithmetic technique to match floating-point values that are
+      <q>near</q> literal values you expect. For example, this query on the <codeph>ss_wholesale_cost</codeph>
+      column returns cost values that are close but not identical to the original figures that were entered as
+      decimal fractions.
+    </p>
+
+<codeblock>select ss_wholesale_cost, avg(ss_quantity * ss_sales_price) as avg_revenue_per_sale
+  from sales
+  group by ss_wholesale_cost
+  order by avg_revenue_per_sale desc
+  limit 5;
++-------------------+----------------------+
+| ss_wholesale_cost | avg_revenue_per_sale |
++-------------------+----------------------+
+| 96.94000244140625 | 4454.351539300434    |
+| 95.93000030517578 | 4423.119941283189    |
+| 98.37999725341797 | 4332.516490316291    |
+| 97.97000122070312 | 4330.480601655014    |
+| 98.52999877929688 | 4291.316953108634    |
++-------------------+----------------------+</codeblock>
+
+    <p>
+      Notice how wholesale cost values originally entered as decimal fractions such as <codeph>96.94</codeph> and
+      <codeph>98.38</codeph> are slightly larger or smaller in the result set, due to precision limitations in the
+      hardware floating-point types. The imprecise representation of <codeph>FLOAT</codeph> and
+      <codeph>DOUBLE</codeph> values is why financial data processing systems often store currency using data types
+      that are less space-efficient but avoid these types of rounding errors.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+    <p>
+      <xref href="impala_select.xml#select"/>,
+      <xref href="impala_aggregate_functions.xml#aggregate_functions"/>
+    </p>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_group_concat.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_group_concat.xml b/docs/topics/impala_group_concat.xml
new file mode 100644
index 0000000..b2a7ff6
--- /dev/null
+++ b/docs/topics/impala_group_concat.xml
@@ -0,0 +1,133 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.2" id="group_concat">
+
+  <title>GROUP_CONCAT Function</title>
+  <titlealts><navtitle>GROUP_CONCAT</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="Aggregate Functions"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">group_concat() function</indexterm>
+      An aggregate function that returns a single string representing the argument value concatenated together for
+      each row of the result set. If the optional separator string is specified, the separator is added between
+      each pair of concatenated values. The default separator is a comma followed by a space.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<!-- Might allow DISTINCT at some point. Check: does it allow ALL now? -->
+
+<codeblock>GROUP_CONCAT([ALL] <varname>expression</varname> [, <varname>separator</varname>])</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/concat_blurb"/>
+
+    <p>
+      By default, returns a single string covering the whole result set. To include other columns or values in the
+      result set, or to produce multiple concatenated strings for subsets of rows, include a <codeph>GROUP
+      BY</codeph> clause in the query.
+    </p>
+
+    <p>
+      <b>Return type:</b> <codeph>STRING</codeph>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <p>
+      You cannot apply the <codeph>DISTINCT</codeph> operator to the argument of this function.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/analytic_not_allowed_caveat"/>
+
+    <p>
+      Currently, Impala returns an error if the result value grows larger than 1 GiB.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      The following examples illustrate various aspects of the <codeph>GROUP_CONCAT()</codeph> function.
+    </p>
+
+    <p>
+      You can call the function directly on a <codeph>STRING</codeph> column. To use it with a numeric column, cast
+      the value to <codeph>STRING</codeph>.
+    </p>
+
+<codeblock>[localhost:21000] &gt; create table t1 (x int, s string);
+[localhost:21000] &gt; insert into t1 values (1, "one"), (3, "three"), (2, "two"), (1, "one");
+[localhost:21000] &gt; select group_concat(s) from t1;
++----------------------+
+| group_concat(s)      |
++----------------------+
+| one, three, two, one |
++----------------------+
+[localhost:21000] &gt; select group_concat(cast(x as string)) from t1;
++---------------------------------+
+| group_concat(cast(x as string)) |
++---------------------------------+
+| 1, 3, 2, 1                      |
++---------------------------------+
+</codeblock>
+
+    <p>
+      The optional separator lets you format the result in flexible ways. The separator can be an arbitrary string
+      expression, not just a single character.
+    </p>
+
+<codeblock>[localhost:21000] &gt; select group_concat(s,"|") from t1;
++----------------------+
+| group_concat(s, '|') |
++----------------------+
+| one|three|two|one    |
++----------------------+
+[localhost:21000] &gt; select group_concat(s,'---') from t1;
++-------------------------+
+| group_concat(s, '---')  |
++-------------------------+
+| one---three---two---one |
++-------------------------+
+</codeblock>
+
+    <p>
+      The default separator is a comma followed by a space. To get a comma-delimited result without extra spaces,
+      specify a delimiter character that is only a comma.
+    </p>
+
+<codeblock>[localhost:21000] &gt; select group_concat(s,',') from t1;
++----------------------+
+| group_concat(s, ',') |
++----------------------+
+| one,three,two,one    |
++----------------------+
+</codeblock>
+
+    <p>
+      Including a <codeph>GROUP BY</codeph> clause lets you produce a different concatenated result for each group
+      in the result set. In this example, the only <codeph>X</codeph> value that occurs more than once is
+      <codeph>1</codeph>, so that is the only row in the result set where <codeph>GROUP_CONCAT()</codeph> returns a
+      delimited value. For groups containing a single value, <codeph>GROUP_CONCAT()</codeph> returns the original
+      value of its <codeph>STRING</codeph> argument.
+    </p>
+
+<codeblock>[localhost:21000] &gt; select x, group_concat(s) from t1 group by x;
++---+-----------------+
+| x | group_concat(s) |
++---+-----------------+
+| 2 | two             |
+| 3 | three           |
+| 1 | one, one        |
++---+-----------------+
+</codeblock>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_having.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_having.xml b/docs/topics/impala_having.xml
new file mode 100644
index 0000000..064a4a8
--- /dev/null
+++ b/docs/topics/impala_having.xml
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="having">
+
+  <title>HAVING Clause</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Aggregate Functions"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Performs a filter operation on a <codeph>SELECT</codeph> query, by examining the results of aggregation
+      functions rather than testing each individual table row. Therefore, it is always used in conjunction with a
+      function such as <codeph><xref href="impala_count.xml#count">COUNT()</xref></codeph>,
+      <codeph><xref href="impala_sum.xml#sum">SUM()</xref></codeph>,
+      <codeph><xref href="impala_avg.xml#avg">AVG()</xref></codeph>,
+      <codeph><xref href="impala_min.xml#min">MIN()</xref></codeph>, or
+      <codeph><xref href="impala_max.xml#max">MAX()</xref></codeph>, and typically with the
+      <codeph><xref href="impala_group_by.xml#group_by">GROUP BY</xref></codeph> clause also.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <p rev="2.0.0">
+      The filter expression in the <codeph>HAVING</codeph> clause cannot include a scalar subquery.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+    <p>
+      <xref href="impala_select.xml#select"/>,
+      <xref href="impala_group_by.xml#group_by"/>,
+      <xref href="impala_aggregate_functions.xml#aggregate_functions"/>
+    </p>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_hbase_cache_blocks.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_hbase_cache_blocks.xml b/docs/topics/impala_hbase_cache_blocks.xml
new file mode 100644
index 0000000..d42cbf6
--- /dev/null
+++ b/docs/topics/impala_hbase_cache_blocks.xml
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="hbase_cache_blocks">
+
+  <title>HBASE_CACHE_BLOCKS Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="HBase"/>
+      <data name="Category" value="Impala Query Options"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">HBASE_CACHE_BLOCKS query option</indexterm>
+      Setting this option is equivalent to calling the <codeph>setCacheBlocks</codeph> method of the class
+      <xref href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Scan.html" scope="external" format="html">org.apache.hadoop.hbase.client.Scan</xref>,
+      in an HBase Java application. Helps to control the memory pressure on the HBase region server, in conjunction
+      with the <codeph>HBASE_CACHING</codeph> query option.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/type_boolean"/>
+    <p conref="../shared/impala_common.xml#common/default_false_0"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+    <p>
+      <xref href="impala_hbase.xml#impala_hbase"/>,
+      <xref href="impala_hbase_caching.xml#hbase_caching"/>
+    </p>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_hbase_caching.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_hbase_caching.xml b/docs/topics/impala_hbase_caching.xml
new file mode 100644
index 0000000..e543792
--- /dev/null
+++ b/docs/topics/impala_hbase_caching.xml
@@ -0,0 +1,39 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="hbase_caching">
+
+  <title>HBASE_CACHING Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="HBase"/>
+      <data name="Category" value="Impala Query Options"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">HBASE_CACHING query option</indexterm>
+      Setting this option is equivalent to calling the <codeph>setCaching</codeph> method of the class
+      <xref href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Scan.html" scope="external" format="html">org.apache.hadoop.hbase.client.Scan</xref>,
+      in an HBase Java application. Helps to control the memory pressure on the HBase region server, in conjunction
+      with the <codeph>HBASE_CACHE_BLOCKS</codeph> query option.
+    </p>
+
+    <p>
+      <b>Type:</b> <codeph>BOOLEAN</codeph>
+    </p>
+
+    <p>
+      <b>Default:</b> 0
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+    <p>
+      <xref href="impala_hbase.xml#impala_hbase"/>,
+      <xref href="impala_hbase_cache_blocks.xml#hbase_cache_blocks"/>
+    </p>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_hints.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_hints.xml b/docs/topics/impala_hints.xml
new file mode 100644
index 0000000..429fb19
--- /dev/null
+++ b/docs/topics/impala_hints.xml
@@ -0,0 +1,247 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="hints">
+
+  <title>Query Hints in Impala SELECT Statements</title>
+  <titlealts><navtitle>Hints</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Performance"/>
+      <data name="Category" value="Troubleshooting"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">hints</indexterm>
+      The Impala SQL dialect supports query hints, for fine-tuning the inner workings of queries. Specify hints as
+      a temporary workaround for expensive queries, where missing statistics or other factors cause inefficient
+      performance.
+    </p>
+
+    <p>
+      Hints are most often used for the most resource-intensive kinds of Impala queries:
+    </p>
+
+    <ul>
+      <li>
+        Join queries involving large tables, where intermediate result sets are transmitted across the network to
+        evaluate the join conditions.
+      </li>
+
+      <li>
+        Inserting into partitioned Parquet tables, where many memory buffers could be allocated on each host to
+        hold intermediate results for each partition.
+      </li>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      You can represent the hints as keywords surrounded by <codeph>[]</codeph> square brackets; include the
+      brackets in the text of the SQL statement.
+    </p>
+
+<codeblock>SELECT STRAIGHT_JOIN <varname>select_list</varname> FROM
+<varname>join_left_hand_table</varname>
+  JOIN [{BROADCAST|SHUFFLE}]
+<varname>join_right_hand_table</varname>
+<varname>remainder_of_query</varname>;
+
+INSERT <varname>insert_clauses</varname>
+  [{SHUFFLE|NOSHUFFLE}]
+  SELECT <varname>remainder_of_query</varname>;
+</codeblock>
+
+    <p rev="2.0.0">
+      In Impala 2.0 and higher, or CDH 5.2 and higher, you can also specify the hints inside comments that use
+      either the <codeph>/* */</codeph> or <codeph>--</codeph> notation. Specify a <codeph>+</codeph> symbol
+      immediately before the hint name.
+    </p>
+
+<codeblock rev="2.0.0">SELECT STRAIGHT_JOIN <varname>select_list</varname> FROM
+<varname>join_left_hand_table</varname>
+  JOIN /* +BROADCAST|SHUFFLE */
+<varname>join_right_hand_table</varname>
+<varname>remainder_of_query</varname>;
+
+SELECT <varname>select_list</varname> FROM
+<varname>join_left_hand_table</varname>
+  JOIN -- +BROADCAST|SHUFFLE
+<varname>join_right_hand_table</varname>
+<varname>remainder_of_query</varname>;
+
+INSERT <varname>insert_clauses</varname>
+  /* +SHUFFLE|NOSHUFFLE */
+  SELECT <varname>remainder_of_query</varname>;
+
+INSERT <varname>insert_clauses</varname>
+  -- +SHUFFLE|NOSHUFFLE
+  SELECT <varname>remainder_of_query</varname>;
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      With both forms of hint syntax, include the <codeph>STRAIGHT_JOIN</codeph>
+      keyword immediately after the <codeph>SELECT</codeph> keyword to prevent Impala from
+      reordering the tables in a way that makes the hint ineffective.
+    </p>
+
+    <p>
+      To reduce the need to use hints, run the <codeph>COMPUTE STATS</codeph> statement against all tables involved
+      in joins, or used as the source tables for <codeph>INSERT ... SELECT</codeph> operations where the
+      destination is a partitioned Parquet table. Do this operation after loading data or making substantial
+      changes to the data within each table. Having up-to-date statistics helps Impala choose more efficient query
+      plans without the need for hinting. See <xref href="impala_perf_stats.xml#perf_stats"/> for details and
+      examples.
+    </p>
+
+    <p>
+      To see which join strategy is used for a particular query, examine the <codeph>EXPLAIN</codeph> output for
+      that query. See <xref href="impala_explain_plan.xml#perf_explain"/> for details and examples.
+    </p>
+
+    <p>
+      <b>Hints for join queries:</b>
+    </p>
+
+    <p>
+      The <codeph>[BROADCAST]</codeph> and <codeph>[SHUFFLE]</codeph> hints control the execution strategy for join
+      queries. Specify one of the following constructs immediately after the <codeph>JOIN</codeph> keyword in a
+      query:
+    </p>
+
+    <ul>
+      <li>
+        <codeph>[SHUFFLE]</codeph> - Makes that join operation use the <q>partitioned</q> technique, which divides
+        up corresponding rows from both tables using a hashing algorithm, sending subsets of the rows to other
+        nodes for processing. (The keyword <codeph>SHUFFLE</codeph> is used to indicate a <q>partitioned join</q>,
+        because that type of join is not related to <q>partitioned tables</q>.) Since the alternative
+        <q>broadcast</q> join mechanism is the default when table and index statistics are unavailable, you might
+        use this hint for queries where broadcast joins are unsuitable; typically, partitioned joins are more
+        efficient for joins between large tables of similar size.
+      </li>
+
+      <li>
+        <codeph>[BROADCAST]</codeph> - Makes that join operation use the <q>broadcast</q> technique that sends the
+        entire contents of the right-hand table to all nodes involved in processing the join. This is the default
+        mode of operation when table and index statistics are unavailable, so you would typically only need it if
+        stale metadata caused Impala to mistakenly choose a partitioned join operation. Typically, broadcast joins
+        are more efficient in cases where one table is much smaller than the other. (Put the smaller table on the
+        right side of the <codeph>JOIN</codeph> operator.)
+      </li>
+    </ul>
+
+    <p>
+      <b>Hints for INSERT ... SELECT queries:</b>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/insert_hints"/>
+
+    <p>
+      <b>Suggestions versus directives:</b>
+    </p>
+
+    <p>
+      In early Impala releases, hints were always obeyed and so acted more like directives. Once Impala gained join
+      order optimizations, sometimes join queries were automatically reordered in a way that made a hint
+      irrelevant. Therefore, the hints act more like suggestions in Impala 1.2.2 and higher.
+    </p>
+
+    <p>
+      To force Impala to follow the hinted execution mechanism for a join query, include the
+      <codeph>STRAIGHT_JOIN</codeph> keyword in the <codeph>SELECT</codeph> statement. See
+      <xref href="impala_perf_joins.xml#straight_join"/> for details. When you use this technique, Impala does not
+      reorder the joined tables at all, so you must be careful to arrange the join order to put the largest table
+      (or subquery result set) first, then the smallest, second smallest, third smallest, and so on. This ordering lets Impala do the
+      most I/O-intensive parts of the query using local reads on the data nodes, and then reduce the size of the
+      intermediate result set as much as possible as each subsequent table or subquery result set is joined.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <p>
+      Queries that include subqueries in the <codeph>WHERE</codeph> clause can be rewritten internally as join
+      queries. Currently, you cannot apply hints to the joins produced by these types of queries.
+    </p>
+
+    <p>
+      Because hints can prevent queries from taking advantage of new metadata or improvements in query planning,
+      use them only when required to work around performance issues, and be prepared to remove them when they are
+      no longer required, such as after a new Impala release or bug fix.
+    </p>
+
+    <p>
+      In particular, the <codeph>[BROADCAST]</codeph> and <codeph>[SHUFFLE]</codeph> hints are expected to be
+      needed much less frequently in Impala 1.2.2 and higher, because the join order optimization feature in
+      combination with the <codeph>COMPUTE STATS</codeph> statement now automatically choose join order and join
+      mechanism without the need to rewrite the query and add hints. See
+      <xref href="impala_perf_joins.xml#perf_joins"/> for details.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/compatibility_blurb"/>
+
+    <p rev="2.0.0">
+      The hints embedded within <codeph>--</codeph> comments are compatible with Hive queries. The hints embedded
+      within <codeph>/* */</codeph> comments or <codeph>[ ]</codeph> square brackets are not recognized by or not
+      compatible with Hive. For example, Hive raises an error for Impala hints within <codeph>/* */</codeph>
+      comments because it does not recognize the Impala hint names.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/view_blurb"/>
+
+    <p rev="2.0.0">
+      If you use a hint in the query that defines a view, the hint is preserved when you query the view. Impala
+      internally rewrites all hints in views to use the <codeph>--</codeph> comment notation, so that Hive can
+      query such views without errors due to unrecognized hint names.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      For example, this query joins a large customer table with a small lookup table of less than 100 rows. The
+      right-hand table can be broadcast efficiently to all nodes involved in the join. Thus, you would use the
+      <codeph>[broadcast]</codeph> hint to force a broadcast join strategy:
+    </p>
+
+<codeblock>select straight_join customer.address, state_lookup.state_name
+  from customer join <b>[broadcast]</b> state_lookup
+  on customer.state_id = state_lookup.state_id;</codeblock>
+
+    <p>
+      This query joins two large tables of unpredictable size. You might benchmark the query with both kinds of
+      hints and find that it is more efficient to transmit portions of each table to other nodes for processing.
+      Thus, you would use the <codeph>[shuffle]</codeph> hint to force a partitioned join strategy:
+    </p>
+
+<codeblock>select straight_join weather.wind_velocity, geospatial.altitude
+  from weather join <b>[shuffle]</b> geospatial
+  on weather.lat = geospatial.lat and weather.long = geospatial.long;</codeblock>
+
+    <p>
+      For joins involving three or more tables, the hint applies to the tables on either side of that specific
+      <codeph>JOIN</codeph> keyword. The <codeph>STRAIGHT_JOIN</codeph> keyword ensures that joins are processed
+      in a predictable order from left to right. For example, this query joins
+      <codeph>t1</codeph> and <codeph>t2</codeph> using a partitioned join, then joins that result set to
+      <codeph>t3</codeph> using a broadcast join:
+    </p>
+
+<codeblock>select straight_join t1.name, t2.id, t3.price
+  from t1 join <b>[shuffle]</b> t2 join <b>[broadcast]</b> t3
+  on t1.id = t2.id and t2.id = t3.id;</codeblock>
+
+    <draft-comment translate="no"> This is a good place to add more sample output showing before and after EXPLAIN plans. </draft-comment>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      For more background information about join queries, see <xref href="impala_joins.xml#joins"/>. For
+      performance considerations, see <xref href="impala_perf_joins.xml#perf_joins"/>.
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_identifiers.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_identifiers.xml b/docs/topics/impala_identifiers.xml
new file mode 100644
index 0000000..55477ed
--- /dev/null
+++ b/docs/topics/impala_identifiers.xml
@@ -0,0 +1,114 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="identifiers">
+
+  <title>Overview of Impala Identifiers</title>
+  <titlealts><navtitle>Identifiers</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Databases"/>
+      <data name="Category" value="Tables"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Identifiers are the names of databases, tables, or columns that you specify in a SQL statement. The rules for
+      identifiers govern what names you can give to things you create, the notation for referring to names
+      containing unusual characters, and other aspects such as case sensitivity.
+    </p>
+
+    <ul>
+      <li>
+        <p>
+        The minimum length of an identifier is 1 character.
+        </p>
+      </li>
+
+      <li>
+        <p>
+        The maximum length of an identifier is currently 128 characters, enforced by the metastore database.
+        </p>
+      </li>
+
+      <li>
+        <p>
+        An identifier must start with an alphabetic character. The remainder can contain any combination of
+        alphanumeric characters and underscores. Quoting the identifier with backticks has no effect on the allowed
+        characters in the name.
+        </p>
+      </li>
+
+      <li>
+        <p>
+        An identifier can contain only ASCII characters.
+        </p>
+      </li>
+
+      <li>
+        <p>
+        To use an identifier name that matches one of the Impala reserved keywords (listed in
+        <xref href="impala_reserved_words.xml#reserved_words"/>), surround the identifier with <codeph>``</codeph>
+        characters (backticks). Quote the reserved word even if it is part of a fully qualified name.
+        The following example shows how a reserved word can be used as a column name if it is quoted
+        with backticks in the <codeph>CREATE TABLE</codeph> statement, and how the column name
+        must also be quoted with backticks in a query:
+        </p>
+<codeblock>[localhost:21000] > create table reserved (`data` string);
+
+[localhost:21000] > select data from reserved;
+ERROR: AnalysisException: Syntax error in line 1:
+select data from reserved
+       ^
+Encountered: DATA
+Expected: ALL, CASE, CAST, DISTINCT, EXISTS, FALSE, IF, INTERVAL, NOT, NULL, STRAIGHT_JOIN, TRUE, IDENTIFIER
+CAUSED BY: Exception: Syntax error
+
+[localhost:21000] > select reserved.data from reserved;
+ERROR: AnalysisException: Syntax error in line 1:
+select reserved.data from reserved
+                ^
+Encountered: DATA
+Expected: IDENTIFIER
+CAUSED BY: Exception: Syntax error
+
+[localhost:21000] > select reserved.`data` from reserved;
+
+[localhost:21000] >
+</codeblock>
+
+        <note type="important">
+          Because the list of reserved words grows over time as new SQL syntax is added,
+          consider adopting coding conventions (especially for any automated scripts
+          or in packaged applications) to always quote all identifiers with backticks.
+          Quoting all identifiers protects your SQL from compatibility issues if
+          new reserved words are added in later releases.
+        </note>
+
+      </li>
+
+      <li>
+        <p>
+        Impala identifiers are always case-insensitive. That is, tables named <codeph>t1</codeph> and
+        <codeph>T1</codeph> always refer to the same table, regardless of quote characters. Internally, Impala
+        always folds all specified table and column names to lowercase. This is why the column headers in query
+        output are always displayed in lowercase.
+        </p>
+      </li>
+    </ul>
+
+    <p>
+      See <xref href="impala_aliases.xml#aliases"/> for how to define shorter or easier-to-remember aliases if the
+      original names are long or cryptic identifiers.
+      <ph conref="../shared/impala_common.xml#common/aliases_vs_identifiers"/>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/views_vs_identifiers"/>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_insert.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_insert.xml b/docs/topics/impala_insert.xml
new file mode 100644
index 0000000..6d0f68b
--- /dev/null
+++ b/docs/topics/impala_insert.xml
@@ -0,0 +1,676 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="insert">
+
+  <title>INSERT Statement</title>
+  <titlealts><navtitle>INSERT</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="ETL"/>
+      <data name="Category" value="Ingest"/>
+      <data name="Category" value="DML"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Tables"/>
+      <data audience="impala_next" name="Category" value="Kudu"/>
+      <!-- This is such an important statement, think if there are more applicable categories. -->
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">INSERT statement</indexterm>
+      Impala supports inserting into tables and partitions that you create with the Impala <codeph>CREATE
+      TABLE</codeph> statement, or pre-defined tables and partitions created through Hive.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>[<varname>with_clause</varname>]
+INSERT { INTO | OVERWRITE } [TABLE] <varname>table_name</varname>
+  [(<varname>column_list</varname>)]
+  [ PARTITION (<varname>partition_clause</varname>)]
+{
+    [<varname>hint_clause</varname>] <varname>select_statement</varname>
+  | VALUES (<varname>value</varname> [, <varname>value</varname> ...]) [, (<varname>value</varname> [, <varname>value</varname> ...]) ...]
+}
+
+partition_clause ::= <varname>col_name</varname> [= <varname>constant</varname>] [, <varname>col_name</varname> [= <varname>constant</varname>] ...]
+
+hint_clause ::= [SHUFFLE] | [NOSHUFFLE]    (Note: the square brackets are part of the syntax.)
+</codeblock>
+
+    <p>
+      <b>Appending or replacing (INTO and OVERWRITE clauses):</b>
+    </p>
+
+    <p>
+      The <codeph>INSERT INTO</codeph> syntax appends data to a table. The existing data files are left as-is, and
+      the inserted data is put into one or more new data files.
+    </p>
+
+    <p>
+      The <codeph>INSERT OVERWRITE</codeph> syntax replaces the data in a table.
+<!-- What happens with INSERT OVERWRITE if the target is a single partition or multiple partitions? -->
+<!-- If that gets too detailed, cover later under "Partitioning Considerations". -->
+      Currently, the overwritten data files are deleted immediately; they do not go through the HDFS trash
+      mechanism.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p rev="2.3.0">
+      The <codeph>INSERT</codeph> statement currently does not support writing data files
+      containing complex types (<codeph>ARRAY</codeph>, <codeph>STRUCT</codeph>, and <codeph>MAP</codeph>).
+      To prepare Parquet data for such tables, you generate the data files outside Impala and then
+      use <codeph>LOAD DATA</codeph> or <codeph>CREATE EXTERNAL TABLE</codeph> to associate those
+      data files with the table. Currently, such tables must use the Parquet file format.
+      See <xref href="impala_complex_types.xml#complex_types"/> for details about working with complex types.
+    </p>
+
+    <p rev="kudu" audience="impala_next">
+      <b>Ignoring duplicate partition keys for Kudu tables (IGNORE clause)</b>
+    </p>
+
+    <p rev="kudu" audience="impala_next">
+      Normally, an <codeph>INSERT</codeph> operation into a Kudu table fails if
+      it would result in duplicate partition key columns for any rows.
+      Specify <codeph>INSERT IGNORE <varname>rest_of_statement</varname></codeph> to
+      make the <codeph>INSERT</codeph> continue in this case. The rows that would
+      have duplicate partition key columns are not inserted.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      Impala currently supports:
+    </p>
+
+    <ul>
+      <li>
+        Copy data from another table using <codeph>SELECT</codeph> query. In Impala 1.2.1 and higher, you can
+        combine <codeph>CREATE TABLE</codeph> and <codeph>INSERT</codeph> operations into a single step with the
+        <codeph>CREATE TABLE AS SELECT</codeph> syntax, which bypasses the actual <codeph>INSERT</codeph> keyword.
+      </li>
+
+      <li>
+        An optional <xref href="impala_with.xml#with"><codeph>WITH</codeph> clause</xref> before the
+        <codeph>INSERT</codeph> keyword, to define a subquery referenced in the <codeph>SELECT</codeph> portion.
+      </li>
+
+      <li>
+        Create one or more new rows using constant expressions through <codeph>VALUES</codeph> clause. (The
+        <codeph>VALUES</codeph> clause was added in Impala 1.0.1.)
+      </li>
+
+      <li rev="1.1">
+        <p>
+          By default, the first column of each newly inserted row goes into the first column of the table, the
+          second column into the second column, and so on.
+        </p>
+        <p>
+          You can also specify the columns to be inserted, an arbitrarily ordered subset of the columns in the
+          destination table, by specifying a column list immediately after the name of the destination table. This
+          feature lets you adjust the inserted columns to match the layout of a <codeph>SELECT</codeph> statement,
+          rather than the other way around. (This feature was added in Impala 1.1.)
+        </p>
+        <p>
+          The number of columns mentioned in the column list (known as the <q>column permutation</q>) must match
+          the number of columns in the <codeph>SELECT</codeph> list or the <codeph>VALUES</codeph> tuples. The
+          order of columns in the column permutation can be different than in the underlying table, and the columns
+          of each input row are reordered to match. If the number of columns in the column permutation is less than
+          in the destination table, all unmentioned columns are set to <codeph>NULL</codeph>.
+        </p>
+      </li>
+
+      <li>
+        <p>
+          For a partitioned table, the optional <codeph>PARTITION</codeph> clause identifies which partition or
+          partitions the new values go into. If a partition key column is given a constant value such as
+          <codeph>PARTITION (year=2012)</codeph> or <codeph>PARTITION (year=2012, month=2)</codeph>, all the
+          inserted rows use those same values for those partition key columns and you omit any corresponding
+          columns in the source table from the <codeph>SELECT</codeph> list. This form is known as <q>static
+          partitioning</q>.
+        </p>
+        <p>
+          If a partition key column is mentioned but not assigned a value, such as in <codeph>PARTITION (year,
+          region)</codeph> (both columns unassigned) or <codeph>PARTITION(year, region='CA')</codeph>
+          (<codeph>year</codeph> column unassigned), the unassigned columns are filled in with the final columns of
+          the <codeph>SELECT</codeph> list. In this case, the number of columns in the <codeph>SELECT</codeph> list
+          must equal the number of columns in the column permutation plus the number of partition key columns not
+          assigned a constant value. This form is known as <q>dynamic partitioning</q>.
+        </p>
+        <p>
+          See <xref href="impala_partitioning.xml#partition_static_dynamic"/> for examples and performance
+          characteristics of static and dynamic partitioned inserts.
+        </p>
+      </li>
+
+      <li rev="1.2.2">
+        An optional hint clause immediately before the <codeph>SELECT</codeph> keyword, to fine-tune the behavior
+        when doing an <codeph>INSERT ... SELECT</codeph> operation into partitioned Parquet tables. The hint
+        keywords are <codeph>[SHUFFLE]</codeph> and <codeph>[NOSHUFFLE]</codeph>, including the square brackets.
+        Inserting into partitioned Parquet tables can be a resource-intensive operation because it potentially
+        involves many files being written to HDFS simultaneously, and separate
+        <ph rev="parquet_block_size">large</ph> memory buffers being allocated to buffer the data for each
+        partition. For usage details, see <xref href="impala_parquet.xml#parquet_etl"/>.
+      </li>
+    </ul>
+
+    <note>
+      <ul>
+        <li>
+          Insert commands that partition or add files result in changes to Hive metadata. Because Impala uses Hive
+          metadata, such changes may necessitate a metadata refresh. For more information, see the
+          <xref href="impala_refresh.xml#refresh" format="dita">REFRESH</xref> function.
+        </li>
+
+        <li>
+          Currently, Impala can only insert data into tables that use the text and Parquet formats. For other file
+          formats, insert the data using Hive and use Impala to query it.
+        </li>
+
+        <li>
+          As an alternative to the <codeph>INSERT</codeph> statement, if you have existing data files elsewhere in
+          HDFS, the <codeph>LOAD DATA</codeph> statement can move those files into a table. This statement works
+          with tables of any file format.
+        </li>
+      </ul>
+    </note>
+
+    <p conref="../shared/impala_common.xml#common/dml_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      When you insert the results of an expression, particularly of a built-in function call, into a small numeric
+      column such as <codeph>INT</codeph>, <codeph>SMALLINT</codeph>, <codeph>TINYINT</codeph>, or
+      <codeph>FLOAT</codeph>, you might need to use a <codeph>CAST()</codeph> expression to coerce values into the
+      appropriate type. Impala does not automatically convert from a larger type to a smaller one. For example, to
+      insert cosine values into a <codeph>FLOAT</codeph> column, write <codeph>CAST(COS(angle) AS FLOAT)</codeph>
+      in the <codeph>INSERT</codeph> statement to make the conversion explicit.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/insert_parquet_blocksize"/>
+
+    <p conref="../shared/impala_common.xml#common/sync_ddl_blurb"/>
+
+    <note conref="../shared/impala_common.xml#common/compute_stats_next"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      The following example sets up new tables with the same definition as the <codeph>TAB1</codeph> table from the
+      <xref href="impala_tutorial.xml#tutorial" format="dita">Tutorial</xref> section, using different file
+      formats, and demonstrates inserting data into the tables created with the <codeph>STORED AS TEXTFILE</codeph>
+      and <codeph>STORED AS PARQUET</codeph> clauses:
+    </p>
+
+<codeblock>CREATE DATABASE IF NOT EXISTS file_formats;
+USE file_formats;
+
+DROP TABLE IF EXISTS text_table;
+CREATE TABLE text_table
+( id INT, col_1 BOOLEAN, col_2 DOUBLE, col_3 TIMESTAMP )
+STORED AS TEXTFILE;
+
+DROP TABLE IF EXISTS parquet_table;
+CREATE TABLE parquet_table
+( id INT, col_1 BOOLEAN, col_2 DOUBLE, col_3 TIMESTAMP )
+STORED AS PARQUET;</codeblock>
+
+    <p>
+      With the <codeph>INSERT INTO TABLE</codeph> syntax, each new set of inserted rows is appended to any existing
+      data in the table. This is how you would record small amounts of data that arrive continuously, or ingest new
+      batches of data alongside the existing data. For example, after running 2 <codeph>INSERT INTO TABLE</codeph>
+      statements with 5 rows each, the table contains 10 rows total:
+    </p>
+
+<codeblock>[localhost:21000] &gt; insert into table text_table select * from default.tab1;
+Inserted 5 rows in 0.41s
+
+[localhost:21000] &gt; insert into table text_table select * from default.tab1;
+Inserted 5 rows in 0.46s
+
+[localhost:21000] &gt; select count(*) from text_table;
++----------+
+| count(*) |
++----------+
+| 10       |
++----------+
+Returned 1 row(s) in 0.26s</codeblock>
+
+    <p>
+      With the <codeph>INSERT OVERWRITE TABLE</codeph> syntax, each new set of inserted rows replaces any existing
+      data in the table. This is how you load data to query in a data warehousing scenario where you analyze just
+      the data for a particular day, quarter, and so on, discarding the previous data each time. You might keep the
+      entire set of data in one raw table, and transfer and transform certain rows into a more compact and
+      efficient form to perform intensive analysis on that subset.
+    </p>
+
+    <p>
+      For example, here we insert 5 rows into a table using the <codeph>INSERT INTO</codeph> clause, then replace
+      the data by inserting 3 rows with the <codeph>INSERT OVERWRITE</codeph> clause. Afterward, the table only
+      contains the 3 rows from the final <codeph>INSERT</codeph> statement.
+    </p>
+
+<codeblock>[localhost:21000] &gt; insert into table parquet_table select * from default.tab1;
+Inserted 5 rows in 0.35s
+
+[localhost:21000] &gt; insert overwrite table parquet_table select * from default.tab1 limit 3;
+Inserted 3 rows in 0.43s
+[localhost:21000] &gt; select count(*) from parquet_table;
++----------+
+| count(*) |
++----------+
+| 3        |
++----------+
+Returned 1 row(s) in 0.43s</codeblock>
+
+    <p>
+      The <codeph><xref href="impala_insert.xml#values">VALUES</xref></codeph> clause lets you insert one or more
+      rows by specifying constant values for all the columns. The number, types, and order of the expressions must
+      match the table definition.
+    </p>
+
+    <note id="insert_values_warning">
+      The <codeph>INSERT ... VALUES</codeph> technique is not suitable for loading large quantities of data into
+      HDFS-based tables, because the insert operations cannot be parallelized, and each one produces a separate
+      data file. Use it for setting up small dimension tables or tiny amounts of data for experimenting with SQL
+      syntax, or with HBase tables. Do not use it for large ETL jobs or benchmark tests for load operations. Do not
+      run scripts with thousands of <codeph>INSERT ... VALUES</codeph> statements that insert a single row each
+      time. If you do run <codeph>INSERT ... VALUES</codeph> operations to load data into a staging table as one
+      stage in an ETL pipeline, include multiple row values if possible within each <codeph>VALUES</codeph> clause,
+      and use a separate database to make cleanup easier if the operation does produce many tiny files.
+    </note>
+
+    <p>
+      The following example shows how to insert one row or multiple rows, with expressions of different types,
+      using literal values, expressions, and function return values:
+    </p>
+
+<codeblock>create table val_test_1 (c1 int, c2 float, c3 string, c4 boolean, c5 timestamp);
+insert into val_test_1 values (100, 99.9/10, 'abc', true, now());
+create table val_test_2 (id int, token string);
+insert overwrite val_test_2 values (1, 'a'), (2, 'b'), (-1,'xyzzy');</codeblock>
+
+    <p>
+      These examples show the type of <q>not implemented</q> error that you see when attempting to insert data into
+      a table with a file format that Impala currently does not write to:
+    </p>
+
+<codeblock>DROP TABLE IF EXISTS sequence_table;
+CREATE TABLE sequence_table
+( id INT, col_1 BOOLEAN, col_2 DOUBLE, col_3 TIMESTAMP )
+STORED AS SEQUENCEFILE;
+
+DROP TABLE IF EXISTS rc_table;
+CREATE TABLE rc_table
+( id INT, col_1 BOOLEAN, col_2 DOUBLE, col_3 TIMESTAMP )
+STORED AS RCFILE;
+
+[localhost:21000] &gt; insert into table rc_table select * from default.tab1;
+Remote error
+Backend 0:RC_FILE not implemented.
+
+[localhost:21000] &gt; insert into table sequence_table select * from default.tab1;
+Remote error
+Backend 0:SEQUENCE_FILE not implemented. </codeblock>
+
+    <p>
+      Inserting data into partitioned tables requires slightly different syntax that divides the partitioning
+      columns from the others:
+    </p>
+
+<codeblock>create table t1 (i int) <b>partitioned by (x int, y string)</b>;
+-- Select an INT column from another table.
+-- All inserted rows will have the same x and y values, as specified in the INSERT statement.
+-- This technique of specifying all the partition key values is known as static partitioning.
+insert into t1 <b>partition(x=10, y='a')</b> select c1 from some_other_table;
+-- Select two INT columns from another table.
+-- All inserted rows will have the same y value, as specified in the INSERT statement.
+-- Values from c2 go into t1.x.
+-- Any partitioning columns whose value is not specified are filled in
+-- from the columns specified last in the SELECT list.
+-- This technique of omitting some partition key values is known as dynamic partitioning.
+insert into t1 <b>partition(x, y='b')</b> select c1, c2 from some_other_table;
+-- Select an INT and a STRING column from another table.
+-- All inserted rows will have the same x value, as specified in the INSERT statement.
+-- Values from c3 go into t1.y.
+insert into t1 <b>partition(x=20, y)</b> select c1, c3  from some_other_table;</codeblock>
+
+    <p rev="1.1">
+      The following examples show how you can copy the data in all the columns from one table to another, copy the
+      data from only some columns, or specify the columns in the select list in a different order than they
+      actually appear in the table:
+    </p>
+
+<codeblock>-- Start with 2 identical tables.
+create table t1 (c1 int, c2 int);
+create table t2 like t1;
+
+-- If there is no () part after the destination table name,
+-- all columns must be specified, either as * or by name.
+insert into t2 select * from t1;
+insert into t2 select c1, c2 from t1;
+
+-- With the () notation following the destination table name,
+-- you can omit columns (all values for that column are NULL
+-- in the destination table), and/or reorder the values
+-- selected from the source table. This is the "column permutation" feature.
+insert into t2 (c1) select c1 from t1;
+insert into t2 (c2, c1) select c1, c2 from t1;
+
+-- The column names can be entirely different in the source and destination tables.
+-- You can copy any columns, not just the corresponding ones, from the source table.
+-- But the number and type of selected columns must match the columns mentioned in the () part.
+alter table t2 replace columns (x int, y int);
+insert into t2 (y) select c1 from t1;
+
+-- For partitioned tables, all the partitioning columns must be mentioned in the () column list
+-- or a PARTITION clause; these columns cannot be defaulted to NULL.
+create table pt1 (x int, y int) partitioned by (z int);
+-- The values from c1 are copied into the column x in the new table,
+-- all in the same partition based on a constant value for z.
+-- The values of y in the new table are all NULL.
+insert into pt1 (x) partition (z=5) select c1 from t1;
+-- Again we omit the values for column y so they are all NULL.
+-- The inserted x values can go into different partitions, based on
+-- the different values inserted into the partitioning column z.
+insert into pt1 (x,z) select x, z from t2;
+</codeblock>
+
+    <p>
+      <codeph>SELECT *</codeph> for a partitioned table requires that all partition key columns in the source table
+      be declared as the last columns in the <codeph>CREATE TABLE</codeph> statement. You still include a
+      <codeph>PARTITION BY</codeph> clause listing all the partition key columns. These partition columns are
+      automatically mapped to the last columns from the <codeph>SELECT *</codeph> list.
+    </p>
+
+<codeblock>create table source (x int, y int, year int, month int, day int);
+create table destination (x int, y int) partitioned by (year int, month int, day int);
+...load some data into the unpartitioned source table...
+-- Insert a single partition of data.
+-- The SELECT * means you cannot specify partition (year=2014, month, day).
+insert overwrite destination partition (year, month, day) select * from source where year=2014;
+-- Insert the data for all year/month/day combinations.
+insert overwrite destination partition (year, month, day) select * from source;
+
+-- If one of the partition columns is omitted from the source table,
+-- then you can specify a specific value for that column in the PARTITION clause.
+-- Here the source table holds only data from 2014, and so does not include a year column.
+create table source_2014 (x int, y int, month, day);
+...load some data into the unpartitioned source_2014 table...
+insert overwrite destination partition (year=2014, month, day) select * from source_2014;
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/insert_sort_blurb"/>
+
+    <p>
+      <b>Concurrency considerations:</b> Each <codeph>INSERT</codeph> operation creates new data files with unique
+      names, so you can run multiple <codeph>INSERT INTO</codeph> statements simultaneously without filename
+      conflicts.
+<!--
+If data is inserted into a table by a statement issued to a different
+<cmdname>impalad</cmdname> node,
+issue a <codeph>REFRESH <varname>table_name</varname></codeph>
+statement to make the node you are connected to aware of this new data.
+-->
+      While data is being inserted into an Impala table, the data is staged temporarily in a subdirectory inside
+      the data directory; during this period, you cannot issue queries against that table in Hive. If an
+      <codeph>INSERT</codeph> operation fails, the temporary data file and the subdirectory could be left behind in
+      the data directory. If so, remove the relevant subdirectory and any data files it contains manually, by
+      issuing an <codeph>hdfs dfs -rm -r</codeph> command, specifying the full path of the work subdirectory, whose
+      name ends in <codeph>_dir</codeph>.
+    </p>
+  </conbody>
+
+  <concept id="values">
+
+    <title>VALUES Clause</title>
+
+    <conbody>
+
+      <p>
+        The <codeph>VALUES</codeph> clause is a general-purpose way to specify the columns of one or more rows,
+        typically within an <codeph><xref href="impala_insert.xml#insert">INSERT</xref></codeph> statement.
+      </p>
+
+      <note conref="../shared/impala_common.xml#common/insert_values_warning">
+        <p/>
+      </note>
+
+      <p>
+        The following examples illustrate:
+      </p>
+
+      <ul>
+        <li>
+          How to insert a single row using a <codeph>VALUES</codeph> clause.
+        </li>
+
+        <li>
+          How to insert multiple rows using a <codeph>VALUES</codeph> clause.
+        </li>
+
+        <li>
+          How the row or rows from a <codeph>VALUES</codeph> clause can be appended to a table through
+          <codeph>INSERT INTO</codeph>, or replace the contents of the table through <codeph>INSERT
+          OVERWRITE</codeph>.
+        </li>
+
+        <li>
+          How the entries in a <codeph>VALUES</codeph> clause can be literals, function results, or any other kind
+          of expression. See <xref href="impala_literals.xml#literals"/> for the notation to use for literal
+          values, especially <xref href="impala_literals.xml#string_literals"/> for quoting and escaping
+          conventions for strings. See <xref href="impala_operators.xml#operators"/> and
+          <xref href="impala_functions.xml#builtins"/> for other things you can include in expressions with the
+          <codeph>VALUES</codeph> clause.
+        </li>
+      </ul>
+
+<codeblock>[localhost:21000] &gt; describe val_example;
+Query: describe val_example
+Query finished, fetching results ...
++-------+---------+---------+
+| name  | type    | comment |
++-------+---------+---------+
+| id    | int     |         |
+| col_1 | boolean |         |
+| col_2 | double  |         |
++-------+---------+---------+
+
+[localhost:21000] &gt; insert into val_example values (1,true,100.0);
+Inserted 1 rows in 0.30s
+[localhost:21000] &gt; select * from val_example;
++----+-------+-------+
+| id | col_1 | col_2 |
++----+-------+-------+
+| 1  | true  | 100   |
++----+-------+-------+
+
+[localhost:21000] &gt; insert overwrite val_example values (10,false,pow(2,5)), (50,true,10/3);
+Inserted 2 rows in 0.16s
+[localhost:21000] &gt; select * from val_example;
++----+-------+-------------------+
+| id | col_1 | col_2             |
++----+-------+-------------------+
+| 10 | false | 32                |
+| 50 | true  | 3.333333333333333 |
++----+-------+-------------------+</codeblock>
+
+      <p>
+        When used in an <codeph>INSERT</codeph> statement, the Impala <codeph>VALUES</codeph> clause can specify
+        some or all of the columns in the destination table, and the columns can be specified in a different order
+        than they actually appear in the table. To specify a different set or order of columns than in the table,
+        use the syntax:
+      </p>
+
+<codeblock>INSERT INTO <varname>destination</varname>
+  (<varname>col_x</varname>, <varname>col_y</varname>, <varname>col_z</varname>)
+  VALUES
+  (<varname>val_x</varname>, <varname>val_y</varname>, <varname>val_z</varname>);
+</codeblock>
+
+      <p>
+        Any columns in the table that are not listed in the <codeph>INSERT</codeph> statement are set to
+        <codeph>NULL</codeph>.
+      </p>
+
+<!--
+      <p>
+        does not support specifying a subset of the
+        columns in the table or specifying the columns in a different order. Use a
+        <codeph>VALUES</codeph> clause with all the column values in the same order as
+        the table definition, using <codeph>NULL</codeph> values for any columns you
+        want to omit from the <codeph>INSERT</codeph> operation.
+      </p>
+-->
+
+      <p>
+        To use a <codeph>VALUES</codeph> clause like a table in other statements, wrap it in parentheses and use
+        <codeph>AS</codeph> clauses to specify aliases for the entire object and any columns you need to refer to:
+      </p>
+
+<codeblock>[localhost:21000] &gt; select * from (values(4,5,6),(7,8,9)) as t;
++---+---+---+
+| 4 | 5 | 6 |
++---+---+---+
+| 4 | 5 | 6 |
+| 7 | 8 | 9 |
++---+---+---+
+[localhost:21000] &gt; select * from (values(1 as c1, true as c2, 'abc' as c3),(100,false,'xyz')) as t;
++-----+-------+-----+
+| c1  | c2    | c3  |
++-----+-------+-----+
+| 1   | true  | abc |
+| 100 | false | xyz |
++-----+-------+-----+</codeblock>
+
+      <p>
+        For example, you might use a tiny table constructed like this from constant literals or function return
+        values as part of a longer statement involving joins or <codeph>UNION ALL</codeph>.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/hdfs_blurb"/>
+
+      <p>
+        Impala physically writes all inserted files under the ownership of its default user, typically
+        <codeph>impala</codeph>. Therefore, this user must have HDFS write permission in the corresponding table
+        directory.
+      </p>
+
+      <p>
+        The permission requirement is independent of the authorization performed by the Sentry framework. (If the
+        connected user is not authorized to insert into a table, Sentry blocks that operation immediately,
+        regardless of the privileges available to the <codeph>impala</codeph> user.) Files created by Impala are
+        not owned by and do not inherit permissions from the connected user.
+      </p>
+
+      <p>
+        The number of data files produced by an <codeph>INSERT</codeph> statement depends on the size of the
+        cluster, the number of data blocks that are processed, the partition key columns in a partitioned table,
+        and the mechanism Impala uses for dividing the work in parallel. Do not assume that an
+        <codeph>INSERT</codeph> statement will produce some particular number of output files. In case of
+        performance issues with data written by Impala, check that the output files do not suffer from issues such
+        as many tiny files or many tiny partitions. (In the Hadoop context, even files or partitions of a few tens
+        of megabytes are considered <q>tiny</q>.)
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/insert_hidden_work_directory"/>
+
+      <p conref="../shared/impala_common.xml#common/hbase_blurb"/>
+
+      <p>
+        You can use the <codeph>INSERT</codeph> statement with HBase tables as follows:
+      </p>
+
+      <ul>
+        <li>
+          <p>
+            You can insert a single row or a small set of rows into an HBase table with the <codeph>INSERT ...
+            VALUES</codeph> syntax. This is a good use case for HBase tables with Impala, because HBase tables are
+            not subject to the same kind of fragmentation from many small insert operations as HDFS tables are.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            You can insert any number of rows at once into an HBase table using the <codeph>INSERT ...
+            SELECT</codeph> syntax.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            If more than one inserted row has the same value for the HBase key column, only the last inserted row
+            with that value is visible to Impala queries. You can take advantage of this fact with <codeph>INSERT
+            ... VALUES</codeph> statements to effectively update rows one at a time, by inserting new rows with the
+            same key values as existing rows. Be aware that after an <codeph>INSERT ... SELECT</codeph> operation
+            copying from an HDFS table, the HBase table might contain fewer rows than were inserted, if the key
+            column in the source table contained duplicate values.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            You cannot <codeph>INSERT OVERWRITE</codeph> into an HBase table. New rows are always appended.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            When you create an Impala or Hive table that maps to an HBase table, the column order you specify with
+            the <codeph>INSERT</codeph> statement might be different than the order you declare with the
+            <codeph>CREATE TABLE</codeph> statement. Behind the scenes, HBase arranges the columns based on how
+            they are divided into column families. This might cause a mismatch during insert operations, especially
+            if you use the syntax <codeph>INSERT INTO <varname>hbase_table</varname> SELECT * FROM
+            <varname>hdfs_table</varname></codeph>. Before inserting data, verify the column order by issuing a
+            <codeph>DESCRIBE</codeph> statement for the table, and adjust the order of the select list in the
+            <codeph>INSERT</codeph> statement.
+          </p>
+        </li>
+      </ul>
+
+      <p>
+        See <xref href="impala_hbase.xml#impala_hbase"/> for more details about using Impala with HBase.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/s3_blurb"/>
+      <p conref="../shared/impala_common.xml#common/s3_dml"/>
+
+      <p conref="../shared/impala_common.xml#common/security_blurb"/>
+      <p conref="../shared/impala_common.xml#common/redaction_yes"/>
+
+      <p conref="../shared/impala_common.xml#common/cancel_blurb_yes"/>
+
+      <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+      <p rev="CDH-19187">
+        The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+        typically the <codeph>impala</codeph> user, must have read
+        permission for the files in the source directory of an <codeph>INSERT ... SELECT</codeph>
+        operation, and write permission for all affected directories in the destination table.
+        (An <codeph>INSERT</codeph> operation could write files to multiple different HDFS directories
+        if the destination table is partitioned.)
+        This user must also have write permission to create a temporary work directory
+        in the top-level HDFS directory of the destination table.
+        An <codeph>INSERT OVERWRITE</codeph> operation does not require write permission on
+        the original data files in the table, only on the table directories themselves.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/char_varchar_cast_from_string"/>
+
+      <p conref="../shared/impala_common.xml#common/related_options"/>
+
+      <p rev="1.3.1" conref="../shared/impala_common.xml#common/insert_inherit_permissions"/>
+    </conbody>
+  </concept>
+
+<!-- Values clause -->
+</concept>
+<!-- INSERT statement -->

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_int.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_int.xml b/docs/topics/impala_int.xml
new file mode 100644
index 0000000..514d377
--- /dev/null
+++ b/docs/topics/impala_int.xml
@@ -0,0 +1,95 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="int">
+
+  <title>INT Data Type</title>
+  <titlealts><navtitle>INT</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      A 4-byte integer data type used in <codeph>CREATE TABLE</codeph> and <codeph>ALTER TABLE</codeph> statements.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      In the column definition of a <codeph>CREATE TABLE</codeph> statement:
+    </p>
+
+<codeblock><varname>column_name</varname> INT</codeblock>
+
+    <p>
+      <b>Range:</b> -2147483648 .. 2147483647. There is no <codeph>UNSIGNED</codeph> subtype.
+    </p>
+
+    <p>
+      <b>Conversions:</b> Impala automatically converts to a larger integer type (<codeph>BIGINT</codeph>) or a
+      floating-point type (<codeph>FLOAT</codeph> or <codeph>DOUBLE</codeph>) automatically. Use
+      <codeph>CAST()</codeph> to convert to <codeph>TINYINT</codeph>, <codeph>SMALLINT</codeph>,
+      <codeph>STRING</codeph>, or <codeph>TIMESTAMP</codeph>.
+      <ph conref="../shared/impala_common.xml#common/cast_int_to_timestamp"/>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      The data type <codeph>INTEGER</codeph> is an alias for <codeph>INT</codeph>.
+    </p>
+
+    <p>
+      For a convenient and automated way to check the bounds of the <codeph>INT</codeph> type, call the functions
+      <codeph>MIN_INT()</codeph> and <codeph>MAX_INT()</codeph>.
+    </p>
+
+    <p>
+      If an integer value is too large to be represented as a <codeph>INT</codeph>, use a <codeph>BIGINT</codeph>
+      instead.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/null_bad_numeric_cast"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>CREATE TABLE t1 (x INT);
+SELECT CAST(1000 AS INT);
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/partitioning_good"/>
+
+    <p conref="../shared/impala_common.xml#common/hbase_ok"/>
+
+    <p conref="../shared/impala_common.xml#common/parquet_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/text_bulky"/>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/compatibility_blurb"/> -->
+
+    <p conref="../shared/impala_common.xml#common/internals_4_bytes"/>
+
+    <p conref="../shared/impala_common.xml#common/added_forever"/>
+
+    <p conref="../shared/impala_common.xml#common/column_stats_constant"/>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/restrictions_blurb"/> -->
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_literals.xml#numeric_literals"/>, <xref href="impala_tinyint.xml#tinyint"/>,
+      <xref href="impala_smallint.xml#smallint"/>, <xref href="impala_int.xml#int"/>,
+      <xref href="impala_bigint.xml#bigint"/>, <xref href="impala_decimal.xml#decimal"/>,
+      <xref href="impala_math_functions.xml#math_functions"/>
+    </p>
+  </conbody>
+</concept>

[12/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

Posted by jr...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_double.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_double.xml b/docs/topics/impala_double.xml
new file mode 100644
index 0000000..f1d1756
--- /dev/null
+++ b/docs/topics/impala_double.xml
@@ -0,0 +1,100 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="double">
+
+  <title>DOUBLE Data Type</title>
+  <titlealts><navtitle>DOUBLE</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      A double precision floating-point data type used in <codeph>CREATE TABLE</codeph> and <codeph>ALTER
+      TABLE</codeph> statements.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      In the column definition of a <codeph>CREATE TABLE</codeph> statement:
+    </p>
+
+<codeblock><varname>column_name</varname> DOUBLE</codeblock>
+
+    <p>
+      <b>Range:</b> 4.94065645841246544e-324d .. 1.79769313486231570e+308, positive or negative
+    </p>
+
+    <p>
+      <b>Precision:</b> 15 to 17 significant digits, depending on usage. The number of significant digits does
+      not depend on the position of the decimal point.
+    </p>
+
+    <p>
+      <b>Representation:</b> The values are stored in 8 bytes, using
+      <xref href="https://en.wikipedia.org/wiki/Double-precision_floating-point_format" scope="external" format="html">IEEE 754 Double Precision Binary Floating Point</xref> format.
+    </p>
+
+    <p>
+      <b>Conversions:</b> Impala does not automatically convert <codeph>DOUBLE</codeph> to any other type. You can
+      use <codeph>CAST()</codeph> to convert <codeph>DOUBLE</codeph> values to <codeph>FLOAT</codeph>,
+      <codeph>TINYINT</codeph>, <codeph>SMALLINT</codeph>, <codeph>INT</codeph>, <codeph>BIGINT</codeph>,
+      <codeph>STRING</codeph>, <codeph>TIMESTAMP</codeph>, or <codeph>BOOLEAN</codeph>. You can use exponential
+      notation in <codeph>DOUBLE</codeph> literals or when casting from <codeph>STRING</codeph>, for example
+      <codeph>1.0e6</codeph> to represent one million.
+      <ph conref="../shared/impala_common.xml#common/cast_int_to_timestamp"/>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      The data type <codeph>REAL</codeph> is an alias for <codeph>DOUBLE</codeph>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>CREATE TABLE t1 (x DOUBLE);
+SELECT CAST(1000.5 AS DOUBLE);
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/partitioning_imprecise"/>
+
+    <p conref="../shared/impala_common.xml#common/hbase_ok"/>
+
+    <p conref="../shared/impala_common.xml#common/parquet_ok"/>
+
+    <p conref="../shared/impala_common.xml#common/text_bulky"/>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/compatibility_blurb"/> -->
+
+    <p conref="../shared/impala_common.xml#common/internals_8_bytes"/>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/added_in_20"/> -->
+
+    <p conref="../shared/impala_common.xml#common/column_stats_constant"/>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+<!-- This conref appears under SUM(), AVG(), FLOAT, and DOUBLE topics. -->
+
+    <p conref="../shared/impala_common.xml#common/sum_double"/>
+
+    <p conref="../shared/impala_common.xml#common/float_double_decimal_caveat"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_literals.xml#numeric_literals"/>, <xref href="impala_math_functions.xml#math_functions"/>,
+      <xref href="impala_float.xml#float"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_drop_database.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_drop_database.xml b/docs/topics/impala_drop_database.xml
new file mode 100644
index 0000000..c6a1b64
--- /dev/null
+++ b/docs/topics/impala_drop_database.xml
@@ -0,0 +1,124 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="drop_database">
+
+  <title>DROP DATABASE Statement</title>
+  <titlealts><navtitle>DROP DATABASE</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Databases"/>
+      <data name="Category" value="DDL"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">DROP DATABASE statement</indexterm>
+      Removes a database from the system. The physical operations involve removing the metadata for the database
+      from the metastore, and deleting the corresponding <codeph>*.db</codeph> directory from HDFS.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>DROP (DATABASE|SCHEMA) [IF EXISTS] <varname>database_name</varname> <ph rev="2.3.0">[RESTRICT | CASCADE]</ph>;</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/ddl_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      By default, the database must be empty before it can be dropped, to avoid losing any data.
+    </p>
+
+    <p rev="2.3.0">
+      In CDH 5.5 / Impala 2.3 and higher, you can include the <codeph>CASCADE</codeph>
+      clause to make Impala drop all tables and other objects in the database before dropping the database itself.
+      The <codeph>RESTRICT</codeph> clause enforces the original requirement that the database be empty
+      before being dropped. Because the <codeph>RESTRICT</codeph> behavior is still the default, this
+      clause is optional.
+    </p>
+
+    <p rev="2.3.0">
+      The automatic dropping resulting from the <codeph>CASCADE</codeph> clause follows the same rules as the
+      corresponding <codeph>DROP TABLE</codeph>, <codeph>DROP VIEW</codeph>, and <codeph>DROP FUNCTION</codeph> statements.
+      In particular, the HDFS directories and data files for any external tables are left behind when the
+      tables are removed.
+    </p>
+
+    <p>
+      When you do not use the <codeph>CASCADE</codeph> clause, drop or move all the objects inside the database manually
+      before dropping the database itself:
+    </p>
+
+    <ul>
+      <li>
+        <p>
+          Use the <codeph>SHOW TABLES</codeph> statement to locate all tables and views in the database,
+          and issue <codeph>DROP TABLE</codeph> and <codeph>DROP VIEW</codeph> statements to remove them all.
+        </p>
+      </li>
+      <li>
+        <p>
+          Use the <codeph>SHOW FUNCTIONS</codeph> and <codeph>SHOW AGGREGATE FUNCTIONS</codeph> statements
+          to locate all user-defined functions in the database, and issue <codeph>DROP FUNCTION</codeph>
+          and <codeph>DROP AGGREGATE FUNCTION</codeph> statements to remove them all.
+        </p>
+      </li>
+      <li>
+        <p>
+          To keep tables or views contained by a database while removing the database itself, use 
+          <codeph>ALTER TABLE</codeph> and <codeph>ALTER VIEW</codeph> to move the relevant
+          objects to a different database before dropping the original database.
+        </p>
+      </li>
+    </ul>
+
+    <p>
+      You cannot drop the current database, that is, the database your session connected to
+      either through the <codeph>USE</codeph> statement or the <codeph>-d</codeph> option of <cmdname>impala-shell</cmdname>.
+      Issue a <codeph>USE</codeph> statement to switch to a different database first.
+      Because the <codeph>default</codeph> database is always available, issuing
+      <codeph>USE default</codeph> is a convenient way to leave the current database
+      before dropping it.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/hive_blurb"/>
+
+    <p>
+      When you drop a database in Impala, the database can no longer be used by Hive.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<!-- Better to conref the same examples in both places. -->
+
+    <p>
+      See <xref href="impala_create_database.xml#create_database"/> for examples covering <codeph>CREATE
+      DATABASE</codeph>, <codeph>USE</codeph>, and <codeph>DROP DATABASE</codeph>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+    <p rev="CDH-19187">
+      The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+      typically the <codeph>impala</codeph> user, must have write
+      permission for the directory associated with the database.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <codeblock conref="../shared/impala_common.xml#common/create_drop_db_example"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_databases.xml#databases"/>, <xref href="impala_create_database.xml#create_database"/>,
+      <xref href="impala_use.xml#use"/>, <xref href="impala_show.xml#show_databases"/>, <xref href="impala_drop_table.xml#drop_table"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_drop_function.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_drop_function.xml b/docs/topics/impala_drop_function.xml
new file mode 100644
index 0000000..51a4d90
--- /dev/null
+++ b/docs/topics/impala_drop_function.xml
@@ -0,0 +1,60 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.2" id="drop_function">
+
+  <title>DROP FUNCTION Statement</title>
+  <titlealts><navtitle>DROP FUNCTION</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="DDL"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="UDFs"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">DROP FUNCTION statement</indexterm>
+      Removes a user-defined function (UDF), so that it is not available for execution during Impala
+      <codeph>SELECT</codeph> or <codeph>INSERT</codeph> operations.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>DROP [AGGREGATE] FUNCTION [IF EXISTS] [<varname>db_name</varname>.]<varname>function_name</varname>(<varname>type</varname>[, <varname>type</varname>...])</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/ddl_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      Because the same function name could be overloaded with different argument signatures, you specify the
+      argument types to identify the exact function to drop.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/udf_persistence_restriction"/>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+    <p rev="CDH-19187">
+      The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+      typically the <codeph>impala</codeph> user, does not need any
+      particular HDFS permissions to perform this statement.
+      All read and write operations are on the metastore database,
+      not HDFS files and directories.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_udf.xml#udfs"/>, <xref href="impala_create_function.xml#create_function"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_drop_role.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_drop_role.xml b/docs/topics/impala_drop_role.xml
new file mode 100644
index 0000000..35d2157
--- /dev/null
+++ b/docs/topics/impala_drop_role.xml
@@ -0,0 +1,67 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.4.0" id="drop_role">
+
+  <title>DROP ROLE Statement (CDH 5.2 or higher only)</title>
+  <titlealts><navtitle>DROP ROLE (CDH 5.2 or higher only)</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="DDL"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Sentry"/>
+      <data name="Category" value="Roles"/>
+      <!-- Consider whether to go deeper into categories like Security for the Sentry-related statements. -->
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">DROP ROLE statement</indexterm>
+<!-- Copied from Sentry docs. Turn into conref. I did some rewording for clarity. -->
+      The <codeph>DROP ROLE</codeph> statement removes a role from the metastore database. Once dropped, the role
+      is revoked for all users to whom it was previously assigned, and all privileges granted to that role are
+      revoked. Queries that are already executing are not affected. Impala verifies the role information
+      approximately every 60 seconds, so the effects of <codeph>DROP ROLE</codeph> might not take effect for new
+      Impala queries for a brief period.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>DROP ROLE <varname>role_name</varname>
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/privileges_blurb"/>
+
+    <p>
+      Only administrative users (initially, a predefined set of users specified in the Sentry service configuration
+      file) can use this statement.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/compatibility_blurb"/>
+
+    <p>
+      Impala makes use of any roles and privileges specified by the <codeph>GRANT</codeph> and
+      <codeph>REVOKE</codeph> statements in Hive, and Hive makes use of any roles and privileges specified by the
+      <codeph>GRANT</codeph> and <codeph>REVOKE</codeph> statements in Impala. The Impala <codeph>GRANT</codeph>
+      and <codeph>REVOKE</codeph> statements for privileges do not require the <codeph>ROLE</codeph> keyword to be
+      repeated before each role name, unlike the equivalent Hive statements.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_authorization.xml#authorization"/>, <xref href="impala_grant.xml#grant"/>
+      <xref href="impala_revoke.xml#revoke"/>, <xref href="impala_create_role.xml#create_role"/>,
+      <xref href="impala_show.xml#show"/>
+    </p>
+
+<!-- To do: nail down the new SHOW syntax, e.g. SHOW ROLES, SHOW CURRENT ROLES, SHOW GROUPS. -->
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_drop_stats.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_drop_stats.xml b/docs/topics/impala_drop_stats.xml
new file mode 100644
index 0000000..56697f4
--- /dev/null
+++ b/docs/topics/impala_drop_stats.xml
@@ -0,0 +1,275 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="2.1.0" id="drop_stats" xml:lang="en-US">
+
+  <title>DROP STATS Statement</title>
+  <titlealts><navtitle>DROP STATS</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="DDL"/>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Performance"/>
+      <data name="Category" value="Scalability"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">DROP STATS statement</indexterm>
+      Removes the specified statistics from a table or partition. The statistics were originally created by the
+      <codeph>COMPUTE STATS</codeph> or <codeph>COMPUTE INCREMENTAL STATS</codeph> statement.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock rev="2.1.0">DROP STATS [<varname>database_name</varname>.]<varname>table_name</varname>
+DROP INCREMENTAL STATS [<varname>database_name</varname>.]<varname>table_name</varname> PARTITION (<varname>partition_spec</varname>)
+
+<varname>partition_spec</varname> ::= <varname>partition_col</varname>=<varname>constant_value</varname>
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/incremental_partition_spec"/>
+
+    <p>
+      <codeph>DROP STATS</codeph> removes all statistics from the table, whether created by <codeph>COMPUTE
+      STATS</codeph> or <codeph>COMPUTE INCREMENTAL STATS</codeph>.
+    </p>
+
+    <p rev="2.1.0">
+      <codeph>DROP INCREMENTAL STATS</codeph> only affects incremental statistics for a single partition, specified
+      through the <codeph>PARTITION</codeph> clause. The incremental stats are marked as outdated, so that they are
+      recomputed by the next <codeph>COMPUTE INCREMENTAL STATS</codeph> statement.
+    </p>
+
+<!-- To do: what release was this added in? -->
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      You typically use this statement when the statistics for a table or a partition have become stale due to data
+      files being added to or removed from the associated HDFS data directories, whether by manual HDFS operations
+      or <codeph>INSERT</codeph>, <codeph>INSERT OVERWRITE</codeph>, or <codeph>LOAD DATA</codeph> statements, or
+      adding or dropping partitions.
+    </p>
+
+    <p>
+      When a table or partition has no associated statistics, Impala treats it as essentially zero-sized when
+      constructing the execution plan for a query. In particular, the statistics influence the order in which
+      tables are joined in a join query. To ensure proper query planning and good query performance and
+      scalability, make sure to run <codeph>COMPUTE STATS</codeph> or <codeph>COMPUTE INCREMENTAL STATS</codeph> on
+      the table or partition after removing any stale statistics.
+    </p>
+
+    <p>
+      Dropping the statistics is not required for an unpartitioned table or a partitioned table covered by the
+      original type of statistics. A subsequent <codeph>COMPUTE STATS</codeph> statement replaces any existing
+      statistics with new ones, for all partitions, regardless of whether the old ones were outdated. Therefore,
+      this statement was rarely used before the introduction of incremental statistics.
+    </p>
+
+    <p>
+      Dropping the statistics is required for a partitioned table containing incremental statistics, to make a
+      subsequent <codeph>COMPUTE INCREMENTAL STATS</codeph> statement rescan an existing partition. See
+      <xref href="impala_perf_stats.xml#perf_stats"/> for information about incremental statistics, a new feature
+      available in Impala 2.1.0 and higher.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/ddl_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+    <p rev="CDH-19187">
+      The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+      typically the <codeph>impala</codeph> user, does not need any
+      particular HDFS permissions to perform this statement.
+      All read and write operations are on the metastore database,
+      not HDFS files and directories.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      The following example shows a partitioned table that has associated statistics produced by the
+      <codeph>COMPUTE INCREMENTAL STATS</codeph> statement, and how the situation evolves as statistics are dropped
+      from specific partitions, then the entire table.
+    </p>
+
+    <p>
+      Initially, all table and column statistics are filled in.
+    </p>
+
+<!-- Note: chopped off any excess characters at position 87 and after,
+           to avoid weird wrapping in PDF.
+           Applies to any subsequent examples with output from SHOW ... STATS too. -->
+
+<codeblock>show table stats item_partitioned;
++-------------+-------+--------+----------+--------------+---------+------------------
+| i_category  | #Rows | #Files | Size     | Bytes Cached | Format  | Incremental stats
++-------------+-------+--------+----------+--------------+---------+------------------
+| Books       | 1733  | 1      | 223.74KB | NOT CACHED   | PARQUET | true
+| Children    | 1786  | 1      | 230.05KB | NOT CACHED   | PARQUET | true
+| Electronics | 1812  | 1      | 232.67KB | NOT CACHED   | PARQUET | true
+| Home        | 1807  | 1      | 232.56KB | NOT CACHED   | PARQUET | true
+| Jewelry     | 1740  | 1      | 223.72KB | NOT CACHED   | PARQUET | true
+| Men         | 1811  | 1      | 231.25KB | NOT CACHED   | PARQUET | true
+| Music       | 1860  | 1      | 237.90KB | NOT CACHED   | PARQUET | true
+| Shoes       | 1835  | 1      | 234.90KB | NOT CACHED   | PARQUET | true
+| Sports      | 1783  | 1      | 227.97KB | NOT CACHED   | PARQUET | true
+| Women       | 1790  | 1      | 226.27KB | NOT CACHED   | PARQUET | true
+| Total       | 17957 | 10     | 2.25MB   | 0B           |         |
++-------------+-------+--------+----------+--------------+---------+------------------
+show column stats item_partitioned;
++------------------+-----------+------------------+--------+----------+---------------
+| Column           | Type      | #Distinct Values | #Nulls | Max Size | Avg Size
++------------------+-----------+------------------+--------+----------+---------------
+| i_item_sk        | INT       | 19443            | -1     | 4        | 4
+| i_item_id        | STRING    | 9025             | -1     | 16       | 16
+| i_rec_start_date | TIMESTAMP | 4                | -1     | 16       | 16
+| i_rec_end_date   | TIMESTAMP | 3                | -1     | 16       | 16
+| i_item_desc      | STRING    | 13330            | -1     | 200      | 100.3028030395
+| i_current_price  | FLOAT     | 2807             | -1     | 4        | 4
+| i_wholesale_cost | FLOAT     | 2105             | -1     | 4        | 4
+| i_brand_id       | INT       | 965              | -1     | 4        | 4
+| i_brand          | STRING    | 725              | -1     | 22       | 16.17760086059
+| i_class_id       | INT       | 16               | -1     | 4        | 4
+| i_class          | STRING    | 101              | -1     | 15       | 7.767499923706
+| i_category_id    | INT       | 10               | -1     | 4        | 4
+| i_manufact_id    | INT       | 1857             | -1     | 4        | 4
+| i_manufact       | STRING    | 1028             | -1     | 15       | 11.32950019836
+| i_size           | STRING    | 8                | -1     | 11       | 4.334599971771
+| i_formulation    | STRING    | 12884            | -1     | 20       | 19.97999954223
+| i_color          | STRING    | 92               | -1     | 10       | 5.380899906158
+| i_units          | STRING    | 22               | -1     | 7        | 4.186900138854
+| i_container      | STRING    | 2                | -1     | 7        | 6.992599964141
+| i_manager_id     | INT       | 105              | -1     | 4        | 4
+| i_product_name   | STRING    | 19094            | -1     | 25       | 18.02330017089
+| i_category       | STRING    | 10               | 0      | -1       | -1
++------------------+-----------+------------------+--------+----------+---------------
+</codeblock>
+
+    <p>
+      To remove statistics for particular partitions, use the <codeph>DROP INCREMENTAL STATS</codeph> statement.
+      After removing statistics for two partitions, the table-level statistics reflect that change in the
+      <codeph>#Rows</codeph> and <codeph>Incremental stats</codeph> fields. The counts, maximums, and averages of
+      the column-level statistics are unaffected.
+    </p>
+
+    <note>
+      (It is possible that the row count might be preserved in future after a <codeph>DROP INCREMENTAL
+      STATS</codeph> statement. Check the resolution of the issue
+      <xref href="https://issues.cloudera.org/browse/IMPALA-1615" scope="external" format="html">IMPALA-1615</xref>.)
+    </note>
+
+<codeblock>drop incremental stats item_partitioned partition (i_category='Sports');
+drop incremental stats item_partitioned partition (i_category='Electronics');
+
+show table stats item_partitioned
++-------------+-------+--------+----------+--------------+---------+------------------
+| i_category  | #Rows | #Files | Size     | Bytes Cached | Format  | Incremental stats
++-------------+-------+--------+----------+--------------+---------+------------------
+| Books       | 1733  | 1      | 223.74KB | NOT CACHED   | PARQUET | true
+| Children    | 1786  | 1      | 230.05KB | NOT CACHED   | PARQUET | true
+| Electronics | -1    | 1      | 232.67KB | NOT CACHED   | PARQUET | false
+| Home        | 1807  | 1      | 232.56KB | NOT CACHED   | PARQUET | true
+| Jewelry     | 1740  | 1      | 223.72KB | NOT CACHED   | PARQUET | true
+| Men         | 1811  | 1      | 231.25KB | NOT CACHED   | PARQUET | true
+| Music       | 1860  | 1      | 237.90KB | NOT CACHED   | PARQUET | true
+| Shoes       | 1835  | 1      | 234.90KB | NOT CACHED   | PARQUET | true
+| Sports      | -1    | 1      | 227.97KB | NOT CACHED   | PARQUET | false
+| Women       | 1790  | 1      | 226.27KB | NOT CACHED   | PARQUET | true
+| Total       | 17957 | 10     | 2.25MB   | 0B           |         |
++-------------+-------+--------+----------+--------------+---------+------------------
+show column stats item_partitioned
++------------------+-----------+------------------+--------+----------+---------------
+| Column           | Type      | #Distinct Values | #Nulls | Max Size | Avg Size
++------------------+-----------+------------------+--------+----------+---------------
+| i_item_sk        | INT       | 19443            | -1     | 4        | 4
+| i_item_id        | STRING    | 9025             | -1     | 16       | 16
+| i_rec_start_date | TIMESTAMP | 4                | -1     | 16       | 16
+| i_rec_end_date   | TIMESTAMP | 3                | -1     | 16       | 16
+| i_item_desc      | STRING    | 13330            | -1     | 200      | 100.3028030395
+| i_current_price  | FLOAT     | 2807             | -1     | 4        | 4
+| i_wholesale_cost | FLOAT     | 2105             | -1     | 4        | 4
+| i_brand_id       | INT       | 965              | -1     | 4        | 4
+| i_brand          | STRING    | 725              | -1     | 22       | 16.17760086059
+| i_class_id       | INT       | 16               | -1     | 4        | 4
+| i_class          | STRING    | 101              | -1     | 15       | 7.767499923706
+| i_category_id    | INT       | 10               | -1     | 4        | 4
+| i_manufact_id    | INT       | 1857             | -1     | 4        | 4
+| i_manufact       | STRING    | 1028             | -1     | 15       | 11.32950019836
+| i_size           | STRING    | 8                | -1     | 11       | 4.334599971771
+| i_formulation    | STRING    | 12884            | -1     | 20       | 19.97999954223
+| i_color          | STRING    | 92               | -1     | 10       | 5.380899906158
+| i_units          | STRING    | 22               | -1     | 7        | 4.186900138854
+| i_container      | STRING    | 2                | -1     | 7        | 6.992599964141
+| i_manager_id     | INT       | 105              | -1     | 4        | 4
+| i_product_name   | STRING    | 19094            | -1     | 25       | 18.02330017089
+| i_category       | STRING    | 10               | 0      | -1       | -1
++------------------+-----------+------------------+--------+----------+---------------
+</codeblock>
+
+    <p>
+      To remove all statistics from the table, whether produced by <codeph>COMPUTE STATS</codeph> or
+      <codeph>COMPUTE INCREMENTAL STATS</codeph>, use the <codeph>DROP STATS</codeph> statement without the
+      <codeph>INCREMENTAL</codeph> clause). Now, both table-level and column-level statistics are reset.
+    </p>
+
+<codeblock>drop stats item_partitioned;
+
+show table stats item_partitioned
++-------------+-------+--------+----------+--------------+---------+------------------
+| i_category  | #Rows | #Files | Size     | Bytes Cached | Format  | Incremental stats
++-------------+-------+--------+----------+--------------+---------+------------------
+| Books       | -1    | 1      | 223.74KB | NOT CACHED   | PARQUET | false
+| Children    | -1    | 1      | 230.05KB | NOT CACHED   | PARQUET | false
+| Electronics | -1    | 1      | 232.67KB | NOT CACHED   | PARQUET | false
+| Home        | -1    | 1      | 232.56KB | NOT CACHED   | PARQUET | false
+| Jewelry     | -1    | 1      | 223.72KB | NOT CACHED   | PARQUET | false
+| Men         | -1    | 1      | 231.25KB | NOT CACHED   | PARQUET | false
+| Music       | -1    | 1      | 237.90KB | NOT CACHED   | PARQUET | false
+| Shoes       | -1    | 1      | 234.90KB | NOT CACHED   | PARQUET | false
+| Sports      | -1    | 1      | 227.97KB | NOT CACHED   | PARQUET | false
+| Women       | -1    | 1      | 226.27KB | NOT CACHED   | PARQUET | false
+| Total       | -1    | 10     | 2.25MB   | 0B           |         |
++-------------+-------+--------+----------+--------------+---------+------------------
+show column stats item_partitioned
++------------------+-----------+------------------+--------+----------+----------+
+| Column           | Type      | #Distinct Values | #Nulls | Max Size | Avg Size |
++------------------+-----------+------------------+--------+----------+----------+
+| i_item_sk        | INT       | -1               | -1     | 4        | 4        |
+| i_item_id        | STRING    | -1               | -1     | -1       | -1       |
+| i_rec_start_date | TIMESTAMP | -1               | -1     | 16       | 16       |
+| i_rec_end_date   | TIMESTAMP | -1               | -1     | 16       | 16       |
+| i_item_desc      | STRING    | -1               | -1     | -1       | -1       |
+| i_current_price  | FLOAT     | -1               | -1     | 4        | 4        |
+| i_wholesale_cost | FLOAT     | -1               | -1     | 4        | 4        |
+| i_brand_id       | INT       | -1               | -1     | 4        | 4        |
+| i_brand          | STRING    | -1               | -1     | -1       | -1       |
+| i_class_id       | INT       | -1               | -1     | 4        | 4        |
+| i_class          | STRING    | -1               | -1     | -1       | -1       |
+| i_category_id    | INT       | -1               | -1     | 4        | 4        |
+| i_manufact_id    | INT       | -1               | -1     | 4        | 4        |
+| i_manufact       | STRING    | -1               | -1     | -1       | -1       |
+| i_size           | STRING    | -1               | -1     | -1       | -1       |
+| i_formulation    | STRING    | -1               | -1     | -1       | -1       |
+| i_color          | STRING    | -1               | -1     | -1       | -1       |
+| i_units          | STRING    | -1               | -1     | -1       | -1       |
+| i_container      | STRING    | -1               | -1     | -1       | -1       |
+| i_manager_id     | INT       | -1               | -1     | 4        | 4        |
+| i_product_name   | STRING    | -1               | -1     | -1       | -1       |
+| i_category       | STRING    | 10               | 0      | -1       | -1       |
++------------------+-----------+------------------+--------+----------+----------+
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_compute_stats.xml#compute_stats"/>, <xref href="impala_show.xml#show_table_stats"/>,
+      <xref href="impala_show.xml#show_column_stats"/>, <xref href="impala_perf_stats.xml#perf_stats"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_drop_table.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_drop_table.xml b/docs/topics/impala_drop_table.xml
new file mode 100644
index 0000000..33cb726
--- /dev/null
+++ b/docs/topics/impala_drop_table.xml
@@ -0,0 +1,142 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="drop_table">
+
+  <title>DROP TABLE Statement</title>
+  <titlealts><navtitle>DROP TABLE</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="DDL"/>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">DROP TABLE statement</indexterm>
+      Removes an Impala table. Also removes the underlying HDFS data files for internal tables, although not for
+      external tables.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>DROP TABLE [IF EXISTS] [<varname>db_name</varname>.]<varname>table_name</varname> <ph rev="2.3.0">[PURGE]</ph></codeblock>
+
+    <p>
+      <b>IF EXISTS clause:</b>
+    </p>
+
+    <p>
+      The optional <codeph>IF EXISTS</codeph> clause makes the statement succeed whether or not the table exists.
+      If the table does exist, it is dropped; if it does not exist, the statement has no effect. This capability is
+      useful in standardized setup scripts that remove existing schema objects and create new ones. By using some
+      combination of <codeph>IF EXISTS</codeph> for the <codeph>DROP</codeph> statements and <codeph>IF NOT
+      EXISTS</codeph> clauses for the <codeph>CREATE</codeph> statements, the script can run successfully the first
+      time you run it (when the objects do not exist yet) and subsequent times (when some or all of the objects do
+      already exist).
+    </p>
+
+    <p rev="2.3.0">
+      <b>PURGE clause:</b>
+    </p>
+
+    <p rev="2.3.0">
+      The optional <codeph>PURGE</codeph> keyword, available in CDH 5.5 / Impala 2.3 and higher,
+      causes Impala to remove the associated HDFS data files
+      immediately, rather than going through the HDFS trashcan mechanism. Use this keyword when dropping
+      a table if it is crucial to remove the data as quickly as possible to free up space, or if there is
+      a problem with the trashcan, such as the trashcan not being configured or being in a different
+      HDFS encryption zone than the data files.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/ddl_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      By default, Impala removes the associated HDFS directory and data files for the table. If you issue a
+      <codeph>DROP TABLE</codeph> and the data files are not deleted, it might be for the following reasons:
+    </p>
+
+    <ul>
+      <li>
+        If the table was created with the
+        <codeph><xref href="impala_tables.xml#external_tables">EXTERNAL</xref></codeph> clause, Impala leaves all
+        files and directories untouched. Use external tables when the data is under the control of other Hadoop
+        components, and Impala is only used to query the data files from their original locations.
+      </li>
+
+      <li>
+        Impala might leave the data files behind unintentionally, if there is no HDFS location available to hold
+        the HDFS trashcan for the <codeph>impala</codeph> user. See
+        <xref href="impala_prereqs.xml#prereqs_account"/> for the procedure to set up the required HDFS home
+        directory.
+      </li>
+    </ul>
+
+    <p>
+      Make sure that you are in the correct database before dropping a table, either by issuing a
+      <codeph>USE</codeph> statement first or by using a fully qualified name
+      <codeph><varname>db_name</varname>.<varname>table_name</varname></codeph>.
+    </p>
+
+    <p>
+      If you intend to issue a <codeph>DROP DATABASE</codeph> statement, first issue <codeph>DROP TABLE</codeph>
+      statements to remove all the tables in that database.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>create database temporary;
+use temporary;
+create table unimportant (x int);
+create table trivial (s string);
+-- Drop a table in the current database.
+drop table unimportant;
+-- Switch to a different database.
+use default;
+-- To drop a table in a different database...
+drop table trivial;
+<i>ERROR: AnalysisException: Table does not exist: default.trivial</i>
+-- ...use a fully qualified name.
+drop table temporary.trivial;</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/disk_space_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/s3_blurb"/>
+    <p rev="2.2.0">
+      Although Impala cannot write new data to a table stored in the Amazon
+      S3 filesystem, the <codeph>DROP TABLE</codeph> statement can remove data files from S3
+      if the associated S3 table is an internal table.
+      See <xref href="impala_s3.xml#s3"/> for details about working with S3 tables.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+    <p rev="CDH-19187">
+      For an internal table, the user ID that the <cmdname>impalad</cmdname> daemon runs under,
+      typically the <codeph>impala</codeph> user, must have write
+      permission for all the files and directories that make up the table.
+    </p>
+    <p>
+      For an external table, dropping the table only involves changes to metadata in the metastore database.
+      Because Impala does not remove any HDFS files or directories when external tables are dropped,
+      no particular permissions are needed for the associated HDFS files or directories.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_tables.xml#tables"/>,
+      <xref href="impala_alter_table.xml#alter_table"/>, <xref href="impala_create_table.xml#create_table"/>,
+      <xref href="impala_partitioning.xml#partitioning"/>, <xref href="impala_tables.xml#internal_tables"/>,
+      <xref href="impala_tables.xml#external_tables"/>
+    </p>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_drop_view.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_drop_view.xml b/docs/topics/impala_drop_view.xml
new file mode 100644
index 0000000..edcab58
--- /dev/null
+++ b/docs/topics/impala_drop_view.xml
@@ -0,0 +1,48 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.1" id="drop_view">
+
+  <title>DROP VIEW Statement</title>
+  <titlealts><navtitle>DROP VIEW</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="DDL"/>
+      <data name="Category" value="Schemas"/>
+      <data name="Category" value="Tables"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">DROP VIEW statement</indexterm>
+      Removes the specified view, which was originally created by the <codeph>CREATE VIEW</codeph> statement.
+      Because a view is purely a logical construct (an alias for a query) with no physical data behind it,
+      <codeph>DROP VIEW</codeph> only involves changes to metadata in the metastore database, not any data files in
+      HDFS.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>DROP VIEW [IF EXISTS] [<varname>db_name</varname>.]<varname>view_name</varname></codeblock>
+
+    <p conref="../shared/impala_common.xml#common/ddl_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/create_drop_view_examples"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_views.xml#views"/>, <xref href="impala_create_view.xml#create_view"/>,
+      <xref href="impala_alter_view.xml#alter_view"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_exec_single_node_rows_threshold.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_exec_single_node_rows_threshold.xml b/docs/topics/impala_exec_single_node_rows_threshold.xml
new file mode 100644
index 0000000..fa3007d
--- /dev/null
+++ b/docs/topics/impala_exec_single_node_rows_threshold.xml
@@ -0,0 +1,91 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="2.0.0" id="exec_single_node_rows_threshold" xml:lang="en-US">
+
+  <title>EXEC_SINGLE_NODE_ROWS_THRESHOLD Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Scalability"/>
+      <data name="Category" value="Performance"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">EXEC_SINGLE_NODE_ROWS_THRESHOLD query option</indexterm>
+      This setting controls the cutoff point (in terms of number of rows scanned) below which Impala treats a query
+      as a <q>small</q> query, turning off optimizations such as parallel execution and native code generation. The
+      overhead for these optimizations is applicable for queries involving substantial amounts of data, but it
+      makes sense to skip them for queries involving tiny amounts of data. Reducing the overhead for small queries
+      allows Impala to complete them more quickly, keeping YARN resources, admission control slots, and so on
+      available for data-intensive queries.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>SET EXEC_SINGLE_NODE_ROWS_THRESHOLD=<varname>number_of_rows</varname></codeblock>
+
+    <p>
+      <b>Type:</b> numeric
+    </p>
+
+    <p>
+      <b>Default:</b> 100
+    </p>
+
+    <p>
+      <b>Usage notes:</b> Typically, you increase the default value to make this optimization apply to more queries.
+      If incorrect or corrupted table and column statistics cause Impala to apply this optimization
+      incorrectly to queries that actually involve substantial work, you might see the queries being slower as a
+      result of remote reads. In that case, recompute statistics with the <codeph>COMPUTE STATS</codeph>
+      or <codeph>COMPUTE INCREMENTAL STATS</codeph> statement. If there is a problem collecting accurate
+      statistics, you can turn this feature off by setting the value to -1.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/internals_blurb"/>
+
+    <p>
+      This setting applies to query fragments where the amount of data to scan can be accurately determined, either
+      through table and column statistics, or by the presence of a <codeph>LIMIT</codeph> clause. If Impala cannot
+      accurately estimate the size of the input data, this setting does not apply.
+    </p>
+
+    <p rev="2.3.0">
+      In CDH 5.5 / Impala 2.3 and higher, where Impala supports the complex data types <codeph>STRUCT</codeph>,
+      <codeph>ARRAY</codeph>, and <codeph>MAP</codeph>, if a query refers to any column of those types,
+      the small-query optimization is turned off for that query regardless of the
+      <codeph>EXEC_SINGLE_NODE_ROWS_THRESHOLD</codeph> setting.
+    </p>
+
+    <p>
+      For a query that is determined to be <q>small</q>, all work is performed on the coordinator node. This might
+      result in some I/O being performed by remote reads. The savings from not distributing the query work and not
+      generating native code are expected to outweigh any overhead from the remote reads.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      A common use case is to query just a few rows from a table to inspect typical data values. In this example,
+      Impala does not parallelize the query or perform native code generation because the result set is guaranteed
+      to be smaller than the threshold value from this query option:
+    </p>
+
+<codeblock>SET EXEC_SINGLE_NODE_ROWS_THRESHOLD=500;
+SELECT * FROM enormous_table LIMIT 300;
+</codeblock>
+
+<!-- Don't have any other places that tie into this particular optimization technique yet.
+Potentially: conceptual topics about code generation, distributed queries
+
+<p conref="/Content/impala_common_xi44078.xml#common/related_info"/>
+<p>
+</p>
+-->
+
+  </conbody>
+
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_explain.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_explain.xml b/docs/topics/impala_explain.xml
new file mode 100644
index 0000000..c9e8846
--- /dev/null
+++ b/docs/topics/impala_explain.xml
@@ -0,0 +1,224 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="explain">
+
+  <title>EXPLAIN Statement</title>
+  <titlealts><navtitle>EXPLAIN</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Reports"/>
+      <data name="Category" value="Planning"/>
+      <data name="Category" value="Performance"/>
+      <data name="Category" value="Troubleshooting"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">EXPLAIN statement</indexterm>
+      Returns the execution plan for a statement, showing the low-level mechanisms that Impala will use to read the
+      data, divide the work among nodes in the cluster, and transmit intermediate and final results across the
+      network. Use <codeph>explain</codeph> followed by a complete <codeph>SELECT</codeph> query. For example:
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>EXPLAIN { <varname>select_query</varname> | <varname>ctas_stmt</varname> | <varname>insert_stmt</varname> }
+</codeblock>
+
+    <p>
+      The <varname>select_query</varname> is a <codeph>SELECT</codeph> statement, optionally prefixed by a
+      <codeph>WITH</codeph> clause. See <xref href="impala_select.xml#select"/> for details.
+    </p>
+
+    <p>
+      The <varname>insert_stmt</varname> is an <codeph>INSERT</codeph> statement that inserts into or overwrites an
+      existing table. It can use either the <codeph>INSERT ... SELECT</codeph> or <codeph>INSERT ...
+      VALUES</codeph> syntax. See <xref href="impala_insert.xml#insert"/> for details.
+    </p>
+
+    <p>
+      The <varname>ctas_stmt</varname> is a <codeph>CREATE TABLE</codeph> statement using the <codeph>AS
+      SELECT</codeph> clause, typically abbreviated as a <q>CTAS</q> operation. See
+      <xref href="impala_create_table.xml#create_table"/> for details.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      You can interpret the output to judge whether the query is performing efficiently, and adjust the query
+      and/or the schema if not. For example, you might change the tests in the <codeph>WHERE</codeph> clause, add
+      hints to make join operations more efficient, introduce subqueries, change the order of tables in a join, add
+      or change partitioning for a table, collect column statistics and/or table statistics in Hive, or any other
+      performance tuning steps.
+    </p>
+
+    <p>
+      The <codeph>EXPLAIN</codeph> output reminds you if table or column statistics are missing from any table
+      involved in the query. These statistics are important for optimizing queries involving large tables or
+      multi-table joins. See <xref href="impala_compute_stats.xml#compute_stats"/> for how to gather statistics,
+      and <xref href="impala_perf_stats.xml#perf_stats"/> for how to use this information for query tuning.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/explain_interpret"/>
+
+    <p>
+      If you come from a traditional database background and are not familiar with data warehousing, keep in mind
+      that Impala is optimized for full table scans across very large tables. The structure and distribution of
+      this data is typically not suitable for the kind of indexing and single-row lookups that are common in OLTP
+      environments. Seeing a query scan entirely through a large table is common, not necessarily an indication of
+      an inefficient query. Of course, if you can reduce the volume of scanned data by orders of magnitude, for
+      example by using a query that affects only certain partitions within a partitioned table, then you might be
+      able to optimize a query so that it executes in seconds rather than minutes.
+    </p>
+
+    <p>
+      For more information and examples to help you interpret <codeph>EXPLAIN</codeph> output, see
+      <xref href="impala_explain_plan.xml#perf_explain"/>.
+    </p>
+
+    <p rev="1.2">
+      <b>Extended EXPLAIN output:</b>
+    </p>
+
+    <p rev="1.2">
+      For performance tuning of complex queries, and capacity planning (such as using the admission control and
+      resource management features), you can enable more detailed and informative output for the
+      <codeph>EXPLAIN</codeph> statement. In the <cmdname>impala-shell</cmdname> interpreter, issue the command
+      <codeph>SET EXPLAIN_LEVEL=<varname>level</varname></codeph>, where <varname>level</varname> is an integer
+      from 0 to 3 or corresponding mnemonic values <codeph>minimal</codeph>, <codeph>standard</codeph>,
+      <codeph>extended</codeph>, or <codeph>verbose</codeph>.
+    </p>
+
+    <p rev="1.2">
+      When extended <codeph>EXPLAIN</codeph> output is enabled, <codeph>EXPLAIN</codeph> statements print
+      information about estimated memory requirements, minimum number of virtual cores, and so on that you can use
+      to fine-tune the resource management options explained in
+      <xref href="impala_resource_management.xml#rm_options"/>. (The estimated memory requirements are
+      intentionally on the high side, to allow a margin for error, to avoid cancelling a query unnecessarily if you
+      set the <codeph>MEM_LIMIT</codeph> option to the estimated memory figure.)
+    </p>
+
+    <p>
+      See <xref href="impala_explain_level.xml#explain_level"/> for details and examples.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      This example shows how the standard <codeph>EXPLAIN</codeph> output moves from the lowest (physical) level to
+      the higher (logical) levels. The query begins by scanning a certain amount of data; each node performs an
+      aggregation operation (evaluating <codeph>COUNT(*)</codeph>) on some subset of data that is local to that
+      node; the intermediate results are transmitted back to the coordinator node (labelled here as the
+      <codeph>EXCHANGE</codeph> node); lastly, the intermediate results are summed to display the final result.
+    </p>
+
+<codeblock id="explain_plan_simple">[impalad-host:21000] &gt; explain select count(*) from customer_address;
++----------------------------------------------------------+
+| Explain String                                           |
++----------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=42.00MB VCores=1 |
+|                                                          |
+| 03:AGGREGATE [MERGE FINALIZE]                            |
+| |  output: sum(count(*))                                 |
+| |                                                        |
+| 02:EXCHANGE [PARTITION=UNPARTITIONED]                    |
+| |                                                        |
+| 01:AGGREGATE                                             |
+| |  output: count(*)                                      |
+| |                                                        |
+| 00:SCAN HDFS [default.customer_address]                  |
+|    partitions=1/1 size=5.25MB                            |
++----------------------------------------------------------+
+</codeblock>
+
+    <p>
+      These examples show how the extended <codeph>EXPLAIN</codeph> output becomes more accurate and informative as
+      statistics are gathered by the <codeph>COMPUTE STATS</codeph> statement. Initially, much of the information
+      about data size and distribution is marked <q>unavailable</q>. Impala can determine the raw data size, but
+      not the number of rows or number of distinct values for each column without additional analysis. The
+      <codeph>COMPUTE STATS</codeph> statement performs this analysis, so a subsequent <codeph>EXPLAIN</codeph>
+      statement has additional information to use in deciding how to optimize the distributed query.
+    </p>
+
+    <draft-comment translate="no">
+Re-run these examples with more substantial tables populated with data.
+</draft-comment>
+
+<codeblock rev="1.2">[localhost:21000] &gt; set explain_level=extended;
+EXPLAIN_LEVEL set to extended
+[localhost:21000] &gt; explain select x from t1;
+[localhost:21000] &gt; explain select x from t1;
++----------------------------------------------------------+
+| Explain String                                           |
++----------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=32.00MB VCores=1 |
+|                                                          |
+| 01:EXCHANGE [PARTITION=UNPARTITIONED]                    |
+| |  hosts=1 per-host-mem=unavailable                      |
+<b>| |  tuple-ids=0 row-size=4B cardinality=unavailable       |</b>
+| |                                                        |
+| 00:SCAN HDFS [default.t2, PARTITION=RANDOM]              |
+|    partitions=1/1 size=36B                               |
+<b>|    table stats: unavailable                              |</b>
+<b>|    column stats: unavailable                             |</b>
+|    hosts=1 per-host-mem=32.00MB                          |
+<b>|    tuple-ids=0 row-size=4B cardinality=unavailable       |</b>
++----------------------------------------------------------+
+</codeblock>
+
+<codeblock rev="1.2">[localhost:21000] &gt; compute stats t1;
++-----------------------------------------+
+| summary                                 |
++-----------------------------------------+
+| Updated 1 partition(s) and 1 column(s). |
++-----------------------------------------+
+[localhost:21000] &gt; explain select x from t1;
++----------------------------------------------------------+
+| Explain String                                           |
++----------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=64.00MB VCores=1 |
+|                                                          |
+| 01:EXCHANGE [PARTITION=UNPARTITIONED]                    |
+| |  hosts=1 per-host-mem=unavailable                      |
+| |  tuple-ids=0 row-size=4B cardinality=0                 |
+| |                                                        |
+| 00:SCAN HDFS [default.t1, PARTITION=RANDOM]              |
+|    partitions=1/1 size=36B                               |
+<b>|    table stats: 0 rows total                             |</b>
+<b>|    column stats: all                                     |</b>
+|    hosts=1 per-host-mem=64.00MB                          |
+<b>|    tuple-ids=0 row-size=4B cardinality=0                 |</b>
++----------------------------------------------------------+
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/security_blurb"/>
+    <p conref="../shared/impala_common.xml#common/redaction_yes"/>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+    <p rev="CDH-19187">
+      <!-- Doublecheck these details. Does EXPLAIN really need any permissions? -->
+      The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+      typically the <codeph>impala</codeph> user, must have read
+      and execute permissions for all applicable directories in all source tables
+      for the query that is being explained.
+      (A <codeph>SELECT</codeph> operation could read files from multiple different HDFS directories
+      if the source table is partitioned.)
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+    <p>
+      <xref href="impala_select.xml#select"/>,
+      <xref href="impala_insert.xml#insert"/>,
+      <xref href="impala_create_table.xml#create_table"/>,
+      <xref href="impala_explain_plan.xml#explain_plan"/>
+    </p>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_explain_level.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_explain_level.xml b/docs/topics/impala_explain_level.xml
new file mode 100644
index 0000000..f54e8a8
--- /dev/null
+++ b/docs/topics/impala_explain_level.xml
@@ -0,0 +1,338 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.2" id="explain_level">
+
+  <title>EXPLAIN_LEVEL Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Troubleshooting"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Performance"/>
+      <data name="Category" value="Reports"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">EXPLAIN_LEVEL query option</indexterm>
+      Controls the amount of detail provided in the output of the <codeph>EXPLAIN</codeph> statement. The basic
+      output can help you identify high-level performance issues such as scanning a higher volume of data or more
+      partitions than you expect. The higher levels of detail show how intermediate results flow between nodes and
+      how different SQL operations such as <codeph>ORDER BY</codeph>, <codeph>GROUP BY</codeph>, joins, and
+      <codeph>WHERE</codeph> clauses are implemented within a distributed query.
+    </p>
+
+    <p>
+      <b>Type:</b> <codeph>STRING</codeph> or <codeph>INT</codeph>
+    </p>
+
+    <p>
+      <b>Default:</b> <codeph>1</codeph>
+    </p>
+
+    <p>
+      <b>Arguments:</b>
+    </p>
+
+    <p>
+      The allowed range of numeric values for this option is 0 to 3:
+    </p>
+
+    <ul>
+      <li>
+        <codeph>0</codeph> or <codeph>MINIMAL</codeph>: A barebones list, one line per operation. Primarily useful
+        for checking the join order in very long queries where the regular <codeph>EXPLAIN</codeph> output is too
+        long to read easily.
+      </li>
+
+      <li>
+        <codeph>1</codeph> or <codeph>STANDARD</codeph>: The default level of detail, showing the logical way that
+        work is split up for the distributed query.
+      </li>
+
+      <li>
+        <codeph>2</codeph> or <codeph>EXTENDED</codeph>: Includes additional detail about how the query planner
+        uses statistics in its decision-making process, to understand how a query could be tuned by gathering
+        statistics, using query hints, adding or removing predicates, and so on.
+      </li>
+
+      <li>
+        <codeph>3</codeph> or <codeph>VERBOSE</codeph>: The maximum level of detail, showing how work is split up
+        within each node into <q>query fragments</q> that are connected in a pipeline. This extra detail is
+        primarily useful for low-level performance testing and tuning within Impala itself, rather than for
+        rewriting the SQL code at the user level.
+      </li>
+    </ul>
+
+    <note>
+      Prior to Impala 1.3, the allowed argument range for <codeph>EXPLAIN_LEVEL</codeph> was 0 to 1: level 0 had
+      the mnemonic <codeph>NORMAL</codeph>, and level 1 was <codeph>VERBOSE</codeph>. In Impala 1.3 and higher,
+      <codeph>NORMAL</codeph> is not a valid mnemonic value, and <codeph>VERBOSE</codeph> still applies to the
+      highest level of detail but now corresponds to level 3. You might need to adjust the values if you have any
+      older <codeph>impala-shell</codeph> script files that set the <codeph>EXPLAIN_LEVEL</codeph> query option.
+    </note>
+
+    <p>
+      Changing the value of this option controls the amount of detail in the output of the <codeph>EXPLAIN</codeph>
+      statement. The extended information from level 2 or 3 is especially useful during performance tuning, when
+      you need to confirm whether the work for the query is distributed the way you expect, particularly for the
+      most resource-intensive operations such as join queries against large tables, queries against tables with
+      large numbers of partitions, and insert operations for Parquet tables. The extended information also helps to
+      check estimated resource usage when you use the admission control or resource management features explained
+      in <xref href="impala_resource_management.xml#resource_management"/>. See
+      <xref href="impala_explain.xml#explain"/> for the syntax of the <codeph>EXPLAIN</codeph> statement, and
+      <xref href="impala_explain_plan.xml#perf_explain"/> for details about how to use the extended information.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      As always, read the <codeph>EXPLAIN</codeph> output from bottom to top. The lowest lines represent the
+      initial work of the query (scanning data files), the lines in the middle represent calculations done on each
+      node and how intermediate results are transmitted from one node to another, and the topmost lines represent
+      the final results being sent back to the coordinator node.
+    </p>
+
+    <p>
+      The numbers in the left column are generated internally during the initial planning phase and do not
+      represent the actual order of operations, so it is not significant if they appear out of order in the
+      <codeph>EXPLAIN</codeph> output.
+    </p>
+
+    <p>
+      At all <codeph>EXPLAIN</codeph> levels, the plan contains a warning if any tables in the query are missing
+      statistics. Use the <codeph>COMPUTE STATS</codeph> statement to gather statistics for each table and suppress
+      this warning. See <xref href="impala_perf_stats.xml#perf_stats"/> for details about how the statistics help
+      query performance.
+    </p>
+
+    <p>
+      The <codeph>PROFILE</codeph> command in <cmdname>impala-shell</cmdname> always starts with an explain plan
+      showing full detail, the same as with <codeph>EXPLAIN_LEVEL=3</codeph>. <ph rev="1.4.0">After the explain
+      plan comes the executive summary, the same output as produced by the <codeph>SUMMARY</codeph> command in
+      <cmdname>impala-shell</cmdname>.</ph>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      These examples use a trivial, empty table to illustrate how the essential aspects of query planning are shown
+      in <codeph>EXPLAIN</codeph> output:
+    </p>
+
+<codeblock>[localhost:21000] &gt; create table t1 (x int, s string);
+[localhost:21000] &gt; set explain_level=1;
+[localhost:21000] &gt; explain select count(*) from t1;
++------------------------------------------------------------------------------------+
+| Explain String                                                                     |
++------------------------------------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=10.00MB VCores=1                           |
+| WARNING: The following tables are missing relevant table and/or column statistics. |
+| explain_plan.t1                                                                    |
+|                                                                                    |
+| 03:AGGREGATE [MERGE FINALIZE]                                                      |
+| |  output: sum(count(*))                                                           |
+| |                                                                                  |
+| 02:EXCHANGE [PARTITION=UNPARTITIONED]                                              |
+| |                                                                                  |
+| 01:AGGREGATE                                                                       |
+| |  output: count(*)                                                                |
+| |                                                                                  |
+| 00:SCAN HDFS [explain_plan.t1]                                                     |
+|    partitions=1/1 size=0B                                                          |
++------------------------------------------------------------------------------------+
+[localhost:21000] &gt; explain select * from t1;
++------------------------------------------------------------------------------------+
+| Explain String                                                                     |
++------------------------------------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=-9223372036854775808B VCores=0             |
+| WARNING: The following tables are missing relevant table and/or column statistics. |
+| explain_plan.t1                                                                    |
+|                                                                                    |
+| 01:EXCHANGE [PARTITION=UNPARTITIONED]                                              |
+| |                                                                                  |
+| 00:SCAN HDFS [explain_plan.t1]                                                     |
+|    partitions=1/1 size=0B                                                          |
++------------------------------------------------------------------------------------+
+[localhost:21000] &gt; set explain_level=2;
+[localhost:21000] &gt; explain select * from t1;
++------------------------------------------------------------------------------------+
+| Explain String                                                                     |
++------------------------------------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=-9223372036854775808B VCores=0             |
+| WARNING: The following tables are missing relevant table and/or column statistics. |
+| explain_plan.t1                                                                    |
+|                                                                                    |
+| 01:EXCHANGE [PARTITION=UNPARTITIONED]                                              |
+| |  hosts=0 per-host-mem=unavailable                                                |
+| |  tuple-ids=0 row-size=19B cardinality=unavailable                                |
+| |                                                                                  |
+| 00:SCAN HDFS [explain_plan.t1, PARTITION=RANDOM]                                   |
+|    partitions=1/1 size=0B                                                          |
+|    table stats: unavailable                                                        |
+|    column stats: unavailable                                                       |
+|    hosts=0 per-host-mem=0B                                                         |
+|    tuple-ids=0 row-size=19B cardinality=unavailable                                |
++------------------------------------------------------------------------------------+
+[localhost:21000] &gt; set explain_level=3;
+[localhost:21000] &gt; explain select * from t1;
++------------------------------------------------------------------------------------+
+| Explain String                                                                     |
++------------------------------------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=-9223372036854775808B VCores=0             |
+<b>| WARNING: The following tables are missing relevant table and/or column statistics. |</b>
+<b>| explain_plan.t1                                                                    |</b>
+|                                                                                    |
+| F01:PLAN FRAGMENT [PARTITION=UNPARTITIONED]                                        |
+|   01:EXCHANGE [PARTITION=UNPARTITIONED]                                            |
+|      hosts=0 per-host-mem=unavailable                                              |
+|      tuple-ids=0 row-size=19B cardinality=unavailable                              |
+|                                                                                    |
+| F00:PLAN FRAGMENT [PARTITION=RANDOM]                                               |
+|   DATASTREAM SINK [FRAGMENT=F01, EXCHANGE=01, PARTITION=UNPARTITIONED]             |
+|   00:SCAN HDFS [explain_plan.t1, PARTITION=RANDOM]                                 |
+|      partitions=1/1 size=0B                                                        |
+<b>|      table stats: unavailable                                                      |</b>
+<b>|      column stats: unavailable                                                     |</b>
+|      hosts=0 per-host-mem=0B                                                       |
+|      tuple-ids=0 row-size=19B cardinality=unavailable                              |
++------------------------------------------------------------------------------------+
+</codeblock>
+
+    <p>
+      As the warning message demonstrates, most of the information needed for Impala to do efficient query
+      planning, and for you to understand the performance characteristics of the query, requires running the
+      <codeph>COMPUTE STATS</codeph> statement for the table:
+    </p>
+
+<codeblock>[localhost:21000] &gt; compute stats t1;
++-----------------------------------------+
+| summary                                 |
++-----------------------------------------+
+| Updated 1 partition(s) and 2 column(s). |
++-----------------------------------------+
+[localhost:21000] &gt; explain select * from t1;
++------------------------------------------------------------------------+
+| Explain String                                                         |
++------------------------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=-9223372036854775808B VCores=0 |
+|                                                                        |
+| F01:PLAN FRAGMENT [PARTITION=UNPARTITIONED]                            |
+|   01:EXCHANGE [PARTITION=UNPARTITIONED]                                |
+|      hosts=0 per-host-mem=unavailable                                  |
+|      tuple-ids=0 row-size=20B cardinality=0                            |
+|                                                                        |
+| F00:PLAN FRAGMENT [PARTITION=RANDOM]                                   |
+|   DATASTREAM SINK [FRAGMENT=F01, EXCHANGE=01, PARTITION=UNPARTITIONED] |
+|   00:SCAN HDFS [explain_plan.t1, PARTITION=RANDOM]                     |
+|      partitions=1/1 size=0B                                            |
+<b>|      table stats: 0 rows total                                         |</b>
+<b>|      column stats: all                                                 |</b>
+|      hosts=0 per-host-mem=0B                                           |
+|      tuple-ids=0 row-size=20B cardinality=0                            |
++------------------------------------------------------------------------+
+</codeblock>
+
+    <p>
+      Joins and other complicated, multi-part queries are the ones where you most commonly need to examine the
+      <codeph>EXPLAIN</codeph> output and customize the amount of detail in the output. This example shows the
+      default <codeph>EXPLAIN</codeph> output for a three-way join query, then the equivalent output with a
+      <codeph>[SHUFFLE]</codeph> hint to change the join mechanism between the first two tables from a broadcast
+      join to a shuffle join.
+    </p>
+
+<codeblock>[localhost:21000] &gt; set explain_level=1;
+[localhost:21000] &gt; explain select one.*, two.*, three.* from t1 one, t1 two, t1 three where one.x = two.x and two.x = three.x;
++------------------------------------------------------------------------------------+
+| Explain String                                                                     |
++------------------------------------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=4.00GB VCores=3                            |
+|                                                                                    |
+| 07:EXCHANGE [PARTITION=UNPARTITIONED]                                              |
+| |                                                                                  |
+<b>| 04:HASH JOIN [INNER JOIN, BROADCAST]                                               |</b>
+| |  hash predicates: two.x = three.x                                                |
+| |                                                                                  |
+<b>| |--06:EXCHANGE [BROADCAST]                                                         |</b>
+| |  |                                                                               |
+| |  02:SCAN HDFS [explain_plan.t1 three]                                            |
+| |     partitions=1/1 size=0B                                                       |
+| |                                                                                  |
+<b>| 03:HASH JOIN [INNER JOIN, BROADCAST]                                               |</b>
+| |  hash predicates: one.x = two.x                                                  |
+| |                                                                                  |
+<b>| |--05:EXCHANGE [BROADCAST]                                                         |</b>
+| |  |                                                                               |
+| |  01:SCAN HDFS [explain_plan.t1 two]                                              |
+| |     partitions=1/1 size=0B                                                       |
+| |                                                                                  |
+| 00:SCAN HDFS [explain_plan.t1 one]                                                 |
+|    partitions=1/1 size=0B                                                          |
++------------------------------------------------------------------------------------+
+[localhost:21000] &gt; explain select one.*, two.*, three.* from t1 one join [shuffle] t1 two join t1 three where one.x = two.x and two.x = three.x;
++------------------------------------------------------------------------------------+
+| Explain String                                                                     |
++------------------------------------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=4.00GB VCores=3                            |
+|                                                                                    |
+| 08:EXCHANGE [PARTITION=UNPARTITIONED]                                              |
+| |                                                                                  |
+<b>| 04:HASH JOIN [INNER JOIN, BROADCAST]                                               |</b>
+| |  hash predicates: two.x = three.x                                                |
+| |                                                                                  |
+<b>| |--07:EXCHANGE [BROADCAST]                                                         |</b>
+| |  |                                                                               |
+| |  02:SCAN HDFS [explain_plan.t1 three]                                            |
+| |     partitions=1/1 size=0B                                                       |
+| |                                                                                  |
+<b>| 03:HASH JOIN [INNER JOIN, PARTITIONED]                                             |</b>
+| |  hash predicates: one.x = two.x                                                  |
+| |                                                                                  |
+<b>| |--06:EXCHANGE [PARTITION=HASH(two.x)]                                             |</b>
+| |  |                                                                               |
+| |  01:SCAN HDFS [explain_plan.t1 two]                                              |
+| |     partitions=1/1 size=0B                                                       |
+| |                                                                                  |
+<b>| 05:EXCHANGE [PARTITION=HASH(one.x)]                                                |</b>
+| |                                                                                  |
+| 00:SCAN HDFS [explain_plan.t1 one]                                                 |
+|    partitions=1/1 size=0B                                                          |
++------------------------------------------------------------------------------------+
+</codeblock>
+
+    <p>
+      For a join involving many different tables, the default <codeph>EXPLAIN</codeph> output might stretch over
+      several pages, and the only details you care about might be the join order and the mechanism (broadcast or
+      shuffle) for joining each pair of tables. In that case, you might set <codeph>EXPLAIN_LEVEL</codeph> to its
+      lowest value of 0, to focus on just the join order and join mechanism for each stage. The following example
+      shows how the rows from the first and second joined tables are hashed and divided among the nodes of the
+      cluster for further filtering; then the entire contents of the third table are broadcast to all nodes for the
+      final stage of join processing.
+    </p>
+
+<codeblock>[localhost:21000] &gt; set explain_level=0;
+[localhost:21000] &gt; explain select one.*, two.*, three.* from t1 one join [shuffle] t1 two join t1 three where one.x = two.x and two.x = three.x;
++---------------------------------------------------------+
+| Explain String                                          |
++---------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=4.00GB VCores=3 |
+|                                                         |
+| 08:EXCHANGE [PARTITION=UNPARTITIONED]                   |
+<b>| 04:HASH JOIN [INNER JOIN, BROADCAST]                    |</b>
+<b>| |--07:EXCHANGE [BROADCAST]                              |</b>
+| |  02:SCAN HDFS [explain_plan.t1 three]                 |
+<b>| 03:HASH JOIN [INNER JOIN, PARTITIONED]                  |</b>
+<b>| |--06:EXCHANGE [PARTITION=HASH(two.x)]                  |</b>
+| |  01:SCAN HDFS [explain_plan.t1 two]                   |
+<b>| 05:EXCHANGE [PARTITION=HASH(one.x)]                     |</b>
+| 00:SCAN HDFS [explain_plan.t1 one]                      |
++---------------------------------------------------------+
+</codeblock>
+
+<!-- Consider adding a related info section to collect the xrefs earlier on this page. -->
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_float.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_float.xml b/docs/topics/impala_float.xml
new file mode 100644
index 0000000..51e3311
--- /dev/null
+++ b/docs/topics/impala_float.xml
@@ -0,0 +1,94 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="float">
+
+  <title>FLOAT Data Type</title>
+  <titlealts><navtitle>FLOAT</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      A single precision floating-point data type used in <codeph>CREATE TABLE</codeph> and <codeph>ALTER
+      TABLE</codeph> statements.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      In the column definition of a <codeph>CREATE TABLE</codeph> statement:
+    </p>
+
+<codeblock><varname>column_name</varname> FLOAT</codeblock>
+
+    <p>
+      <b>Range:</b> 1.40129846432481707e-45 .. 3.40282346638528860e+38, positive or negative
+    </p>
+
+    <p>
+      <b>Precision:</b> 6 to 9 significant digits, depending on usage. The number of significant digits does
+      not depend on the position of the decimal point.
+    </p>
+
+    <p>
+      <b>Representation:</b> The values are stored in 4 bytes, using
+      <xref href="https://en.wikipedia.org/wiki/Single-precision_floating-point_format" scope="external" format="html">IEEE 754 Single Precision Binary Floating Point</xref> format.
+    </p>
+
+    <p>
+      <b>Conversions:</b> Impala automatically converts <codeph>FLOAT</codeph> to more precise
+      <codeph>DOUBLE</codeph> values, but not the other way around. You can use <codeph>CAST()</codeph> to convert
+      <codeph>FLOAT</codeph> values to <codeph>TINYINT</codeph>, <codeph>SMALLINT</codeph>, <codeph>INT</codeph>,
+      <codeph>BIGINT</codeph>, <codeph>STRING</codeph>, <codeph>TIMESTAMP</codeph>, or <codeph>BOOLEAN</codeph>.
+      You can use exponential notation in <codeph>FLOAT</codeph> literals or when casting from
+      <codeph>STRING</codeph>, for example <codeph>1.0e6</codeph> to represent one million.
+      <ph conref="../shared/impala_common.xml#common/cast_int_to_timestamp"/>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>CREATE TABLE t1 (x FLOAT);
+SELECT CAST(1000.5 AS FLOAT);
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/partitioning_imprecise"/>
+
+    <p conref="../shared/impala_common.xml#common/hbase_ok"/>
+
+    <p conref="../shared/impala_common.xml#common/parquet_ok"/>
+
+    <p conref="../shared/impala_common.xml#common/text_bulky"/>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/compatibility_blurb"/> -->
+
+    <p conref="../shared/impala_common.xml#common/internals_4_bytes"/>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/added_in_20"/> -->
+
+    <p conref="../shared/impala_common.xml#common/column_stats_constant"/>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+<!-- This conref appears under SUM(), AVG(), FLOAT, and DOUBLE topics. -->
+
+    <p conref="../shared/impala_common.xml#common/sum_double"/>
+
+    <p conref="../shared/impala_common.xml#common/float_double_decimal_caveat"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_literals.xml#numeric_literals"/>, <xref href="impala_math_functions.xml#math_functions"/>,
+      <xref href="impala_double.xml#double"/>
+    </p>
+  </conbody>
+</concept>

[06/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

Posted by jr...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_porting.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_porting.xml b/docs/topics/impala_porting.xml
new file mode 100644
index 0000000..c9c8e52
--- /dev/null
+++ b/docs/topics/impala_porting.xml
@@ -0,0 +1,622 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="porting">
+
+  <title>Porting SQL from Other Database Systems to Impala</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Databases"/>
+      <data name="Category" value="Hive"/>
+      <data name="Category" value="Oracle"/>
+      <data name="Category" value="MySQL"/>
+      <data name="Category" value="PostgreSQL"/>
+      <data name="Category" value="Troubleshooting"/>
+      <data name="Category" value="Porting"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">porting</indexterm>
+      Although Impala uses standard SQL for queries, you might need to modify SQL source when bringing applications
+      to Impala, due to variations in data types, built-in functions, vendor language extensions, and
+      Hadoop-specific syntax. Even when SQL is working correctly, you might make further minor modifications for
+      best performance.
+    </p>
+
+    <p outputclass="toc inpage"/>
+  </conbody>
+
+  <concept id="porting_ddl_dml">
+
+    <title>Porting DDL and DML Statements</title>
+
+    <conbody>
+
+      <p>
+        When adapting SQL code from a traditional database system to Impala, expect to find a number of differences
+        in the DDL statements that you use to set up the schema. Clauses related to physical layout of files,
+        tablespaces, and indexes have no equivalent in Impala. You might restructure your schema considerably to
+        account for the Impala partitioning scheme and Hadoop file formats.
+      </p>
+
+      <p>
+        Expect SQL queries to have a much higher degree of compatibility. With modest rewriting to address vendor
+        extensions and features not yet supported in Impala, you might be able to run identical or almost-identical
+        query text on both systems.
+      </p>
+
+      <p>
+        Therefore, consider separating out the DDL into a separate Impala-specific setup script. Focus your reuse
+        and ongoing tuning efforts on the code for SQL queries.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="porting_data_types">
+
+    <title>Porting Data Types from Other Database Systems</title>
+
+    <conbody>
+
+      <ul>
+        <li>
+          <p>
+            Change any <codeph>VARCHAR</codeph>, <codeph>VARCHAR2</codeph>, and <codeph>CHAR</codeph> columns to
+            <codeph>STRING</codeph>. Remove any length constraints from the column declarations; for example,
+            change <codeph>VARCHAR(32)</codeph> or <codeph>CHAR(1)</codeph> to <codeph>STRING</codeph>. Impala is
+            very flexible about the length of string values; it does not impose any length constraints
+            or do any special processing (such as blank-padding) for <codeph>STRING</codeph> columns.
+            (In Impala 2.0 and higher, there are data types <codeph>VARCHAR</codeph> and <codeph>CHAR</codeph>,
+            with length constraints for both types and blank-padding for <codeph>CHAR</codeph>.
+            However, for performance reasons, it is still preferable to use <codeph>STRING</codeph>
+            columns where practical.)
+          </p>
+        </li>
+
+        <li>
+          <p>
+            For national language character types such as <codeph>NCHAR</codeph>, <codeph>NVARCHAR</codeph>, or
+            <codeph>NCLOB</codeph>, be aware that while Impala can store and query UTF-8 character data, currently
+            some string manipulation operations only work correctly with ASCII data. See
+            <xref href="impala_string.xml#string"/> for details.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            Change any <codeph>DATE</codeph>, <codeph>DATETIME</codeph>, or <codeph>TIME</codeph> columns to
+            <codeph>TIMESTAMP</codeph>. Remove any precision constraints. Remove any timezone clauses, and make
+            sure your application logic or ETL process accounts for the fact that Impala expects all
+            <codeph>TIMESTAMP</codeph> values to be in
+            <xref href="http://en.wikipedia.org/wiki/Coordinated_Universal_Time" scope="external" format="html">Coordinated
+            Universal Time (UTC)</xref>. See <xref href="impala_timestamp.xml#timestamp"/> for information about
+            the <codeph>TIMESTAMP</codeph> data type, and
+            <xref href="impala_datetime_functions.xml#datetime_functions"/> for conversion functions for different
+            date and time formats.
+          </p>
+          <p>
+            You might also need to adapt date- and time-related literal values and format strings to use the
+            supported Impala date and time formats. If you have date and time literals with different separators or
+            different numbers of <codeph>YY</codeph>, <codeph>MM</codeph>, and so on placeholders than Impala
+            expects, consider using calls to <codeph>regexp_replace()</codeph> to transform those values to the
+            Impala-compatible format. See <xref href="impala_timestamp.xml#timestamp"/> for information about the
+            allowed formats for date and time literals, and
+            <xref href="impala_string_functions.xml#string_functions"/> for string conversion functions such as
+            <codeph>regexp_replace()</codeph>.
+          </p>
+          <p>
+            Instead of <codeph>SYSDATE</codeph>, call the function <codeph>NOW()</codeph>.
+          </p>
+          <p>
+            Instead of adding or subtracting directly from a date value to produce a value <varname>N</varname>
+            days in the past or future, use an <codeph>INTERVAL</codeph> expression, for example <codeph>NOW() +
+            INTERVAL 30 DAYS</codeph>.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            Although Impala supports <codeph>INTERVAL</codeph> expressions for datetime arithmetic, as shown in
+            <xref href="impala_timestamp.xml#timestamp"/>, <codeph>INTERVAL</codeph> is not available as a column
+            data type in Impala. For any <codeph>INTERVAL</codeph> values stored in tables, convert them to numeric
+            values that you can add or subtract using the functions in
+            <xref href="impala_datetime_functions.xml#datetime_functions"/>. For example, if you had a table
+            <codeph>DEADLINES</codeph> with an <codeph>INT</codeph> column <codeph>TIME_PERIOD</codeph>, you could
+            construct dates N days in the future like so:
+          </p>
+<codeblock>SELECT NOW() + INTERVAL time_period DAYS from deadlines;</codeblock>
+        </li>
+
+        <li>
+          <p>
+            For <codeph>YEAR</codeph> columns, change to the smallest Impala integer type that has sufficient
+            range. See <xref href="impala_datatypes.xml#datatypes"/> for details about ranges, casting, and so on
+            for the various numeric data types.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            Change any <codeph>DECIMAL</codeph> and <codeph>NUMBER</codeph> types. If fixed-point precision is not
+            required, you can use <codeph>FLOAT</codeph> or <codeph>DOUBLE</codeph> on the Impala side depending on
+            the range of values. For applications that require precise decimal values, such as financial data, you
+            might need to make more extensive changes to table structure and application logic, such as using
+            separate integer columns for dollars and cents, or encoding numbers as string values and writing UDFs
+            to manipulate them. See <xref href="impala_datatypes.xml#datatypes"/> for details about ranges,
+            casting, and so on for the various numeric data types.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            <codeph>FLOAT</codeph>, <codeph>DOUBLE</codeph>, and <codeph>REAL</codeph> types are supported in
+            Impala. Remove any precision and scale specifications. (In Impala, <codeph>REAL</codeph> is just an
+            alias for <codeph>DOUBLE</codeph>; columns declared as <codeph>REAL</codeph> are turned into
+            <codeph>DOUBLE</codeph> behind the scenes.) See <xref href="impala_datatypes.xml#datatypes"/> for
+            details about ranges, casting, and so on for the various numeric data types.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            Most integer types from other systems have equivalents in Impala, perhaps under different names such as
+            <codeph>BIGINT</codeph> instead of <codeph>INT8</codeph>. For any that are unavailable, for example
+            <codeph>MEDIUMINT</codeph>, switch to the smallest Impala integer type that has sufficient range.
+            Remove any precision specifications. See <xref href="impala_datatypes.xml#datatypes"/> for details
+            about ranges, casting, and so on for the various numeric data types.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            Remove any <codeph>UNSIGNED</codeph> constraints. All Impala numeric types are signed. See
+            <xref href="impala_datatypes.xml#datatypes"/> for details about ranges, casting, and so on for the
+            various numeric data types.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            For any types holding bitwise values, use an integer type with enough range to hold all the relevant
+            bits within a positive integer. See <xref href="impala_datatypes.xml#datatypes"/> for details about
+            ranges, casting, and so on for the various numeric data types.
+          </p>
+          <p>
+            For example, <codeph>TINYINT</codeph> has a maximum positive value of 127, not 256, so to manipulate
+            8-bit bitfields as positive numbers switch to the next largest type <codeph>SMALLINT</codeph>.
+          </p>
+<codeblock>[localhost:21000] &gt; select cast(127*2 as tinyint);
++--------------------------+
+| cast(127 * 2 as tinyint) |
++--------------------------+
+| -2                       |
++--------------------------+
+[localhost:21000] &gt; select cast(128 as tinyint);
++----------------------+
+| cast(128 as tinyint) |
++----------------------+
+| -128                 |
++----------------------+
+[localhost:21000] &gt; select cast(127*2 as smallint);
++---------------------------+
+| cast(127 * 2 as smallint) |
++---------------------------+
+| 254                       |
++---------------------------+</codeblock>
+          <p>
+            Impala does not support notation such as <codeph>b'0101'</codeph> for bit literals.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            For BLOB values, use <codeph>STRING</codeph> to represent <codeph>CLOB</codeph> or
+            <codeph>TEXT</codeph> types (character based large objects) up to 32 KB in size. Binary large objects
+            such as <codeph>BLOB</codeph>, <codeph>RAW</codeph> <codeph>BINARY</codeph>, and
+            <codeph>VARBINARY</codeph> do not currently have an equivalent in Impala.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            For Boolean-like types such as <codeph>BOOL</codeph>, use the Impala <codeph>BOOLEAN</codeph> type.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            Because Impala currently does not support composite or nested types, any spatial data types in other
+            database systems do not have direct equivalents in Impala. You could represent spatial values in string
+            format and write UDFs to process them. See <xref href="impala_udf.xml#udfs"/> for details. Where
+            practical, separate spatial types into separate tables so that Impala can still work with the
+            non-spatial data.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            Take out any <codeph>DEFAULT</codeph> clauses. Impala can use data files produced from many different
+            sources, such as Pig, Hive, or MapReduce jobs. The fast import mechanisms of <codeph>LOAD DATA</codeph>
+            and external tables mean that Impala is flexible about the format of data files, and Impala does not
+            necessarily validate or cleanse data before querying it. When copying data through Impala
+            <codeph>INSERT</codeph> statements, you can use conditional functions such as <codeph>CASE</codeph> or
+            <codeph>NVL</codeph> to substitute some other value for <codeph>NULL</codeph> fields; see
+            <xref href="impala_conditional_functions.xml#conditional_functions"/> for details.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            Take out any constraints from your <codeph>CREATE TABLE</codeph> and <codeph>ALTER TABLE</codeph>
+            statements, for example <codeph>PRIMARY KEY</codeph>, <codeph>FOREIGN KEY</codeph>,
+            <codeph>UNIQUE</codeph>, <codeph>NOT NULL</codeph>, <codeph>UNSIGNED</codeph>, or
+            <codeph>CHECK</codeph> constraints. Impala can use data files produced from many different sources,
+            such as Pig, Hive, or MapReduce jobs. Therefore, Impala expects initial data validation to happen
+            earlier during the ETL or ELT cycle. After data is loaded into Impala tables, you can perform queries
+            to test for <codeph>NULL</codeph> values. When copying data through Impala <codeph>INSERT</codeph>
+            statements, you can use conditional functions such as <codeph>CASE</codeph> or <codeph>NVL</codeph> to
+            substitute some other value for <codeph>NULL</codeph> fields; see
+            <xref href="impala_conditional_functions.xml#conditional_functions"/> for details.
+          </p>
+          <p>
+            Do as much verification as practical before loading data into Impala. After data is loaded into Impala,
+            you can do further verification using SQL queries to check if values have expected ranges, if values
+            are <codeph>NULL</codeph> or not, and so on. If there is a problem with the data, you will need to
+            re-run earlier stages of the ETL process, or do an <codeph>INSERT ... SELECT</codeph> statement in
+            Impala to copy the faulty data to a new table and transform or filter out the bad values.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            Take out any <codeph>CREATE INDEX</codeph>, <codeph>DROP INDEX</codeph>, and <codeph>ALTER
+            INDEX</codeph> statements, and equivalent <codeph>ALTER TABLE</codeph> statements. Remove any
+            <codeph>INDEX</codeph>, <codeph>KEY</codeph>, or <codeph>PRIMARY KEY</codeph> clauses from
+            <codeph>CREATE TABLE</codeph> and <codeph>ALTER TABLE</codeph> statements. Impala is optimized for bulk
+            read operations for data warehouse-style queries, and therefore does not support indexes for its
+            tables.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            Calls to built-in functions with out-of-range or otherwise incorrect arguments, return
+            <codeph>NULL</codeph> in Impala as opposed to raising exceptions. (This rule applies even when the
+            <codeph>ABORT_ON_ERROR=true</codeph> query option is in effect.) Run small-scale queries using
+            representative data to doublecheck that calls to built-in functions are returning expected values
+            rather than <codeph>NULL</codeph>. For example, unsupported <codeph>CAST</codeph> operations do not
+            raise an error in Impala:
+          </p>
+<codeblock>select cast('foo' as int);
++--------------------+
+| cast('foo' as int) |
++--------------------+
+| NULL               |
++--------------------+</codeblock>
+        </li>
+
+        <li>
+          <p>
+            For any other type not supported in Impala, you could represent their values in string format and write
+            UDFs to process them. See <xref href="impala_udf.xml#udfs"/> for details.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            To detect the presence of unsupported or unconvertable data types in data files, do initial testing
+            with the <codeph>ABORT_ON_ERROR=true</codeph> query option in effect. This option causes queries to
+            fail immediately if they encounter disallowed type conversions. See
+            <xref href="impala_abort_on_error.xml#abort_on_error"/> for details. For example:
+          </p>
+<codeblock>set abort_on_error=true;
+select count(*) from (select * from t1);
+-- The above query will fail if the data files for T1 contain any
+-- values that can't be converted to the expected Impala data types.
+-- For example, if T1.C1 is defined as INT but the column contains
+-- floating-point values like 1.1, the query will return an error.</codeblock>
+        </li>
+      </ul>
+    </conbody>
+  </concept>
+
+  <concept id="porting_statements">
+
+    <title>SQL Statements to Remove or Adapt</title>
+
+    <conbody>
+
+      <p>
+        Some SQL statements or clauses that you might be familiar with are not currently supported in Impala:
+      </p>
+
+      <ul>
+        <li>
+          <p>
+            Impala has no <codeph>DELETE</codeph> statement. Impala is intended for data warehouse-style operations
+            where you do bulk moves and transforms of large quantities of data. Instead of using
+            <codeph>DELETE</codeph>, use <codeph>INSERT OVERWRITE</codeph> to entirely replace the contents of a
+            table or partition, or use <codeph>INSERT ... SELECT</codeph> to copy a subset of data (everything but
+            the rows you intended to delete) from one table to another. See <xref href="impala_dml.xml#dml"/> for
+            an overview of Impala DML statements.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            Impala has no <codeph>UPDATE</codeph> statement. Impala is intended for data warehouse-style operations
+            where you do bulk moves and transforms of large quantities of data. Instead of using
+            <codeph>UPDATE</codeph>, do all necessary transformations early in the ETL process, such as in the job
+            that generates the original data, or when copying from one table to another to convert to a particular
+            file format or partitioning scheme. See <xref href="impala_dml.xml#dml"/> for an overview of Impala DML
+            statements.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            Impala has no transactional statements, such as <codeph>COMMIT</codeph> or <codeph>ROLLBACK</codeph>.
+            Impala effectively works like the <codeph>AUTOCOMMIT</codeph> mode in some database systems, where
+            changes take effect as soon as they are made.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            If your database, table, column, or other names conflict with Impala reserved words, use different
+            names or quote the names with backticks. See <xref href="impala_reserved_words.xml#reserved_words"/>
+            for the current list of Impala reserved words.
+          </p>
+          <p>
+            Conversely, if you use a keyword that Impala does not recognize, it might be interpreted as a table or
+            column alias. For example, in <codeph>SELECT * FROM t1 NATURAL JOIN t2</codeph>, Impala does not
+            recognize the <codeph>NATURAL</codeph> keyword and interprets it as an alias for the table
+            <codeph>t1</codeph>. If you experience any unexpected behavior with queries, check the list of reserved
+            words to make sure all keywords in join and <codeph>WHERE</codeph> clauses are recognized.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            Impala supports subqueries only in the <codeph>FROM</codeph> clause of a query, not within the
+            <codeph>WHERE</codeph> clauses. Therefore, you cannot use clauses such as <codeph>WHERE
+            <varname>column</varname> IN (<varname>subquery</varname>)</codeph>. Also, Impala does not allow
+            <codeph>EXISTS</codeph> or <codeph>NOT EXISTS</codeph> clauses (although <codeph>EXISTS</codeph> is a
+            reserved keyword).
+          </p>
+        </li>
+
+        <li>
+          <p>
+            Impala supports <codeph>UNION</codeph> and <codeph>UNION ALL</codeph> set operators, but not
+            <codeph>INTERSECT</codeph>. <ph conref="../shared/impala_common.xml#common/union_all_vs_union"/>
+          </p>
+        </li>
+
+        <li>
+          <p>
+            Within queries, Impala requires query aliases for any subqueries:
+          </p>
+<codeblock>-- Without the alias 'contents_of_t1' at the end, query gives syntax error.
+select count(*) from (select * from t1) contents_of_t1;</codeblock>
+        </li>
+
+        <li>
+          <p>
+            When an alias is declared for an expression in a query, that alias cannot be referenced again within
+            the same query block:
+          </p>
+<codeblock>-- Can't reference AVERAGE twice in the SELECT list where it's defined.
+select avg(x) as average, average+1 from t1 group by x;
+ERROR: AnalysisException: couldn't resolve column reference: 'average'
+
+-- Although it can be referenced again later in the same query.
+select avg(x) as average from t1 group by x having average &gt; 3;</codeblock>
+          <p>
+            For Impala, either repeat the expression again, or abstract the expression into a <codeph>WITH</codeph>
+            clause, creating named columns that can be referenced multiple times anywhere in the base query:
+          </p>
+<codeblock>-- The following 2 query forms are equivalent.
+select avg(x) as average, avg(x)+1 from t1 group by x;
+with avg_t as (select avg(x) average from t1 group by x) select average, average+1 from avg_t;</codeblock>
+<!-- An alternative bunch of queries to use in the example above.
+[localhost:21000] > select x*x as x_squared from t1;
+
+[localhost:21000] > select x*x as x_squared from t1 where x_squared < 100;
+ERROR: AnalysisException: couldn't resolve column reference: 'x_squared'
+[localhost:21000] > select x*x as x_squared, x_squared * pi() as pi_x_squared from t1;
+ERROR: AnalysisException: couldn't resolve column reference: 'x_squared'
+[localhost:21000] > select x*x as x_squared from t1 group by x_squared;
+
+[localhost:21000] > select x*x as x_squared from t1 group by x_squared having x_squared < 100;
+-->
+        </li>
+
+        <li>
+          <p>
+            Impala does not support certain rarely used join types that are less appropriate for high-volume tables
+            used for data warehousing. In some cases, Impala supports join types but requires explicit syntax to
+            ensure you do not do inefficient joins of huge tables by accident. For example, Impala does not support
+            natural joins or anti-joins, and requires the <codeph>CROSS JOIN</codeph> operator for Cartesian
+            products. See <xref href="impala_joins.xml#joins"/> for details on the syntax for Impala join clauses.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            Impala has a limited choice of partitioning types. Partitions are defined based on each distinct
+            combination of values for one or more partition key columns. Impala does not redistribute or check data
+            to create evenly distributed partitions; you must choose partition key columns based on your knowledge
+            of the data volume and distribution. Adapt any tables that use range, list, hash, or key partitioning
+            to use the Impala partition syntax for <codeph>CREATE TABLE</codeph> and <codeph>ALTER TABLE</codeph>
+            statements. Impala partitioning is similar to range partitioning where every range has exactly one
+            value, or key partitioning where the hash function produces a separate bucket for every combination of
+            key values. See <xref href="impala_partitioning.xml#partitioning"/> for usage details, and
+            <xref href="impala_create_table.xml#create_table"/> and
+            <xref href="impala_alter_table.xml#alter_table"/> for syntax.
+          </p>
+          <note>
+            Because the number of separate partitions is potentially higher than in other database systems, keep a
+            close eye on the number of partitions and the volume of data in each one; scale back the number of
+            partition key columns if you end up with too many partitions with a small volume of data in each one.
+            Remember, to distribute work for a query across a cluster, you need at least one HDFS block per node.
+            HDFS blocks are typically multiple megabytes, <ph rev="parquet_block_size">especially</ph> for Parquet
+            files. Therefore, if each partition holds only a few megabytes of data, you are unlikely to see much
+            parallelism in the query because such a small amount of data is typically processed by a single node.
+          </note>
+        </li>
+
+        <li>
+          <p>
+            For <q>top-N</q> queries, Impala uses the <codeph>LIMIT</codeph> clause rather than comparing against a
+            pseudocolumn named <codeph>ROWNUM</codeph> or <codeph>ROW_NUM</codeph>. See
+            <xref href="impala_limit.xml#limit"/> for details.
+          </p>
+        </li>
+      </ul>
+    </conbody>
+  </concept>
+
+  <concept id="porting_antipatterns">
+
+    <title>SQL Constructs to Doublecheck</title>
+
+    <conbody>
+
+      <p>
+        Some SQL constructs that are supported have behavior or defaults more oriented towards convenience than
+        optimal performance. Also, sometimes machine-generated SQL, perhaps issued through JDBC or ODBC
+        applications, might have inefficiencies or exceed internal Impala limits. As you port SQL code, be alert
+        and change these things where appropriate:
+      </p>
+
+      <ul>
+        <li>
+          <p>
+            A <codeph>CREATE TABLE</codeph> statement with no <codeph>STORED AS</codeph> clause creates data files
+            in plain text format, which is convenient for data interchange but not a good choice for high-volume
+            data with high-performance queries. See <xref href="impala_file_formats.xml#file_formats"/> for why and
+            how to use specific file formats for compact data and high-performance queries. Especially see
+            <xref href="impala_parquet.xml#parquet"/>, for details about the file format most heavily optimized for
+            large-scale data warehouse queries.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            A <codeph>CREATE TABLE</codeph> statement with no <codeph>PARTITIONED BY</codeph> clause stores all the
+            data files in the same physical location, which can lead to scalability problems when the data volume
+            becomes large.
+          </p>
+          <p>
+            On the other hand, adapting tables that were already partitioned in a different database system could
+            produce an Impala table with a high number of partitions and not enough data in each one, leading to
+            underutilization of Impala's parallel query features.
+          </p>
+          <p>
+            See <xref href="impala_partitioning.xml#partitioning"/> for details about setting up partitioning and
+            tuning the performance of queries on partitioned tables.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            The <codeph>INSERT ... VALUES</codeph> syntax is suitable for setting up toy tables with a few rows for
+            functional testing, but because each such statement creates a separate tiny file in HDFS, it is not a
+            scalable technique for loading megabytes or gigabytes (let alone petabytes) of data. Consider revising
+            your data load process to produce raw data files outside of Impala, then setting up Impala external
+            tables or using the <codeph>LOAD DATA</codeph> statement to use those data files instantly in Impala
+            tables, with no conversion or indexing stage. See <xref href="impala_tables.xml#external_tables"/> and
+            <xref href="impala_load_data.xml#load_data"/> for details about the Impala techniques for working with
+            data files produced outside of Impala; see <xref href="impala_tutorial.xml#tutorial_etl"/> for examples
+            of ETL workflow for Impala.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            If your ETL process is not optimized for Hadoop, you might end up with highly fragmented small data
+            files, or a single giant data file that cannot take advantage of distributed parallel queries or
+            partitioning. In this case, use an <codeph>INSERT ... SELECT</codeph> statement to copy the data into a
+            new table and reorganize into a more efficient layout in the same operation. See
+            <xref href="impala_insert.xml#insert"/> for details about the <codeph>INSERT</codeph> statement.
+          </p>
+          <p>
+            You can do <codeph>INSERT ... SELECT</codeph> into a table with a more efficient file format (see
+            <xref href="impala_file_formats.xml#file_formats"/>) or from an unpartitioned table into a partitioned
+            one (see <xref href="impala_partitioning.xml#partitioning"/>).
+          </p>
+        </li>
+
+        <li>
+          <p>
+            The number of expressions allowed in an Impala query might be smaller than for some other database
+            systems, causing failures for very complicated queries (typically produced by automated SQL
+            generators). Where practical, keep the number of expressions in the <codeph>WHERE</codeph> clauses to
+            approximately 2000 or fewer. As a workaround, set the query option
+            <codeph>DISABLE_CODEGEN=true</codeph> if queries fail for this reason. See
+            <xref href="impala_disable_codegen.xml#disable_codegen"/> for details.
+          </p>
+        </li>
+
+        <li>
+          <p>
+            If practical, rewrite <codeph>UNION</codeph> queries to use the <codeph>UNION ALL</codeph> operator
+            instead. <ph conref="../shared/impala_common.xml#common/union_all_vs_union"/>
+          </p>
+        </li>
+      </ul>
+    </conbody>
+  </concept>
+
+  <concept id="porting_next">
+
+    <title>Next Porting Steps after Verifying Syntax and Semantics</title>
+
+    <conbody>
+
+      <p>
+        Throughout this section, some of the decisions you make during the porting process also have a substantial
+        impact on performance. After your SQL code is ported and working correctly, doublecheck the
+        performance-related aspects of your schema design, physical layout, and queries to make sure that the
+        ported application is taking full advantage of Impala's parallelism, performance-related SQL features, and
+        integration with Hadoop components.
+      </p>
+
+      <ul>
+        <li>
+          Have you run the <codeph>COMPUTE STATS</codeph> statement on each table involved in join queries? Have
+          you also run <codeph>COMPUTE STATS</codeph> for each table used as the source table in an <codeph>INSERT
+          ... SELECT</codeph> or <codeph>CREATE TABLE AS SELECT</codeph> statement?
+        </li>
+
+        <li>
+          Are you using the most efficient file format for your data volumes, table structure, and query
+          characteristics?
+        </li>
+
+        <li>
+          Are you using partitioning effectively? That is, have you partitioned on columns that are often used for
+          filtering in <codeph>WHERE</codeph> clauses? Have you partitioned at the right granularity so that there
+          is enough data in each partition to parallelize the work for each query?
+        </li>
+
+        <li>
+          Does your ETL process produce a relatively small number of multi-megabyte data files (good) rather than a
+          huge number of small files (bad)?
+        </li>
+      </ul>
+
+      <p>
+        See <xref href="impala_performance.xml#performance"/> for details about the whole performance tuning
+        process.
+      </p>
+    </conbody>
+  </concept>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_query_options.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_query_options.xml b/docs/topics/impala_query_options.xml
new file mode 100644
index 0000000..1011746
--- /dev/null
+++ b/docs/topics/impala_query_options.xml
@@ -0,0 +1,75 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="query_options">
+
+  <title>Query Options for the SET Statement</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="impala-shell"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Configuring"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      You can specify the following options using the <codeph>SET</codeph> statement, and those settings affect all
+      queries issued from that session.
+    </p>
+
+    <p>
+      Some query options are useful in day-to-day operations for improving usability, performance, or flexibility.
+    </p>
+
+    <p>
+      Other query options control special-purpose aspects of Impala operation and are intended primarily for
+      advanced debugging or troubleshooting.
+    </p>
+
+    <p>
+      Options with Boolean parameters can be set to 1 or <codeph>true</codeph> to enable, or 0 or <codeph>false</codeph>
+      to turn off.
+    </p>
+
+    <note rev="2.0.0">
+      In Impala 2.0 and later, you can set query options directly through the JDBC and ODBC interfaces by using the
+      <codeph>SET</codeph> statement. Formerly, <codeph>SET</codeph> was only available as a command within the
+      <cmdname>impala-shell</cmdname> interpreter.
+    </note>
+
+<!-- This is the list including defaults from the pre-release 1.2 impala-shell:
+	ABORT_ON_DEFAULT_LIMIT_EXCEEDED: 0
+	ABORT_ON_ERROR: 0
+	ALLOW_UNSUPPORTED_FORMATS: 0
+	BATCH_SIZE: 0
+	DEBUG_ACTION:
+	DEFAULT_ORDER_BY_LIMIT: -1
+	DISABLE_CODEGEN: 0
+	HBASE_CACHE_BLOCKS: 0
+	HBASE_CACHING: 0
+	MAX_ERRORS: 0
+	MAX_IO_BUFFERS: 0
+	MAX_SCAN_RANGE_LENGTH: 0
+	MEM_LIMIT: 0
+	NUM_NODES: 0
+	NUM_SCANNER_THREADS: 0
+	PARQUET_COMPRESSION_CODEC: SNAPPY
+	PARQUET_FILE_SIZE: 0
+	SUPPORT_START_OVER: false
+-->
+
+    <p outputclass="toc"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_set.xml#set"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_query_timeout_s.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_query_timeout_s.xml b/docs/topics/impala_query_timeout_s.xml
new file mode 100644
index 0000000..41f2918
--- /dev/null
+++ b/docs/topics/impala_query_timeout_s.xml
@@ -0,0 +1,51 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="2.0.0" id="query_timeout_s">
+
+  <title>QUERY_TIMEOUT_S Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">QUERY_TIMEOUT_S query option</indexterm>
+      Sets the idle query timeout value for the session, in seconds. Queries that sit idle for longer than the
+      timeout value are automatically cancelled. If the system administrator specified the
+      <codeph>--idle_query_timeout</codeph> startup option, <codeph>QUERY_TIMEOUT_S</codeph> must be smaller than
+      or equal to the <codeph>--idle_query_timeout</codeph> value.
+    </p>
+
+    <note conref="../shared/impala_common.xml#common/timeout_clock_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>SET QUERY_TIMEOUT_S=<varname>seconds</varname>;</codeblock>
+
+<!-- Don't have a compelling example to show at this time because the 'idle' aspect only applies
+     when the client is careless and leaves the query open. Can't easily demonstrate in impala-shell.
+
+     <p conref="/Content/impala_common_xi44078.xml#common/example_blurb"/>
+-->
+
+    <p>
+      <b>Type:</b> numeric
+    </p>
+
+    <p>
+      <b>Default:</b> 0 (no timeout if <codeph>--idle_query_timeout</codeph> not in effect; otherwise, use
+      <codeph>--idle_query_timeout</codeph> value)
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_timeouts.xml#timeouts"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_real.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_real.xml b/docs/topics/impala_real.xml
new file mode 100644
index 0000000..e6430e3
--- /dev/null
+++ b/docs/topics/impala_real.xml
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="real">
+
+  <title>REAL Data Type</title>
+  <titlealts><navtitle>REAL</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      An alias for the <codeph>DOUBLE</codeph> data type. See <xref href="impala_double.xml#double"/> for details.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      These examples show how you can use the type names <codeph>REAL</codeph> and <codeph>DOUBLE</codeph>
+      interchangeably, and behind the scenes Impala treats them always as <codeph>DOUBLE</codeph>.
+    </p>
+
+<codeblock>[localhost:21000] &gt; create table r1 (x real);
+[localhost:21000] &gt; describe r1;
++------+--------+---------+
+| name | type   | comment |
++------+--------+---------+
+| x    | double |         |
++------+--------+---------+
+[localhost:21000] &gt; insert into r1 values (1.5), (cast (2.2 as double));
+[localhost:21000] &gt; select cast (1e6 as real);
++---------------------------+
+| cast(1000000.0 as double) |
++---------------------------+
+| 1000000                   |
++---------------------------+</codeblock>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_refresh.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_refresh.xml b/docs/topics/impala_refresh.xml
new file mode 100644
index 0000000..ee022d5
--- /dev/null
+++ b/docs/topics/impala_refresh.xml
@@ -0,0 +1,234 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="refresh">
+
+  <title>REFRESH Statement</title>
+  <titlealts><navtitle>REFRESH</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="DDL"/>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Hive"/>
+      <data name="Category" value="Metastore"/>
+      <data name="Category" value="ETL"/>
+      <data name="Category" value="Ingest"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">REFRESH statement</indexterm>
+      To accurately respond to queries, the Impala node that acts as the coordinator (the node to which you are
+      connected through <cmdname>impala-shell</cmdname>, JDBC, or ODBC) must have current metadata about those
+      databases and tables that are referenced in Impala queries. If you are not familiar with the way Impala uses
+      metadata and how it shares the same metastore database as Hive, see
+      <xref href="impala_hadoop.xml#intro_metastore"/> for background information.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>REFRESH [<varname>db_name</varname>.]<varname>table_name</varname></codeblock>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      Use the <codeph>REFRESH</codeph> statement to load the latest metastore metadata and block location data for
+      a particular table in these scenarios:
+    </p>
+
+    <ul>
+      <li>
+        After loading new data files into the HDFS data directory for the table. (Once you have set up an ETL
+        pipeline to bring data into Impala on a regular basis, this is typically the most frequent reason why
+        metadata needs to be refreshed.)
+      </li>
+
+      <li>
+        After issuing <codeph>ALTER TABLE</codeph>, <codeph>INSERT</codeph>, <codeph>LOAD DATA</codeph>, or other
+        table-modifying SQL statement in Hive.
+      </li>
+    </ul>
+
+    <p>
+      You only need to issue the <codeph>REFRESH</codeph> statement on the node to which you connect to issue
+      queries. The coordinator node divides the work among all the Impala nodes in a cluster, and sends read
+      requests for the correct HDFS blocks without relying on the metadata on the other nodes.
+    </p>
+
+    <p>
+      <codeph>REFRESH</codeph> reloads the metadata for the table from the metastore database, and does an
+      incremental reload of the low-level block location data to account for any new data files added to the HDFS
+      data directory for the table. It is a low-overhead, single-table operation, specifically tuned for the common
+      scenario where new data files are added to HDFS.
+    </p>
+
+    <p>
+      Only the metadata for the specified table is flushed. The table must already exist and be known to Impala,
+      either because the <codeph>CREATE TABLE</codeph> statement was run in Impala rather than Hive, or because a
+      previous <codeph>INVALIDATE METADATA</codeph> statement caused Impala to reload its entire metadata catalog.
+    </p>
+
+    <note>
+      <p rev="1.2">
+        In Impala 1.2 and higher, the catalog service broadcasts any changed metadata as a result of Impala
+        <codeph>ALTER TABLE</codeph>, <codeph>INSERT</codeph> and <codeph>LOAD DATA</codeph> statements to all
+        Impala nodes. Thus, the <codeph>REFRESH</codeph> statement is only required if you load data through Hive
+        or by manipulating data files in HDFS directly. See <xref href="impala_components.xml#intro_catalogd"/> for
+        more information on the catalog service.
+      </p>
+      <p rev="1.2.1">
+        In Impala 1.2.1 and higher, another way to avoid inconsistency across nodes is to enable the
+        <codeph>SYNC_DDL</codeph> query option before performing a DDL statement or an <codeph>INSERT</codeph> or
+        <codeph>LOAD DATA</codeph>.
+      </p>
+      <p>
+        The functionality of the <codeph>REFRESH</codeph> statement has changed in Impala 1.1 and higher. Now the
+        table name is a required parameter. To flush the metadata for all tables, use the
+        <codeph><xref href="impala_invalidate_metadata.xml#invalidate_metadata">INVALIDATE METADATA</xref></codeph>
+        command.
+      </p>
+      <draft-comment translate="no"> Almost-identical wording here, under INVALIDATE METADATA, and in Release Notes :: New Features. Makes sense to conref. </draft-comment>
+      <p>
+        Because <codeph>REFRESH <varname>table_name</varname></codeph> only works for tables that Impala is already
+        aware of, when you create a new table in the Hive shell, you must enter <codeph>INVALIDATE
+        METADATA</codeph> with no table parameter before you can see the new table in
+        <cmdname>impala-shell</cmdname>. Once the table is known to Impala, you can issue <codeph>REFRESH
+        <varname>table_name</varname></codeph> as needed after you add more data files for that table.
+      </p>
+    </note>
+
+    <p conref="../shared/impala_common.xml#common/refresh_vs_invalidate"/>
+
+    <p>
+      A metadata update for an <codeph>impalad</codeph> instance <b>is</b> required if:
+    </p>
+
+    <ul>
+      <li>
+        A metadata change occurs.
+      </li>
+
+      <li>
+        <b>and</b> the change is made through Hive.
+      </li>
+
+      <li>
+        <b>and</b> the change is made to a database to which clients such as the Impala shell or ODBC directly
+        connect.
+      </li>
+    </ul>
+
+    <p rev="1.2">
+      A metadata update for an Impala node is <b>not</b> required after you run <codeph>ALTER TABLE</codeph>,
+      <codeph>INSERT</codeph>, or other table-modifying statement in Impala rather than Hive. Impala handles the
+      metadata synchronization automatically through the catalog service.
+    </p>
+
+    <p>
+      Database and table metadata is typically modified by:
+    </p>
+
+    <ul>
+      <li>
+        Hive - through <codeph>ALTER</codeph>, <codeph>CREATE</codeph>, <codeph>DROP</codeph> or
+        <codeph>INSERT</codeph> operations.
+      </li>
+
+      <li>
+        Impalad - through <codeph>CREATE TABLE</codeph>, <codeph>ALTER TABLE</codeph>, and <codeph>INSERT</codeph>
+        operations. <ph rev="1.2">In Impala 1.2 and higher, such changes are propagated to all Impala nodes by the
+        Impala catalog service.</ph>
+      </li>
+    </ul>
+
+    <p>
+      <codeph>REFRESH</codeph> causes the metadata for that table to be immediately reloaded. For a huge table,
+      that process could take a noticeable amount of time; but doing the refresh up front avoids an unpredictable
+      delay later, for example if the next reference to the table is during a benchmark test.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/sync_ddl_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      The following example shows how you might use the <codeph>REFRESH</codeph> statement after manually adding
+      new HDFS data files to the Impala data directory for a table:
+    </p>
+
+<codeblock>[impalad-host:21000] &gt; refresh t1;
+[impalad-host:21000] &gt; refresh t2;
+[impalad-host:21000] &gt; select * from t1;
+...
+[impalad-host:21000] &gt; select * from t2;
+... </codeblock>
+
+    <p>
+      For more examples of using <codeph>REFRESH</codeph> and <codeph>INVALIDATE METADATA</codeph> with a
+      combination of Impala and Hive operations, see <xref href="impala_tutorial.xml#tutorial_impala_hive"/>.
+    </p>
+
+    <p>
+      <b>Related impalad options:</b>
+    </p>
+
+    <p>
+      In Impala 1.0, the <codeph>-r</codeph> option of <cmdname>impala-shell</cmdname> issued
+      <codeph>REFRESH</codeph> to reload metadata for all tables.
+    </p>
+
+    <p>
+      In Impala 1.1 and higher, this option issues <codeph>INVALIDATE METADATA</codeph> because
+      <codeph>REFRESH</codeph> now requires a table name parameter. Due to the expense of reloading the metadata
+      for all tables, the <cmdname>impala-shell</cmdname> <codeph>-r</codeph> option is not recommended for
+      day-to-day use in a production environment.
+    </p>
+
+    <p rev="1.2">
+      In Impala 1.2 and higher, the <codeph>-r</codeph> option is needed even less frequently, because metadata
+      changes caused by SQL statements in Impala are automatically broadcast to all Impala nodes.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+    <p rev="CDH-19187">
+      The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+      typically the <codeph>impala</codeph> user, must have execute
+      permissions for all the relevant directories holding table data.
+      (A table could have data spread across multiple directories,
+      or in unexpected paths, if it uses partitioning or
+      specifies a <codeph>LOCATION</codeph> attribute for
+      individual partitions or the entire table.)
+      Issues with permissions might not cause an immediate error for this statement,
+      but subsequent statements such as <codeph>SELECT</codeph>
+      or <codeph>SHOW TABLE STATS</codeph> could fail.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/hdfs_blurb"/>
+
+    <p>
+      The <codeph>REFRESH</codeph> command checks HDFS permissions of the underlying data files and directories,
+      caching this information so that a statement can be cancelled immediately if for example the
+      <codeph>impala</codeph> user does not have permission to write to the data directory for the table. Impala
+      reports any lack of write permissions as an <codeph>INFO</codeph> message in the log file, in case that
+      represents an oversight. If you change HDFS permissions to make data readable or writeable by the Impala
+      user, issue another <codeph>REFRESH</codeph> to make Impala aware of the change.
+    </p>
+
+    <note conref="../shared/impala_common.xml#common/compute_stats_next"/>
+
+    <p conref="../shared/impala_common.xml#common/s3_blurb"/>
+    <p conref="../shared/impala_common.xml#common/s3_metadata"/>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+    <p>
+      <xref href="impala_hadoop.xml#intro_metastore"/>,
+      <xref href="impala_invalidate_metadata.xml#invalidate_metadata"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_request_pool.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_request_pool.xml b/docs/topics/impala_request_pool.xml
new file mode 100644
index 0000000..cf2a811
--- /dev/null
+++ b/docs/topics/impala_request_pool.xml
@@ -0,0 +1,45 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.3.0" id="request_pool">
+
+  <title>REQUEST_POOL Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Resource Management"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Admission Control"/>
+      <data name="Category" value="YARN"/>
+      <data name="Category" value="Llama"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">REQUEST_POOL query option</indexterm>
+      The pool or queue name that queries should be submitted to. Only applies when you enable the Impala admission
+      control feature (CDH 4 or CDH 5; see <xref href="impala_admission.xml#admission_control"/>), or the YARN
+      resource management feature (CDH 5 only; see
+      <xref href="impala_resource_management.xml#resource_management"/>). Specifies the name of the pool used by
+      requests from Impala to the resource manager.
+    </p>
+
+    <p>
+      Formerly known as <codeph>YARN_POOL</codeph> during the CDH 5 beta period. Renamed to reflect that it can be
+      used both with YARN and with the lightweight admission control feature introduced in Impala 1.3.
+    </p>
+
+    <p>
+      <b>Type:</b> <codeph>STRING</codeph>
+    </p>
+
+    <p>
+      <b>Default:</b> empty (use the user-to-pool mapping defined by an <cmdname>impalad</cmdname> startup option
+      in the Impala configuration file)
+    </p>
+
+<!-- Worth adding a couple of related info links here. -->
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_reservation_request_timeout.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_reservation_request_timeout.xml b/docs/topics/impala_reservation_request_timeout.xml
new file mode 100644
index 0000000..0316e44
--- /dev/null
+++ b/docs/topics/impala_reservation_request_timeout.xml
@@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.2" id="reservation_request_timeout">
+
+  <title>RESERVATION_REQUEST_TIMEOUT Query Option (CDH 5 only)</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Resource Management"/>
+      <data name="Category" value="YARN"/>
+      <data name="Category" value="Llama"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">RESERVATION_REQUEST_TIMEOUT query option</indexterm>
+      Maximum number of milliseconds Impala will wait for a reservation to be completely granted or denied. Used in
+      conjunction with the Impala resource management feature in Impala 1.2 and higher with CDH 5.
+    </p>
+
+    <p>
+      <b>Type:</b> numeric
+    </p>
+
+    <p>
+      <b>Default:</b> 300000 (5 minutes)
+    </p>
+
+<!-- Worth adding a couple of related info links here. -->
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_revoke.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_revoke.xml b/docs/topics/impala_revoke.xml
new file mode 100644
index 0000000..88fbbf9
--- /dev/null
+++ b/docs/topics/impala_revoke.xml
@@ -0,0 +1,96 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="2.0.0" id="revoke">
+
+  <title>REVOKE Statement (CDH 5.2 or higher only)</title>
+  <titlealts><navtitle>REVOKE (CDH 5.2 or higher only)</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="DDL"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Sentry"/>
+      <data name="Category" value="Roles"/>
+      <!-- Consider whether to go deeper into categories like Security for the Sentry-related statements. -->
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">REVOKE statement</indexterm>
+<!-- Copied from Sentry docs. Turn into conref. I did some rewording for clarity. -->
+      The <codeph>REVOKE</codeph> statement revokes roles or privileges on a specified object from groups. Only
+      Sentry administrative users can revoke the role from a group. The revocation has a cascading effect. For
+      example, revoking the <codeph>ALL</codeph> privilege on a database also revokes the same privilege for all
+      the tables in that database.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock rev="2.3.0 collevelauth">REVOKE ROLE <varname>role_name</varname> FROM GROUP <varname>group_name</varname>
+
+REVOKE <varname>privilege</varname> ON <varname>object_type</varname> <varname>object_name</varname>
+  FROM [ROLE] <varname>role_name</varname>
+
+<ph rev="2.3.0">privilege ::= SELECT | SELECT(<varname>column_name</varname>) | INSERT | ALL</ph>
+object_type ::= TABLE | DATABASE | SERVER | URI
+</codeblock>
+
+    <p>
+      Typically, the object name is an identifier. For URIs, it is a string literal.
+    </p>
+
+    <p rev="2.3.0 collevelauth">
+      The ability to grant or revoke <codeph>SELECT</codeph> privilege on specific columns is available
+      in CDH 5.5 / Impala 2.3 and higher. See <xref href="sg_hive_sql.xml#concept_c2q_4qx_p4/col_level_auth_sentry"/>
+      for details.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/privileges_blurb"/>
+
+    <p>
+      Only administrative users (those with <codeph>ALL</codeph> privileges on the server, defined in the Sentry
+      policy file) can use this statement.
+    </p>
+
+<!-- Turn compatibility info into a conref or series of conrefs. (In both GRANT and REVOKE.) -->
+
+    <p conref="../shared/impala_common.xml#common/compatibility_blurb"/>
+
+    <p>
+      <ul>
+        <li>
+          The Impala <codeph>GRANT</codeph> and <codeph>REVOKE</codeph> statements are available in CDH 5.2 and
+          higher.
+        </li>
+
+        <li>
+          In CDH 5.1 and higher, Impala makes use of any roles and privileges specified by the
+          <codeph>GRANT</codeph> and <codeph>REVOKE</codeph> statements in Hive, when your system is configured to
+          use the Sentry service instead of the file-based policy mechanism.
+        </li>
+
+        <li>
+          The Impala <codeph>GRANT</codeph> and <codeph>REVOKE</codeph> statements do not require the
+          <codeph>ROLE</codeph> keyword to be repeated before each role name, unlike the equivalent Hive
+          statements.
+        </li>
+
+        <li conref="../shared/impala_common.xml#common/grant_revoke_single"/>
+      </ul>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_authorization.xml#authorization"/>, <xref href="impala_grant.xml#grant"/>
+      <xref href="impala_create_role.xml#create_role"/>, <xref href="impala_drop_role.xml#drop_role"/>,
+      <xref href="impala_show.xml#show"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_schema_objects.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_schema_objects.xml b/docs/topics/impala_schema_objects.xml
new file mode 100644
index 0000000..d8abe12
--- /dev/null
+++ b/docs/topics/impala_schema_objects.xml
@@ -0,0 +1,57 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="schema_objects">
+
+  <title>Impala Schema Objects and Object Names</title>
+  <titlealts><navtitle>Schema Objects and Object Names</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">schema objects</indexterm>
+      With Impala, you work with schema objects that are familiar to database users: primarily databases, tables, views,
+      and functions. The SQL syntax to work with these objects is explained in
+      <xref href="impala_langref_sql.xml#langref_sql"/>. This section explains the conceptual knowledge you need to
+      work with these objects and the various ways to specify their names.
+    </p>
+
+    <p>
+      Within a table, partitions can also be considered a kind of object. Partitioning is an important subject for
+      Impala, with its own documentation section covering use cases and performance considerations. See
+      <xref href="impala_partitioning.xml#partitioning"/> for details.
+    </p>
+
+    <p>
+      Impala does not have a counterpart of the <q>tablespace</q> notion from some database systems. By default,
+      all the data files for a database, table, or partition are located within nested folders within the HDFS file
+      system. You can also specify a particular HDFS location for a given Impala table or partition. The raw data
+      for these objects is represented as a collection of data files, providing the flexibility to load data by
+      simply moving files into the expected HDFS location.
+    </p>
+
+    <p>
+      Information about the schema objects is held in the
+      <xref href="impala_hadoop.xml#intro_metastore">metastore</xref> database. This database is shared between
+      Impala and Hive, allowing each to create, drop, and query each other's databases, tables, and so on. When
+      Impala makes a change to schema objects through a <codeph>CREATE</codeph>, <codeph>ALTER</codeph>,
+      <codeph>DROP</codeph>, <codeph>INSERT</codeph>, or <codeph>LOAD DATA</codeph> statement, it broadcasts those
+      changes to all nodes in the cluster through the <xref href="impala_components.xml#intro_catalogd">catalog
+      service</xref>. When you make such changes through Hive or directly through manipulating HDFS files, you use
+      the <xref href="impala_refresh.xml#refresh">REFRESH</xref> or
+      <xref href="impala_invalidate_metadata.xml#invalidate_metadata">INVALIDATE METADATA</xref> statements on the
+      Impala side to recognize the newly loaded data, new tables, and so on.
+    </p>
+
+    <p outputclass="toc"/>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_select.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_select.xml b/docs/topics/impala_select.xml
new file mode 100644
index 0000000..db63f71
--- /dev/null
+++ b/docs/topics/impala_select.xml
@@ -0,0 +1,203 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="select">
+
+  <title>SELECT Statement</title>
+  <titlealts><navtitle>SELECT</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Reports"/>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <!-- This is such an important statement, think if there are more applicable categories. -->
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">SELECT statement</indexterm>
+      The <codeph>SELECT</codeph> statement performs queries, retrieving data from one or more tables and producing
+      result sets consisting of rows and columns.
+    </p>
+
+    <p>
+      The Impala <codeph><xref href="impala_insert.xml#insert">INSERT</xref></codeph> statement also typically ends
+      with a <codeph>SELECT</codeph> statement, to define data to copy from one table to another.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>[WITH <i>name</i> AS (<i>select_expression</i>) [, ...] ]
+SELECT
+  [ALL | DISTINCT]
+  [STRAIGHT_JOIN]
+  <i>expression</i> [, <i>expression</i> ...]
+FROM <i>table_reference</i> [, <i>table_reference</i> ...]
+[[FULL | [LEFT | RIGHT] INNER | [LEFT | RIGHT] OUTER | [LEFT | RIGHT] SEMI | [LEFT | RIGHT] ANTI | CROSS]
+  JOIN <i>table_reference</i>
+  [ON <i>join_equality_clauses</i> | USING (<varname>col1</varname>[, <varname>col2</varname> ...]] ...
+WHERE <i>conditions</i>
+GROUP BY { <i>column</i> | <i>expression</i> [ASC | DESC] [NULLS FIRST | NULLS LAST] [, ...] }
+HAVING <codeph>conditions</codeph>
+GROUP BY { <i>column</i> | <i>expression</i> [ASC | DESC] [, ...] }
+LIMIT <i>expression</i> [OFFSET <i>expression</i>]
+[UNION [ALL] <i>select_statement</i>] ...]
+</codeblock>
+
+    <p>
+      Impala <codeph>SELECT</codeph> queries support:
+    </p>
+
+    <ul>
+      <li>
+        SQL scalar data types: <codeph><xref href="impala_boolean.xml#boolean">BOOLEAN</xref></codeph>,
+        <codeph><xref href="impala_tinyint.xml#tinyint">TINYINT</xref></codeph>,
+        <codeph><xref href="impala_smallint.xml#smallint">SMALLINT</xref></codeph>,
+        <codeph><xref href="impala_int.xml#int">INT</xref></codeph>,
+        <codeph><xref href="impala_bigint.xml#bigint">BIGINT</xref></codeph>,
+        <codeph><xref href="impala_decimal.xml#decimal">DECIMAL</xref></codeph>
+        <codeph><xref href="impala_float.xml#float">FLOAT</xref></codeph>,
+        <codeph><xref href="impala_double.xml#double">DOUBLE</xref></codeph>,
+        <codeph><xref href="impala_timestamp.xml#timestamp">TIMESTAMP</xref></codeph>,
+        <codeph><xref href="impala_string.xml#string">STRING</xref></codeph>,
+        <codeph><xref href="impala_varchar.xml#varchar">VARCHAR</xref></codeph>,
+        <codeph><xref href="impala_char.xml#char">CHAR</xref></codeph>.
+      </li>
+
+<!-- To do: Consider promoting 'querying complex types' to its own subtopic or pseudo-heading. -->
+      <li rev="2.3.0">
+        The complex data types <codeph>ARRAY</codeph>, <codeph>STRUCT</codeph>, and <codeph>MAP</codeph>,
+        are available in CDH 5.5 / Impala 2.3 and higher.
+        Queries involving these types typically involve special qualified names
+        using dot notation for referring to the complex column fields,
+        and join clauses for bringing the complex columns into the result set.
+        See <xref href="impala_complex_types.xml#complex_types"/> for details.
+      </li>
+
+      <li rev="1.1">
+        An optional <xref href="impala_with.xml#with"><codeph>WITH</codeph> clause</xref> before the
+        <codeph>SELECT</codeph> keyword, to define a subquery whose name or column names can be referenced from
+        later in the main query. This clause lets you abstract repeated clauses, such as aggregation functions,
+        that are referenced multiple times in the same query.
+      </li>
+
+      <li>
+        By default, one <codeph>DISTINCT</codeph> clause per query. See <xref href="impala_distinct.xml#distinct"/>
+        for details. See <xref href="impala_appx_count_distinct.xml#appx_count_distinct"/> for a query option to
+        allow multiple <codeph>COUNT(DISTINCT)</codeph> impressions in the same query.
+      </li>
+
+      <li>
+        Subqueries in a <codeph>FROM</codeph> clause. In CDH 5.2 / Impala 2.0 and higher,
+        subqueries can also go in the <codeph>WHERE</codeph> clause, for example with the
+        <codeph>IN()</codeph>, <codeph>EXISTS</codeph>, and <codeph>NOT EXISTS</codeph> operators.
+      </li>
+
+      <li>
+        <codeph>WHERE</codeph>, <codeph>GROUP BY</codeph>, <codeph>HAVING</codeph> clauses.
+      </li>
+
+      <li rev="obwl">
+        <codeph><xref href="impala_order_by.xml#order_by">ORDER BY</xref></codeph>. Prior to Impala 1.4.0, Impala
+        required that queries using an <codeph>ORDER BY</codeph> clause also include a
+        <codeph><xref href="impala_limit.xml#limit">LIMIT</xref></codeph> clause. In Impala 1.4.0 and higher, this
+        restriction is lifted; sort operations that would exceed the Impala memory limit automatically use a
+        temporary disk work area to perform the sort.
+      </li>
+
+      <li>
+        <p conref="../shared/impala_common.xml#common/join_types"/>
+        <p>
+          See <xref href="impala_joins.xml#joins"/> for details and examples of join queries.
+        </p>
+      </li>
+
+      <li>
+        <codeph>UNION ALL</codeph>.
+      </li>
+
+      <li>
+        <codeph>LIMIT</codeph>.
+      </li>
+
+      <li>
+        External tables.
+      </li>
+
+      <li>
+        Relational operators such as greater than, less than, or equal to.
+      </li>
+
+      <li>
+        Arithmetic operators such as addition or subtraction.
+      </li>
+
+      <li>
+        Logical/Boolean operators <codeph>AND</codeph>, <codeph>OR</codeph>, and <codeph>NOT</codeph>. Impala does
+        not support the corresponding symbols <codeph>&amp;&amp;</codeph>, <codeph>||</codeph>, and
+        <codeph>!</codeph>.
+      </li>
+
+      <li>
+        Common SQL built-in functions such as <codeph>COUNT</codeph>, <codeph>SUM</codeph>, <codeph>CAST</codeph>,
+        <codeph>LIKE</codeph>, <codeph>IN</codeph>, <codeph>BETWEEN</codeph>, and <codeph>COALESCE</codeph>. Impala
+        specifically supports built-ins described in <xref href="impala_functions.xml#builtins"/>.
+      </li>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/ignore_file_extensions"/>
+
+    <p conref="../shared/impala_common.xml#common/security_blurb"/>
+    <p conref="../shared/impala_common.xml#common/redaction_yes"/>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_yes"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+    <p rev="CDH-19187">
+      The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+      typically the <codeph>impala</codeph> user, must have read
+      permissions for the files in all applicable directories in all source tables,
+      and read and execute permissions for the relevant data directories.
+      (A <codeph>SELECT</codeph> operation could read files from multiple different HDFS directories
+      if the source table is partitioned.)
+      If a query attempts to read a data file and is unable to because of an HDFS permission error,
+      the query halts and does not return any further results.
+    </p>
+
+    <p outputclass="toc"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      The <codeph>SELECT</codeph> syntax is so extensive that it forms its own category of statements: queries. The
+      other major classifications of SQL statements are data definition language (see
+      <xref href="impala_ddl.xml#ddl"/>) and data manipulation language (see <xref href="impala_dml.xml#dml"/>).
+    </p>
+
+    <p>
+      Because the focus of Impala is on fast queries with interactive response times over huge data sets, query
+      performance and scalability are important considerations. See
+      <xref href="impala_performance.xml#performance"/> and <xref href="impala_scalability.xml#scalability"/> for
+      details.
+    </p>
+  </conbody>
+
+  <concept id="where" audience="Cloudera">
+
+<!-- WHERE hidden for the moment until there's the chance to add some reasonably comprehensive content
+
+     and make it its own file. -->
+
+    <title>WHERE Clause</title>
+
+    <conbody>
+
+      <p/>
+    </conbody>
+  </concept>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_set.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_set.xml b/docs/topics/impala_set.xml
new file mode 100644
index 0000000..afa6777
--- /dev/null
+++ b/docs/topics/impala_set.xml
@@ -0,0 +1,90 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="2.0.0" id="set">
+
+  <title>SET Statement</title>
+  <titlealts><navtitle>SET</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Configuring"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">SET statement</indexterm>
+      Specifies values for query options that control the runtime behavior of other statements within the same
+      session.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>SET [<varname>query_option</varname>=<varname>option_value</varname>]
+</codeblock>
+
+    <p>
+      <codeph>SET</codeph> with no arguments returns a result set consisting of all available query options and
+      their current values.
+    </p>
+
+    <p>
+      The query option name and any string argument values are case-insensitive.
+    </p>
+
+    <p>
+      Each query option has a specific allowed notation for its arguments. Boolean options can be enabled and
+      disabled by assigning values of either <codeph>true</codeph> and <codeph>false</codeph>, or
+      <codeph>1</codeph> and <codeph>0</codeph>. Some numeric options accept a final character signifying the unit,
+      such as <codeph>2g</codeph> for 2 gigabytes or <codeph>100m</codeph> for 100 megabytes. See
+      <xref href="impala_query_options.xml#query_options"/> for the details of each query option.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      <codeph>MEM_LIMIT</codeph> is probably the most commonly used query option. You can specify a high value to
+      allow a resource-intensive query to complete. For testing how queries would work on memory-constrained
+      systems, you might specify an artificially low value.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      The following example sets some numeric and some Boolean query options to control usage of memory, disk
+      space, and timeout periods, then runs a query whose success could depend on the options in effect:
+    </p>
+
+<codeblock>set mem_limit=64g;
+set DISABLE_UNSAFE_SPILLS=true;
+set parquet_file_size=400m;
+set RESERVATION_REQUEST_TIMEOUT=900000;
+insert overwrite parquet_table select c1, c2, count(c3) from text_table group by c1, c2, c3;
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/added_in_20"/>
+
+    <p>
+      <codeph>SET</codeph> has always been available as an <cmdname>impala-shell</cmdname> command. Promoting it to
+      a SQL statement lets you use this feature in client applications through the JDBC and ODBC APIs.
+    </p>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/jdbc_blurb"/> -->
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      See <xref href="impala_query_options.xml#query_options"/> for the query options you can adjust using this
+      statement.
+    </p>
+  </conbody>
+</concept>

[08/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

Posted by jr...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_max.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_max.xml b/docs/topics/impala_max.xml
new file mode 100644
index 0000000..b989785
--- /dev/null
+++ b/docs/topics/impala_max.xml
@@ -0,0 +1,192 @@
+<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="max">
+
+  <title>MAX Function</title>
+  <titlealts><navtitle>MAX</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="Analytic Functions"/>
+      <data name="Category" value="Aggregate Functions"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">max() function</indexterm>
+      An aggregate function that returns the maximum value from a set of numbers. Opposite of the
+      <codeph>MIN</codeph> function. Its single argument can be numeric column, or the numeric result of a function
+      or expression applied to the column value. Rows with a <codeph>NULL</codeph> value for the specified column
+      are ignored. If the table is empty, or all the values supplied to <codeph>MAX</codeph> are
+      <codeph>NULL</codeph>, <codeph>MAX</codeph> returns <codeph>NULL</codeph>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>MAX([DISTINCT | ALL] <varname>expression</varname>) [OVER (<varname>analytic_clause</varname>)]</codeblock>
+
+    <p>
+      When the query contains a <codeph>GROUP BY</codeph> clause, returns one value for each combination of
+      grouping values.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_sliding_window"/>
+
+    <p conref="../shared/impala_common.xml#common/return_type_same_except_string"/>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_aggregation_explanation"/>
+    
+    <p conref="../shared/impala_common.xml#common/complex_types_aggregation_example"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>-- Find the largest value for this column in the table.
+select max(c1) from t1;
+-- Find the largest value for this column from a subset of the table.
+select max(c1) from t1 where month = 'January' and year = '2013';
+-- Find the largest value from a set of numeric function results.
+select max(length(s)) from t1;
+-- Can also be used in combination with DISTINCT and/or GROUP BY.
+-- Return more than one result.
+select month, year, max(purchase_price) from store_stats group by month, year;
+-- Filter the input to eliminate duplicates before performing the calculation.
+select max(distinct x) from t1;
+</codeblock>
+
+    <p rev="2.0.0">
+      The following examples show how to use <codeph>MAX()</codeph> in an analytic context. They use a table
+      containing integers from 1 to 10. Notice how the <codeph>MAX()</codeph> is reported for each input value, as
+      opposed to the <codeph>GROUP BY</codeph> clause which condenses the result set.
+<codeblock>select x, property, max(x) over (partition by property) as max from int_t where property in ('odd','even');
++----+----------+-----+
+| x  | property | max |
++----+----------+-----+
+| 2  | even     | 10  |
+| 4  | even     | 10  |
+| 6  | even     | 10  |
+| 8  | even     | 10  |
+| 10 | even     | 10  |
+| 1  | odd      | 9   |
+| 3  | odd      | 9   |
+| 5  | odd      | 9   |
+| 7  | odd      | 9   |
+| 9  | odd      | 9   |
++----+----------+-----+
+</codeblock>
+
+Adding an <codeph>ORDER BY</codeph> clause lets you experiment with results that are cumulative or apply to a moving
+set of rows (the <q>window</q>). The following examples use <codeph>MAX()</codeph> in an analytic context
+(that is, with an <codeph>OVER()</codeph> clause) to display the smallest value of <codeph>X</codeph>
+encountered up to each row in the result set. The examples use two columns in the <codeph>ORDER BY</codeph>
+clause to produce a sequence of values that rises and falls, to illustrate how the <codeph>MAX()</codeph>
+result only increases or stays the same throughout each partition within the result set.
+The basic <codeph>ORDER BY x</codeph> clause implicitly
+activates a window clause of <codeph>RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW</codeph>,
+which is effectively the same as <codeph>ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW</codeph>,
+therefore all of these examples produce the same results:
+
+<codeblock>select x, property,
+  max(x) <b>over (order by property, x desc)</b> as 'maximum to this point'
+from int_t where property in ('prime','square');
++---+----------+-----------------------+
+| x | property | maximum to this point |
++---+----------+-----------------------+
+| 7 | prime    | 7                     |
+| 5 | prime    | 7                     |
+| 3 | prime    | 7                     |
+| 2 | prime    | 7                     |
+| 9 | square   | 9                     |
+| 4 | square   | 9                     |
+| 1 | square   | 9                     |
++---+----------+-----------------------+
+
+select x, property,
+  max(x) over
+  (
+    <b>order by property, x desc</b>
+    <b>rows between unbounded preceding and current row</b>
+  ) as 'maximum to this point'        
+from int_t where property in ('prime','square');
++---+----------+-----------------------+
+| x | property | maximum to this point |
++---+----------+-----------------------+
+| 7 | prime    | 7                     |
+| 5 | prime    | 7                     |
+| 3 | prime    | 7                     |
+| 2 | prime    | 7                     |
+| 9 | square   | 9                     |
+| 4 | square   | 9                     |
+| 1 | square   | 9                     |
++---+----------+-----------------------+
+
+select x, property,
+  max(x) over
+  (
+    <b>order by property, x desc</b>
+    <b>range between unbounded preceding and current row</b>
+  ) as 'maximum to this point'        
+from int_t where property in ('prime','square');
++---+----------+-----------------------+
+| x | property | maximum to this point |
++---+----------+-----------------------+
+| 7 | prime    | 7                     |
+| 5 | prime    | 7                     |
+| 3 | prime    | 7                     |
+| 2 | prime    | 7                     |
+| 9 | square   | 9                     |
+| 4 | square   | 9                     |
+| 1 | square   | 9                     |
++---+----------+-----------------------+
+</codeblock>
+
+The following examples show how to construct a moving window, with a running maximum taking into account all rows before
+and 1 row after the current row.
+Because of a restriction in the Impala <codeph>RANGE</codeph> syntax, this type of
+moving window is possible with the <codeph>ROWS BETWEEN</codeph> clause but not the <codeph>RANGE BETWEEN</codeph> clause.
+Because of an extra Impala restriction on the <codeph>MAX()</codeph> and <codeph>MIN()</codeph> functions in an
+analytic context, the lower bound must be <codeph>UNBOUNDED PRECEDING</codeph>.
+<codeblock>select x, property,
+  max(x) over
+  (
+    <b>order by property, x</b>
+    <b>rows between unbounded preceding and 1 following</b>
+  ) as 'local maximum'             
+from int_t where property in ('prime','square');
++---+----------+---------------+
+| x | property | local maximum |
++---+----------+---------------+
+| 2 | prime    | 3             |
+| 3 | prime    | 5             |
+| 5 | prime    | 7             |
+| 7 | prime    | 7             |
+| 1 | square   | 7             |
+| 4 | square   | 9             |
+| 9 | square   | 9             |
++---+----------+---------------+
+
+-- Doesn't work because of syntax restriction on RANGE clause.
+select x, property,
+  max(x) over
+  (
+    <b>order by property, x</b>
+    <b>range between unbounded preceding and 1 following</b>
+  ) as 'local maximum'
+from int_t where property in ('prime','square');
+ERROR: AnalysisException: RANGE is only supported with both the lower and upper bounds UNBOUNDED or one UNBOUNDED and the other CURRENT ROW.
+</codeblock>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_analytic_functions.xml#analytic_functions"/>, <xref href="impala_min.xml#min"/>,
+      <xref href="impala_avg.xml#avg"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_max_errors.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_max_errors.xml b/docs/topics/impala_max_errors.xml
new file mode 100644
index 0000000..86f3618
--- /dev/null
+++ b/docs/topics/impala_max_errors.xml
@@ -0,0 +1,44 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="max_errors">
+
+  <title>MAX_ERRORS Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Troubleshooting"/>
+      <data name="Category" value="Logs"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">MAX_ERRORS query option</indexterm>
+      Maximum number of non-fatal errors for any particular query that are recorded in the Impala log file. For
+      example, if a billion-row table had a non-fatal data error in every row, you could diagnose the problem
+      without all billion errors being logged. Unspecified or 0 indicates the built-in default value of 1000.
+    </p>
+
+    <p>
+      This option only controls how many errors are reported. To specify whether Impala continues or halts when it
+      encounters such errors, use the <codeph>ABORT_ON_ERROR</codeph> option.
+    </p>
+
+    <p>
+      <b>Type:</b> numeric
+    </p>
+
+    <p>
+      <b>Default:</b> 0 (meaning 1000 errors)
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+    <p>
+      <xref href="impala_abort_on_error.xml#abort_on_error"/>,
+      <xref href="impala_logging.xml#logging"/>
+    </p>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_max_io_buffers.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_max_io_buffers.xml b/docs/topics/impala_max_io_buffers.xml
new file mode 100644
index 0000000..b08c57e
--- /dev/null
+++ b/docs/topics/impala_max_io_buffers.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="max_io_buffers">
+
+  <title>MAX_IO_BUFFERS Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Deprecated Features"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Deprecated query option. Currently has no effect.
+    </p>
+
+    <p>
+      <b>Type:</b> numeric
+    </p>
+
+    <p>
+      <b>Default:</b> 0
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_max_scan_range_length.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_max_scan_range_length.xml b/docs/topics/impala_max_scan_range_length.xml
new file mode 100644
index 0000000..a790fc7
--- /dev/null
+++ b/docs/topics/impala_max_scan_range_length.xml
@@ -0,0 +1,45 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="max_scan_range_length">
+
+  <title>MAX_SCAN_RANGE_LENGTH Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">MAX_SCAN_RANGE_LENGTH query option</indexterm>
+      Maximum length of the scan range. Interacts with the number of HDFS blocks in the table to determine how many
+      CPU cores across the cluster are involved with the processing for a query. (Each core processes one scan
+      range.)
+    </p>
+
+    <p>
+      Lowering the value can sometimes increase parallelism if you have unused CPU capacity, but a too-small value
+      can limit query performance because each scan range involves extra overhead.
+    </p>
+
+    <p>
+      Only applicable to HDFS tables. Has no effect on Parquet tables. Unspecified or 0 indicates backend default,
+      which is the same as the HDFS block size for each table.
+    </p>
+
+    <p>
+      Although the scan range can be arbitrarily long, Impala internally uses an 8 MB read buffer so that it can
+      query tables with huge block sizes without allocating equivalent blocks of memory.
+    </p>
+
+    <p>
+      <b>Type:</b> numeric
+    </p>
+
+    <p>
+      <b>Default:</b> 0
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_mem_limit.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_mem_limit.xml b/docs/topics/impala_mem_limit.xml
new file mode 100644
index 0000000..fd12953
--- /dev/null
+++ b/docs/topics/impala_mem_limit.xml
@@ -0,0 +1,208 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="mem_limit">
+
+  <title>MEM_LIMIT Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Scalability"/>
+      <data name="Category" value="Memory"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">MEM_LIMIT query option</indexterm>
+      When resource management is not enabled, defines the maximum amount of memory a query can allocate on each node.
+      Therefore, the total memory that can be used by a query is the <codeph>MEM_LIMIT</codeph> times the number of nodes.
+    </p>
+
+    <p rev="CDH-32135">
+      There are two levels of memory limit for Impala.
+      The <codeph>-mem_limit</codeph> startup option sets an overall limit for the <cmdname>impalad</cmdname> process
+      (which handles multiple queries concurrently).
+      That limit is typically expressed in terms of a percentage of the RAM available on the host, such as <codeph>-mem_limit=70%</codeph>.
+      The <codeph>MEM_LIMIT</codeph> query option, which you set through <cmdname>impala-shell</cmdname>
+      or the <codeph>SET</codeph> statement in a JDBC or ODBC application, applies to each individual query.
+      The <codeph>MEM_LIMIT</codeph> query option is usually expressed as a fixed size such as <codeph>10gb</codeph>,
+      and must always be less than the <cmdname>impalad</cmdname> memory limit.
+    </p>
+
+    <p rev="CDH-32135">
+      If query processing exceeds the specified memory limit on any node, either the per-query limit or the
+      <cmdname>impalad</cmdname> limit, Impala cancels the query automatically.
+      Memory limits are checked periodically during query processing, so the actual memory in use
+      might briefly exceed the limit without the query being cancelled.
+    </p>
+
+    <p>
+      When resource management is enabled in CDH 5, the mechanism for this option changes. If set, it overrides the
+      automatic memory estimate from Impala. Impala requests this amount of memory from YARN on each node, and the
+      query does not proceed until that much memory is available. The actual memory used by the query could be
+      lower, since some queries use much less memory than others. With resource management, the
+      <codeph>MEM_LIMIT</codeph> setting acts both as a hard limit on the amount of memory a query can use on any
+      node (enforced by YARN) and a guarantee that that much memory will be available on each node while the query
+      is being executed. When resource management is enabled but no <codeph>MEM_LIMIT</codeph> setting is
+      specified, Impala estimates the amount of memory needed on each node for each query, requests that much
+      memory from YARN before starting the query, and then internally sets the <codeph>MEM_LIMIT</codeph> on each
+      node to the requested amount of memory during the query. Thus, if the query takes more memory than was
+      originally estimated, Impala detects that the <codeph>MEM_LIMIT</codeph> is exceeded and cancels the query
+      itself.
+    </p>
+
+    <p>
+      <b>Type:</b> numeric
+    </p>
+
+    <p rev="CDH-32135">
+      <b>Units:</b> A numeric argument represents memory size in bytes; you can also use a suffix of <codeph>m</codeph> or <codeph>mb</codeph>
+      for megabytes, or more commonly <codeph>g</codeph> or <codeph>gb</codeph> for gigabytes. If you specify a value with unrecognized
+      formats, subsequent queries fail with an error.
+    </p>
+
+    <p rev="CDH-32135">
+      <b>Default:</b> 0 (unlimited)
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p rev="CDH-32135">
+      The <codeph>MEM_LIMIT</codeph> setting is primarily useful in a high-concurrency setting,
+      or on a cluster with a workload shared between Impala and other data processing components.
+      You can prevent any query from accidentally using much more memory than expected,
+      which could negatively impact other Impala queries.
+    </p>
+
+    <p rev="CDH-32135">
+      Use the output of the <codeph>SUMMARY</codeph> command in <cmdname>impala-shell</cmdname>
+      to get a report of memory used for each phase of your most heavyweight queries on each node, 
+      and then set a <codeph>MEM_LIMIT</codeph> somewhat higher than that.
+      See <xref href="impala_explain_plan.xml#perf_summary"/> for usage information about
+      the <codeph>SUMMARY</codeph> command.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb" rev="CDH-32135"/>
+
+    <p rev="CDH-32135">
+      The following examples show how to set the <codeph>MEM_LIMIT</codeph> query option
+      using a fixed number of bytes, or suffixes representing gigabytes or megabytes.
+    </p>
+
+<codeblock rev="CDH-32135">
+[localhost:21000] > set mem_limit=3000000000;
+MEM_LIMIT set to 3000000000
+[localhost:21000] > select 5;
+Query: select 5
++---+
+| 5 |
++---+
+| 5 |
++---+
+
+[localhost:21000] > set mem_limit=3g;
+MEM_LIMIT set to 3g
+[localhost:21000] > select 5;
+Query: select 5
++---+
+| 5 |
++---+
+| 5 |
++---+
+
+[localhost:21000] > set mem_limit=3gb;
+MEM_LIMIT set to 3gb
+[localhost:21000] > select 5;
++---+
+| 5 |
++---+
+| 5 |
++---+
+
+[localhost:21000] > set mem_limit=3m;
+MEM_LIMIT set to 3m
+[localhost:21000] > select 5;
++---+
+| 5 |
++---+
+| 5 |
++---+
+[localhost:21000] > set mem_limit=3mb;
+MEM_LIMIT set to 3mb
+[nightly55-2.vpc.cloudera.com:21000] > select 5;
++---+
+| 5 |
++---+
+</codeblock>
+
+    <p rev="CDH-32135">
+      The following examples show how unrecognized <codeph>MEM_LIMIT</codeph>
+      values lead to errors for subsequent queries.
+    </p>
+
+<codeblock rev="CDH-32135">
+[localhost:21000] > set mem_limit=3tb;
+MEM_LIMIT set to 3tb
+[localhost:21000] > select 5;
+ERROR: Failed to parse query memory limit from '3tb'.
+
+[localhost:21000] > set mem_limit=xyz;
+MEM_LIMIT set to xyz
+[localhost:21000] > select 5;
+Query: select 5
+ERROR: Failed to parse query memory limit from 'xyz'.
+</codeblock>
+
+    <p rev="CDH-32135">
+      The following examples shows the automatic query cancellation
+      when the <codeph>MEM_LIMIT</codeph> value is exceeded
+      on any host involved in the Impala query. First it runs a
+      successful query and checks the largest amount of memory
+      used on any node for any stage of the query.
+      Then it sets an artificially low <codeph>MEM_LIMIT</codeph>
+      setting so that the same query cannot run.
+    </p>
+
+<codeblock rev="CDH-32135">
+[localhost:21000] > select count(*) from customer;
+Query: select count(*) from customer
++----------+
+| count(*) |
++----------+
+| 150000   |
++----------+
+
+[localhost:21000] > select count(distinct c_name) from customer;
+Query: select count(distinct c_name) from customer
++------------------------+
+| count(distinct c_name) |
++------------------------+
+| 150000                 |
++------------------------+
+
+[localhost:21000] > summary;
++--------------+--------+----------+----------+---------+------------+----------+---------------+---------------+
+| Operator     | #Hosts | Avg Time | Max Time | #Rows   | Est. #Rows | Peak Mem | Est. Peak Mem | Detail        |
++--------------+--------+----------+----------+---------+------------+----------+---------------+---------------+
+| 06:AGGREGATE | 1      | 230.00ms | 230.00ms | 1       | 1          | 16.00 KB | -1 B          | FINALIZE      |
+| 05:EXCHANGE  | 1      | 43.44us  | 43.44us  | 1       | 1          | 0 B      | -1 B          | UNPARTITIONED |
+| 02:AGGREGATE | 1      | 227.14ms | 227.14ms | 1       | 1          | 12.00 KB | 10.00 MB      |               |
+| 04:AGGREGATE | 1      | 126.27ms | 126.27ms | 150.00K | 150.00K    | 15.17 MB | 10.00 MB      |               |
+| 03:EXCHANGE  | 1      | 44.07ms  | 44.07ms  | 150.00K | 150.00K    | 0 B      | 0 B           | HASH(c_name)  |
+<b>| 01:AGGREGATE | 1      | 361.94ms | 361.94ms | 150.00K | 150.00K    | 23.04 MB | 10.00 MB      |               |</b>
+| 00:SCAN HDFS | 1      | 43.64ms  | 43.64ms  | 150.00K | 150.00K    | 24.19 MB | 64.00 MB      | tpch.customer |
++--------------+--------+----------+----------+---------+------------+----------+---------------+---------------+
+
+[localhost:21000] > set mem_limit=15mb;
+MEM_LIMIT set to 15mb
+[localhost:21000] > select count(distinct c_name) from customer;
+Query: select count(distinct c_name) from customer
+ERROR: 
+Memory limit exceeded
+Query did not have enough memory to get the minimum required buffers in the block manager.
+</codeblock>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_min.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_min.xml b/docs/topics/impala_min.xml
new file mode 100644
index 0000000..a63fc4c
--- /dev/null
+++ b/docs/topics/impala_min.xml
@@ -0,0 +1,191 @@
+<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="min">
+
+  <title>MIN Function</title>
+  <titlealts><navtitle>MIN</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="Analytic Functions"/>
+      <data name="Category" value="Aggregate Functions"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">min() function</indexterm>
+      An aggregate function that returns the minimum value from a set of numbers. Opposite of the
+      <codeph>MAX</codeph> function. Its single argument can be numeric column, or the numeric result of a function
+      or expression applied to the column value. Rows with a <codeph>NULL</codeph> value for the specified column
+      are ignored. If the table is empty, or all the values supplied to <codeph>MIN</codeph> are
+      <codeph>NULL</codeph>, <codeph>MIN</codeph> returns <codeph>NULL</codeph>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>MIN([DISTINCT | ALL] <varname>expression</varname>) [OVER (<varname>analytic_clause</varname>)]</codeblock>
+
+    <p>
+      When the query contains a <codeph>GROUP BY</codeph> clause, returns one value for each combination of
+      grouping values.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_sliding_window"/>
+
+    <p conref="../shared/impala_common.xml#common/return_type_same_except_string"/>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_aggregation_explanation"/>
+    
+    <p conref="../shared/impala_common.xml#common/complex_types_aggregation_example"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>-- Find the smallest value for this column in the table.
+select min(c1) from t1;
+-- Find the smallest value for this column from a subset of the table.
+select min(c1) from t1 where month = 'January' and year = '2013';
+-- Find the smallest value from a set of numeric function results.
+select min(length(s)) from t1;
+-- Can also be used in combination with DISTINCT and/or GROUP BY.
+-- Return more than one result.
+select month, year, min(purchase_price) from store_stats group by month, year;
+-- Filter the input to eliminate duplicates before performing the calculation.
+select min(distinct x) from t1;
+</codeblock>
+
+    <p rev="2.0.0">
+      The following examples show how to use <codeph>MIN()</codeph> in an analytic context. They use a table
+      containing integers from 1 to 10. Notice how the <codeph>MIN()</codeph> is reported for each input value, as
+      opposed to the <codeph>GROUP BY</codeph> clause which condenses the result set.
+<codeblock>select x, property, min(x) over (partition by property) as min from int_t where property in ('odd','even');
++----+----------+-----+
+| x  | property | min |
++----+----------+-----+
+| 2  | even     | 2   |
+| 4  | even     | 2   |
+| 6  | even     | 2   |
+| 8  | even     | 2   |
+| 10 | even     | 2   |
+| 1  | odd      | 1   |
+| 3  | odd      | 1   |
+| 5  | odd      | 1   |
+| 7  | odd      | 1   |
+| 9  | odd      | 1   |
++----+----------+-----+
+</codeblock>
+
+Adding an <codeph>ORDER BY</codeph> clause lets you experiment with results that are cumulative or apply to a moving
+set of rows (the <q>window</q>). The following examples use <codeph>MIN()</codeph> in an analytic context
+(that is, with an <codeph>OVER()</codeph> clause) to display the smallest value of <codeph>X</codeph>
+encountered up to each row in the result set. The examples use two columns in the <codeph>ORDER BY</codeph>
+clause to produce a sequence of values that rises and falls, to illustrate how the <codeph>MIN()</codeph>
+result only decreases or stays the same throughout each partition within the result set.
+The basic <codeph>ORDER BY x</codeph> clause implicitly
+activates a window clause of <codeph>RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW</codeph>,
+which is effectively the same as <codeph>ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW</codeph>,
+therefore all of these examples produce the same results:
+
+<codeblock>select x, property, min(x) <b>over (order by property, x desc)</b> as 'minimum to this point'
+  from int_t where property in ('prime','square');
++---+----------+-----------------------+
+| x | property | minimum to this point |
++---+----------+-----------------------+
+| 7 | prime    | 7                     |
+| 5 | prime    | 5                     |
+| 3 | prime    | 3                     |
+| 2 | prime    | 2                     |
+| 9 | square   | 2                     |
+| 4 | square   | 2                     |
+| 1 | square   | 1                     |
++---+----------+-----------------------+
+
+select x, property,
+  min(x) over
+  (
+    <b>order by property, x desc</b>
+    <b>range between unbounded preceding and current row</b>
+  ) as 'minimum to this point'
+from int_t where property in ('prime','square');
++---+----------+-----------------------+
+| x | property | minimum to this point |
++---+----------+-----------------------+
+| 7 | prime    | 7                     |
+| 5 | prime    | 5                     |
+| 3 | prime    | 3                     |
+| 2 | prime    | 2                     |
+| 9 | square   | 2                     |
+| 4 | square   | 2                     |
+| 1 | square   | 1                     |
++---+----------+-----------------------+
+
+select x, property,
+  min(x) over
+  (
+    <b>order by property, x desc</b>
+    <b>rows between unbounded preceding and current row</b>
+  ) as 'minimum to this point'
+from int_t where property in ('prime','square');
++---+----------+-----------------------+
+| x | property | minimum to this point |
++---+----------+-----------------------+
+| 7 | prime    | 7                     |
+| 5 | prime    | 5                     |
+| 3 | prime    | 3                     |
+| 2 | prime    | 2                     |
+| 9 | square   | 2                     |
+| 4 | square   | 2                     |
+| 1 | square   | 1                     |
++---+----------+-----------------------+
+</codeblock>
+
+The following examples show how to construct a moving window, with a running minimum taking into account all rows before
+and 1 row after the current row.
+Because of a restriction in the Impala <codeph>RANGE</codeph> syntax, this type of
+moving window is possible with the <codeph>ROWS BETWEEN</codeph> clause but not the <codeph>RANGE BETWEEN</codeph> clause.
+Because of an extra Impala restriction on the <codeph>MAX()</codeph> and <codeph>MIN()</codeph> functions in an
+analytic context, the lower bound must be <codeph>UNBOUNDED PRECEDING</codeph>.
+<codeblock>select x, property,
+  min(x) over
+  (
+    <b>order by property, x desc</b>
+    <b>rows between unbounded preceding and 1 following</b>
+  ) as 'local minimum'         
+from int_t where property in ('prime','square');
++---+----------+---------------+
+| x | property | local minimum |
++---+----------+---------------+
+| 7 | prime    | 5             |
+| 5 | prime    | 3             |
+| 3 | prime    | 2             |
+| 2 | prime    | 2             |
+| 9 | square   | 2             |
+| 4 | square   | 1             |
+| 1 | square   | 1             |
++---+----------+---------------+
+
+-- Doesn't work because of syntax restriction on RANGE clause.
+select x, property,
+  min(x) over
+  (
+    <b>order by property, x desc</b>
+    <b>range between unbounded preceding and 1 following</b>
+  ) as 'local minimum'
+from int_t where property in ('prime','square');
+ERROR: AnalysisException: RANGE is only supported with both the lower and upper bounds UNBOUNDED or one UNBOUNDED and the other CURRENT ROW.
+</codeblock>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_analytic_functions.xml#analytic_functions"/>, <xref href="impala_max.xml#max"/>,
+      <xref href="impala_avg.xml#avg"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_misc_functions.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_misc_functions.xml b/docs/topics/impala_misc_functions.xml
new file mode 100644
index 0000000..bb9f062
--- /dev/null
+++ b/docs/topics/impala_misc_functions.xml
@@ -0,0 +1,148 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="misc_functions">
+
+  <title>Impala Miscellaneous Functions</title>
+  <titlealts><navtitle>Miscellaneous Functions</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Impala supports the following utility functions that do not operate on a particular column or data type:
+    </p>
+
+    <dl>
+      <dlentry rev="1.3.0" id="current_database">
+
+        <dt>
+          <codeph>current_database()</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">current_database() function</indexterm>
+          <b>Purpose:</b> Returns the database that the session is currently using, either <codeph>default</codeph>
+          if no database has been selected, or whatever database the session switched to through a
+          <codeph>USE</codeph> statement or the <cmdname>impalad</cmdname><codeph>-d</codeph> option.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="5.4.5" id="effective_user">
+
+        <dt>
+          <codeph>effective_user()</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">effective_user() function</indexterm>
+          <b>Purpose:</b> Typically returns the same value as <codeph>user()</codeph>,
+          except if delegation is enabled, in which case it returns the ID of the delegated user.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+          <p>
+            <b>Added in:</b> CDH 5.4.5
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="pid">
+
+        <dt>
+          <codeph>pid()</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">pid() function</indexterm>
+          <b>Purpose:</b> Returns the process ID of the <cmdname>impalad</cmdname> daemon that the session is
+          connected to. You can use it during low-level debugging, to issue Linux commands that trace, show the
+          arguments, and so on the <cmdname>impalad</cmdname> process.
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry audience="Cloudera" id="sleep">
+
+        <dt>
+          <codeph>sleep(int ms)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">sleep() function</indexterm>
+          <b>Purpose:</b> Pauses the query for a specified number of milliseconds. For slowing down queries with
+          small result sets enough to monitor runtime execution, memory usage, or other factors that otherwise
+          would be difficult to capture during the brief interval of query execution. When used in the
+          <codeph>SELECT</codeph> list, it is called once for each row in the result set; adjust the number of
+          milliseconds accordingly. For example, a query <codeph>SELECT *, sleep(5) FROM
+          table_with_1000_rows</codeph> would take at least 5 seconds to complete (5 milliseconds * 1000 rows in
+          result set). To avoid an excessive number of concurrent queries, use this function for troubleshooting on
+          test and development systems, not for production queries.
+          <p>
+            <b>Return type:</b> N/A
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.1" id="user">
+
+        <dt>
+          <codeph>user()</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">user() function</indexterm>
+          <b>Purpose:</b> Returns the username of the Linux user who is connected to the <cmdname>impalad</cmdname>
+          daemon. Typically called a single time, in a query without any <codeph>FROM</codeph> clause, to
+          understand how authorization settings apply in a security context; once you know the logged-in user name,
+          you can check which groups that user belongs to, and from the list of groups you can check which roles
+          are available to those groups through the authorization policy file.
+          <p conref="../shared/impala_common.xml#common/user_kerberized"/>
+          <p>
+            When delegation is enabled, consider calling the <codeph>effective_user()</codeph> function instead.
+          </p>
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="version">
+
+        <dt>
+          <codeph>version()</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">version() function</indexterm>
+          <b>Purpose:</b> Returns information such as the precise version number and build date for the
+          <codeph>impalad</codeph> daemon that you are currently connected to. Typically used to confirm that you
+          are connected to the expected level of Impala to use a particular feature, or to connect to several nodes
+          and confirm they are all running the same level of <cmdname>impalad</cmdname>.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph> (with one or more embedded newlines)
+          </p>
+        </dd>
+
+      </dlentry>
+    </dl>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_ndv.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_ndv.xml b/docs/topics/impala_ndv.xml
new file mode 100644
index 0000000..a1e5527
--- /dev/null
+++ b/docs/topics/impala_ndv.xml
@@ -0,0 +1,133 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.2.1" id="ndv">
+
+  <title>NDV Function</title>
+  <titlealts><navtitle>NDV</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="Aggregate Functions"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">ndv() function</indexterm>
+      An aggregate function that returns an approximate value similar to the result of <codeph>COUNT(DISTINCT
+      <varname>col</varname>)</codeph>, the <q>number of distinct values</q>. It is much faster than the
+      combination of <codeph>COUNT</codeph> and <codeph>DISTINCT</codeph>, and uses a constant amount of memory and
+      thus is less memory-intensive for columns with high cardinality.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>NDV([DISTINCT | ALL] <varname>expression</varname>)</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p rev="1.2.2">
+      This is the mechanism used internally by the <codeph>COMPUTE STATS</codeph> statement for computing the
+      number of distinct values in a column.
+    </p>
+
+    <p>
+      Because this number is an estimate, it might not reflect the precise number of different values in the
+      column, especially if the cardinality is very low or very high. If the estimated number is higher than the
+      number of rows in the table, Impala adjusts the value internally during query planning.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/former_odd_return_type_string"/>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/restrictions_sliding_window"/> -->
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_aggregation_explanation"/>
+    
+    <p conref="../shared/impala_common.xml#common/complex_types_aggregation_example"/>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/analytic_not_allowed_caveat"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      The following example queries a billion-row table to illustrate the relative performance of
+      <codeph>COUNT(DISTINCT)</codeph> and <codeph>NDV()</codeph>. It shows how <codeph>COUNT(DISTINCT)</codeph>
+      gives a precise answer, but is inefficient for large-scale data where an approximate result is sufficient.
+      The <codeph>NDV()</codeph> function gives an approximate result but is much faster.
+    </p>
+
+<codeblock>select count(distinct col1) from sample_data;
++---------------------+
+| count(distinct col1)|
++---------------------+
+| 100000              |
++---------------------+
+Fetched 1 row(s) in 20.13s
+
+select cast(ndv(col1) as bigint) as col1 from sample_data;
++----------+
+| col1     |
++----------+
+| 139017   |
++----------+
+Fetched 1 row(s) in 8.91s
+</codeblock>
+
+    <p>
+      The following example shows how you can code multiple <codeph>NDV()</codeph> calls in a single query, to
+      easily learn which columns have substantially more or fewer distinct values. This technique is faster than
+      running a sequence of queries with <codeph>COUNT(DISTINCT)</codeph> calls.
+    </p>
+
+<codeblock>select cast(ndv(col1) as bigint) as col1, cast(ndv(col2) as bigint) as col2,
+    cast(ndv(col3) as bigint) as col3, cast(ndv(col4) as bigint) as col4
+  from sample_data;
++----------+-----------+------------+-----------+
+| col1     | col2      | col3       | col4      |
++----------+-----------+------------+-----------+
+| 139017   | 282       | 46         | 145636240 |
++----------+-----------+------------+-----------+
+Fetched 1 row(s) in 34.97s
+
+select count(distinct col1) from sample_data;
++---------------------+
+| count(distinct col1)|
++---------------------+
+| 100000              |
++---------------------+
+Fetched 1 row(s) in 20.13s
+
+select count(distinct col2) from sample_data;
++----------------------+
+| count(distinct col2) |
++----------------------+
+| 278                  |
++----------------------+
+Fetched 1 row(s) in 20.09s
+
+select count(distinct col3) from sample_data;
++-----------------------+
+| count(distinct col3)  |
++-----------------------+
+| 46                    |
++-----------------------+
+Fetched 1 row(s) in 19.12s
+
+select count(distinct col4) from sample_data;
++----------------------+
+| count(distinct col4) |
++----------------------+
+| 147135880            |
++----------------------+
+Fetched 1 row(s) in 266.95s
+</codeblock>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_num_nodes.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_num_nodes.xml b/docs/topics/impala_num_nodes.xml
new file mode 100644
index 0000000..75ae8e8
--- /dev/null
+++ b/docs/topics/impala_num_nodes.xml
@@ -0,0 +1,45 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="num_nodes">
+
+  <title>NUM_NODES Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">NUM_NODES query option</indexterm>
+      Limit the number of nodes that process a query, typically during debugging.
+
+    </p>
+
+    <p>
+      <b>Type:</b> numeric
+    </p>
+
+<p>
+      <b>Allowed values:</b> Only accepts the values 0
+      (meaning all nodes) or 1 (meaning all work is done on the coordinator node).
+</p>
+
+    <p>
+      <b>Default:</b> 0
+    </p>
+
+     <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+     <p>
+       If you are diagnosing a problem that you suspect is due to a timing issue due to
+       distributed query processing, you can set <codeph>NUM_NODES=1</codeph> to verify
+       if the problem still occurs when all the work is done on a single node.
+     </p>
+
+    <p conref="../shared/impala_common.xml#common/num_nodes_tip"/>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_num_scanner_threads.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_num_scanner_threads.xml b/docs/topics/impala_num_scanner_threads.xml
new file mode 100644
index 0000000..27cf883
--- /dev/null
+++ b/docs/topics/impala_num_scanner_threads.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="num_scanner_threads">
+
+  <title>NUM_SCANNER_THREADS Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">NUM_SCANNER_THREADS query option</indexterm>
+      Maximum number of scanner threads (on each node) used for each query. By default, Impala uses as many cores
+      as are available (one thread per core). You might lower this value if queries are using excessive resources
+      on a busy cluster. Impala imposes a maximum value automatically, so a high value has no practical effect.
+    </p>
+
+    <p>
+      <b>Type:</b> numeric
+    </p>
+
+    <p>
+      <b>Default:</b> 0
+    </p>
+
+    <note conref="../shared/impala_common.xml#common/compute_stats_parquet"/>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_offset.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_offset.xml b/docs/topics/impala_offset.xml
new file mode 100644
index 0000000..c9c073d
--- /dev/null
+++ b/docs/topics/impala_offset.xml
@@ -0,0 +1,64 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.2.1" id="offset">
+
+  <title>OFFSET Clause</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Reports"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      The <codeph>OFFSET</codeph> clause in a <codeph>SELECT</codeph> query causes the result set to start some
+      number of rows after the logical first item. The result set is numbered starting from zero, so <codeph>OFFSET
+      0</codeph> produces the same result as leaving out the <codeph>OFFSET</codeph> clause. Always use this clause
+      in combination with <codeph>ORDER BY</codeph> (so that it is clear which item should be first, second, and so
+      on) and <codeph>LIMIT</codeph> (so that the result set covers a bounded range, such as items 0-9, 100-199,
+      and so on).
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/limit_and_offset"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      The following example shows how you could run a <q>paging</q> query originally written for a traditional
+      database application. Because typical Impala queries process megabytes or gigabytes of data and read large
+      data files from disk each time, it is inefficient to run a separate query to retrieve each small group of
+      items. Use this technique only for compatibility while porting older applications, then rewrite the
+      application code to use a single query with a large result set, and display pages of results from the cached
+      result set.
+    </p>
+
+<codeblock>[localhost:21000] &gt; create table numbers (x int);
+[localhost:21000] &gt; insert into numbers select x from very_long_sequence;
+Inserted 1000000 rows in 1.34s
+[localhost:21000] &gt; select x from numbers order by x limit 5 offset 0;
++----+
+| x  |
++----+
+| 1  |
+| 2  |
+| 3  |
+| 4  |
+| 5  |
++----+
+[localhost:21000] &gt; select x from numbers order by x limit 5 offset 5;
++----+
+| x  |
++----+
+| 6  |
+| 7  |
+| 8  |
+| 9  |
+| 10 |
++----+
+</codeblock>
+  </conbody>
+</concept>

[03/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

Posted by jr...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_timestamp.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_timestamp.xml b/docs/topics/impala_timestamp.xml
new file mode 100644
index 0000000..c469b54
--- /dev/null
+++ b/docs/topics/impala_timestamp.xml
@@ -0,0 +1,441 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="timestamp">
+
+  <title>TIMESTAMP Data Type</title>
+  <titlealts><navtitle>TIMESTAMP</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Dates and Times"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      A data type used in <codeph>CREATE TABLE</codeph> and <codeph>ALTER TABLE</codeph> statements, representing a
+      point in time.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      In the column definition of a <codeph>CREATE TABLE</codeph> statement:
+    </p>
+
+<codeblock><varname>column_name</varname> TIMESTAMP</codeblock>
+
+    <p>
+      <b>Range:</b> Allowed date values range from 1400-01-01 to 9999-12-31; this range is different from the Hive
+      <codeph>TIMESTAMP</codeph> type. Internally, the resolution of the time portion of a
+      <codeph>TIMESTAMP</codeph> value is in nanoseconds.
+    </p>
+
+    <p>
+      <b>INTERVAL expressions:</b>
+    </p>
+
+    <p>
+      You can perform date arithmetic by adding or subtracting a specified number of time units, using the
+      <codeph>INTERVAL</codeph> keyword and the <codeph>+</codeph> and <codeph>-</codeph> operators or
+      <codeph>date_add()</codeph> and <codeph>date_sub()</codeph> functions. You can specify units as
+      <codeph>YEAR[S]</codeph>, <codeph>MONTH[S]</codeph>, <codeph>WEEK[S]</codeph>, <codeph>DAY[S]</codeph>,
+      <codeph>HOUR[S]</codeph>, <codeph>MINUTE[S]</codeph>, <codeph>SECOND[S]</codeph>,
+      <codeph>MILLISECOND[S]</codeph>, <codeph>MICROSECOND[S]</codeph>, and <codeph>NANOSECOND[S]</codeph>. You can
+      only specify one time unit in each interval expression, for example <codeph>INTERVAL 3 DAYS</codeph> or
+      <codeph>INTERVAL 25 HOURS</codeph>, but you can produce any granularity by adding together successive
+      <codeph>INTERVAL</codeph> values, such as <codeph><varname>timestamp_value</varname> + INTERVAL 3 WEEKS -
+      INTERVAL 1 DAY + INTERVAL 10 MICROSECONDS</codeph>.
+    </p>
+
+    <p>
+      For example:
+    </p>
+
+<codeblock>select now() + interval 1 day;
+select date_sub(now(), interval 5 minutes);
+insert into auction_details
+  select auction_id, auction_start_time, auction_start_time + interval 2 days + interval 12 hours
+  from new_auctions;</codeblock>
+
+    <p>
+      <b>Time zones:</b>
+    </p>
+
+    <p>
+      By default, Impala does not store timestamps using the local timezone, to avoid undesired results from
+      unexpected time zone issues. Timestamps are stored and interpreted relative to UTC, both when written to or
+      read from data files, or when converted to or from Unix time values through functions such as
+      <codeph>from_unixtime()</codeph> or <codeph>unix_timestamp()</codeph>. To convert such a
+      <codeph>TIMESTAMP</codeph> value to one that represents the date and time in a specific time zone, convert
+      the original value with the <codeph>from_utc_timestamp()</codeph> function.
+    </p>
+
+    <p>
+      Because Impala does not assume that <codeph>TIMESTAMP</codeph> values are in any particular time zone, you
+      must be conscious of the time zone aspects of data that you query, insert, or convert.
+    </p>
+
+    <p>
+      For consistency with Unix system calls, the <codeph>TIMESTAMP</codeph> returned by the <codeph>now()</codeph>
+      function represents the local time in the system time zone, rather than in UTC. To store values relative to
+      the current time in a portable way, convert any <codeph>now()</codeph> return values using the
+      <codeph>to_utc_timestamp()</codeph> function first. For example, the following example shows that the current
+      time in California (where Cloudera HQ is located) is shortly after 2 PM. If that value was written to a data
+      file, and shipped off to a distant server to be analyzed alongside other data from far-flung locations, the
+      dates and times would not match up precisely because of time zone differences. Therefore, the
+      <codeph>to_utc_timestamp()</codeph> function converts it using a common reference point, the UTC time zone
+      (descended from the old Greenwich Mean Time standard). The <codeph>'PDT'</codeph> argument indicates that the
+      original value is from the Pacific time zone with Daylight Saving Time in effect. When servers in all
+      geographic locations run the same transformation on any local date and time values (with the appropriate time
+      zone argument), the stored data uses a consistent representation. Impala queries can use functions such as
+      <codeph>EXTRACT()</codeph>, <codeph>MIN()</codeph>, <codeph>AVG()</codeph>, and so on to do time-series
+      analysis on those timestamps.
+    </p>
+
+<codeblock>[localhost:21000] > select now();
++-------------------------------+
+| now()                         |
++-------------------------------+
+| 2015-04-09 14:07:46.580465000 |
++-------------------------------+
+[localhost:21000] > select to_utc_timestamp(now(), 'PDT');
++--------------------------------+
+| to_utc_timestamp(now(), 'pdt') |
++--------------------------------+
+| 2015-04-09 21:08:07.664547000  |
++--------------------------------+
+</codeblock>
+
+    <p>
+      The converse function, <codeph>from_utc_timestamp()</codeph>, lets you take stored <codeph>TIMESTAMP</codeph>
+      data or calculated results and convert back to local date and time for processing on the application side.
+      The following example shows how you might represent some future date (such as the ending date and time of an
+      auction) in UTC, and then convert back to local time when convenient for reporting or other processing. The
+      final query in the example tests whether this arbitrary UTC date and time has passed yet, by converting it
+      back to the local time zone and comparing it against the current date and time.
+    </p>
+
+<codeblock>[localhost:21000] > select to_utc_timestamp(now() + interval 2 weeks, 'PDT');
++---------------------------------------------------+
+| to_utc_timestamp(now() + interval 2 weeks, 'pdt') |
++---------------------------------------------------+
+| 2015-04-23 21:08:34.152923000                     |
++---------------------------------------------------+
+[localhost:21000] > select from_utc_timestamp('2015-04-23 21:08:34.152923000','PDT');
++------------------------------------------------------------+
+| from_utc_timestamp('2015-04-23 21:08:34.152923000', 'pdt') |
++------------------------------------------------------------+
+| 2015-04-23 14:08:34.152923000                              |
++------------------------------------------------------------+
+[localhost:21000] > select from_utc_timestamp('2015-04-23 21:08:34.152923000','PDT') &lt; now();
++--------------------------------------------------------------------+
+| from_utc_timestamp('2015-04-23 21:08:34.152923000', 'pdt') &lt; now() |
++--------------------------------------------------------------------+
+| false                                                              |
++--------------------------------------------------------------------+
+</codeblock>
+
+    <p rev="2.2.0">
+      If you have data files written by Hive, those <codeph>TIMESTAMP</codeph> values represent the local timezone
+      of the host where the data was written, potentially leading to inconsistent results when processed by Impala.
+      To avoid compatibility problems or having to code workarounds, you can specify one or both of these
+      <cmdname>impalad</cmdname> startup flags: <codeph>-use_local_tz_for_unix_timestamp_conversions=true</codeph>
+      <codeph>-convert_legacy_hive_parquet_utc_timestamps=true</codeph>. Although
+      <codeph>-convert_legacy_hive_parquet_utc_timestamps</codeph> is turned off by default to avoid performance overhead, Cloudera recommends
+      turning it on when processing <codeph>TIMESTAMP</codeph> columns in Parquet files written by Hive, to avoid unexpected behavior.
+    </p>
+
+    <p rev="2.2.0">
+      The <codeph>-use_local_tz_for_unix_timestamp_conversions</codeph> setting affects conversions from
+      <codeph>TIMESTAMP</codeph> to <codeph>BIGINT</codeph>, or from <codeph>BIGINT</codeph>
+      to <codeph>TIMESTAMP</codeph>. By default, Impala treats all <codeph>TIMESTAMP</codeph> values as UTC,
+      to simplify analysis of time-series data from different geographic regions. When you enable the
+      <codeph>-use_local_tz_for_unix_timestamp_conversions</codeph> setting, these operations
+      treat the input values as if they are in the local tie zone of the host doing the processing.
+      See <xref href="impala_datetime_functions.xml#datetime_functions"/> for the list of functions
+      affected by the <codeph>-use_local_tz_for_unix_timestamp_conversions</codeph> setting.
+    </p>
+
+    <p>
+      The following sequence of examples shows how the interpretation of <codeph>TIMESTAMP</codeph> values in
+      Parquet tables is affected by the setting of the <codeph>-convert_legacy_hive_parquet_utc_timestamps</codeph>
+      setting.
+    </p>
+
+    <p>
+      Regardless of the <codeph>-convert_legacy_hive_parquet_utc_timestamps</codeph> setting,
+      <codeph>TIMESTAMP</codeph> columns in text tables can be written and read interchangeably by Impala and Hive:
+    </p>
+
+<codeblock>Impala DDL and queries for text table:
+
+[localhost:21000] > create table t1 (x timestamp);
+[localhost:21000] > insert into t1 values (now()), (now() + interval 1 day);
+[localhost:21000] > select x from t1;
++-------------------------------+
+| x                             |
++-------------------------------+
+| 2015-04-07 15:43:02.892403000 |
+| 2015-04-08 15:43:02.892403000 |
++-------------------------------+
+[localhost:21000] > select to_utc_timestamp(x, 'PDT') from t1;
++-------------------------------+
+| to_utc_timestamp(x, 'pdt')    |
++-------------------------------+
+| 2015-04-07 22:43:02.892403000 |
+| 2015-04-08 22:43:02.892403000 |
++-------------------------------+
+
+Hive query for text table:
+
+hive> select * from t1;
+OK
+2015-04-07 15:43:02.892403
+2015-04-08 15:43:02.892403
+Time taken: 1.245 seconds, Fetched: 2 row(s)
+</codeblock>
+
+    <p>
+      When the table uses Parquet format, Impala expects any time zone adjustment to be applied prior to writing,
+      while <codeph>TIMESTAMP</codeph> values written by Hive are adjusted to be in the UTC time zone. When Hive
+      queries Parquet data files that it wrote, it adjusts the <codeph>TIMESTAMP</codeph> values back to the local
+      time zone, while Impala does no conversion. Hive does no time zone conversion when it queries Impala-written
+      Parquet files.
+    </p>
+
+<codeblock>Impala DDL and queries for Parquet table:
+
+[localhost:21000] > create table p1 stored as parquet as select x from t1;
++-------------------+
+| summary           |
++-------------------+
+| Inserted 2 row(s) |
++-------------------+
+[localhost:21000] > select x from p1;
++-------------------------------+
+| x                             |
++-------------------------------+
+| 2015-04-07 15:43:02.892403000 |
+| 2015-04-08 15:43:02.892403000 |
++-------------------------------+
+
+Hive DDL and queries for Parquet table:
+
+hive> create table h1 (x timestamp) stored as parquet;
+OK
+hive> insert into h1 select * from p1;
+...
+OK
+Time taken: 35.573 seconds
+hive> select x from p1;
+OK
+2015-04-07 15:43:02.892403
+2015-04-08 15:43:02.892403
+Time taken: 0.324 seconds, Fetched: 2 row(s)
+hive> select x from h1;
+OK
+2015-04-07 15:43:02.892403
+2015-04-08 15:43:02.892403
+Time taken: 0.197 seconds, Fetched: 2 row(s)
+</codeblock>
+
+    <p>
+      The discrepancy arises when Impala queries the Hive-created Parquet table. The underlying values in the
+      <codeph>TIMESTAMP</codeph> column are different from the ones written by Impala, even though they were copied
+      from one table to another by an <codeph>INSERT ... SELECT</codeph> statement in Hive. Hive did an implicit
+      conversion from the local time zone to UTC as it wrote the values to Parquet.
+    </p>
+
+<codeblock>Impala query for TIMESTAMP values from Impala-written and Hive-written data:
+
+[localhost:21000] > select * from p1;
++-------------------------------+
+| x                             |
++-------------------------------+
+| 2015-04-07 15:43:02.892403000 |
+| 2015-04-08 15:43:02.892403000 |
++-------------------------------+
+Fetched 2 row(s) in 0.29s
+[localhost:21000] > select * from h1;
++-------------------------------+
+| x                             |
++-------------------------------+
+| 2015-04-07 22:43:02.892403000 |
+| 2015-04-08 22:43:02.892403000 |
++-------------------------------+
+Fetched 2 row(s) in 0.41s
+
+Underlying integer values for Impala-written and Hive-written data:
+
+[localhost:21000] > select cast(x as bigint) from p1;
++-------------------+
+| cast(x as bigint) |
++-------------------+
+| 1428421382        |
+| 1428507782        |
++-------------------+
+Fetched 2 row(s) in 0.38s
+[localhost:21000] > select cast(x as bigint) from h1;
++-------------------+
+| cast(x as bigint) |
++-------------------+
+| 1428446582        |
+| 1428532982        |
++-------------------+
+Fetched 2 row(s) in 0.20s
+</codeblock>
+
+    <p>
+      When the <codeph>-convert_legacy_hive_parquet_utc_timestamps</codeph> setting is enabled, Impala recognizes
+      the Parquet data files written by Hive, and applies the same UTC-to-local-timezone conversion logic during
+      the query as Hive uses, making the contents of the Impala-written <codeph>P1</codeph> table and the
+      Hive-written <codeph>H1</codeph> table appear identical, whether represented as <codeph>TIMESTAMP</codeph>
+      values or the underlying <codeph>BIGINT</codeph> integers:
+    </p>
+
+<codeblock>[localhost:21000] > select x from p1;
++-------------------------------+
+| x                             |
++-------------------------------+
+| 2015-04-07 15:43:02.892403000 |
+| 2015-04-08 15:43:02.892403000 |
++-------------------------------+
+Fetched 2 row(s) in 0.37s
+[localhost:21000] > select x from h1;
++-------------------------------+
+| x                             |
++-------------------------------+
+| 2015-04-07 15:43:02.892403000 |
+| 2015-04-08 15:43:02.892403000 |
++-------------------------------+
+Fetched 2 row(s) in 0.19s
+[localhost:21000] > select cast(x as bigint) from p1;
++-------------------+
+| cast(x as bigint) |
++-------------------+
+| 1428446582        |
+| 1428532982        |
++-------------------+
+Fetched 2 row(s) in 0.29s
+[localhost:21000] > select cast(x as bigint) from h1;
++-------------------+
+| cast(x as bigint) |
++-------------------+
+| 1428446582        |
+| 1428532982        |
++-------------------+
+Fetched 2 row(s) in 0.22s
+</codeblock>
+
+    <p>
+      <b>Conversions:</b>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/timestamp_conversions"/>
+
+    <p>
+      In Impala 1.3 and higher, the <codeph>FROM_UNIXTIME()</codeph> and <codeph>UNIX_TIMESTAMP()</codeph>
+      functions allow a wider range of format strings, with more flexibility in element order, repetition of letter
+      placeholders, and separator characters. In CDH 5.5 / Impala 2.3 and higher, the <codeph>UNIX_TIMESTAMP()</codeph>
+      function also allows a numeric timezone offset to be specified as part of the input string.
+      See <xref href="impala_datetime_functions.xml#datetime_functions"/> for details.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/y2k38"/>
+
+    <p>
+      <b>Partitioning:</b>
+    </p>
+
+    <p>
+      Although you cannot use a <codeph>TIMESTAMP</codeph> column as a partition key, you can extract the
+      individual years, months, days, hours, and so on and partition based on those columns. Because the partition
+      key column values are represented in HDFS directory names, rather than as fields in the data files
+      themselves, you can also keep the original <codeph>TIMESTAMP</codeph> values if desired, without duplicating
+      data or wasting storage space. See <xref href="impala_partitioning.xml#partition_key_columns"/> for more
+      details on partitioning with date and time values.
+    </p>
+
+<codeblock>[localhost:21000] &gt; create table timeline (event string) partitioned by (happened timestamp);
+ERROR: AnalysisException: Type 'TIMESTAMP' is not supported as partition-column type in column: happened
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>select cast('1966-07-30' as timestamp);
+select cast('1985-09-25 17:45:30.005' as timestamp);
+select cast('08:30:00' as timestamp);
+select hour('1970-01-01 15:30:00');         -- Succeeds, returns 15.
+select hour('1970-01-01 15:30');            -- Returns NULL because seconds field required.
+select hour('1970-01-01 27:30:00');         -- Returns NULL because hour value out of range.
+select dayofweek('2004-06-13');             -- Returns 1, representing Sunday.
+select dayname('2004-06-13');               -- Returns 'Sunday'.
+select date_add('2004-06-13', 365);         -- Returns 2005-06-13 with zeros for hh:mm:ss fields.
+select day('2004-06-13');                   -- Returns 13.
+select datediff('1989-12-31','1984-09-01'); -- How many days between these 2 dates?
+select now();                               -- Returns current date and time in local timezone.
+
+create table dates_and_times (t timestamp);
+insert into dates_and_times values
+  ('1966-07-30'), ('1985-09-25 17:45:30.005'), ('08:30:00'), (now());
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/null_bad_timestamp_cast"/>
+
+    <p conref="../shared/impala_common.xml#common/partitioning_worrisome"/>
+
+    <p conref="../shared/impala_common.xml#common/hbase_ok"/>
+
+    <p conref="../shared/impala_common.xml#common/parquet_ok"/>
+
+    <p conref="../shared/impala_common.xml#common/text_bulky"/>
+
+<!--    <p conref="/Content/impala_common_xi44078.xml#common/compatibility_blurb"/> -->
+
+    <p conref="../shared/impala_common.xml#common/internals_16_bytes"/>
+
+    <p conref="../shared/impala_common.xml#common/added_forever"/>
+
+    <p conref="../shared/impala_common.xml#common/column_stats_constant"/>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <p>
+      If you cast a <codeph>STRING</codeph> with an unrecognized format to a <codeph>TIMESTAMP</codeph>, the result
+      is <codeph>NULL</codeph> rather than an error. Make sure to test your data pipeline to be sure any textual
+      date and time values are in a format that Impala <codeph>TIMESTAMP</codeph> can recognize.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/avro_no_timestamp"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <ul>
+      <li>
+<!-- The Timestamp Literals topic is pretty brief. Consider adding more examples there. -->
+        <xref href="impala_literals.xml#timestamp_literals"/>.
+      </li>
+
+      <li>
+        To convert to or from different date formats, or perform date arithmetic, use the date and time functions
+        described in <xref href="impala_datetime_functions.xml#datetime_functions"/>. In particular, the
+        <codeph>from_unixtime()</codeph> function requires a case-sensitive format string such as
+        <codeph>"yyyy-MM-dd HH:mm:ss.SSSS"</codeph>, matching one of the allowed variations of a
+        <codeph>TIMESTAMP</codeph> value (date plus time, only date, only time, optional fractional seconds).
+      </li>
+
+      <li>
+        See <xref href="impala_langref_unsupported.xml#langref_hiveql_delta"/> for details about differences in
+        <codeph>TIMESTAMP</codeph> handling between Impala and Hive.
+      </li>
+    </ul>
+
+  </conbody>
+
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_tinyint.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_tinyint.xml b/docs/topics/impala_tinyint.xml
new file mode 100644
index 0000000..2b1b3a8
--- /dev/null
+++ b/docs/topics/impala_tinyint.xml
@@ -0,0 +1,101 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="tinyint">
+
+  <title>TINYINT Data Type</title>
+  <titlealts><navtitle>TINYINT</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      A 1-byte integer data type used in <codeph>CREATE TABLE</codeph> and <codeph>ALTER TABLE</codeph> statements.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      In the column definition of a <codeph>CREATE TABLE</codeph> statement:
+    </p>
+
+<codeblock><varname>column_name</varname> TINYINT</codeblock>
+
+    <p>
+      <b>Range:</b> -128 .. 127. There is no <codeph>UNSIGNED</codeph> subtype.
+    </p>
+
+    <p>
+      <b>Conversions:</b> Impala automatically converts to a larger integer type (<codeph>SMALLINT</codeph>,
+      <codeph>INT</codeph>, or <codeph>BIGINT</codeph>) or a floating-point type (<codeph>FLOAT</codeph> or
+      <codeph>DOUBLE</codeph>) automatically. Use <codeph>CAST()</codeph> to convert to <codeph>STRING</codeph> or
+      <codeph>TIMESTAMP</codeph>.
+      <ph conref="../shared/impala_common.xml#common/cast_int_to_timestamp"/>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      For a convenient and automated way to check the bounds of the <codeph>TINYINT</codeph> type, call the
+      functions <codeph>MIN_TINYINT()</codeph> and <codeph>MAX_TINYINT()</codeph>.
+    </p>
+
+    <p>
+      If an integer value is too large to be represented as a <codeph>TINYINT</codeph>, use a
+      <codeph>SMALLINT</codeph> instead.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/null_bad_numeric_cast"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>CREATE TABLE t1 (x TINYINT);
+SELECT CAST(100 AS TINYINT);
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/parquet_blurb"/>
+
+<!-- Duplicated under TINYINT and SMALLINT. Turn into a conref in both places. -->
+
+    <p rev="1.4.0">
+      Physically, Parquet files represent <codeph>TINYINT</codeph> and <codeph>SMALLINT</codeph> values as 32-bit
+      integers. Although Impala rejects attempts to insert out-of-range values into such columns, if you create a
+      new table with the <codeph>CREATE TABLE ... LIKE PARQUET</codeph> syntax, any <codeph>TINYINT</codeph> or
+      <codeph>SMALLINT</codeph> columns in the original table turn into <codeph>INT</codeph> columns in the new
+      table.
+    </p>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/partitioning_blurb"/> -->
+
+    <p conref="../shared/impala_common.xml#common/hbase_ok"/>
+
+    <p conref="../shared/impala_common.xml#common/text_bulky"/>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/compatibility_blurb"/> -->
+
+    <p conref="../shared/impala_common.xml#common/internals_1_bytes"/>
+
+    <p conref="../shared/impala_common.xml#common/added_forever"/>
+
+    <p conref="../shared/impala_common.xml#common/column_stats_constant"/>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/restrictions_blurb"/> -->
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_literals.xml#numeric_literals"/>, <xref href="impala_tinyint.xml#tinyint"/>,
+      <xref href="impala_smallint.xml#smallint"/>, <xref href="impala_int.xml#int"/>,
+      <xref href="impala_bigint.xml#bigint"/>, <xref href="impala_decimal.xml#decimal"/>,
+      <xref href="impala_math_functions.xml#math_functions"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_truncate_table.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_truncate_table.xml b/docs/topics/impala_truncate_table.xml
new file mode 100644
index 0000000..9f0d00b
--- /dev/null
+++ b/docs/topics/impala_truncate_table.xml
@@ -0,0 +1,151 @@
+<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="2.3.0 5.5.0" id="truncate_table">
+
+  <title>TRUNCATE TABLE Statement (CDH 5.5 or higher only)</title>
+  <titlealts><navtitle>TRUNCATE TABLE</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">TRUNCATE TABLE statement</indexterm>
+      Removes the data from an Impala table while leaving the table itself.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<!-- <codeblock>TRUNCATE TABLE [IF EXISTS] [<varname>db_name</varname>.]<varname>table_name</varname></codeblock> -->
+<codeblock>TRUNCATE TABLE [<varname>db_name</varname>.]<varname>table_name</varname></codeblock>
+
+    <p conref="../shared/impala_common.xml#common/ddl_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      Often used to empty tables that are used during ETL cycles, after the data has been copied to another
+      table for the next stage of processing. This statement is a low-overhead alternative to dropping and
+      recreating the table, or using <codeph>INSERT OVERWRITE</codeph> to replace the data during the
+      next ETL cycle.
+    </p>
+
+    <p>
+      This statement removes all the data and associated data files in the table. It can remove data files from internal tables,
+      external tables, partitioned tables, and tables mapped to HBase or the Amazon Simple Storage Service (S3).
+      The data removal applies to the entire table, including all partitions of a partitioned table.
+    </p>
+
+    <p>
+      Any statistics produced by the <codeph>COMPUTE STATS</codeph> statement are reset when the data is removed.
+    </p>
+
+    <p>
+      Make sure that you are in the correct database before truncating a table, either by issuing a
+      <codeph>USE</codeph> statement first or by using a fully qualified name
+      <codeph><varname>db_name</varname>.<varname>table_name</varname></codeph>.
+    </p>
+
+<!-- IF EXISTS apparently not implemented for this first go-round. Filing a JIRA about that: 
+    <p>
+      The optional <codeph>IF EXISTS</codeph> clause makes the statement succeed whether or not the table exists.
+      If the table does exist, it is truncated; if it does not exist, the statement has no effect. This capability is
+      useful in standardized setup scripts that are might be run both before and after some of the tables exist.
+    </p>
+-->
+
+    <p>
+      Any HDFS data files removed by this statement go into the HDFS trashcan, from which you can recover them
+      within a defined time interval if this operation turns out to be a mistake.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/disk_space_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/s3_blurb"/>
+    <p rev="2.2.0">
+      Although Impala cannot write new data to a table stored in the Amazon
+      S3 filesystem, the <codeph>TRUNCATE TABLE</codeph> statement can remove data files from S3.
+      See <xref href="impala_s3.xml#s3"/> for details about working with S3 tables.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+    <p rev="CDH-19187">
+      The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+      typically the <codeph>impala</codeph> user, must have write
+      permission for all the files and directories that make up the table.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      The following example shows a table containing some data and with table and column statistics.
+      After the <codeph>TRUNCATE TABLE</codeph> statement, the data is removed and the statistics
+      are reset.
+    </p>
+
+<codeblock>CREATE TABLE truncate_demo (x INT);
+INSERT INTO truncate_demo VALUES (1), (2), (4), (8);
+SELECT COUNT(*) FROM truncate_demo;
++----------+
+| count(*) |
++----------+
+| 4        |
++----------+
+COMPUTE STATS truncate_demo;
++-----------------------------------------+
+| summary                                 |
++-----------------------------------------+
+| Updated 1 partition(s) and 1 column(s). |
++-----------------------------------------+
+SHOW TABLE STATS truncate_demo;
++-------+--------+------+--------------+-------------------+--------+-------------------+
+| #Rows | #Files | Size | Bytes Cached | Cache Replication | Format | Incremental stats |
++-------+--------+------+--------------+-------------------+--------+-------------------+
+| 4     | 1      | 8B   | NOT CACHED   | NOT CACHED        | TEXT   | false             |
++-------+--------+------+--------------+-------------------+--------+-------------------+
+SHOW COLUMN STATS truncate_demo;
++--------+------+------------------+--------+----------+----------+
+| Column | Type | #Distinct Values | #Nulls | Max Size | Avg Size |
++--------+------+------------------+--------+----------+----------+
+| x      | INT  | 4                | -1     | 4        | 4        |
++--------+------+------------------+--------+----------+----------+
+
+-- After this statement, the data and the table/column stats will be gone.
+TRUNCATE TABLE truncate_demo;
+
+SELECT COUNT(*) FROM truncate_demo;
++----------+
+| count(*) |
++----------+
+| 0        |
++----------+
+SHOW TABLE STATS truncate_demo;
++-------+--------+------+--------------+-------------------+--------+-------------------+
+| #Rows | #Files | Size | Bytes Cached | Cache Replication | Format | Incremental stats |
++-------+--------+------+--------------+-------------------+--------+-------------------+
+| -1    | 0      | 0B   | NOT CACHED   | NOT CACHED        | TEXT   | false             |
++-------+--------+------+--------------+-------------------+--------+-------------------+
+SHOW COLUMN STATS truncate_demo;
++--------+------+------------------+--------+----------+----------+
+| Column | Type | #Distinct Values | #Nulls | Max Size | Avg Size |
++--------+------+------------------+--------+----------+----------+
+| x      | INT  | -1               | -1     | 4        | 4        |
++--------+------+------------------+--------+----------+----------+
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_tables.xml#tables"/>,
+      <xref href="impala_alter_table.xml#alter_table"/>, <xref href="impala_create_table.xml#create_table"/>,
+      <xref href="impala_partitioning.xml#partitioning"/>, <xref href="impala_tables.xml#internal_tables"/>,
+      <xref href="impala_tables.xml#external_tables"/>
+    </p>
+
+  </conbody>
+</concept>