You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jb...@apache.org on 2017/04/12 18:25:43 UTC
[39/51] [partial] incubator-impala git commit: IMPALA-4181 [DOCS]
Publish rendered Impala documentation to ASF site
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/75c46918/docs/build/html/topics/impala_create_table.html
----------------------------------------------------------------------
diff --git a/docs/build/html/topics/impala_create_table.html b/docs/build/html/topics/impala_create_table.html
new file mode 100644
index 0000000..2f88c58
--- /dev/null
+++ b/docs/build/html/topics/impala_create_table.html
@@ -0,0 +1,1250 @@
+<!DOCTYPE html
+ SYSTEM "about:legacy-compat">
+<html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="UTF-8"><meta name="copyright" content="(C) Copyright 2017"><meta name="DC.rights.owner" content="(C) Copyright 2017"><meta name="DC.Type" content="concept"><meta name="DC.Relation" scheme="URI" content="../topics/impala_langref_sql.html"><meta name="prodname" content="Impala"><meta name="prodname" content="Impala"><meta name="version" content="Impala 2.8.x"><meta name="version" content="Impala 2.8.x"><meta name="DC.Format" content="XHTML"><meta name="DC.Identifier" content="create_table"><link rel="stylesheet" type="text/css" href="../commonltr.css"><title>CREATE TABLE Statement</title></head><body class="impala sql_statement" id="create_table"><main role="main"><article role="article" aria-labelledby="ariaid-title1">
+
+ <h1 class="title topictitle1 impala_title sql_statement_title" id="ariaid-title1">CREATE TABLE Statement</h1>
+
+
+
+
+
+ <div class="body conbody">
+
+ <p class="p">
+
+ Creates a new table and specifies its characteristics. While creating a table, you
+ optionally specify aspects such as:
+ </p>
+
+ <ul class="ul">
+ <li class="li">
+ Whether the table is internal or external.
+ </li>
+
+ <li class="li">
+ The columns and associated data types.
+ </li>
+
+ <li class="li">
+ The columns used for physically partitioning the data.
+ </li>
+
+ <li class="li">
+ The file format for data files.
+ </li>
+
+ <li class="li">
+ The HDFS directory where the data files are located.
+ </li>
+ </ul>
+
+ <p class="p">
+ <strong class="ph b">Syntax:</strong>
+ </p>
+
+ <p class="p">
+ The general syntax for creating a table and specifying its columns is as follows:
+ </p>
+
+ <p class="p">
+ <strong class="ph b">Explicit column definitions:</strong>
+ </p>
+
+<pre class="pre codeblock"><code>CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [<var class="keyword varname">db_name</var>.]<var class="keyword varname">table_name</var>
+ (<var class="keyword varname">col_name</var> <var class="keyword varname">data_type</var>
+ [COMMENT '<var class="keyword varname">col_comment</var>']
+ [, ...]
+ )
+ [PARTITIONED BY (<var class="keyword varname">col_name</var> <var class="keyword varname">data_type</var> [COMMENT '<var class="keyword varname">col_comment</var>'], ...)]
+ [COMMENT '<var class="keyword varname">table_comment</var>']
+ [WITH SERDEPROPERTIES ('<var class="keyword varname">key1</var>'='<var class="keyword varname">value1</var>', '<var class="keyword varname">key2</var>'='<var class="keyword varname">value2</var>', ...)]
+ [
+ [ROW FORMAT <var class="keyword varname">row_format</var>] [STORED AS <var class="keyword varname">file_format</var>]
+ ]
+ [LOCATION '<var class="keyword varname">hdfs_path</var>']
+ [TBLPROPERTIES ('<var class="keyword varname">key1</var>'='<var class="keyword varname">value1</var>', '<var class="keyword varname">key2</var>'='<var class="keyword varname">value2</var>', ...)]
+<span class="ph"> [CACHED IN '<var class="keyword varname">pool_name</var>'</span> <span class="ph">[WITH REPLICATION = <var class="keyword varname">integer</var>]</span> | UNCACHED]
+</code></pre>
+
+ <p class="p">
+ <strong class="ph b">CREATE TABLE AS SELECT:</strong>
+ </p>
+
+<pre class="pre codeblock"><code>CREATE [EXTERNAL] TABLE [IF NOT EXISTS] <var class="keyword varname">db_name</var>.]<var class="keyword varname">table_name</var>
+ <span class="ph">[PARTITIONED BY (<var class="keyword varname">col_name</var>[, ...])]</span>
+ [COMMENT '<var class="keyword varname">table_comment</var>']
+ [WITH SERDEPROPERTIES ('<var class="keyword varname">key1</var>'='<var class="keyword varname">value1</var>', '<var class="keyword varname">key2</var>'='<var class="keyword varname">value2</var>', ...)]
+ [
+ [ROW FORMAT <var class="keyword varname">row_format</var>] <span class="ph">[STORED AS <var class="keyword varname">ctas_file_format</var>]</span>
+ ]
+ [LOCATION '<var class="keyword varname">hdfs_path</var>']
+ [TBLPROPERTIES ('<var class="keyword varname">key1</var>'='<var class="keyword varname">value1</var>', '<var class="keyword varname">key2</var>'='<var class="keyword varname">value2</var>', ...)]
+<span class="ph"> [CACHED IN '<var class="keyword varname">pool_name</var>'</span> <span class="ph">[WITH REPLICATION = <var class="keyword varname">integer</var>]</span> | UNCACHED]
+AS
+ <var class="keyword varname">select_statement</var></code></pre>
+
+<pre class="pre codeblock"><code>primitive_type:
+ TINYINT
+ | SMALLINT
+ | INT
+ | BIGINT
+ | BOOLEAN
+ | FLOAT
+ | DOUBLE
+ <span class="ph">| DECIMAL</span>
+ | STRING
+ <span class="ph">| CHAR</span>
+ <span class="ph">| VARCHAR</span>
+ | TIMESTAMP
+
+<span class="ph">complex_type:
+ struct_type
+ | array_type
+ | map_type
+
+struct_type: STRUCT < <var class="keyword varname">name</var> : <var class="keyword varname">primitive_or_complex_type</var> [COMMENT '<var class="keyword varname">comment_string</var>'], ... >
+
+array_type: ARRAY < <var class="keyword varname">primitive_or_complex_type</var> >
+
+map_type: MAP < <var class="keyword varname">primitive_type</var>, <var class="keyword varname">primitive_or_complex_type</var> >
+</span>
+row_format:
+ DELIMITED [FIELDS TERMINATED BY '<var class="keyword varname">char</var>' [ESCAPED BY '<var class="keyword varname">char</var>']]
+ [LINES TERMINATED BY '<var class="keyword varname">char</var>']
+
+file_format:
+ PARQUET
+ | TEXTFILE
+ | AVRO
+ | SEQUENCEFILE
+ | RCFILE
+
+<span class="ph">ctas_file_format:
+ PARQUET
+ | TEXTFILE</span>
+</code></pre>
+
+ <p class="p">
+ <strong class="ph b">Column definitions inferred from data file:</strong>
+ </p>
+
+<pre class="pre codeblock"><code>CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [<var class="keyword varname">db_name</var>.]<var class="keyword varname">table_name</var>
+ LIKE PARQUET '<var class="keyword varname">hdfs_path_of_parquet_file</var>'
+ [COMMENT '<var class="keyword varname">table_comment</var>']
+ [PARTITIONED BY (<var class="keyword varname">col_name</var> <var class="keyword varname">data_type</var> [COMMENT '<var class="keyword varname">col_comment</var>'], ...)]
+ [WITH SERDEPROPERTIES ('<var class="keyword varname">key1</var>'='<var class="keyword varname">value1</var>', '<var class="keyword varname">key2</var>'='<var class="keyword varname">value2</var>', ...)]
+ [
+ [ROW FORMAT <var class="keyword varname">row_format</var>] [STORED AS <var class="keyword varname">file_format</var>]
+ ]
+ [LOCATION '<var class="keyword varname">hdfs_path</var>']
+ [TBLPROPERTIES ('<var class="keyword varname">key1</var>'='<var class="keyword varname">value1</var>', '<var class="keyword varname">key2</var>'='<var class="keyword varname">value2</var>', ...)]
+<span class="ph"> [CACHED IN '<var class="keyword varname">pool_name</var>'</span> <span class="ph">[WITH REPLICATION = <var class="keyword varname">integer</var>]</span> | UNCACHED]
+data_type:
+ <var class="keyword varname">primitive_type</var>
+ | array_type
+ | map_type
+ | struct_type
+</code></pre>
+
+ <p class="p">
+ <strong class="ph b">Kudu tables:</strong>
+ </p>
+
+<pre class="pre codeblock"><code>CREATE TABLE [IF NOT EXISTS] [<var class="keyword varname">db_name</var>.]<var class="keyword varname">table_name</var>
+ (<var class="keyword varname">col_name</var> <var class="keyword varname">data_type</var>
+ <span class="ph">[<var class="keyword varname">kudu_column_attribute</var> ...]</span>
+ [COMMENT '<var class="keyword varname">col_comment</var>']
+ [, ...]
+ [PRIMARY KEY (<var class="keyword varname">col_name</var>[, ...])]
+ )
+ <span class="ph">[PARTITION BY <var class="keyword varname">kudu_partition_clause</var></span>
+ [COMMENT '<var class="keyword varname">table_comment</var>']
+ STORED AS KUDU
+ [TBLPROPERTIES ('<var class="keyword varname">key1</var>'='<var class="keyword varname">value1</var>', '<var class="keyword varname">key2</var>'='<var class="keyword varname">value2</var>', ...)]
+</code></pre>
+
+ <div class="p">
+ <strong class="ph b">Kudu column attributes:</strong>
+<pre class="pre codeblock"><code>
+ PRIMARY KEY
+| [NOT] NULL
+| ENCODING <var class="keyword varname">codec</var>
+| COMPRESSION <var class="keyword varname">algorithm</var>
+| DEFAULT <var class="keyword varname">constant</var>
+| BLOCK_SIZE <var class="keyword varname">number</var>
+</code></pre>
+ </div>
+
+ <div class="p">
+ <strong class="ph b">kudu_partition_clause:</strong>
+<pre class="pre codeblock"><code>
+kudu_partition_clause ::= PARTITION BY [<var class="keyword varname">hash_clause</var>] [, <var class="keyword varname">range_clause</var> [ , <var class="keyword varname">range_clause</var> ] ]
+
+hash_clause ::=
+ HASH [ (<var class="keyword varname">pk_col</var> [, ...]) ]
+ PARTITIONS <var class="keyword varname">n</var>
+
+range_clause ::=
+ RANGE [ (<var class="keyword varname">pk_col</var> [, ...]) ]
+ (
+ {
+ PARTITION <var class="keyword varname">constant_expression</var> <var class="keyword varname">range_comparison_operator</var> VALUES <var class="keyword varname">range_comparison_operator</var> <var class="keyword varname">constant_expression</var>
+ | PARTITION VALUE = <var class="keyword varname">constant_expression_or_tuple</var>
+ }
+ [, ...]
+ )
+
+range_comparison_operator ::= { < | <= }
+</code></pre>
+ </div>
+
+ <p class="p">
+ <strong class="ph b">External Kudu tables:</strong>
+ </p>
+
+<pre class="pre codeblock"><code>CREATE EXTERNAL TABLE [IF NOT EXISTS] [<var class="keyword varname">db_name</var>.]<var class="keyword varname">table_name</var>
+ [COMMENT '<var class="keyword varname">table_comment</var>']
+ STORED AS KUDU
+ [TBLPROPERTIES ('kudu.table_name'='<var class="keyword varname">internal_kudu_name</var>')]
+</code></pre>
+
+ <p class="p">
+ <strong class="ph b">CREATE TABLE AS SELECT for Kudu tables:</strong>
+ </p>
+
+<pre class="pre codeblock"><code>CREATE TABLE [IF NOT EXISTS] <var class="keyword varname">db_name</var>.]<var class="keyword varname">table_name</var>
+ [PRIMARY KEY (<var class="keyword varname">col_name</var>[, ...])]
+ [PARTITION BY <var class="keyword varname">kudu_partition_clause</var>
+ [COMMENT '<var class="keyword varname">table_comment</var>']
+ STORED AS KUDU
+ [TBLPROPERTIES ('<var class="keyword varname">key1</var>'='<var class="keyword varname">value1</var>', '<var class="keyword varname">key2</var>'='<var class="keyword varname">value2</var>', ...)]
+AS
+ <var class="keyword varname">select_statement</var></code></pre>
+
+ <p class="p">
+ <strong class="ph b">Statement type:</strong> DDL
+ </p>
+
+
+
+ <p class="p">
+ <strong class="ph b">Column definitions:</strong>
+ </p>
+
+ <p class="p">
+ Depending on the form of the <code class="ph codeph">CREATE TABLE</code> statement, the column
+ definitions are required or not allowed.
+ </p>
+
+ <p class="p">
+ With the <code class="ph codeph">CREATE TABLE AS SELECT</code> and <code class="ph codeph">CREATE TABLE LIKE</code>
+ syntax, you do not specify the columns at all; the column names and types are derived from
+ the source table, query, or data file.
+ </p>
+
+ <p class="p">
+ With the basic <code class="ph codeph">CREATE TABLE</code> syntax, you must list one or more columns,
+ its name, type, and optionally a comment, in addition to any columns used as partitioning
+ keys. There is one exception where the column list is not required: when creating an Avro
+ table with the <code class="ph codeph">STORED AS AVRO</code> clause, you can omit the list of columns
+ and specify the same metadata as part of the <code class="ph codeph">TBLPROPERTIES</code> clause.
+ </p>
+
+ <p class="p">
+ <strong class="ph b">Complex type considerations:</strong>
+ </p>
+
+ <p class="p">
+ The Impala complex types (<code class="ph codeph">STRUCT</code>, <code class="ph codeph">ARRAY</code>, or
+ <code class="ph codeph">MAP</code>) are available in <span class="keyword">Impala 2.3</span> and higher.
+ Because you can nest these types (for example, to make an array of maps or a struct with
+ an array field), these types are also sometimes referred to as nested types. See
+ <a class="xref" href="impala_complex_types.html#complex_types">Complex Types (Impala 2.3 or higher only)</a> for usage details.
+ </p>
+
+
+
+ <p class="p">
+ Impala can create tables containing complex type columns, with any supported file format.
+ Because currently Impala can only query complex type columns in Parquet tables, creating
+ tables with complex type columns and other file formats such as text is of limited use.
+ For example, you might create a text table including some columns with complex types with
+ Impala, and use Hive as part of your to ingest the nested type data and copy it to an
+ identical Parquet table. Or you might create a partitioned table containing complex type
+ columns using one file format, and use <code class="ph codeph">ALTER TABLE</code> to change the file
+ format of individual partitions to Parquet; Impala can then query only the Parquet-format
+ partitions in that table.
+ </p>
+
+ <p class="p">
+ Partitioned tables can contain complex type columns.
+ All the partition key columns must be scalar types.
+ </p>
+
+ <p class="p">
+ <strong class="ph b">Internal and external tables (EXTERNAL and LOCATION clauses):</strong>
+ </p>
+
+ <p class="p">
+ By default, Impala creates an <span class="q">"internal"</span> table, where Impala manages the underlying
+ data files for the table, and physically deletes the data files when you drop the table.
+ If you specify the <code class="ph codeph">EXTERNAL</code> clause, Impala treats the table as an
+ <span class="q">"external"</span> table, where the data files are typically produced outside Impala and
+ queried from their original locations in HDFS, and Impala leaves the data files in place
+ when you drop the table. For details about internal and external tables, see
+ <a class="xref" href="impala_tables.html#tables">Overview of Impala Tables</a>.
+ </p>
+
+ <p class="p">
+ Typically, for an external table you include a <code class="ph codeph">LOCATION</code> clause to specify
+ the path to the HDFS directory where Impala reads and writes files for the table. For
+ example, if your data pipeline produces Parquet files in the HDFS directory
+ <span class="ph filepath">/user/etl/destination</span>, you might create an external table as follows:
+ </p>
+
+<pre class="pre codeblock"><code>CREATE EXTERNAL TABLE external_parquet (c1 INT, c2 STRING, c3 TIMESTAMP)
+ STORED AS PARQUET LOCATION '/user/etl/destination';
+</code></pre>
+
+ <p class="p">
+ Although the <code class="ph codeph">EXTERNAL</code> and <code class="ph codeph">LOCATION</code> clauses are often
+ specified together, <code class="ph codeph">LOCATION</code> is optional for external tables, and you can
+ also specify <code class="ph codeph">LOCATION</code> for internal tables. The difference is all about
+ whether Impala <span class="q">"takes control"</span> of the underlying data files and moves them when you
+ rename the table, or deletes them when you drop the table. For more about internal and
+ external tables and how they interact with the <code class="ph codeph">LOCATION</code> attribute, see
+ <a class="xref" href="impala_tables.html#tables">Overview of Impala Tables</a>.
+ </p>
+
+ <p class="p">
+ <strong class="ph b">Partitioned tables (PARTITIONED BY clause):</strong>
+ </p>
+
+ <p class="p">
+ The <code class="ph codeph">PARTITIONED BY</code> clause divides the data files based on the values from
+ one or more specified columns. Impala queries can use the partition metadata to minimize
+ the amount of data that is read from disk or transmitted across the network, particularly
+ during join queries. For details about partitioning, see
+ <a class="xref" href="impala_partitioning.html#partitioning">Partitioning for Impala Tables</a>.
+ </p>
+
+ <div class="note note note_note"><span class="note__title notetitle">Note:</span>
+ <p class="p">
+ All Kudu tables require partitioning, which involves different syntax than non-Kudu
+ tables. See the <code class="ph codeph">PARTITION BY</code> clause, rather than <code class="ph codeph">PARTITIONED
+ BY</code>, for Kudu tables.
+ </p>
+ </div>
+
+ <p class="p">
+ Prior to <span class="keyword">Impala 2.5</span>, you could use a partitioned table as the
+ source and copy data from it, but could not specify any partitioning clauses for the new
+ table. In <span class="keyword">Impala 2.5</span> and higher, you can now use the
+ <code class="ph codeph">PARTITIONED BY</code> clause with a <code class="ph codeph">CREATE TABLE AS SELECT</code>
+ statement. See the examples under the following discussion of the <code class="ph codeph">CREATE TABLE AS
+ SELECT</code> syntax variation.
+ </p>
+
+ <p class="p">
+ <strong class="ph b">Kudu considerations:</strong>
+ </p>
+
+ <p class="p">
+ Because Kudu tables do not support clauses related to HDFS and S3 data files and
+ partitioning mechanisms, the syntax associated with the <code class="ph codeph">STORED AS KUDU</code>
+ clause is shown separately in the above syntax descriptions. Kudu tables have their own
+ syntax for <code class="ph codeph">CREATE TABLE</code>, <code class="ph codeph">CREATE EXTERNAL TABLE</code>, and
+ <code class="ph codeph">CREATE TABLE AS SELECT</code>. All internal Kudu tables require a
+ <code class="ph codeph">PARTITION BY</code> clause, different than the <code class="ph codeph">PARTITIONED BY</code>
+ clause for HDFS-backed tables.
+ </p>
+
+ <p class="p">
+ Here are some examples of creating empty Kudu tables:
+ </p>
+
+<pre class="pre codeblock"><code>
+-- Single-column primary key.
+CREATE TABLE kudu_t1 (id BIGINT PRIMARY key, s STRING, b BOOLEAN)
+ PARTITION BY HASH (id) PARTITIONS 20 STORED AS KUDU;
+
+-- Multi-column primary key.
+CREATE TABLE kudu_t2 (id BIGINT, s STRING, b BOOLEAN, PRIMARY KEY (id,s))
+ PARTITION BY HASH (s) PARTITIONS 30 STORED AS KUDU;
+
+-- Meaningful primary key column is good for range partitioning.
+CREATE TABLE kudu_t3 (id BIGINT, year INT, s STRING,
+ b BOOLEAN, PRIMARY KEY (id,year))
+ PARTITION BY HASH (id) PARTITIONS 20,
+ RANGE (year) (PARTITION 1980 <= VALUES < 1990,
+ PARTITION 1990 <= VALUES < 2000,
+ PARTITION VALUE = 2001,
+ PARTITION 2001 < VALUES)
+ STORED AS KUDU;
+
+</code></pre>
+
+ <p class="p">
+ Here is an example of creating an external Kudu table:
+ </p>
+
+<pre class="pre codeblock"><code>
+-- Inherits column definitions from original table.
+-- For tables created through Impala, the kudu.table_name property
+-- comes from DESCRIBE FORMATTED output from the original table.
+CREATE EXTERNAL TABLE external_t1 STORED AS KUDU
+ TBLPROPERTIES ('kudu.table_name'='kudu_tbl_created_via_api');
+
+</code></pre>
+
+ <p class="p">
+ Here is an example of <code class="ph codeph">CREATE TABLE AS SELECT</code> syntax for a Kudu table:
+ </p>
+
+<pre class="pre codeblock"><code>
+-- The CTAS statement defines the primary key and partitioning scheme.
+-- The rest of the column definitions are derived from the select list.
+CREATE TABLE ctas_t1
+ PRIMARY KEY (id) PARTITION BY HASH (id) PARTITIONS 10
+ STORED AS KUDU
+ AS SELECT id, s FROM kudu_t1;
+
+</code></pre>
+
+ <p class="p">
+ The following <code class="ph codeph">CREATE TABLE</code> clauses are not supported for Kudu tables:
+ </p>
+
+ <ul class="ul">
+ <li class="li">
+ <code class="ph codeph">PARTITIONED BY</code> (Kudu tables use the clause <code class="ph codeph">PARTITION
+ BY</code> instead)
+ </li>
+
+ <li class="li">
+ <code class="ph codeph">LOCATION</code>
+ </li>
+
+ <li class="li">
+ <code class="ph codeph">ROWFORMAT</code>
+ </li>
+
+ <li class="li">
+ <code class="ph codeph">CACHED IN | UNCACHED</code>
+ </li>
+
+ <li class="li">
+ <code class="ph codeph">WITH SERDEPROPERTIES</code>
+ </li>
+ </ul>
+
+ <p class="p">
+ For more on the <code class="ph codeph">PRIMARY KEY</code> clause, see
+ <a class="xref" href="impala_kudu.html#kudu_primary_key">Primary Key Columns for Kudu Tables</a> and
+ <a class="xref" href="impala_kudu.html#kudu_primary_key_attribute">PRIMARY KEY Attribute</a>.
+ </p>
+
+ <p class="p">
+ For more on the <code class="ph codeph">NULL</code> and <code class="ph codeph">NOT NULL</code> attributes, see
+ <a class="xref" href="impala_kudu.html#kudu_not_null_attribute">NULL | NOT NULL Attribute</a>.
+ </p>
+
+ <p class="p">
+ For more on the <code class="ph codeph">ENCODING</code> attribute, see
+ <a class="xref" href="impala_kudu.html#kudu_encoding_attribute">ENCODING Attribute</a>.
+ </p>
+
+ <p class="p">
+ For more on the <code class="ph codeph">COMPRESSION</code> attribute, see
+ <a class="xref" href="impala_kudu.html#kudu_compression_attribute">COMPRESSION Attribute</a>.
+ </p>
+
+ <p class="p">
+ For more on the <code class="ph codeph">DEFAULT</code> attribute, see
+ <a class="xref" href="impala_kudu.html#kudu_default_attribute">DEFAULT Attribute</a>.
+ </p>
+
+ <p class="p">
+ For more on the <code class="ph codeph">BLOCK_SIZE</code> attribute, see
+ <a class="xref" href="impala_kudu.html#kudu_block_size_attribute">BLOCK_SIZE Attribute</a>.
+ </p>
+
+ <p class="p">
+ <strong class="ph b">Partitioning for Kudu tables (PARTITION BY clause)</strong>
+ </p>
+
+ <p class="p">
+ For Kudu tables, you specify logical partitioning across one or more columns using the
+ <code class="ph codeph">PARTITION BY</code> clause. In contrast to partitioning for HDFS-based tables,
+ multiple values for a partition key column can be located in the same partition. The
+ optional <code class="ph codeph">HASH</code> clause lets you divide one or a set of partition key
+ columns into a specified number of buckets. You can use more than one
+ <code class="ph codeph">HASH</code> clause, specifying a distinct set of partition key columns for each.
+ The optional <code class="ph codeph">RANGE</code> clause further subdivides the partitions, based on a
+ set of comparison operations for the partition key columns.
+ </p>
+
+ <p class="p">
+ Here are some examples of the <code class="ph codeph">PARTITION BY HASH</code> syntax:
+ </p>
+
+<pre class="pre codeblock"><code>
+-- Apply hash function to 1 primary key column.
+create table hash_t1 (x bigint, y bigint, s string, primary key (x,y))
+ partition by hash (x) partitions 10
+ stored as kudu;
+
+-- Apply hash function to a different primary key column.
+create table hash_t2 (x bigint, y bigint, s string, primary key (x,y))
+ partition by hash (y) partitions 10
+ stored as kudu;
+
+-- Apply hash function to both primary key columns.
+-- In this case, the total number of partitions is 10.
+create table hash_t3 (x bigint, y bigint, s string, primary key (x,y))
+ partition by hash (x,y) partitions 10
+ stored as kudu;
+
+-- When the column list is omitted, apply hash function to all primary key columns.
+create table hash_t4 (x bigint, y bigint, s string, primary key (x,y))
+ partition by hash partitions 10
+ stored as kudu;
+
+-- Hash the X values independently from the Y values.
+-- In this case, the total number of partitions is 10 x 20.
+create table hash_t5 (x bigint, y bigint, s string, primary key (x,y))
+ partition by hash (x) partitions 10, hash (y) partitions 20
+ stored as kudu;
+
+</code></pre>
+
+ <p class="p">
+ Here are some examples of the <code class="ph codeph">PARTITION BY RANGE</code> syntax:
+ </p>
+
+<pre class="pre codeblock"><code>
+-- Create partitions that cover every possible value of X.
+-- Ranges that span multiple values use the keyword VALUES between
+-- a pair of < and <= comparisons.
+create table range_t1 (x bigint, s string, s2 string, primary key (x, s))
+ partition by range (x)
+ (
+ partition 0 <= values <= 49, partition 50 <= values <= 100,
+ partition values < 0, partition 100 < values
+ )
+ stored as kudu;
+
+-- Create partitions that cover some possible values of X.
+-- Values outside the covered range(s) are rejected.
+-- New range partitions can be added through ALTER TABLE.
+create table range_t2 (x bigint, s string, s2 string, primary key (x, s))
+ partition by range (x)
+ (
+ partition 0 <= values <= 49, partition 50 <= values <= 100
+ )
+ stored as kudu;
+
+-- A range can also specify a single specific value, using the keyword VALUE
+-- with an = comparison.
+create table range_t3 (x bigint, s string, s2 string, primary key (x, s))
+ partition by range (s)
+ (
+ partition value = 'Yes', partition value = 'No', partition value = 'Maybe'
+ )
+ stored as kudu;
+
+-- Using multiple columns in the RANGE clause and tuples inside the partition spec
+-- only works for partitions specified with the VALUE= syntax.
+create table range_t4 (x bigint, s string, s2 string, primary key (x, s))
+ partition by range (x,s)
+ (
+ partition value = (0,'zero'), partition value = (1,'one'), partition value = (2,'two')
+ )
+ stored as kudu;
+
+</code></pre>
+
+ <p class="p">
+ Here are some examples combining both <code class="ph codeph">HASH</code> and <code class="ph codeph">RANGE</code>
+ syntax for the <code class="ph codeph">PARTITION BY</code> clause:
+ </p>
+
+<pre class="pre codeblock"><code>
+-- Values from each range partition are hashed into 10 associated buckets.
+-- Total number of partitions in this case is 10 x 2.
+create table combined_t1 (x bigint, s string, s2 string, primary key (x, s))
+ partition by hash (x) partitions 10, range (x)
+ (
+ partition 0 <= values <= 49, partition 50 <= values <= 100
+ )
+ stored as kudu;
+
+-- The hash partitioning and range partitioning can apply to different columns.
+-- But all the columns used in either partitioning scheme must be from the primary key.
+create table combined_t2 (x bigint, s string, s2 string, primary key (x, s))
+ partition by hash (s) partitions 10, range (x)
+ (
+ partition 0 <= values <= 49, partition 50 <= values <= 100
+ )
+ stored as kudu;
+
+</code></pre>
+
+ <p class="p">
+ For more usage details and examples of the Kudu partitioning syntax, see
+ <a class="xref" href="impala_kudu.html">Using Impala to Query Kudu Tables</a>.
+ </p>
+
+ <p class="p">
+ <strong class="ph b">Specifying file format (STORED AS and ROW FORMAT clauses):</strong>
+ </p>
+
+ <p class="p">
+ The <code class="ph codeph">STORED AS</code> clause identifies the format of the underlying data files.
+ Currently, Impala can query more types of file formats than it can create or insert into.
+ Use Hive to perform any create or data load operations that are not currently available in
+ Impala. For example, Impala can create an Avro, SequenceFile, or RCFile table but cannot
+ insert data into it. There are also Impala-specific procedures for using compression with
+ each kind of file format. For details about working with data files of various formats,
+ see <a class="xref" href="impala_file_formats.html#file_formats">How Impala Works with Hadoop File Formats</a>.
+ </p>
+
+ <div class="note note note_note"><span class="note__title notetitle">Note:</span>
+ In Impala 1.4.0 and higher, Impala can create Avro tables, which formerly required doing
+ the <code class="ph codeph">CREATE TABLE</code> statement in Hive. See
+ <a class="xref" href="impala_avro.html#avro">Using the Avro File Format with Impala Tables</a> for details and examples.
+ </div>
+
+ <p class="p">
+ By default (when no <code class="ph codeph">STORED AS</code> clause is specified), data files in Impala
+ tables are created as text files with Ctrl-A (hex 01) characters as the delimiter.
+
+ Specify the <code class="ph codeph">ROW FORMAT DELIMITED</code> clause to produce or ingest data files
+ that use a different delimiter character such as tab or <code class="ph codeph">|</code>, or a different
+ line end character such as carriage return or newline. When specifying delimiter and line
+ end characters with the <code class="ph codeph">FIELDS TERMINATED BY</code> and <code class="ph codeph">LINES TERMINATED
+ BY</code> clauses, use <code class="ph codeph">'\t'</code> for tab, <code class="ph codeph">'\n'</code> for newline
+ or linefeed, <code class="ph codeph">'\r'</code> for carriage return, and
+ <code class="ph codeph">\</code><code class="ph codeph">0</code> for ASCII <code class="ph codeph">nul</code> (hex 00). For more
+ examples of text tables, see <a class="xref" href="impala_txtfile.html#txtfile">Using Text Data Files with Impala Tables</a>.
+ </p>
+
+ <p class="p">
+ The <code class="ph codeph">ESCAPED BY</code> clause applies both to text files that you create through
+ an <code class="ph codeph">INSERT</code> statement to an Impala <code class="ph codeph">TEXTFILE</code> table, and to
+ existing data files that you put into an Impala table directory. (You can ingest existing
+ data files either by creating the table with <code class="ph codeph">CREATE EXTERNAL TABLE ...
+ LOCATION</code>, the <code class="ph codeph">LOAD DATA</code> statement, or through an HDFS operation
+ such as <code class="ph codeph">hdfs dfs -put <var class="keyword varname">file</var>
+ <var class="keyword varname">hdfs_path</var></code>.) Choose an escape character that is not used
+ anywhere else in the file, and put it in front of each instance of the delimiter character
+ that occurs within a field value. Surrounding field values with quotation marks does not
+ help Impala to parse fields with embedded delimiter characters; the quotation marks are
+ considered to be part of the column value. If you want to use <code class="ph codeph">\</code> as the
+ escape character, specify the clause in <span class="keyword cmdname">impala-shell</span> as <code class="ph codeph">ESCAPED
+ BY '\\'</code>.
+ </p>
+
+ <div class="note note note_note"><span class="note__title notetitle">Note:</span>
+ The <code class="ph codeph">CREATE TABLE</code> clauses <code class="ph codeph">FIELDS TERMINATED BY</code>, <code class="ph codeph">ESCAPED
+ BY</code>, and <code class="ph codeph">LINES TERMINATED BY</code> have special rules for the string literal used for
+ their argument, because they all require a single character. You can use a regular character surrounded by
+ single or double quotation marks, an octal sequence such as <code class="ph codeph">'\054'</code> (representing a comma),
+ or an integer in the range '-127'..'128' (with quotation marks but no backslash), which is interpreted as a
+ single-byte ASCII character. Negative values are subtracted from 256; for example, <code class="ph codeph">FIELDS
+ TERMINATED BY '-2'</code> sets the field delimiter to ASCII code 254, the <span class="q">"Icelandic Thorn"</span>
+ character used as a delimiter by some data formats.
+ </div>
+
+ <p class="p">
+ <strong class="ph b">Cloning tables (LIKE clause):</strong>
+ </p>
+
+ <p class="p">
+ To create an empty table with the same columns, comments, and other attributes as another
+ table, use the following variation. The <code class="ph codeph">CREATE TABLE ... LIKE</code> form allows
+ a restricted set of clauses, currently only the <code class="ph codeph">LOCATION</code>,
+ <code class="ph codeph">COMMENT</code>, and <code class="ph codeph">STORED AS</code> clauses.
+ </p>
+
+<pre class="pre codeblock"><code>CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [<var class="keyword varname">db_name</var>.]<var class="keyword varname">table_name</var>
+ <span class="ph">LIKE { [<var class="keyword varname">db_name</var>.]<var class="keyword varname">table_name</var> | PARQUET '<var class="keyword varname">hdfs_path_of_parquet_file</var>' }</span>
+ [COMMENT '<var class="keyword varname">table_comment</var>']
+ [STORED AS <var class="keyword varname">file_format</var>]
+ [LOCATION '<var class="keyword varname">hdfs_path</var>']</code></pre>
+
+ <div class="note note note_note"><span class="note__title notetitle">Note:</span>
+ <p class="p">
+ To clone the structure of a table and transfer data into it in a single operation, use
+ the <code class="ph codeph">CREATE TABLE AS SELECT</code> syntax described in the next subsection.
+ </p>
+ </div>
+
+ <p class="p">
+ When you clone the structure of an existing table using the <code class="ph codeph">CREATE TABLE ...
+ LIKE</code> syntax, the new table keeps the same file format as the original one, so you
+ only need to specify the <code class="ph codeph">STORED AS</code> clause if you want to use a different
+ file format, or when specifying a view as the original table. (Creating a table
+ <span class="q">"like"</span> a view produces a text table by default.)
+ </p>
+
+ <p class="p">
+ Although normally Impala cannot create an HBase table directly, Impala can clone the
+ structure of an existing HBase table with the <code class="ph codeph">CREATE TABLE ... LIKE</code>
+ syntax, preserving the file format and metadata from the original table.
+ </p>
+
+ <p class="p">
+ There are some exceptions to the ability to use <code class="ph codeph">CREATE TABLE ... LIKE</code>
+ with an Avro table. For example, you cannot use this technique for an Avro table that is
+ specified with an Avro schema but no columns. When in doubt, check if a <code class="ph codeph">CREATE
+ TABLE ... LIKE</code> operation works in Hive; if not, it typically will not work in
+ Impala either.
+ </p>
+
+ <p class="p">
+ If the original table is partitioned, the new table inherits the same partition key
+ columns. Because the new table is initially empty, it does not inherit the actual
+ partitions that exist in the original one. To create partitions in the new table, insert
+ data or issue <code class="ph codeph">ALTER TABLE ... ADD PARTITION</code> statements.
+ </p>
+
+ <p class="p">
+ Prior to Impala 1.4.0, it was not possible to use the <code class="ph codeph">CREATE TABLE LIKE
+ <var class="keyword varname">view_name</var></code> syntax. In Impala 1.4.0 and higher, you can create a table with the
+ same column definitions as a view using the <code class="ph codeph">CREATE TABLE LIKE</code> technique. Although
+ <code class="ph codeph">CREATE TABLE LIKE</code> normally inherits the file format of the original table, a view has no
+ underlying file format, so <code class="ph codeph">CREATE TABLE LIKE <var class="keyword varname">view_name</var></code> produces a text
+ table by default. To specify a different file format, include a <code class="ph codeph">STORED AS
+ <var class="keyword varname">file_format</var></code> clause at the end of the <code class="ph codeph">CREATE TABLE LIKE</code>
+ statement.
+ </p>
+
+ <p class="p">
+ Because <code class="ph codeph">CREATE TABLE ... LIKE</code> only manipulates table metadata, not the
+ physical data of the table, issue <code class="ph codeph">INSERT INTO TABLE</code> statements afterward
+ to copy any data from the original table into the new one, optionally converting the data
+ to a new file format. (For some file formats, Impala can do a <code class="ph codeph">CREATE TABLE ...
+ LIKE</code> to create the table, but Impala cannot insert data in that file format; in
+ these cases, you must load the data in Hive. See
+ <a class="xref" href="impala_file_formats.html#file_formats">How Impala Works with Hadoop File Formats</a> for details.)
+ </p>
+
+ <p class="p" id="create_table__ctas">
+ <strong class="ph b">CREATE TABLE AS SELECT:</strong>
+ </p>
+
+ <p class="p">
+ The <code class="ph codeph">CREATE TABLE AS SELECT</code> syntax is a shorthand notation to create a
+ table based on column definitions from another table, and copy data from the source table
+ to the destination table without issuing any separate <code class="ph codeph">INSERT</code> statement.
+ This idiom is so popular that it has its own acronym, <span class="q">"CTAS"</span>.
+ </p>
+
+ <p class="p">
+ The following examples show how to copy data from a source table <code class="ph codeph">T1</code> to a
+ variety of destinations tables, applying various transformations to the table properties,
+ table layout, or the data itself as part of the operation:
+ </p>
+
+<pre class="pre codeblock"><code>
+-- Sample table to be the source of CTAS operations.
+CREATE TABLE t1 (x INT, y STRING);
+INSERT INTO t1 VALUES (1, 'one'), (2, 'two'), (3, 'three');
+
+-- Clone all the columns and data from one table to another.
+CREATE TABLE clone_of_t1 AS SELECT * FROM t1;
++-------------------+
+| summary |
++-------------------+
+| Inserted 3 row(s) |
++-------------------+
+
+-- Clone the columns and data, and convert the data to a different file format.
+CREATE TABLE parquet_version_of_t1 STORED AS PARQUET AS SELECT * FROM t1;
++-------------------+
+| summary |
++-------------------+
+| Inserted 3 row(s) |
++-------------------+
+
+-- Copy only some rows to the new table.
+CREATE TABLE subset_of_t1 AS SELECT * FROM t1 WHERE x >= 2;
++-------------------+
+| summary |
++-------------------+
+| Inserted 2 row(s) |
++-------------------+
+
+-- Same idea as CREATE TABLE LIKE: clone table layout but do not copy any data.
+CREATE TABLE empty_clone_of_t1 AS SELECT * FROM t1 WHERE 1=0;
++-------------------+
+| summary |
++-------------------+
+| Inserted 0 row(s) |
++-------------------+
+
+-- Reorder and rename columns and transform the data.
+CREATE TABLE t5 AS SELECT upper(y) AS s, x+1 AS a, 'Entirely new column' AS n FROM t1;
++-------------------+
+| summary |
++-------------------+
+| Inserted 3 row(s) |
++-------------------+
+SELECT * FROM t5;
++-------+---+---------------------+
+| s | a | n |
++-------+---+---------------------+
+| ONE | 2 | Entirely new column |
+| TWO | 3 | Entirely new column |
+| THREE | 4 | Entirely new column |
++-------+---+---------------------+
+</code></pre>
+
+
+
+
+
+ <p class="p">
+ See <a class="xref" href="impala_select.html#select">SELECT Statement</a> for details about query syntax for the
+ <code class="ph codeph">SELECT</code> portion of a <code class="ph codeph">CREATE TABLE AS SELECT</code> statement.
+ </p>
+
+ <p class="p">
+ The newly created table inherits the column names that you select from the original table,
+ which you can override by specifying column aliases in the query. Any column or table
+ comments from the original table are not carried over to the new table.
+ </p>
+
+ <div class="note note note_note"><span class="note__title notetitle">Note:</span>
+ When using the <code class="ph codeph">STORED AS</code> clause with a <code class="ph codeph">CREATE TABLE AS
+ SELECT</code> statement, the destination table must be a file format that Impala can
+ write to: currently, text or Parquet. You cannot specify an Avro, SequenceFile, or RCFile
+ table as the destination table for a CTAS operation.
+ </div>
+
+ <p class="p">
+ Prior to <span class="keyword">Impala 2.5</span> you could use a partitioned table as the source
+ and copy data from it, but could not specify any partitioning clauses for the new table.
+ In <span class="keyword">Impala 2.5</span> and higher, you can now use the <code class="ph codeph">PARTITIONED
+ BY</code> clause with a <code class="ph codeph">CREATE TABLE AS SELECT</code> statement. The following
+ example demonstrates how you can copy data from an unpartitioned table in a <code class="ph codeph">CREATE
+ TABLE AS SELECT</code> operation, creating a new partitioned table in the process. The
+ main syntax consideration is the column order in the <code class="ph codeph">PARTITIONED BY</code>
+ clause and the select list: the partition key columns must be listed last in the select
+ list, in the same order as in the <code class="ph codeph">PARTITIONED BY</code> clause. Therefore, in
+ this case, the column order in the destination table is different from the source table.
+ You also only specify the column names in the <code class="ph codeph">PARTITIONED BY</code> clause, not
+ the data types or column comments.
+ </p>
+
+<pre class="pre codeblock"><code>
+create table partitions_no (year smallint, month tinyint, s string);
+insert into partitions_no values (2016, 1, 'January 2016'),
+ (2016, 2, 'February 2016'), (2016, 3, 'March 2016');
+
+-- Prove that the source table is not partitioned.
+show partitions partitions_no;
+ERROR: AnalysisException: Table is not partitioned: ctas_partition_by.partitions_no
+
+-- Create new table with partitions based on column values from source table.
+<strong class="ph b">create table partitions_yes partitioned by (year, month)
+ as select s, year, month from partitions_no;</strong>
++-------------------+
+| summary |
++-------------------+
+| Inserted 3 row(s) |
++-------------------+
+
+-- Prove that the destination table is partitioned.
+show partitions partitions_yes;
++-------+-------+-------+--------+------+...
+| year | month | #Rows | #Files | Size |...
++-------+-------+-------+--------+------+...
+| 2016 | 1 | -1 | 1 | 13B |...
+| 2016 | 2 | -1 | 1 | 14B |...
+| 2016 | 3 | -1 | 1 | 11B |...
+| Total | | -1 | 3 | 38B |...
++-------+-------+-------+--------+------+...
+</code></pre>
+
+ <p class="p">
+ The most convenient layout for partitioned tables is with all the partition key columns at
+ the end. The CTAS <code class="ph codeph">PARTITIONED BY</code> syntax requires that column order in the
+ select list, resulting in that same column order in the destination table.
+ </p>
+
+<pre class="pre codeblock"><code>
+describe partitions_no;
++-------+----------+---------+
+| name | type | comment |
++-------+----------+---------+
+| year | smallint | |
+| month | tinyint | |
+| s | string | |
++-------+----------+---------+
+
+-- The CTAS operation forced us to put the partition key columns last.
+-- Having those columns last works better with idioms such as SELECT *
+-- for partitioned tables.
+describe partitions_yes;
++-------+----------+---------+
+| name | type | comment |
++-------+----------+---------+
+| s | string | |
+| year | smallint | |
+| month | tinyint | |
++-------+----------+---------+
+</code></pre>
+
+ <p class="p">
+ Attempting to use a select list with the partition key columns not at the end results in
+ an error due to a column name mismatch:
+ </p>
+
+<pre class="pre codeblock"><code>
+-- We expect this CTAS to fail because non-key column S
+-- comes after key columns YEAR and MONTH in the select list.
+create table partitions_maybe partitioned by (year, month)
+ as select year, month, s from partitions_no;
+ERROR: AnalysisException: Partition column name mismatch: year != month
+</code></pre>
+
+ <p class="p">
+ For example, the following statements show how you can clone all the data in a table, or a
+ subset of the columns and/or rows, or reorder columns, rename them, or construct them out
+ of expressions:
+ </p>
+
+ <p class="p">
+ As part of a CTAS operation, you can convert the data to any file format that Impala can
+ write (currently, <code class="ph codeph">TEXTFILE</code> and <code class="ph codeph">PARQUET</code>). You cannot
+ specify the lower-level properties of a text table, such as the delimiter.
+ </p>
+
+ <p class="p">
+ <strong class="ph b">Sorting considerations:</strong> Although you can specify an <code class="ph codeph">ORDER BY</code> clause in an
+ <code class="ph codeph">INSERT ... SELECT</code> statement, any <code class="ph codeph">ORDER BY</code> clause is ignored and the
+ results are not necessarily sorted. An <code class="ph codeph">INSERT ... SELECT</code> operation potentially creates
+ many different data files, prepared on different data nodes, and therefore the notion of the data being
+ stored in sorted order is impractical.
+ </p>
+
+ <p class="p">
+ <strong class="ph b">CREATE TABLE LIKE PARQUET:</strong>
+ </p>
+
+ <p class="p">
+ The variation <code class="ph codeph">CREATE TABLE ... LIKE PARQUET
+ '<var class="keyword varname">hdfs_path_of_parquet_file</var>'</code> lets you skip the column
+ definitions of the <code class="ph codeph">CREATE TABLE</code> statement. The column names and data
+ types are automatically configured based on the organization of the specified Parquet data
+ file, which must already reside in HDFS. You can use a data file located outside the
+ Impala database directories, or a file from an existing Impala Parquet table; either way,
+ Impala only uses the column definitions from the file and does not use the HDFS location
+ for the <code class="ph codeph">LOCATION</code> attribute of the new table. (Although you can also
+ specify the enclosing directory with the <code class="ph codeph">LOCATION</code> attribute, to both use
+ the same schema as the data file and point the Impala table at the associated directory
+ for querying.)
+ </p>
+
+ <p class="p">
+ The following considerations apply when you use the <code class="ph codeph">CREATE TABLE LIKE
+ PARQUET</code> technique:
+ </p>
+
+ <ul class="ul">
+ <li class="li">
+ Any column comments from the original table are not preserved in the new table. Each
+ column in the new table has a comment stating the low-level Parquet field type used to
+ deduce the appropriate SQL column type.
+ </li>
+
+ <li class="li">
+ If you use a data file from a partitioned Impala table, any partition key columns from
+ the original table are left out of the new table, because they are represented in HDFS
+ directory names rather than stored in the data file. To preserve the partition
+ information, repeat the same <code class="ph codeph">PARTITION</code> clause as in the original
+ <code class="ph codeph">CREATE TABLE</code> statement.
+ </li>
+
+ <li class="li">
+ The file format of the new table defaults to text, as with other kinds of <code class="ph codeph">CREATE
+ TABLE</code> statements. To make the new table also use Parquet format, include the
+ clause <code class="ph codeph">STORED AS PARQUET</code> in the <code class="ph codeph">CREATE TABLE LIKE
+ PARQUET</code> statement.
+ </li>
+
+ <li class="li">
+ If the Parquet data file comes from an existing Impala table, currently, any
+ <code class="ph codeph">TINYINT</code> or <code class="ph codeph">SMALLINT</code> columns are turned into
+ <code class="ph codeph">INT</code> columns in the new table. Internally, Parquet stores such values as
+ 32-bit integers.
+ </li>
+
+ <li class="li">
+ When the destination table uses the Parquet file format, the <code class="ph codeph">CREATE TABLE AS
+ SELECT</code> and <code class="ph codeph">INSERT ... SELECT</code> statements always create at least
+ one data file, even if the <code class="ph codeph">SELECT</code> part of the statement does not match
+ any rows. You can use such an empty Parquet data file as a template for subsequent
+ <code class="ph codeph">CREATE TABLE LIKE PARQUET</code> statements.
+ </li>
+ </ul>
+
+ <p class="p">
+ For more details about creating Parquet tables, and examples of the <code class="ph codeph">CREATE TABLE
+ LIKE PARQUET</code> syntax, see <a class="xref" href="impala_parquet.html#parquet">Using the Parquet File Format with Impala Tables</a>.
+ </p>
+
+ <p class="p">
+ <strong class="ph b">Visibility and Metadata (TBLPROPERTIES and WITH SERDEPROPERTIES clauses):</strong>
+ </p>
+
+ <p class="p">
+ You can associate arbitrary items of metadata with a table by specifying the
+ <code class="ph codeph">TBLPROPERTIES</code> clause. This clause takes a comma-separated list of
+ key-value pairs and stores those items in the metastore database. You can also change the
+ table properties later with an <code class="ph codeph">ALTER TABLE</code> statement. You can observe the
+ table properties for different delimiter and escape characters using the <code class="ph codeph">DESCRIBE
+ FORMATTED</code> command, and change those settings for an existing table with
+ <code class="ph codeph">ALTER TABLE ... SET TBLPROPERTIES</code>.
+ </p>
+
+ <p class="p">
+ You can also associate SerDes properties with the table by specifying key-value pairs
+ through the <code class="ph codeph">WITH SERDEPROPERTIES</code> clause. This metadata is not used by
+ Impala, which has its own built-in serializer and deserializer for the file formats it
+ supports. Particular property values might be needed for Hive compatibility with certain
+ variations of file formats, particularly Avro.
+ </p>
+
+ <p class="p">
+ Some DDL operations that interact with other Hadoop components require specifying
+ particular values in the <code class="ph codeph">SERDEPROPERTIES</code> or
+ <code class="ph codeph">TBLPROPERTIES</code> fields, such as creating an Avro table or an HBase table.
+ (You typically create HBase tables in Hive, because they require additional clauses not
+ currently available in Impala.)
+
+ </p>
+
+ <p class="p">
+ To see the column definitions and column comments for an existing table, for example
+ before issuing a <code class="ph codeph">CREATE TABLE ... LIKE</code> or a <code class="ph codeph">CREATE TABLE ... AS
+ SELECT</code> statement, issue the statement <code class="ph codeph">DESCRIBE
+ <var class="keyword varname">table_name</var></code>. To see even more detail, such as the location of
+ data files and the values for clauses such as <code class="ph codeph">ROW FORMAT</code> and
+ <code class="ph codeph">STORED AS</code>, issue the statement <code class="ph codeph">DESCRIBE FORMATTED
+ <var class="keyword varname">table_name</var></code>. <code class="ph codeph">DESCRIBE FORMATTED</code> is also needed
+ to see any overall table comment (as opposed to individual column comments).
+ </p>
+
+ <p class="p">
+ After creating a table, your <span class="keyword cmdname">impala-shell</span> session or another
+ <span class="keyword cmdname">impala-shell</span> connected to the same node can immediately query that
+ table. There might be a brief interval (one statestore heartbeat) before the table can be
+ queried through a different Impala node. To make the <code class="ph codeph">CREATE TABLE</code>
+ statement return only when the table is recognized by all Impala nodes in the cluster,
+ enable the <code class="ph codeph">SYNC_DDL</code> query option.
+ </p>
+
+ <p class="p">
+ <strong class="ph b">HDFS caching (CACHED IN clause):</strong>
+ </p>
+
+ <p class="p">
+ If you specify the <code class="ph codeph">CACHED IN</code> clause, any existing or future data files in
+ the table directory or the partition subdirectories are designated to be loaded into
+ memory with the HDFS caching mechanism. See
+ <a class="xref" href="impala_perf_hdfs_caching.html#hdfs_caching">Using HDFS Caching with Impala (Impala 2.1 or higher only)</a> for details about using the HDFS
+ caching feature.
+ </p>
+
+ <p class="p">
+ In <span class="keyword">Impala 2.2</span> and higher, the optional <code class="ph codeph">WITH REPLICATION</code> clause
+ for <code class="ph codeph">CREATE TABLE</code> and <code class="ph codeph">ALTER TABLE</code> lets you specify
+ a <dfn class="term">replication factor</dfn>, the number of hosts on which to cache the same data blocks.
+ When Impala processes a cached data block, where the cache replication factor is greater than 1, Impala randomly
+ selects a host that has a cached copy of that data block. This optimization avoids excessive CPU
+ usage on a single host when the same cached data block is processed multiple times.
+ Where practical, specify a value greater than or equal to the HDFS block replication factor.
+ </p>
+
+
+
+ <p class="p">
+ <strong class="ph b">Column order</strong>:
+ </p>
+
+ <p class="p">
+ If you intend to use the table to hold data files produced by some external source,
+ specify the columns in the same order as they appear in the data files.
+ </p>
+
+ <p class="p">
+ If you intend to insert or copy data into the table through Impala, or if you have control
+ over the way externally produced data files are arranged, use your judgment to specify
+ columns in the most convenient order:
+ </p>
+
+ <ul class="ul">
+ <li class="li">
+ <p class="p">
+ If certain columns are often <code class="ph codeph">NULL</code>, specify those columns last. You
+ might produce data files that omit these trailing columns entirely. Impala
+ automatically fills in the <code class="ph codeph">NULL</code> values if so.
+ </p>
+ </li>
+
+ <li class="li">
+ <p class="p">
+ If an unpartitioned table will be used as the source for an <code class="ph codeph">INSERT ...
+ SELECT</code> operation into a partitioned table, specify last in the unpartitioned
+ table any columns that correspond to partition key columns in the partitioned table,
+ and in the same order as the partition key columns are declared in the partitioned
+ table. This technique lets you use <code class="ph codeph">INSERT ... SELECT *</code> when copying
+ data to the partitioned table, rather than specifying each column name individually.
+ </p>
+ </li>
+
+ <li class="li">
+ <p class="p">
+ If you specify columns in an order that you later discover is suboptimal, you can
+ sometimes work around the problem without recreating the table. You can create a view
+ that selects columns from the original table in a permuted order, then do a
+ <code class="ph codeph">SELECT *</code> from the view. When inserting data into a table, you can
+ specify a permuted order for the inserted columns to match the order in the
+ destination table.
+ </p>
+ </li>
+ </ul>
+
+ <p class="p">
+ <strong class="ph b">Hive considerations:</strong>
+ </p>
+
+ <p class="p">
+ Impala queries can make use of metadata about the table and columns, such as the number of
+ rows in a table or the number of different values in a column. Prior to Impala 1.2.2, to
+ create this metadata, you issued the <code class="ph codeph">ANALYZE TABLE</code> statement in Hive to
+ gather this information, after creating the table and loading representative data into it.
+ In Impala 1.2.2 and higher, the <code class="ph codeph">COMPUTE STATS</code> statement produces these
+ statistics within Impala, without needing to use Hive at all.
+ </p>
+
+ <p class="p">
+ <strong class="ph b">HBase considerations:</strong>
+ </p>
+
+ <div class="note note note_note"><span class="note__title notetitle">Note:</span>
+ <p class="p">
+ The Impala <code class="ph codeph">CREATE TABLE</code> statement cannot create an HBase table, because
+ it currently does not support the <code class="ph codeph">STORED BY</code> clause needed for HBase
+ tables. Create such tables in Hive, then query them through Impala. For information on
+ using Impala with HBase tables, see <a class="xref" href="impala_hbase.html#impala_hbase">Using Impala to Query HBase Tables</a>.
+ </p>
+ </div>
+
+ <p class="p">
+ <strong class="ph b">Amazon S3 considerations:</strong>
+ </p>
+
+ <p class="p">
+ To create a table where the data resides in the Amazon Simple Storage Service (S3),
+ specify a <code class="ph codeph">s3a://</code> prefix <code class="ph codeph">LOCATION</code> attribute pointing to
+ the data files in S3.
+ </p>
+
+ <p class="p">
+ In <span class="keyword">Impala 2.6</span> and higher, you can use this special
+ <code class="ph codeph">LOCATION</code> syntax as part of a <code class="ph codeph">CREATE TABLE AS SELECT</code>
+ statement.
+ </p>
+
+ <p class="p">
+ In <span class="keyword">Impala 2.6</span> and higher, Impala DDL statements such as
+ <code class="ph codeph">CREATE DATABASE</code>, <code class="ph codeph">CREATE TABLE</code>, <code class="ph codeph">DROP DATABASE CASCADE</code>,
+ <code class="ph codeph">DROP TABLE</code>, and <code class="ph codeph">ALTER TABLE [ADD|DROP] PARTITION</code> can create or remove folders
+ as needed in the Amazon S3 system. Prior to <span class="keyword">Impala 2.6</span>, you had to create folders yourself and point
+ Impala database, tables, or partitions at them, and manually remove folders when no longer needed.
+ See <a class="xref" href="../shared/../topics/impala_s3.html#s3">Using Impala with the Amazon S3 Filesystem</a> for details about reading and writing S3 data with Impala.
+ </p>
+
+ <p class="p">
+ <strong class="ph b">Sorting considerations:</strong> Although you can specify an <code class="ph codeph">ORDER BY</code> clause in an
+ <code class="ph codeph">INSERT ... SELECT</code> statement, any <code class="ph codeph">ORDER BY</code> clause is ignored and the
+ results are not necessarily sorted. An <code class="ph codeph">INSERT ... SELECT</code> operation potentially creates
+ many different data files, prepared on different data nodes, and therefore the notion of the data being
+ stored in sorted order is impractical.
+ </p>
+
+ <p class="p">
+ <strong class="ph b">HDFS considerations:</strong>
+ </p>
+
+ <p class="p">
+ The <code class="ph codeph">CREATE TABLE</code> statement for an internal table creates a directory in
+ HDFS. The <code class="ph codeph">CREATE EXTERNAL TABLE</code> statement associates the table with an
+ existing HDFS directory, and does not create any new directory in HDFS. To locate the HDFS
+ data directory for a table, issue a <code class="ph codeph">DESCRIBE FORMATTED
+ <var class="keyword varname">table</var></code> statement. To examine the contents of that HDFS
+ directory, use an OS command such as <code class="ph codeph">hdfs dfs -ls
+ hdfs://<var class="keyword varname">path</var></code>, either from the OS command line or through the
+ <code class="ph codeph">shell</code> or <code class="ph codeph">!</code> commands in <span class="keyword cmdname">impala-shell</span>.
+ </p>
+
+ <p class="p">
+ The <code class="ph codeph">CREATE TABLE AS SELECT</code> syntax creates data files under the table data
+ directory to hold any data copied by the <code class="ph codeph">INSERT</code> portion of the statement.
+ (Even if no data is copied, Impala might create one or more empty data files.)
+ </p>
+
+ <p class="p">
+ <strong class="ph b">HDFS permissions:</strong>
+ </p>
+
+ <p class="p">
+ The user ID that the <span class="keyword cmdname">impalad</span> daemon runs under, typically the
+ <code class="ph codeph">impala</code> user, must have both execute and write permission for the database
+ directory where the table is being created.
+ </p>
+
+ <p class="p">
+ <strong class="ph b">Security considerations:</strong>
+ </p>
+
+ <p class="p">
+ If these statements in your environment contain sensitive literal values such as credit card numbers or tax
+ identifiers, Impala can redact this sensitive information when displaying the statements in log files and
+ other administrative contexts. See <span class="xref">the documentation for your Apache Hadoop distribution</span> for details.
+ </p>
+
+ <p class="p">
+ <strong class="ph b">Cancellation:</strong> Certain multi-stage statements (<code class="ph codeph">CREATE TABLE AS SELECT</code> and
+ <code class="ph codeph">COMPUTE STATS</code>) can be cancelled during some stages, when running <code class="ph codeph">INSERT</code>
+ or <code class="ph codeph">SELECT</code> operations internally. To cancel this statement, use Ctrl-C from the
+ <span class="keyword cmdname">impala-shell</span> interpreter, the <span class="ph uicontrol">Cancel</span> button from the
+ <span class="ph uicontrol">Watch</span> page in Hue, or <span class="ph uicontrol">Cancel</span> from the list of
+ in-flight queries (for a particular node) on the <span class="ph uicontrol">Queries</span> tab in the Impala web UI
+ (port 25000).
+ </p>
+
+ <p class="p">
+ <strong class="ph b">Related information:</strong>
+ </p>
+
+ <p class="p">
+ <a class="xref" href="impala_tables.html#tables">Overview of Impala Tables</a>,
+ <a class="xref" href="impala_alter_table.html#alter_table">ALTER TABLE Statement</a>,
+ <a class="xref" href="impala_drop_table.html#drop_table">DROP TABLE Statement</a>,
+ <a class="xref" href="impala_partitioning.html#partitioning">Partitioning for Impala Tables</a>,
+ <a class="xref" href="impala_tables.html#internal_tables">Internal Tables</a>,
+ <a class="xref" href="impala_tables.html#external_tables">External Tables</a>,
+ <a class="xref" href="impala_compute_stats.html#compute_stats">COMPUTE STATS Statement</a>,
+ <a class="xref" href="impala_sync_ddl.html#sync_ddl">SYNC_DDL Query Option</a>, <a class="xref" href="impala_show.html#show_tables">SHOW TABLES Statement</a>,
+ <a class="xref" href="impala_show.html#show_create_table">SHOW CREATE TABLE Statement</a>,
+ <a class="xref" href="impala_describe.html#describe">DESCRIBE Statement</a>
+ </p>
+
+ </div>
+
+<nav role="navigation" class="related-links"><div class="familylinks"><div class="parentlink"><strong>Parent topic:</strong> <a class="link" href="../topics/impala_langref_sql.html">Impala SQL Statements</a></div></div></nav></article></main></body></html>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/75c46918/docs/build/html/topics/impala_create_view.html
----------------------------------------------------------------------
diff --git a/docs/build/html/topics/impala_create_view.html b/docs/build/html/topics/impala_create_view.html
new file mode 100644
index 0000000..3d56969
--- /dev/null
+++ b/docs/build/html/topics/impala_create_view.html
@@ -0,0 +1,194 @@
+<!DOCTYPE html
+ SYSTEM "about:legacy-compat">
+<html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="UTF-8"><meta name="copyright" content="(C) Copyright 2017"><meta name="DC.rights.owner" content="(C) Copyright 2017"><meta name="DC.Type" content="concept"><meta name="DC.Relation" scheme="URI" content="../topics/impala_langref_sql.html"><meta name="prodname" content="Impala"><meta name="prodname" content="Impala"><meta name="version" content="Impala 2.8.x"><meta name="version" content="Impala 2.8.x"><meta name="DC.Format" content="XHTML"><meta name="DC.Identifier" content="create_view"><link rel="stylesheet" type="text/css" href="../commonltr.css"><title>CREATE VIEW Statement</title></head><body id="create_view"><main role="main"><article role="article" aria-labelledby="ariaid-title1">
+
+ <h1 class="title topictitle1" id="ariaid-title1">CREATE VIEW Statement</h1>
+
+
+
+ <div class="body conbody">
+
+ <p class="p">
+
+ The <code class="ph codeph">CREATE VIEW</code> statement lets you create a shorthand abbreviation for a more complicated
+ query. The base query can involve joins, expressions, reordered columns, column aliases, and other SQL
+ features that can make a query hard to understand or maintain.
+ </p>
+
+ <p class="p">
+ Because a view is purely a logical construct (an alias for a query) with no physical data behind it,
+ <code class="ph codeph">ALTER VIEW</code> only involves changes to metadata in the metastore database, not any data files
+ in HDFS.
+ </p>
+
+ <p class="p">
+ <strong class="ph b">Syntax:</strong>
+ </p>
+
+<pre class="pre codeblock"><code>CREATE VIEW [IF NOT EXISTS] <var class="keyword varname">view_name</var> [(<var class="keyword varname">column_list</var>)]
+ AS <var class="keyword varname">select_statement</var></code></pre>
+
+ <p class="p">
+ <strong class="ph b">Statement type:</strong> DDL
+ </p>
+
+ <p class="p">
+ <strong class="ph b">Usage notes:</strong>
+ </p>
+
+ <p class="p">
+ The <code class="ph codeph">CREATE VIEW</code> statement can be useful in scenarios such as the following:
+ </p>
+
+ <ul class="ul">
+ <li class="li">
+ To turn even the most lengthy and complicated SQL query into a one-liner. You can issue simple queries
+ against the view from applications, scripts, or interactive queries in <span class="keyword cmdname">impala-shell</span>.
+ For example:
+<pre class="pre codeblock"><code>select * from <var class="keyword varname">view_name</var>;
+select * from <var class="keyword varname">view_name</var> order by c1 desc limit 10;</code></pre>
+ The more complicated and hard-to-read the original query, the more benefit there is to simplifying the
+ query using a view.
+ </li>
+
+ <li class="li">
+ To hide the underlying table and column names, to minimize maintenance problems if those names change. In
+ that case, you re-create the view using the new names, and all queries that use the view rather than the
+ underlying tables keep running with no changes.
+ </li>
+
+ <li class="li">
+ To experiment with optimization techniques and make the optimized queries available to all applications.
+ For example, if you find a combination of <code class="ph codeph">WHERE</code> conditions, join order, join hints, and so
+ on that works the best for a class of queries, you can establish a view that incorporates the
+ best-performing techniques. Applications can then make relatively simple queries against the view, without
+ repeating the complicated and optimized logic over and over. If you later find a better way to optimize the
+ original query, when you re-create the view, all the applications immediately take advantage of the
+ optimized base query.
+ </li>
+
+ <li class="li">
+ To simplify a whole class of related queries, especially complicated queries involving joins between
+ multiple tables, complicated expressions in the column list, and other SQL syntax that makes the query
+ difficult to understand and debug. For example, you might create a view that joins several tables, filters
+ using several <code class="ph codeph">WHERE</code> conditions, and selects several columns from the result set.
+ Applications might issue queries against this view that only vary in their <code class="ph codeph">LIMIT</code>,
+ <code class="ph codeph">ORDER BY</code>, and similar simple clauses.
+ </li>
+ </ul>
+
+ <p class="p">
+ For queries that require repeating complicated clauses over and over again, for example in the select list,
+ <code class="ph codeph">ORDER BY</code>, and <code class="ph codeph">GROUP BY</code> clauses, you can use the <code class="ph codeph">WITH</code>
+ clause as an alternative to creating a view.
+ </p>
+
+ <p class="p">
+ <strong class="ph b">Complex type considerations:</strong>
+ </p>
+ <p class="p">
+ For tables containing complex type columns (<code class="ph codeph">ARRAY</code>,
+ <code class="ph codeph">STRUCT</code>, or <code class="ph codeph">MAP</code>), you typically use
+ join queries to refer to the complex values. You can use views to
+ hide the join notation, making such tables seem like traditional denormalized
+ tables, and making those tables queryable by business intelligence tools
+ that do not have built-in support for those complex types.
+ See <a class="xref" href="../shared/../topics/impala_complex_types.html#complex_types_views">Accessing Complex Type Data in Flattened Form Using Views</a> for details.
+ </p>
+ <p class="p">
+ Because you cannot directly issue <code class="ph codeph">SELECT <var class="keyword varname">col_name</var></code>
+ against a column of complex type, you cannot use a view or a <code class="ph codeph">WITH</code>
+ clause to <span class="q">"rename"</span> a column by selecting it with a column alias.
+ </p>
+
+ <p class="p">
+ If you connect to different Impala nodes within an <span class="keyword cmdname">impala-shell</span> session for
+ load-balancing purposes, you can enable the <code class="ph codeph">SYNC_DDL</code> query option to make each DDL
+ statement wait before returning, until the new or changed metadata has been received by all the Impala
+ nodes. See <a class="xref" href="../shared/../topics/impala_sync_ddl.html#sync_ddl">SYNC_DDL Query Option</a> for details.
+ </p>
+
+ <p class="p">
+ <strong class="ph b">Security considerations:</strong>
+ </p>
+ <p class="p">
+ If these statements in your environment contain sensitive literal values such as credit card numbers or tax
+ identifiers, Impala can redact this sensitive information when displaying the statements in log files and
+ other administrative contexts. See <span class="xref">the documentation for your Apache Hadoop distribution</span> for details.
+ </p>
+
+ <p class="p">
+ <strong class="ph b">Cancellation:</strong> Cannot be cancelled.
+ </p>
+
+ <p class="p">
+ <strong class="ph b">HDFS permissions:</strong> This statement does not touch any HDFS files or directories,
+ therefore no HDFS permissions are required.
+ </p>
+
+ <p class="p">
+ <strong class="ph b">Examples:</strong>
+ </p>
+
+
+
+<pre class="pre codeblock"><code>-- Create a view that is exactly the same as the underlying table.
+create view v1 as select * from t1;
+
+-- Create a view that includes only certain columns from the underlying table.
+create view v2 as select c1, c3, c7 from t1;
+
+-- Create a view that filters the values from the underlying table.
+create view v3 as select distinct c1, c3, c7 from t1 where c1 is not null and c5 > 0;
+
+-- Create a view that that reorders and renames columns from the underlying table.
+create view v4 as select c4 as last_name, c6 as address, c2 as birth_date from t1;
+
+-- Create a view that runs functions to convert or transform certain columns.
+create view v5 as select c1, cast(c3 as string) c3, concat(c4,c5) c5, trim(c6) c6, "Constant" c8 from t1;
+
+-- Create a view that hides the complexity of a view query.
+create view v6 as select t1.c1, t2.c2 from t1 join t2 on t1.id = t2.id;
+</code></pre>
+
+
+
+ <div class="p">
+ The following example creates a series of views and then drops them. These examples illustrate how views
+ are associated with a particular database, and both the view definitions and the view names for
+ <code class="ph codeph">CREATE VIEW</code> and <code class="ph codeph">DROP VIEW</code> can refer to a view in the current database or
+ a fully qualified view name.
+<pre class="pre codeblock"><code>
+-- Create and drop a view in the current database.
+CREATE VIEW few_rows_from_t1 AS SELECT * FROM t1 LIMIT 10;
+DROP VIEW few_rows_from_t1;
+
+-- Create and drop a view referencing a table in a different database.
+CREATE VIEW table_from_other_db AS SELECT x FROM db1.foo WHERE x IS NOT NULL;
+DROP VIEW table_from_other_db;
+
+USE db1;
+-- Create a view in a different database.
+CREATE VIEW db2.v1 AS SELECT * FROM db2.foo;
+-- Switch into the other database and drop the view.
+USE db2;
+DROP VIEW v1;
+
+USE db1;
+-- Create a view in a different database.
+CREATE VIEW db2.v1 AS SELECT * FROM db2.foo;
+-- Drop a view in the other database.
+DROP VIEW db2.v1;
+</code></pre>
+ </div>
+
+ <p class="p">
+ <strong class="ph b">Related information:</strong>
+ </p>
+
+ <p class="p">
+ <a class="xref" href="impala_views.html#views">Overview of Impala Views</a>, <a class="xref" href="impala_alter_view.html#alter_view">ALTER VIEW Statement</a>,
+ <a class="xref" href="impala_drop_view.html#drop_view">DROP VIEW Statement</a>
+ </p>
+ </div>
+<nav role="navigation" class="related-links"><div class="familylinks"><div class="parentlink"><strong>Parent topic:</strong> <a class="link" href="../topics/impala_langref_sql.html">Impala SQL Statements</a></div></div></nav></article></main></body></html>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/75c46918/docs/build/html/topics/impala_databases.html
----------------------------------------------------------------------
diff --git a/docs/build/html/topics/impala_databases.html b/docs/build/html/topics/impala_databases.html
new file mode 100644
index 0000000..83e9911
--- /dev/null
+++ b/docs/build/html/topics/impala_databases.html
@@ -0,0 +1,62 @@
+<!DOCTYPE html
+ SYSTEM "about:legacy-compat">
+<html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="UTF-8"><meta name="copyright" content="(C) Copyright 2017"><meta name="DC.rights.owner" content="(C) Copyright 2017"><meta name="DC.Type" content="concept"><meta name="DC.Relation" scheme="URI" content="../topics/impala_schema_objects.html"><meta name="prodname" content="Impala"><meta name="prodname" content="Impala"><meta name="version" content="Impala 2.8.x"><meta name="version" content="Impala 2.8.x"><meta name="DC.Format" content="XHTML"><meta name="DC.Identifier" content="databases"><link rel="stylesheet" type="text/css" href="../commonltr.css"><title>Overview of Impala Databases</title></head><body id="databases"><main role="main"><article role="article" aria-labelledby="ariaid-title1">
+
+ <h1 class="title topictitle1" id="ariaid-title1">Overview of Impala Databases</h1>
+
+
+
+ <div class="body conbody">
+
+ <p class="p">
+ In Impala, a database is a logical container for a group of tables. Each database defines a separate
+ namespace. Within a database, you can refer to the tables inside it using their unqualified names. Different
+ databases can contain tables with identical names.
+ </p>
+
+ <p class="p">
+ Creating a database is a lightweight operation. There are minimal database-specific properties to configure,
+ only <code class="ph codeph">LOCATION</code> and <code class="ph codeph">COMMENT</code>. There is no <code class="ph codeph">ALTER DATABASE</code> statement.
+ </p>
+
+ <p class="p">
+ Typically, you create a separate database for each project or application, to avoid naming conflicts between
+ tables and to make clear which tables are related to each other. The <code class="ph codeph">USE</code> statement lets
+ you switch between databases. Unqualified references to tables, views, and functions refer to objects
+ within the current database. You can also refer to objects in other databases by using qualified names
+ of the form <code class="ph codeph"><var class="keyword varname">dbname</var>.<var class="keyword varname">object_name</var></code>.
+ </p>
+
+ <p class="p">
+ Each database is physically represented by a directory in HDFS. When you do not specify a <code class="ph codeph">LOCATION</code>
+ attribute, the directory is located in the Impala data directory with the associated tables managed by Impala.
+ When you do specify a <code class="ph codeph">LOCATION</code> attribute, any read and write operations for tables in that
+ database are relative to the specified HDFS directory.
+ </p>
+
+ <p class="p">
+ There is a special database, named <code class="ph codeph">default</code>, where you begin when you connect to Impala.
+ Tables created in <code class="ph codeph">default</code> are physically located one level higher in HDFS than all the
+ user-created databases.
+ </p>
+
+ <div class="p">
+ Impala includes another predefined database, <code class="ph codeph">_impala_builtins</code>, that serves as the location
+ for the <a class="xref" href="../shared/../topics/impala_functions.html#builtins">built-in functions</a>. To see the built-in
+ functions, use a statement like the following:
+<pre class="pre codeblock"><code>show functions in _impala_builtins;
+show functions in _impala_builtins like '*<var class="keyword varname">substring</var>*';
+</code></pre>
+ </div>
+
+ <p class="p">
+ <strong class="ph b">Related statements:</strong>
+ </p>
+
+ <p class="p">
+ <a class="xref" href="impala_create_database.html#create_database">CREATE DATABASE Statement</a>,
+ <a class="xref" href="impala_drop_database.html#drop_database">DROP DATABASE Statement</a>, <a class="xref" href="impala_use.html#use">USE Statement</a>,
+ <a class="xref" href="impala_show.html#show_databases">SHOW DATABASES</a>
+ </p>
+ </div>
+<nav role="navigation" class="related-links"><div class="familylinks"><div class="parentlink"><strong>Parent topic:</strong> <a class="link" href="../topics/impala_schema_objects.html">Impala Schema Objects and Object Names</a></div></div></nav></article></main></body></html>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/75c46918/docs/build/html/topics/impala_datatypes.html
----------------------------------------------------------------------
diff --git a/docs/build/html/topics/impala_datatypes.html b/docs/build/html/topics/impala_datatypes.html
new file mode 100644
index 0000000..dfe9b71
--- /dev/null
+++ b/docs/build/html/topics/impala_datatypes.html
@@ -0,0 +1,33 @@
+<!DOCTYPE html
+ SYSTEM "about:legacy-compat">
+<html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="UTF-8"><meta name="copyright" content="(C) Copyright 2017"><meta name="DC.rights.owner" content="(C) Copyright 2017"><meta name="DC.Type" content="concept"><meta name="DC.Relation" scheme="URI" content="../topics/impala_langref.html"><meta name="DC.Relation" scheme="URI" content="../topics/impala_array.html"><meta name="DC.Relation" scheme="URI" content="../topics/impala_bigint.html"><meta name="DC.Relation" scheme="URI" content="../topics/impala_boolean.html"><meta name="DC.Relation" scheme="URI" content="../topics/impala_char.html"><meta name="DC.Relation" scheme="URI" content="../topics/impala_decimal.html"><meta name="DC.Relation" scheme="URI" content="../topics/impala_double.html"><meta name="DC.Relation" scheme="URI" content="../topics/impala_float.html"><meta name="DC.Relation" scheme="URI" content="../topics/impala_int.html"><meta name="DC.Relation" scheme="URI" content=".
./topics/impala_map.html"><meta name="DC.Relation" scheme="URI" content="../topics/impala_real.html"><meta name="DC.Relation" scheme="URI" content="../topics/impala_smallint.html"><meta name="DC.Relation" scheme="URI" content="../topics/impala_string.html"><meta name="DC.Relation" scheme="URI" content="../topics/impala_struct.html"><meta name="DC.Relation" scheme="URI" content="../topics/impala_timestamp.html"><meta name="DC.Relation" scheme="URI" content="../topics/impala_tinyint.html"><meta name="DC.Relation" scheme="URI" content="../topics/impala_varchar.html"><meta name="DC.Relation" scheme="URI" content="../topics/impala_complex_types.html"><meta name="prodname" content="Impala"><meta name="prodname" content="Impala"><meta name="version" content="Impala 2.8.x"><meta name="version" content="Impala 2.8.x"><meta name="DC.Format" content="XHTML"><meta name="DC.Identifier" content="datatypes"><link rel="stylesheet" type="text/css" href="../commonltr.css"><title>Data Types</title></h
ead><body id="datatypes"><main role="main"><article role="article" aria-labelledby="ariaid-title1">
+
+ <h1 class="title topictitle1" id="ariaid-title1">Data Types</h1>
+
+
+ <div class="body conbody">
+
+ <p class="p">
+
+ Impala supports a set of data types that you can use for table columns, expression values, and function
+ arguments and return values.
+ </p>
+
+ <div class="note note note_note"><span class="note__title notetitle">Note:</span>
+ Currently, Impala supports only scalar types, not composite or nested types. Accessing a table containing any
+ columns with unsupported types causes an error.
+ </div>
+
+ <p class="p toc"></p>
+
+ <p class="p">
+ For the notation to write literals of each of these data types, see
+ <a class="xref" href="impala_literals.html#literals">Literals</a>.
+ </p>
+
+ <p class="p">
+ See <a class="xref" href="impala_langref_unsupported.html#langref_hiveql_delta">SQL Differences Between Impala and Hive</a> for differences between Impala and
+ Hive data types.
+ </p>
+ </div>
+<nav role="navigation" class="related-links"><ul class="ullinks"><li class="link ulchildlink"><strong><a href="../topics/impala_array.html">ARRAY Complex Type (Impala 2.3 or higher only)</a></strong><br></li><li class="link ulchildlink"><strong><a href="../topics/impala_bigint.html">BIGINT Data Type</a></strong><br></li><li class="link ulchildlink"><strong><a href="../topics/impala_boolean.html">BOOLEAN Data Type</a></strong><br></li><li class="link ulchildlink"><strong><a href="../topics/impala_char.html">CHAR Data Type (Impala 2.0 or higher only)</a></strong><br></li><li class="link ulchildlink"><strong><a href="../topics/impala_decimal.html">DECIMAL Data Type (Impala 1.4 or higher only)</a></strong><br></li><li class="link ulchildlink"><strong><a href="../topics/impala_double.html">DOUBLE Data Type</a></strong><br></li><li class="link ulchildlink"><strong><a href="../topics/impala_float.html">FLOAT Data Type</a></strong><br></li><li class="link ulchildlink"><strong><a href="../to
pics/impala_int.html">INT Data Type</a></strong><br></li><li class="link ulchildlink"><strong><a href="../topics/impala_map.html">MAP Complex Type (Impala 2.3 or higher only)</a></strong><br></li><li class="link ulchildlink"><strong><a href="../topics/impala_real.html">REAL Data Type</a></strong><br></li><li class="link ulchildlink"><strong><a href="../topics/impala_smallint.html">SMALLINT Data Type</a></strong><br></li><li class="link ulchildlink"><strong><a href="../topics/impala_string.html">STRING Data Type</a></strong><br></li><li class="link ulchildlink"><strong><a href="../topics/impala_struct.html">STRUCT Complex Type (Impala 2.3 or higher only)</a></strong><br></li><li class="link ulchildlink"><strong><a href="../topics/impala_timestamp.html">TIMESTAMP Data Type</a></strong><br></li><li class="link ulchildlink"><strong><a href="../topics/impala_tinyint.html">TINYINT Data Type</a></strong><br></li><li class="link ulchildlink"><strong><a href="../topics/impala_varchar.html">V
ARCHAR Data Type (Impala 2.0 or higher only)</a></strong><br></li><li class="link ulchildlink"><strong><a href="../topics/impala_complex_types.html">Complex Types (Impala 2.3 or higher only)</a></strong><br></li></ul><div class="familylinks"><div class="parentlink"><strong>Parent topic:</strong> <a class="link" href="../topics/impala_langref.html">Impala SQL Language Reference</a></div></div></nav></article></main></body></html>
\ No newline at end of file