You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bo...@apache.org on 2024/01/02 14:20:17 UTC
(impala) branch master updated: IMPALA-12653: Update documentation about the UPDATE statement
This is an automated email from the ASF dual-hosted git repository.
boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new dea8546d8 IMPALA-12653: Update documentation about the UPDATE statement
dea8546d8 is described below
commit dea8546d80323f706d48c5f3ccc310b9ccbd1eae
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Tue Dec 19 16:00:59 2023 +0100
IMPALA-12653: Update documentation about the UPDATE statement
This patch adds documentation about the UPDATE statement.
Change-Id: I2a4f3dcdba5faaa7dffda60b8590d09e6a92a165
Reviewed-on: http://gerrit.cloudera.org:8080/20818
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Noemi Pap-Takacs <np...@cloudera.com>
Reviewed-by: Andrew Sherman <as...@cloudera.com>
---
docs/impala_keydefs.ditamap | 1 +
docs/topics/impala_iceberg.xml | 32 +++++++++++++++++++++++--
docs/topics/impala_update.xml | 53 +++++++++++++++++++++++-------------------
3 files changed, 60 insertions(+), 26 deletions(-)
diff --git a/docs/impala_keydefs.ditamap b/docs/impala_keydefs.ditamap
index ca1582410..f97d10a6f 100644
--- a/docs/impala_keydefs.ditamap
+++ b/docs/impala_keydefs.ditamap
@@ -10529,6 +10529,7 @@ under the License.
<keydef href="https://issues.apache.org/jira/browse/IMPALA-9999" scope="external" format="html" keys="IMPALA-9999"/>
<!-- Short form of mapping from Impala release to vendor-specific releases, for use in headings. -->
+ <keydef keys="impala44"><topicmeta><keywords><keyword>Impala 4.4</keyword></keywords></topicmeta></keydef>
<keydef keys="impala43"><topicmeta><keywords><keyword>Impala 4.3</keyword></keywords></topicmeta></keydef>
<keydef keys="impala42"><topicmeta><keywords><keyword>Impala 4.2</keyword></keywords></topicmeta></keydef>
<keydef keys="impala41"><topicmeta><keywords><keyword>Impala 4.1</keyword></keywords></topicmeta></keydef>
diff --git a/docs/topics/impala_iceberg.xml b/docs/topics/impala_iceberg.xml
index 66dd7fd8c..fce5aaa76 100644
--- a/docs/topics/impala_iceberg.xml
+++ b/docs/topics/impala_iceberg.xml
@@ -494,8 +494,36 @@ DELETE FROM ice_t where i = 3;
<title>Updating data int Iceberg tables</title>
<conbody>
<p>
- Impala does not yet support the UPDATE statement. One can mimic its behaviour by calling
- DELETE + INSERT statements one after the other, but please note that this creates two transactions.
+ Since <keyword keyref="impala44"/> Impala is able to run <codeph>UPDATE</codeph> statements against
+ Iceberg V2 tables. E.g.:
+ <codeblock>
+UPDATE ice_t SET val = val + 1;
+UPDATE ice_t SET k = 4 WHERE i = 5;
+UPDATE ice_t SET ice_t.k = o.k, ice_t.j = o.j, FROM ice_t, other_table o where ice_t.id = o.id;
+ </codeblock>
+ </p>
+ <p>
+ The UPDATE FROM statement can be used to update a target Iceberg table based on a source table (or view) that doesn't need
+ to be an Iceberg table. If there are multiple matches on the JOIN condition, Impala will raise an error.
+ </p>
+ <p>
+ Limitations:
+ <ul>
+ <li>Only the merge-on-read update mode is supported.</li>
+ <li>Only writes position delete files, i.e. no support for writing equality deletes.</li>
+ <li>Cannot update tables with complex types.</li>
+ <li>
+ Can only write data and delete files in Parquet format. This means if table properties 'write.format.default'
+ and 'write.delete.format.default' are set, their values must be PARQUET.
+ </li>
+ <li>
+ Updating partitioning column with non-constant expression via the UPDATE FROM statement is not allowed.
+ The upcoming MERGE statement will not have this limitation.
+ </li>
+ </ul>
+ </p>
+ <p>
+ More information about the <codeph>UPDATE</codeph> statement can be found at <xref href="impala_update.xml#update"/>.
</p>
</conbody>
</concept>
diff --git a/docs/topics/impala_update.xml b/docs/topics/impala_update.xml
index 0d3250b71..598a81f4e 100644
--- a/docs/topics/impala_update.xml
+++ b/docs/topics/impala_update.xml
@@ -27,6 +27,7 @@ under the License.
<data name="Category" value="Impala"/>
<data name="Category" value="SQL"/>
<data name="Category" value="Kudu"/>
+ <data name="Category" value="Iceberg"/>
<data name="Category" value="ETL"/>
<data name="Category" value="Ingest"/>
<data name="Category" value="DML"/>
@@ -39,8 +40,8 @@ under the License.
<p>
<indexterm audience="hidden">UPDATE statement</indexterm>
- Updates an arbitrary number of rows in a Kudu table.
- This statement only works for Impala tables that use the Kudu storage engine.
+ This statement only works for Kudu and Iceberg tables.
+ Updates an arbitrary number of rows in a target table.
</p>
<p conref="../shared/impala_common.xml#common/syntax_blurb"/>
@@ -68,6 +69,23 @@ UPDATE [<varname>database_name</varname>.]<varname>table_name</varname> SET <var
If the <codeph>WHERE</codeph> clause is omitted, all rows in the table are updated.
</p>
+ <p>
+ The number of affected rows is reported in an <cmdname>impala-shell</cmdname> message
+ and in the query profile.
+ </p>
+
+ <p>
+ The optional <codeph>FROM</codeph> clause lets you restrict the
+ updates to only the rows in the specified table that are part
+ of the result set for a join query. The join clauses can include
+ tables with any format, but the table from which the rows are deleted
+ must be a Kudu or Iceberg table.
+ </p>
+
+
+ <p>
+ <b>Kudu considerations</b>
+ </p>
<p>
The conditions in the <codeph>WHERE</codeph> clause can refer to
any combination of primary key columns or other columns. Referring to
@@ -101,19 +119,6 @@ UPDATE [<varname>database_name</varname>.]<varname>table_name</varname> SET <var
</li>
</ul>
- <p>
- The number of affected rows is reported in an <cmdname>impala-shell</cmdname> message
- and in the query profile.
- </p>
-
- <p>
- The optional <codeph>FROM</codeph> clause lets you restrict the
- updates to only the rows in the specified table that are part
- of the result set for a join query. The join clauses can include
- non-Kudu tables, but the table from which the rows are deleted
- must be a Kudu table.
- </p>
-
<p conref="../shared/impala_common.xml#common/dml_blurb_kudu"/>
<note conref="../shared/impala_common.xml#common/compute_stats_next"/>
@@ -129,16 +134,16 @@ UPDATE [<varname>database_name</varname>.]<varname>table_name</varname> SET <var
-- Set all rows to the same value for column c3.
-- In this case, c1 and c2 are primary key columns
-- and so cannot be updated.
-UPDATE kudu_table SET c3 = 'not applicable';
+UPDATE target_table SET c3 = 'not applicable';
-- Update only the rows that match the condition.
-UPDATE kudu_table SET c3 = NULL WHERE c1 > 100 AND c3 IS NULL;
+UPDATE target_table SET c3 = NULL WHERE c1 > 100 AND c3 IS NULL;
-- Does not update any rows, because the WHERE condition is always false.
-UPDATE kudu_table SET c3 = 'impossible' WHERE 1 = 0;
+UPDATE target_table SET c3 = 'impossible' WHERE 1 = 0;
-- Change the values of multiple columns in a single UPDATE statement.
-UPDATE kudu_table SET c3 = upper(c3), c4 = FALSE, c5 = 0 WHERE c6 = TRUE;
+UPDATE target_table SET c3 = upper(c3), c4 = FALSE, c5 = 0 WHERE c6 = TRUE;
</codeblock>
<p>
@@ -149,22 +154,22 @@ UPDATE kudu_table SET c3 = upper(c3), c4 = FALSE, c5 = 0 WHERE c6 = TRUE;
<codeblock>
-- Uppercase a column value, only for rows that have
-- an ID that matches the value from another table.
-UPDATE kudu_table SET c3 = upper(c3)
- FROM kudu_table JOIN non_kudu_table
- ON kudu_table.id = non_kudu_table.id;
+UPDATE target_table SET c3 = upper(c3)
+ FROM target_table JOIN other_table
+ ON target_table.id = other_table.id;
-- Same effect as previous statement.
-- Assign table aliases in FROM clause, then refer to
-- short names elsewhere in the statement.
UPDATE t1 SET c3 = upper(c3)
- FROM kudu_table t1 JOIN non_kudu_table t2
+ FROM target_table t1 JOIN other_table t2
ON t1.id = t2.id;
-- Same effect as previous statements, but more efficient.
-- Use WHERE clause to skip updating values that are
-- already uppercase.
UPDATE t1 SET c3 = upper(c3)
- FROM kudu_table t1 JOIN non_kudu_table t2
+ FROM target_table t1 JOIN other_table t2
ON t1.id = t2.id
WHERE c3 != upper(c3);
</codeblock>