You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bo...@apache.org on 2024/01/02 14:20:17 UTC

(impala) branch master updated: IMPALA-12653: Update documentation about the UPDATE statement

This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new dea8546d8 IMPALA-12653: Update documentation about the UPDATE statement
dea8546d8 is described below

commit dea8546d80323f706d48c5f3ccc310b9ccbd1eae
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Tue Dec 19 16:00:59 2023 +0100

    IMPALA-12653: Update documentation about the UPDATE statement
    
    This patch adds documentation about the UPDATE statement.
    
    Change-Id: I2a4f3dcdba5faaa7dffda60b8590d09e6a92a165
    Reviewed-on: http://gerrit.cloudera.org:8080/20818
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Noemi Pap-Takacs <np...@cloudera.com>
    Reviewed-by: Andrew Sherman <as...@cloudera.com>
---
 docs/impala_keydefs.ditamap    |  1 +
 docs/topics/impala_iceberg.xml | 32 +++++++++++++++++++++++--
 docs/topics/impala_update.xml  | 53 +++++++++++++++++++++++-------------------
 3 files changed, 60 insertions(+), 26 deletions(-)

diff --git a/docs/impala_keydefs.ditamap b/docs/impala_keydefs.ditamap
index ca1582410..f97d10a6f 100644
--- a/docs/impala_keydefs.ditamap
+++ b/docs/impala_keydefs.ditamap
@@ -10529,6 +10529,7 @@ under the License.
   <keydef href="https://issues.apache.org/jira/browse/IMPALA-9999" scope="external" format="html" keys="IMPALA-9999"/>
 
 <!-- Short form of mapping from Impala release to vendor-specific releases, for use in headings. -->
+  <keydef keys="impala44"><topicmeta><keywords><keyword>Impala 4.4</keyword></keywords></topicmeta></keydef>
   <keydef keys="impala43"><topicmeta><keywords><keyword>Impala 4.3</keyword></keywords></topicmeta></keydef>
   <keydef keys="impala42"><topicmeta><keywords><keyword>Impala 4.2</keyword></keywords></topicmeta></keydef>
   <keydef keys="impala41"><topicmeta><keywords><keyword>Impala 4.1</keyword></keywords></topicmeta></keydef>
diff --git a/docs/topics/impala_iceberg.xml b/docs/topics/impala_iceberg.xml
index 66dd7fd8c..fce5aaa76 100644
--- a/docs/topics/impala_iceberg.xml
+++ b/docs/topics/impala_iceberg.xml
@@ -494,8 +494,36 @@ DELETE FROM ice_t where i = 3;
     <title>Updating data int Iceberg tables</title>
     <conbody>
       <p>
-        Impala does not yet support the UPDATE statement. One can mimic its behaviour by calling
-        DELETE + INSERT statements one after the other, but please note that this creates two transactions.
+        Since <keyword keyref="impala44"/> Impala is able to run <codeph>UPDATE</codeph> statements against
+        Iceberg V2 tables. E.g.:
+        <codeblock>
+UPDATE ice_t SET val = val + 1;
+UPDATE ice_t SET k = 4 WHERE i = 5;
+UPDATE ice_t SET ice_t.k = o.k, ice_t.j = o.j, FROM ice_t, other_table o where ice_t.id = o.id;
+        </codeblock>
+      </p>
+      <p>
+        The UPDATE FROM statement can be used to update a target Iceberg table based on a source table (or view) that doesn't need
+        to be an Iceberg table. If there are multiple matches on the JOIN condition, Impala will raise an error.
+      </p>
+      <p>
+        Limitations:
+        <ul>
+          <li>Only the merge-on-read update mode is supported.</li>
+          <li>Only writes position delete files, i.e. no support for writing equality deletes.</li>
+          <li>Cannot update tables with complex types.</li>
+          <li>
+            Can only write data and delete files in Parquet format. This means if table properties 'write.format.default'
+            and 'write.delete.format.default' are set, their values must be PARQUET.
+          </li>
+          <li>
+            Updating partitioning column with non-constant expression via the UPDATE FROM statement is not allowed.
+            The upcoming MERGE statement will not have this limitation.
+          </li>
+        </ul>
+      </p>
+      <p>
+        More information about the <codeph>UPDATE</codeph> statement can be found at <xref href="impala_update.xml#update"/>.
       </p>
     </conbody>
   </concept>
diff --git a/docs/topics/impala_update.xml b/docs/topics/impala_update.xml
index 0d3250b71..598a81f4e 100644
--- a/docs/topics/impala_update.xml
+++ b/docs/topics/impala_update.xml
@@ -27,6 +27,7 @@ under the License.
       <data name="Category" value="Impala"/>
       <data name="Category" value="SQL"/>
       <data name="Category" value="Kudu"/>
+      <data name="Category" value="Iceberg"/>
       <data name="Category" value="ETL"/>
       <data name="Category" value="Ingest"/>
       <data name="Category" value="DML"/>
@@ -39,8 +40,8 @@ under the License.
 
     <p>
       <indexterm audience="hidden">UPDATE statement</indexterm>
-      Updates an arbitrary number of rows in a Kudu table.
-      This statement only works for Impala tables that use the Kudu storage engine.
+      This statement only works for Kudu and Iceberg tables.
+      Updates an arbitrary number of rows in a target table.
     </p>
 
     <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
@@ -68,6 +69,23 @@ UPDATE [<varname>database_name</varname>.]<varname>table_name</varname> SET <var
       If the <codeph>WHERE</codeph> clause is omitted, all rows in the table are updated.
     </p>
 
+    <p>
+      The number of affected rows is reported in an <cmdname>impala-shell</cmdname> message
+      and in the query profile.
+    </p>
+
+    <p>
+      The optional <codeph>FROM</codeph> clause lets you restrict the
+      updates to only the rows in the specified table that are part
+      of the result set for a join query. The join clauses can include
+      tables with any format, but the table from which the rows are deleted
+      must be a Kudu or Iceberg table.
+    </p>
+
+
+    <p>
+      <b>Kudu considerations</b>
+    </p>
     <p>
       The conditions in the <codeph>WHERE</codeph> clause can refer to
       any combination of primary key columns or other columns. Referring to
@@ -101,19 +119,6 @@ UPDATE [<varname>database_name</varname>.]<varname>table_name</varname> SET <var
       </li>
     </ul>
 
-    <p>
-      The number of affected rows is reported in an <cmdname>impala-shell</cmdname> message
-      and in the query profile.
-    </p>
-
-    <p>
-      The optional <codeph>FROM</codeph> clause lets you restrict the
-      updates to only the rows in the specified table that are part
-      of the result set for a join query. The join clauses can include
-      non-Kudu tables, but the table from which the rows are deleted
-      must be a Kudu table.
-    </p>
-
     <p conref="../shared/impala_common.xml#common/dml_blurb_kudu"/>
 
     <note conref="../shared/impala_common.xml#common/compute_stats_next"/>
@@ -129,16 +134,16 @@ UPDATE [<varname>database_name</varname>.]<varname>table_name</varname> SET <var
 -- Set all rows to the same value for column c3.
 -- In this case, c1 and c2 are primary key columns
 -- and so cannot be updated.
-UPDATE kudu_table SET c3 = 'not applicable';
+UPDATE target_table SET c3 = 'not applicable';
 
 -- Update only the rows that match the condition.
-UPDATE kudu_table SET c3 = NULL WHERE c1 > 100 AND c3 IS NULL;
+UPDATE target_table SET c3 = NULL WHERE c1 > 100 AND c3 IS NULL;
 
 -- Does not update any rows, because the WHERE condition is always false.
-UPDATE kudu_table SET c3 = 'impossible' WHERE 1 = 0;
+UPDATE target_table SET c3 = 'impossible' WHERE 1 = 0;
 
 -- Change the values of multiple columns in a single UPDATE statement.
-UPDATE kudu_table SET c3 = upper(c3), c4 = FALSE, c5 = 0 WHERE c6 = TRUE;
+UPDATE target_table SET c3 = upper(c3), c4 = FALSE, c5 = 0 WHERE c6 = TRUE;
 </codeblock>
 
     <p>
@@ -149,22 +154,22 @@ UPDATE kudu_table SET c3 = upper(c3), c4 = FALSE, c5 = 0 WHERE c6 = TRUE;
 <codeblock>
 -- Uppercase a column value, only for rows that have
 -- an ID that matches the value from another table.
-UPDATE kudu_table SET c3 = upper(c3)
-  FROM kudu_table JOIN non_kudu_table
-  ON kudu_table.id = non_kudu_table.id;
+UPDATE target_table SET c3 = upper(c3)
+  FROM target_table JOIN other_table
+  ON target_table.id = other_table.id;
 
 -- Same effect as previous statement.
 -- Assign table aliases in FROM clause, then refer to
 -- short names elsewhere in the statement.
 UPDATE t1 SET c3 = upper(c3)
-  FROM kudu_table t1 JOIN non_kudu_table t2
+  FROM target_table t1 JOIN other_table t2
   ON t1.id = t2.id;
 
 -- Same effect as previous statements, but more efficient.
 -- Use WHERE clause to skip updating values that are
 -- already uppercase.
 UPDATE t1 SET c3 = upper(c3)
-  FROM kudu_table t1 JOIN non_kudu_table t2
+  FROM target_table t1 JOIN other_table t2
   ON t1.id = t2.id
   WHERE c3 != upper(c3);
 </codeblock>