You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2023/08/17 21:06:23 UTC

[impala] branch master updated (4a5057d27 -> 5d0a2f01a)

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


    from 4a5057d27 IMPALA-11195: (Addendum) use actual SSL context
     new 12276c79f IMPALA-12335: [DOCS] Add documentation about the DELETE statement
     new 5d0a2f01a IMPALA-12372: Only use -Wno-deprecated-declaration for OpenSSL3

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .clang-tidy                    |  1 +
 be/CMakeLists.txt              | 21 +++++++++-----
 be/src/gutil/CMakeLists.txt    |  2 +-
 docs/impala_keydefs.ditamap    |  1 +
 docs/topics/impala_delete.xml  | 35 +++++++++++++----------
 docs/topics/impala_iceberg.xml | 63 ++++++++++++++++++++++++++++++++++++++----
 6 files changed, 96 insertions(+), 27 deletions(-)


[impala] 02/02: IMPALA-12372: Only use -Wno-deprecated-declaration for OpenSSL3

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 5d0a2f01a52f2660acc1f0f4b3214ca6ecfa66ce
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Wed Aug 16 08:43:00 2023 -0700

    IMPALA-12372: Only use -Wno-deprecated-declaration for OpenSSL3
    
    Redhat 9 and Ubuntu 22.04 both use OpenSSL3, which deprecated
    several APIs that we use. To support those platforms, we added
    the -Wno-deprecated-declaration to the build. Historically, the
    Impala build has also specified -Wno-deprecated due to
    use of deprecated headers in gutils. These flags limit our
    ability to notice use of deprecated code in other parts of the
    code.
    
    The code in gutils no longer requires -Wno-deprecated, so
    this removes it completely. Additionally, this limits the
    -Wno-deprecated-declaration flag to machines using
    OpenSSL 3.
    
    Reenabling deprecation warnings also reenables Clang Tidy's
    clang-diagnostic-deprecated enforcement. This is currently
    broken, so this turns off clang-diagnostic-deprecated
    until it can be addressed properly.
    
    Testing:
     - Ran build-all-options on Ubuntu 22 and Ubuntu 16
     - Ran a Rocky 9.2 build
    
    Change-Id: I1b36450d084f342eeab5dac2272580ab6b0c988b
    Reviewed-on: http://gerrit.cloudera.org:8080/20369
    Reviewed-by: Laszlo Gaal <la...@cloudera.com>
    Reviewed-by: Zoltan Borok-Nagy <bo...@cloudera.com>
    Tested-by: Joe McDonnell <jo...@cloudera.com>
---
 .clang-tidy                 |  1 +
 be/CMakeLists.txt           | 21 ++++++++++++++-------
 be/src/gutil/CMakeLists.txt |  2 +-
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/.clang-tidy b/.clang-tidy
index cc70284e2..47bce4d00 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -36,6 +36,7 @@ Checks: "-*,clang*,\
 -clang-diagnostic-class-varargs,\
 -clang-diagnostic-conversion,\
 -clang-diagnostic-covered-switch-default,\
+-clang-diagnostic-deprecated,\
 -clang-diagnostic-disabled-macro-expansion,\
 -clang-diagnostic-documentation-html,\
 -clang-diagnostic-documentation-unknown-command,\
diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index 908b34a2d..e15d21c95 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -36,10 +36,6 @@ option(BUILD_WITH_NO_TESTS "Do not generate test and benchmark targets" OFF)
 #  -Wno-unknown-pragmas: suppress warnings for unknown (compiler specific) pragmas
 #  -fsigned-char: on aarch64 platform, type of char default is unsigned char, here
 #        set it to signed-char to be compatible with x86-64
-#  -Wno-deprecated: gutil contains deprecated headers
-#  -Wno-deprecated-declarations: OpenSSL3 deprecated various APIs currently used by
-#       Impala, so this disables those warnings until they can be addressed.
-#       See IMPALA-12226.
 #  -Wno-vla: we use C99-style variable-length arrays
 #  -pthread: enable multithreaded malloc
 #  -DBOOST_DATE_TIME_POSIX_TIME_STD_CONFIG: enable nanosecond precision for boost
@@ -51,7 +47,7 @@ SET(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall -Wno-sign-compare -Wno-unknown-p
 SET(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -fno-strict-aliasing -fno-omit-frame-pointer")
 SET(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -fsigned-char")
 SET(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -std=c++17")
-SET(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-deprecated -Wno-deprecated-declarations -Wno-vla")
+SET(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-vla")
 SET(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -DBOOST_DATE_TIME_POSIX_TIME_STD_CONFIG")
 SET(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -DBOOST_SYSTEM_NO_DEPRECATED")
 SET(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -DBOOST_BIND_GLOBAL_PLACEHOLDERS")
@@ -63,6 +59,12 @@ SET(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -DBOOST_ALLOW_DEPRECATED_HEADERS")
 #      isn't supported (e.g., that might happen in containerized deployments).
 SET(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -DBOOST_UUID_RANDOM_PROVIDER_FORCE_POSIX")
 SET(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -B $ENV{IMPALA_TOOLCHAIN_PACKAGES_HOME}/binutils-$ENV{IMPALA_BINUTILS_VERSION}/bin/")
+#  -Wno-deprecated-declarations: OpenSSL3 deprecated various APIs currently used by
+#      Impala, so this disables those warnings when using OpenSSL3 until they can be
+#      addressed. See IMPALA-12226.
+if (OPENSSL_VERSION VERSION_GREATER_EQUAL 3)
+  SET(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-deprecated-declarations")
+endif()
 IF($ENV{USE_GOLD_LINKER} STREQUAL "true")
   SET(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -fuse-ld=gold")
 ENDIF()
@@ -291,15 +293,20 @@ add_definitions(-DKUDU_HEADERS_USE_RICH_SLICE -DKUDU_HEADERS_NO_STUBS)
 # Set clang flags for cross-compiling to IR.
 # IR_COMPILE is #defined for the cross compile to remove code that bloats the IR.
 # Optimization is omitted and left up to individual uses.
-#  -Wno-deprecated: gutil contains deprecated headers
 #  -Wno-return-type-c-linkage: UDFs return C++ classes but use C linkage to prevent
 #       mangling
 #  -DBOOST_NO_EXCEPTIONS: call a custom error handler for exceptions in codegen'd code.
 set(CLANG_IR_CXX_FLAGS "-emit-llvm" "-c" "-std=c++17" "-DIR_COMPILE" "-DHAVE_INTTYPES_H"
   "-DHAVE_NETINET_IN_H" "-DBOOST_DATE_TIME_POSIX_TIME_STD_CONFIG" "-DBOOST_NO_EXCEPTIONS"
   "-DBOOST_BIND_GLOBAL_PLACEHOLDERS" "-DBOOST_ALLOW_DEPRECATED_HEADERS"
-  "-DKUDU_HEADERS_NO_STUBS" "-fcolor-diagnostics" "-Wno-deprecated"
+  "-DKUDU_HEADERS_NO_STUBS" "-fcolor-diagnostics"
   "-Wno-return-type-c-linkage" "-fsigned-char")
+#  -Wno-deprecated-declarations: OpenSSL3 deprecated various APIs currently used by
+#      Impala, so this disables those warnings when using OpenSSL3 until they can be
+#      addressed. See IMPALA-12226.
+if (OPENSSL_VERSION VERSION_GREATER_EQUAL 3)
+  SET(CLANG_IR_CXX_FLAGS "${CLANG_IR_CXX_FLAGS}" "-Wno-deprecated-declarations")
+endif()
 
 if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
   set(CLANG_IR_CXX_FLAGS "${CLANG_IR_CXX_FLAGS}" "-march=armv8-a+crc"
diff --git a/be/src/gutil/CMakeLists.txt b/be/src/gutil/CMakeLists.txt
index 6bb2ccb5f..7e12e84cb 100644
--- a/be/src/gutil/CMakeLists.txt
+++ b/be/src/gutil/CMakeLists.txt
@@ -67,7 +67,7 @@ ADD_EXPORTABLE_LIBRARY(gutil
   SRCS ${GUTIL_SRCS}
   DEPS ${GUTIL_LIBS}
   # Disable warnings which trigger a lot in the Google code:
-  COMPILE_FLAGS "-funsigned-char -Wno-deprecated -Wno-char-subscripts")
+  COMPILE_FLAGS "-funsigned-char -Wno-char-subscripts")
 
 if (BUILD_WITH_NO_TESTS)
   return()


[impala] 01/02: IMPALA-12335: [DOCS] Add documentation about the DELETE statement

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 12276c79f9975dc63322138ea56290434a49221d
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Tue Aug 15 14:47:03 2023 +0200

    IMPALA-12335: [DOCS] Add documentation about the DELETE statement
    
    IMPALA-11877 added support for the DELETE statement for Iceberg
    tables. This patch documents this feature.
    
    Change-Id: If111a7ecd20bda2d4928332ef2ccd905814cb203
    Reviewed-on: http://gerrit.cloudera.org:8080/20361
    Reviewed-by: Zoltan Borok-Nagy <bo...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 docs/impala_keydefs.ditamap    |  1 +
 docs/topics/impala_delete.xml  | 35 +++++++++++++----------
 docs/topics/impala_iceberg.xml | 63 ++++++++++++++++++++++++++++++++++++++----
 3 files changed, 80 insertions(+), 19 deletions(-)

diff --git a/docs/impala_keydefs.ditamap b/docs/impala_keydefs.ditamap
index 737da1089..601d8208a 100644
--- a/docs/impala_keydefs.ditamap
+++ b/docs/impala_keydefs.ditamap
@@ -10529,6 +10529,7 @@ under the License.
   <keydef href="https://issues.apache.org/jira/browse/IMPALA-9999" scope="external" format="html" keys="IMPALA-9999"/>
 
 <!-- Short form of mapping from Impala release to vendor-specific releases, for use in headings. -->
+  <keydef keys="impala43"><topicmeta><keywords><keyword>Impala 4.3</keyword></keywords></topicmeta></keydef>
   <keydef keys="impala42"><topicmeta><keywords><keyword>Impala 4.2</keyword></keywords></topicmeta></keydef>
   <keydef keys="impala41"><topicmeta><keywords><keyword>Impala 4.1</keyword></keywords></topicmeta></keydef>
   <keydef keys="impala40"><topicmeta><keywords><keyword>Impala 4.0</keyword></keywords></topicmeta></keydef>
diff --git a/docs/topics/impala_delete.xml b/docs/topics/impala_delete.xml
index c8591b6bb..5611e9d42 100644
--- a/docs/topics/impala_delete.xml
+++ b/docs/topics/impala_delete.xml
@@ -27,6 +27,7 @@ under the License.
       <data name="Category" value="Impala"/>
       <data name="Category" value="SQL"/>
       <data name="Category" value="Kudu"/>
+      <data name="Category" value="Iceberg"/>
       <data name="Category" value="ETL"/>
       <data name="Category" value="Ingest"/>
       <data name="Category" value="DML"/>
@@ -39,8 +40,8 @@ under the License.
 
     <p>
       <indexterm audience="hidden">DELETE statement</indexterm>
-      Deletes an arbitrary number of rows from a Kudu table.
-      This statement only works for Impala tables that use the Kudu storage engine.
+      Deletes an arbitrary number of rows from a table.
+      This statement only works for Kudu (from <keyword keyref="impala28"/>) and Iceberg tables (from <keyword keyref="impala43"/>).
     </p>
 
     <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
@@ -61,8 +62,8 @@ DELETE <varname>table_ref</varname> FROM [<varname>joined_table_refs</varname>]
     <p>
       The second form evaluates one or more join clauses, and deletes
       all matching rows from one of the tables. The join clauses can
-      include non-Kudu tables, but the table from which the rows
-      are deleted must be a Kudu table. The <codeph>FROM</codeph>
+      include tables of any kind, but the table from which the rows
+      are deleted must be a Kudu or Iceberg table. The <codeph>FROM</codeph>
       keyword is required in this case, to separate the name of
       the table whose rows are being deleted from the table names
       of the join clauses.
@@ -80,7 +81,7 @@ DELETE <varname>table_ref</varname> FROM [<varname>joined_table_refs</varname>]
       The conditions in the <codeph>WHERE</codeph> clause can refer to
       any combination of primary key columns or other columns. Referring to
       primary key columns in the <codeph>WHERE</codeph> clause is more efficient
-      than referring to non-primary key columns.
+      than referring to non-primary key columns (in case of Kudu tables).
     </p>
 
     <p>
@@ -113,6 +114,11 @@ DELETE <varname>table_ref</varname> FROM [<varname>joined_table_refs</varname>]
       </li>
     </ul>
 
+    <p>
+      Iceberg also allows concurrent DELETE operations, in which case the concurrent DELETEs might
+      remove the same rows. This won't corrupt the table as Iceberg allows such behavior.
+    </p>
+
     <p>
       The number of affected rows is reported in an <cmdname>impala-shell</cmdname> message
       and in the query profile.
@@ -132,16 +138,16 @@ DELETE <varname>table_ref</varname> FROM [<varname>joined_table_refs</varname>]
 
 <codeblock>
 -- Deletes all rows. The FROM keyword is optional.
-DELETE FROM kudu_table;
-DELETE kudu_table;
+DELETE FROM table;
+DELETE table;
 
 -- Deletes 0, 1, or more rows.
 -- (If c1 is a single-column primary key, the statement could only
 -- delete 0 or 1 rows.)
-DELETE FROM kudu_table WHERE c1 = 100;
+DELETE FROM table WHERE c1 = 100;
 
 -- Deletes all rows that match all the WHERE conditions.
-DELETE FROM kudu_table WHERE
+DELETE FROM table WHERE
   (c1 > c2 OR c3 IN ('hello','world')) AND c4 IS NOT NULL;
 DELETE FROM t1 WHERE
   (c1 IN (1,2,3) AND c2 > c3) OR c4 IS NOT NULL;
@@ -153,7 +159,7 @@ DELETE FROM t1 WHERE
   c5 IN (SELECT DISTINCT other_col FROM other_table);
 
 -- Does not delete any rows, because the WHERE condition is always false.
-DELETE FROM kudu_table WHERE 1 = 0;
+DELETE FROM table WHERE 1 = 0;
 </codeblock>
 
     <p>
@@ -169,17 +175,18 @@ DELETE t1 FROM t1 JOIN t2 ON t1.x = t2.x;
 DELETE t1 FROM t1 JOIN t2 ON t1.x = t2.x
   WHERE t1.y = FALSE and t2.z > 100;
 
--- Delete from a Kudu table based on a join with a non-Kudu table.
-DELETE t1 FROM kudu_table t1 JOIN non_kudu_table t2 ON t1.x = t2.x;
+-- Delete from a table based on a join with another table.
+DELETE t1 FROM table t1 JOIN other_table t2 ON t1.x = t2.x;
 
--- The tables can be joined in any order as long as the Kudu table
+-- The tables can be joined in any order as long as the Kudu or Iceberg table
 -- is specified as the deletion target.
-DELETE t2 FROM non_kudu_table t1 JOIN kudu_table t2 ON t1.x = t2.x;
+DELETE t2 FROM non_kudu_non_ice_table t1 JOIN kudu_or_ice_table t2 ON t1.x = t2.x;
 </codeblock>
 
     <p conref="../shared/impala_common.xml#common/related_info"/>
 
     <p>
+      <xref href="impala_iceberg.xml#impala_iceberg"/>,
       <xref href="impala_kudu.xml#impala_kudu"/>, <xref href="impala_insert.xml#insert"/>,
       <xref href="impala_update.xml#update"/>, <xref href="impala_upsert.xml#upsert"/>
     </p>
diff --git a/docs/topics/impala_iceberg.xml b/docs/topics/impala_iceberg.xml
index 1504efcdb..66dd7fd8c 100644
--- a/docs/topics/impala_iceberg.xml
+++ b/docs/topics/impala_iceberg.xml
@@ -43,10 +43,6 @@ under the License.
       Iceberg Catalogs (e.g. HiveCatalog, HadoopCatalog). It also supports location-based
       tables (HadoopTables).
     </p>
-    <p>
-      Currently only Iceberg V1 DML operations are allowed, i.e. INSERT INTO /INSERT OVERWRITE.
-      Iceberg V2 operations like row-level modifications (UPDATE, DELETE) are not supported yet.
-    </p>
 
     <p>
       For more information on Iceberg, see <xref keyref="upstream_iceberg_site"/>.
@@ -214,6 +210,37 @@ CREATE TABLE ice_ctas_part_spec PARTITIONED BY SPEC (truncate(3, s)) STORED AS I
     </conbody>
   </concept>
 
+  <concept id="iceberg_v2">
+    <title>Iceberg V2 tables</title>
+    <conbody>
+      <p>
+        Iceberg V2 tables support row-level modifications (DELETE, UPDATE) via "merge-on-read", which means instead
+        of rewriting existing data files, separate so-called delete files are being written that store information
+        about the deleted records. There are two kinds of delete files in Iceberg:
+        <ul>
+          <li>position deletes</li>
+          <li>equality deletes</li>
+        </ul>
+        Impala only supports position delete files. These files contain the file path and file position of the deleted
+        rows.
+      </p>
+      <p>
+        One can create Iceberg V2 tables via the <codeph>CREATE TABLE</codeph> statement, they just need to specify
+        the 'format-version' table property:
+        <codeblock>
+CREATE TABLE ice_v2 (i int) STORED BY ICEBERG TBLPROPERTIES('format-version'='2');
+        </codeblock>
+      </p>
+      <p>
+        It is also possible to upgrade existing Iceberg V1 tables to Iceberg V2 tables. One can use the following
+        <codeph>ALTER TABLE</codeph> statement to do so:
+        <codeblock>
+ALTER TABLE ice_v1_to_v2 SET TBLPROPERTIES('format-version'='2');
+        </codeblock>
+      </p>
+    </conbody>
+  </concept>
+
   <concept id="iceberg_drop">
     <title>Dropping Iceberg tables</title>
     <conbody>
@@ -418,7 +445,7 @@ CREATE TABLE ice_p (i INT, b INT) PARTITIONED BY (p1 INT, p2 STRING) STORED AS I
   </concept>
 
   <concept id="iceberg_inserts">
-    <title>Writing Iceberg tables</title>
+    <title>Inserting data into Iceberg tables</title>
     <conbody>
       <p>
         Impala is also able to insert new data to Iceberg tables. Currently the <codeph>INSERT INTO</codeph>
@@ -447,6 +474,32 @@ INSERT INTO ice_p VALUES (1, 2);
     </conbody>
   </concept>
 
+  <concept id="iceberg_delete">
+    <title>Delete data from Iceberg tables</title>
+    <conbody>
+      <p>
+        Since <keyword keyref="impala43"/> Impala is able to run <codeph>DELETE</codeph> statements against
+        Iceberg V2 tables. E.g.:
+        <codeblock>
+DELETE FROM ice_t where i = 3;
+        </codeblock>
+      </p>
+      <p>
+        More information about the <codeph>DELETE</codeph> statement can be found at <xref href="impala_delete.xml#delete"/>.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="iceberg_update">
+    <title>Updating data int Iceberg tables</title>
+    <conbody>
+      <p>
+        Impala does not yet support the UPDATE statement. One can mimic its behaviour by calling
+        DELETE + INSERT statements one after the other, but please note that this creates two transactions.
+      </p>
+    </conbody>
+  </concept>
+
   <concept id="iceberg_load">
     <title>Loading data into Iceberg tables</title>
     <conbody>