You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by wz...@apache.org on 2021/11/24 06:37:39 UTC

[impala] 03/03: IMPALA-11029: DescriptorTable.copyTupleDescriptor throw exception for Kudu table

This is an automated email from the ASF dual-hosted git repository.

wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit b13a17b9cff1f93c44a3a9143e1ba0769ee125f7
Author: wzhou-code <wz...@cloudera.com>
AuthorDate: Tue Nov 16 22:38:51 2021 -0800

    IMPALA-11029: DescriptorTable.copyTupleDescriptor throw exception for Kudu table
    
    In DescriptorTable.copyTupleDescriptor(), a TupleDescriptor object is
    created with slots copied from source TupleDescriptor, but its "path"
    member variable is set as null. This make the table associated with the
    copied TupleDescriptor object detected as non Kudu table, and
    SlotDescriptor.isKuduStringSlot() always returns false for its copied
    slots.
    TupleDescriptor.getSlotSize() add 4 padding bytes for Kudu string slot.
    When this function is called to calculate slot size for string type
    slots for copied TupleDescriptor object, the slot size will be 4 bytes
    less than the slot size calculated for the source TupleDescriptor if the
    table associated with source TupleDescriptor is Kudu table. This cause
    Preconditions.checkState(d.getByteSize() == src.getByteSize()) failed to
    throw exception.
    To fix it, we should copy "path" from source TupleDescriptor for the
    copied TupleDescriptor object before copying slots.
    
    Testing:
     - Added a new test case to verify the bug fixing.
     - Passed exhaustive tests.
    
    Change-Id: Ib88e005910134f2b7575c06fa02ce11890418a41
    Reviewed-on: http://gerrit.cloudera.org:8080/18037
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../apache/impala/analysis/DescriptorTable.java    |  3 +-
 tests/query_test/test_kudu.py                      | 49 ++++++++++++++++++++++
 2 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/DescriptorTable.java b/fe/src/main/java/org/apache/impala/analysis/DescriptorTable.java
index 765697b..783a3a7 100644
--- a/fe/src/main/java/org/apache/impala/analysis/DescriptorTable.java
+++ b/fe/src/main/java/org/apache/impala/analysis/DescriptorTable.java
@@ -76,8 +76,9 @@ public class DescriptorTable {
   public TupleDescriptor copyTupleDescriptor(TupleId srcId, String debugName) {
     TupleDescriptor d = new TupleDescriptor(tupleIdGenerator_.getNextId(), debugName);
     tupleDescs_.put(d.getId(), d);
-    // create copies of slots
+    // Copy path from source and create copies of slots
     TupleDescriptor src = tupleDescs_.get(srcId);
+    if (src.getPath() != null) d.setPath(src.getPath());
     for (SlotDescriptor slot: src.getSlots()) {
       copySlotDescriptor(d, slot);
     }
diff --git a/tests/query_test/test_kudu.py b/tests/query_test/test_kudu.py
index 5d77540..890fac8 100644
--- a/tests/query_test/test_kudu.py
+++ b/tests/query_test/test_kudu.py
@@ -1565,3 +1565,52 @@ class TestKuduReadTokenSplit(KuduTestSuite):
     assert len(matches.groups()) == 1, plan
     self.client.clear_configuration()
     return int(matches.group(1))
+
+
+@SkipIfHive2.create_external_kudu_table
+class TestKuduInsertWithBufferedTupleDesc(KuduTestSuite):
+  """
+  This test verifies bug fixing for IMPALA-11029.
+  """
+
+  # queries to create Kudu tables.
+  _create_kudu_table_1_query = "CREATE TABLE {0} (id1 INT NOT NULL, " \
+      "agrmt INT NOT NULL, big_id BIGINT NOT NULL, outdated_flag STRING NOT NULL, " \
+      "mod_ts TIMESTAMP NOT NULL, PRIMARY KEY (id1, agrmt)) " \
+      "PARTITION BY HASH (id1) PARTITIONS 2 STORED AS KUDU"
+
+  _create_kudu_table_2_query = "CREATE TABLE {0} (cl_id INT NOT NULL, " \
+      "cl_agrmt INT NOT NULL, outdat STRING NULL, mod_dat TIMESTAMP NULL, " \
+      "PRIMARY KEY (cl_id, cl_agrmt)) " \
+      "PARTITION BY HASH (cl_id) PARTITIONS 2 STORED AS KUDU"
+
+  # query to insert rows to Kudu table.
+  _insert_query = "INSERT INTO {0} (id1, agrmt, big_id, outdated_flag, mod_ts) " \
+      "SELECT i.cl_id, cast(row_number() over(order by null) as int), i.cl_agrmt, 'Y', " \
+      "case when outdat='Y' and i.mod_dat is not null then i.mod_dat else now() end " \
+      "from {1} i left join {0} u on u.big_id=i.cl_agrmt " \
+      "left join (select id1, big_id from {0} group by id1, big_id) uu " \
+      "on uu.big_id=i.cl_agrmt " \
+      "where u.big_id is null"
+
+  @SkipIfKudu.no_hybrid_clock
+  def test_kudu_insert_with_buffered_tuple_desc(self, cursor, kudu_client,
+      unique_database):
+    # Create Kudu tables.
+    table_1_name = "%s.tab1" % unique_database
+    cursor.execute(self._create_kudu_table_1_query.format(table_1_name))
+    assert kudu_client.table_exists(
+        KuduTestSuite.to_kudu_table_name(unique_database, "tab1"))
+    table_2_name = "%s.tab2" % unique_database
+    cursor.execute(self._create_kudu_table_2_query.format(table_2_name))
+    assert kudu_client.table_exists(
+        KuduTestSuite.to_kudu_table_name(unique_database, "tab2"))
+
+    # Insert rows
+    try:
+      cursor.execute(self._insert_query.format(table_1_name, table_2_name))
+      cursor.execute("SELECT * FROM %s" % table_1_name)
+      assert len(cursor.fetchall()) == 0
+    except Exception as e:
+      # Not expect to throw exception like "IllegalStateException: null"
+      assert False, str(e)