You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2023/06/08 10:10:45 UTC

[doris] branch master updated: [Fix](multi-catalog) Fix be crashed when query hive table after schema changed(new column added). (#20537)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 4faee4d8fd [Fix](multi-catalog) Fix be crashed when query  hive table after schema changed(new column added). (#20537)
4faee4d8fd is described below

commit 4faee4d8fd72d1edf4bc681abe5c22fb572cfbcb
Author: Qi Chen <ka...@gmail.com>
AuthorDate: Thu Jun 8 18:10:36 2023 +0800

    [Fix](multi-catalog) Fix be crashed when query  hive table after schema changed(new column added). (#20537)
    
    Fix be crashed when query hive table after schema changed(new column added).
    
    Regression Test: test_hive_schema_evolution.groovy
---
 be/src/vec/exec/format/orc/vorc_reader.cpp         | 16 +++--
 .../hive/scripts/create_preinstalled_table.hql     | 27 ++++++++
 .../hive/test_hive_schema_evolution.out            | 37 +++++++++++
 .../hive/test_hive_schema_evolution.groovy         | 75 ++++++++++++++++++++++
 4 files changed, 150 insertions(+), 5 deletions(-)

diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp
index 058af519b1..0ba81cf986 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -273,11 +273,17 @@ Status OrcReader::_init_read_columns() {
     for (auto& col_name : _column_names) {
         if (_is_hive) {
             auto iter = _scan_params.slot_name_to_schema_pos.find(col_name);
-            int pos = iter->second;
-            if (_is_acid) {
-                orc_cols_lower_case[ACID_ROW_OFFSET + 1 + pos] = iter->first;
-            } else {
-                orc_cols_lower_case[pos] = iter->first;
+            if (iter != _scan_params.slot_name_to_schema_pos.end()) {
+                int pos = iter->second;
+                if (_is_acid) {
+                    if (ACID_ROW_OFFSET + 1 + pos < orc_cols_lower_case.size()) {
+                        orc_cols_lower_case[ACID_ROW_OFFSET + 1 + pos] = iter->first;
+                    }
+                } else {
+                    if (pos < orc_cols_lower_case.size()) {
+                        orc_cols_lower_case[pos] = iter->first;
+                    }
+                }
             }
         }
         auto iter = std::find(orc_cols_lower_case.begin(), orc_cols_lower_case.end(), col_name);
diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
index 86cd6649a7..4c6108e9d4 100644
--- a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
+++ b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
@@ -594,4 +594,31 @@ CREATE TABLE `unsupported_type_table`(
   k6 int
 );
 
+CREATE TABLE `schema_evo_test_text`(
+  id int,
+  name string
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED by ',';
+insert into `schema_evo_test_text` select 1, "kaka";
+alter table `schema_evo_test_text` ADD COLUMNS (`ts` timestamp);
+insert into `schema_evo_test_text` select 2, "messi", from_unixtime(to_unix_timestamp('20230101 13:01:03','yyyyMMdd HH:mm:ss'));
+
+CREATE TABLE `schema_evo_test_parquet`(
+  id int,
+  name string
+)
+stored as parquet;
+insert into `schema_evo_test_parquet` select 1, "kaka";
+alter table `schema_evo_test_parquet` ADD COLUMNS (`ts` timestamp);
+insert into `schema_evo_test_parquet` select 2, "messi", from_unixtime(to_unix_timestamp('20230101 13:01:03','yyyyMMdd HH:mm:ss'));
+
+CREATE TABLE `schema_evo_test_orc`(
+  id int,
+  name string
+)
+stored as orc;
+insert into `schema_evo_test_orc` select 1, "kaka";
+alter table `schema_evo_test_orc` ADD COLUMNS (`ts` timestamp);
+insert into `schema_evo_test_orc` select 2, "messi", from_unixtime(to_unix_timestamp('20230101 13:01:03','yyyyMMdd HH:mm:ss'));
+
 show tables;
diff --git a/regression-test/data/external_table_emr_p2/hive/test_hive_schema_evolution.out b/regression-test/data/external_table_emr_p2/hive/test_hive_schema_evolution.out
new file mode 100644
index 0000000000..1cb5cde151
--- /dev/null
+++ b/regression-test/data/external_table_emr_p2/hive/test_hive_schema_evolution.out
@@ -0,0 +1,37 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !q01 --
+1	kaka	\N
+2	messi	2023-01-01T13:01:03
+
+-- !q02 --
+1	kaka	\N
+2	messi	2023-01-01T13:01:03
+
+-- !q03 --
+\N
+2023-01-01T13:01:03
+
+-- !q01 --
+1	kaka	\N
+2	messi	2023-01-01T21:01:03
+
+-- !q02 --
+1	kaka	\N
+2	messi	2023-01-01T21:01:03
+
+-- !q03 --
+\N
+2023-01-01T21:01:03
+
+-- !q01 --
+1	kaka	\N
+2	messi	2023-01-01T13:01:03
+
+-- !q02 --
+1	kaka	\N
+2	messi	2023-01-01T13:01:03
+
+-- !q03 --
+\N
+2023-01-01T13:01:03
+
diff --git a/regression-test/suites/external_table_emr_p2/hive/test_hive_schema_evolution.groovy b/regression-test/suites/external_table_emr_p2/hive/test_hive_schema_evolution.groovy
new file mode 100644
index 0000000000..2cbe589e8b
--- /dev/null
+++ b/regression-test/suites/external_table_emr_p2/hive/test_hive_schema_evolution.groovy
@@ -0,0 +1,75 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_schema_evolution", "p0") {
+    def q_text = {
+        qt_q01 """
+        select * from schema_evo_test_text order by id;
+        """
+        qt_q02 """
+        select id, name, ts from schema_evo_test_text order by id;
+        """
+        qt_q03 """
+        select ts from schema_evo_test_text order by id;
+        """
+    }
+
+    def q_parquet = {
+        qt_q01 """
+        select * from schema_evo_test_parquet order by id;
+        """
+        qt_q02 """
+        select id, name, ts from schema_evo_test_parquet order by id;
+        """
+        qt_q03 """
+        select ts from schema_evo_test_parquet order by id;
+        """
+    }
+
+    def q_orc = {
+        qt_q01 """
+        select * from schema_evo_test_orc order by id;
+        """
+        qt_q02 """
+        select id, name, ts from schema_evo_test_orc order by id;
+        """
+        qt_q03 """
+        select ts from schema_evo_test_orc order by id;
+        """
+    }
+
+    String enabled = context.config.otherConfigs.get("enableHiveTest")
+    if (enabled != null && enabled.equalsIgnoreCase("true")) {
+        try {
+            String hms_port = context.config.otherConfigs.get("hms_port")
+            String catalog_name = "test_hive_schema_evolution"
+            sql """drop catalog if exists ${catalog_name}"""
+            sql """create catalog if not exists ${catalog_name} properties (
+                "type"="hms",
+                'hive.metastore.uris' = 'thrift://127.0.0.1:${hms_port}'
+            );"""
+            sql """use `${catalog_name}`.`default`"""
+
+            q_text()
+            q_parquet()
+            q_orc()
+
+            sql """drop catalog if exists ${catalog_name}"""
+        } finally {
+        }
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org