You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jr...@apache.org on 2017/03/09 23:42:17 UTC

[2/5] incubator-impala git commit: IMPALA-4615: Fix create_table.sql command order

IMPALA-4615: Fix create_table.sql command order

INSERT OVERWRITE commands in Hive will only affect partitions that Hive
knows about. If an external table gets dropped and recreated, then
'MSCK REPAIR TABLE' needs to be executed to recover any preexisting
partitions. Otherwise, an INSERT OVERWRITE will not remove the data
files in those partitions and will fail to move the new data in place.

More information can be found here:
http://www.ericlin.me/hive-insert-overwrite-does-not-remove-existing-data

I tested the fix by running the following commands, making sure that the
second run of the .sql script completed without errors and validating
the number of lines was correct (10) after both runs.

export JDBC_URL="jdbc:hive2://${HS2_HOST_PORT}/default;"
export HS2_HOST_PORT=localhost:11050

beeline -n $USER -u "${JDBC_URL}" -f ${IMPALA_HOME}/testdata/avro_schema_resolution/create_table.sql
beeline -n $USER -u "${JDBC_URL}" -f ${IMPALA_HOME}/testdata/avro_schema_resolution/create_table.sql

Change-Id: I0f68eeb75ba2f43b96b8f3d82f902e291d3bd396
Reviewed-on: http://gerrit.cloudera.org:8080/6317
Reviewed-by: Lars Volker <lv...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/32d45f42
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/32d45f42
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/32d45f42

Branch: refs/heads/master
Commit: 32d45f42620621ee6d6faa4a7ca2d5c90ad6d8ee
Parents: 7d8acee
Author: Lars Volker <lv...@cloudera.com>
Authored: Wed Mar 8 10:07:46 2017 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Mar 9 06:00:15 2017 +0000

----------------------------------------------------------------------
 testdata/avro_schema_resolution/create_table.sql | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/32d45f42/testdata/avro_schema_resolution/create_table.sql
----------------------------------------------------------------------
diff --git a/testdata/avro_schema_resolution/create_table.sql b/testdata/avro_schema_resolution/create_table.sql
index 71bff71..b1425e1 100644
--- a/testdata/avro_schema_resolution/create_table.sql
+++ b/testdata/avro_schema_resolution/create_table.sql
@@ -15,6 +15,9 @@
 -- specific language governing permissions and limitations
 -- under the License.
 
+-- NOTE: Queries in this file have to be executed against Hive and some won't work with
+-- Impala due to different type compatibility rules.
+
 USE functional_avro_snap;
 
 DROP TABLE IF EXISTS schema_resolution_test;
@@ -158,6 +161,10 @@ TBLPROPERTIES ('avro.schema.literal'='{
   {"name":"timestamp_col",  "type": "long"}
 ]}');
 
+-- Reload existing partitions from HDFS. Without this, the overwrite will fail to remove
+-- any preexisting data files, which in turn will fail the query.
+MSCK REPAIR TABLE avro_coldef;
+
 INSERT OVERWRITE TABLE avro_coldef PARTITION(year=2014, month=1)
 SELECT bool_col, tinyint_col, smallint_col, int_col, bigint_col,
 float_col, double_col, date_string_col, string_col, timestamp_col
@@ -196,11 +203,16 @@ TBLPROPERTIES ('avro.schema.literal'='{
   {"name":"extra_col",  "type": "string", "default": "null"}
 ]}');
 
+-- Reload existing partitions from HDFS. Without this, the overwrite will fail to remove
+-- any preexisting data files, which in turn will fail the query.
+MSCK REPAIR TABLE avro_extra_coldef;
+
 INSERT OVERWRITE TABLE avro_extra_coldef PARTITION(year=2014, month=2)
 SELECT bool_col, tinyint_col, smallint_col, int_col, bigint_col,
 float_col, double_col, date_string_col, string_col,
 timestamp_col, "avro" AS extra_col FROM
 (select * from functional.alltypes order by id limit 5) a;
 
+-- Reload the partitions for the first table once again. This will make sure that the new
+-- partition from the second insert shows up in the first table, too.
 MSCK REPAIR TABLE avro_coldef;
-MSCK REPAIR TABLE avro_extra_coldef;