You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by he...@apache.org on 2016/12/07 15:23:18 UTC
[3/4] incubator-impala git commit: IMPALA-3314: Fix Avro schema
loading for partitioned tables.
IMPALA-3314: Fix Avro schema loading for partitioned tables.
Bug: Commit 6f31c7 fixed a crash when setting Avro schemas for
tables with storage altered to Avro file format. However the
fix was incomplete for partitioned/multi file format tables since
'hasAvroData_' is not set for all code paths that load the
partitioned tables (For example: HdfsTable#loadAllPartitions()).
Fix: Moved the code for setting 'hasAvroData_' to addPartition()
which is the common logic for all code paths adding new partitions.
Also fixed the test coverage gap by adding a new test for partitioned
tables altered to Avro format.
Change-Id: I7854ff002b2277ec4a5388216218a1d5ad142de8
Reviewed-on: http://gerrit.cloudera.org:8080/5388
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Internal Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/bb633393
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/bb633393
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/bb633393
Branch: refs/heads/master
Commit: bb633393775691807843a2b6bac28b1750c2c5da
Parents: f83652c
Author: Bharath Vissapragada <bh...@cloudera.com>
Authored: Tue Dec 6 14:48:49 2016 -0800
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Wed Dec 7 09:45:11 2016 +0000
----------------------------------------------------------------------
.../org/apache/impala/catalog/HdfsTable.java | 2 +-
.../queries/QueryTest/avro-schema-changes.test | 38 ++++++++++++++++++++
2 files changed, 39 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bb633393/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
index 386ef79..ae5e811 100644
--- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
@@ -894,6 +894,7 @@ public class HdfsTable extends Table {
throw new CatalogException(String.format("Partition %s already exists in table %s",
partition.getPartitionName(), getFullName()));
}
+ if (partition.getFileFormat() == HdfsFileFormat.AVRO) hasAvroData_ = true;
partitionMap_.put(partition.getId(), partition);
totalHdfsBytes_ += partition.getSize();
numHdfsFiles_ += partition.getNumFileDescriptors();
@@ -1430,7 +1431,6 @@ public class HdfsTable extends Table {
// If the partition is null, its HDFS path does not exist, and it was not added to
// this table's partition list. Skip the partition.
if (partition == null) continue;
- if (partition.getFileFormat() == HdfsFileFormat.AVRO) hasAvroData_ = true;
if (msPartition.getParameters() != null) {
partition.setNumRows(getRowCount(msPartition.getParameters()));
}
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bb633393/testdata/workloads/functional-query/queries/QueryTest/avro-schema-changes.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/avro-schema-changes.test b/testdata/workloads/functional-query/queries/QueryTest/avro-schema-changes.test
index 8233a02..14f0549 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/avro-schema-changes.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/avro-schema-changes.test
@@ -39,6 +39,44 @@ select count(*) from alltypesagg_staleschema
bigint
====
---- QUERY
+# Same as above but for partitioned tables.
+CREATE EXTERNAL TABLE alltypesagg_staleschema_part (
+ id INT,
+ bool_col BOOLEAN,
+ tinyint_col INT,
+ smallint_col INT,
+ int_col INT,
+ bigint_col BIGINT,
+ float_col FLOAT,
+ double_col DOUBLE,
+ date_string_col STRING,
+ string_col STRING,
+ timestamp_col STRING
+) partitioned by (part_col int)
+TBLPROPERTIES ('avro.schema.url'= '$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypesaggmultifilesnopart.json')
+====
+---- QUERY
+alter table alltypesagg_staleschema_part add partition (part_col=1) location '$FILESYSTEM_PREFIX/test-warehouse/alltypesaggmultifilesnopart_avro_snap'
+====
+---- QUERY
+alter table alltypesagg_staleschema_part partition (part_col=1) set fileformat avro
+====
+---- QUERY
+select count(*) from alltypesagg_staleschema_part
+---- CATCH
+Missing Avro schema in scan node. This could be due to stale metadata.
+====
+---- QUERY
+invalidate metadata alltypesagg_staleschema_part
+====
+---- QUERY
+select count(*) from alltypesagg_staleschema_part
+---- RESULTS
+11000
+---- TYPES
+bigint
+====
+---- QUERY
# IMPALA-3092. Create an Avro table without column definitions and add columns via ALTER
# TABLE. Querying the table should work.
CREATE EXTERNAL TABLE avro_alter_table_add_new_column (