You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by he...@apache.org on 2016/12/07 15:23:18 UTC

[3/4] incubator-impala git commit: IMPALA-3314: Fix Avro schema loading for partitioned tables.

IMPALA-3314: Fix Avro schema loading for partitioned tables.

Bug: Commit 6f31c7 fixed a crash when setting Avro schemas for
tables with storage altered to Avro file format. However the
fix was incomplete for partitioned/multi file format tables since
'hasAvroData_' is not set for all code paths that load the
partitioned tables (For example: HdfsTable#loadAllPartitions()).

Fix: Moved the code for setting 'hasAvroData_' to addPartition()
which is the common logic for all code paths adding new partitions.
Also fixed the test coverage gap by adding a new test for partitioned
tables altered to Avro format.

Change-Id: I7854ff002b2277ec4a5388216218a1d5ad142de8
Reviewed-on: http://gerrit.cloudera.org:8080/5388
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/bb633393
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/bb633393
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/bb633393

Branch: refs/heads/master
Commit: bb633393775691807843a2b6bac28b1750c2c5da
Parents: f83652c
Author: Bharath Vissapragada <bh...@cloudera.com>
Authored: Tue Dec 6 14:48:49 2016 -0800
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Wed Dec 7 09:45:11 2016 +0000

----------------------------------------------------------------------
 .../org/apache/impala/catalog/HdfsTable.java    |  2 +-
 .../queries/QueryTest/avro-schema-changes.test  | 38 ++++++++++++++++++++
 2 files changed, 39 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bb633393/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
index 386ef79..ae5e811 100644
--- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
@@ -894,6 +894,7 @@ public class HdfsTable extends Table {
       throw new CatalogException(String.format("Partition %s already exists in table %s",
           partition.getPartitionName(), getFullName()));
     }
+    if (partition.getFileFormat() == HdfsFileFormat.AVRO) hasAvroData_ = true;
     partitionMap_.put(partition.getId(), partition);
     totalHdfsBytes_ += partition.getSize();
     numHdfsFiles_ += partition.getNumFileDescriptors();
@@ -1430,7 +1431,6 @@ public class HdfsTable extends Table {
       // If the partition is null, its HDFS path does not exist, and it was not added to
       // this table's partition list. Skip the partition.
       if (partition == null) continue;
-      if (partition.getFileFormat() == HdfsFileFormat.AVRO) hasAvroData_ = true;
       if (msPartition.getParameters() != null) {
         partition.setNumRows(getRowCount(msPartition.getParameters()));
       }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bb633393/testdata/workloads/functional-query/queries/QueryTest/avro-schema-changes.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/avro-schema-changes.test b/testdata/workloads/functional-query/queries/QueryTest/avro-schema-changes.test
index 8233a02..14f0549 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/avro-schema-changes.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/avro-schema-changes.test
@@ -39,6 +39,44 @@ select count(*) from alltypesagg_staleschema
 bigint
 ====
 ---- QUERY
+# Same as above but for partitioned tables.
+CREATE EXTERNAL TABLE alltypesagg_staleschema_part (
+  id INT,
+  bool_col BOOLEAN,
+  tinyint_col INT,
+  smallint_col INT,
+  int_col INT,
+  bigint_col BIGINT,
+  float_col FLOAT,
+  double_col DOUBLE,
+  date_string_col STRING,
+  string_col STRING,
+  timestamp_col STRING
+) partitioned by (part_col int)
+TBLPROPERTIES ('avro.schema.url'= '$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypesaggmultifilesnopart.json')
+====
+---- QUERY
+alter table alltypesagg_staleschema_part add partition (part_col=1) location '$FILESYSTEM_PREFIX/test-warehouse/alltypesaggmultifilesnopart_avro_snap'
+====
+---- QUERY
+alter table alltypesagg_staleschema_part partition (part_col=1) set fileformat avro
+====
+---- QUERY
+select count(*) from alltypesagg_staleschema_part
+---- CATCH
+Missing Avro schema in scan node. This could be due to stale metadata.
+====
+---- QUERY
+invalidate metadata alltypesagg_staleschema_part
+====
+---- QUERY
+select count(*) from alltypesagg_staleschema_part
+---- RESULTS
+11000
+---- TYPES
+bigint
+====
+---- QUERY
 # IMPALA-3092. Create an Avro table without column definitions and add columns via ALTER
 # TABLE. Querying the table should work.
 CREATE EXTERNAL TABLE avro_alter_table_add_new_column (