You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2020/10/14 22:48:36 UTC
[impala] 05/06: IMPALA-10159: Supporting ORC file format for
Iceberg table
This is an automated email from the ASF dual-hosted git repository.
joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 0c0985a825fba8d9702639e3e679d2e1b9070fe1
Author: skyyws <sk...@163.com>
AuthorDate: Sun Sep 6 11:22:06 2020 +0800
IMPALA-10159: Supporting ORC file format for Iceberg table
This patch mainly realizes querying Iceberg table with ORC
file format. We can using following SQL to create table with
ORC file format:
CREATE TABLE default.iceberg_test (
level string,
event_time timestamp,
message string,
)
STORED AS ICEBERG
LOCATION 'hdfs://xxx'
TBLPROPERTIES ('iceberg.file_format'='orc', 'iceberg.catalog'='hadoop.tables');
But pay attention, there still some problems when scan ORC files
with Timestamp, more details please refer IMPALA-9967. We may add
new tests with Timestmap type after this JIRA fixed.
Testing:
- Create table tests in functional_schema_template.sql
- Iceberg table create test in test_iceberg.py
- Iceberg table query test in test_scanners.py
Change-Id: Ib579461aa57348c9893a6d26a003a0d812346c4d
Reviewed-on: http://gerrit.cloudera.org:8080/16568
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
.../java/org/apache/impala/util/IcebergUtil.java | 6 +-
...-0982a5d3-48c0-4dd0-ab87-d24190894251-00000.orc | Bin 0 -> 460 bytes
...-dc56d2c8-e285-428d-b81e-f3d07ec53c12-00000.orc | Bin 0 -> 460 bytes
...-d4a071f4-5d57-493e-8d21-8f82655087e4-00000.orc | Bin 0 -> 461 bytes
...-e9fa9638-8693-43dc-b1ba-2d8dc390b585-00000.orc | Bin 0 -> 460 bytes
...-4526ff61-8991-4645-b448-bf3535492111-00000.orc | Bin 0 -> 460 bytes
...-439376ff-3d17-4494-94ba-586fa00caadd-00000.orc | Bin 0 -> 461 bytes
...-fb7663c5-aade-4690-bdfc-2334d605a891-00000.orc | Bin 0 -> 474 bytes
...-fc53a761-1ee2-4ce4-9507-c9917917d2c9-00000.orc | Bin 0 -> 474 bytes
...-8887c762-b6a8-4e2c-b1e0-6069d6141917-00000.orc | Bin 0 -> 474 bytes
...-a85e02aa-ff28-4311-b996-a83f67a5e39b-00000.orc | Bin 0 -> 470 bytes
...-dea5a8ec-ea06-4688-8293-486a62563ef3-00000.orc | Bin 0 -> 474 bytes
...-d182fe7e-de19-4df8-ac19-27bbfe17b5bd-00000.orc | Bin 0 -> 474 bytes
...-d5df20b1-6ce6-486a-b084-ea55b7954431-00000.orc | Bin 0 -> 439 bytes
...-b30cc130-03a1-45e5-b927-d80545593f9a-00000.orc | Bin 0 -> 435 bytes
...-6476cc9e-9b5a-48d3-8cce-ea7ebb2d904f-00000.orc | Bin 0 -> 439 bytes
...-c2ab6dc5-3586-4b45-8a3c-860fdc71980d-00000.orc | Bin 0 -> 439 bytes
...-fd03c61f-8c73-4639-8974-1f6431a30bc5-00000.orc | Bin 0 -> 439 bytes
...-9baec6d2-8e31-4d19-8259-1a9e546aaea5-00000.orc | Bin 0 -> 439 bytes
...-433b1afa-9fee-4581-89fb-105483ebf1a2-00000.orc | Bin 0 -> 439 bytes
...-e0156a2d-4240-4c8e-9724-6ad3231cf178-00000.orc | Bin 0 -> 439 bytes
.../02fb8198-e791-4d89-8afa-c75fb5443346-m0.avro | Bin 0 -> 5018 bytes
...670-1-02fb8198-e791-4d89-8afa-c75fb5443346.avro | Bin 0 -> 2582 bytes
.../metadata/v1.metadata.json | 47 +++++++++++
.../metadata/v2.metadata.json | 66 +++++++++++++++
.../metadata/version-hint.text | 1 +
.../functional/functional_schema_template.sql | 12 +++
.../datasets/functional/schema_constraints.csv | 2 +
.../queries/QueryTest/iceberg-create.test | 51 +++++++++++
.../queries/QueryTest/iceberg-query.test | 94 +++++++++++++++++++++
.../queries/QueryTest/show-create-table.test | 22 +++++
31 files changed, 300 insertions(+), 1 deletion(-)
diff --git a/fe/src/main/java/org/apache/impala/util/IcebergUtil.java b/fe/src/main/java/org/apache/impala/util/IcebergUtil.java
index 79444f1..d5688dc 100644
--- a/fe/src/main/java/org/apache/impala/util/IcebergUtil.java
+++ b/fe/src/main/java/org/apache/impala/util/IcebergUtil.java
@@ -224,7 +224,11 @@ public class IcebergUtil {
* Get TIcebergFileFormat from a string, usually from table properties
*/
public static TIcebergFileFormat getIcebergFileFormat(String format){
- if ("PARQUET".equalsIgnoreCase(format)) return TIcebergFileFormat.PARQUET;
+ if ("PARQUET".equalsIgnoreCase(format)) {
+ return TIcebergFileFormat.PARQUET;
+ } else if ("ORC".equalsIgnoreCase(format)) {
+ return TIcebergFileFormat.ORC;
+ }
return null;
}
diff --git a/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00004-4-0982a5d3-48c0-4dd0-ab87-d24190894251-00000.orc b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00004-4-0982a5d3-48c0-4dd0-ab87-d24190894251-00000.orc
new file mode 100644
index 0000000..435f1f5
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00004-4-0982a5d3-48c0-4dd0-ab87-d24190894251-00000.orc differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00014-14-dc56d2c8-e285-428d-b81e-f3d07ec53c12-00000.orc b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00014-14-dc56d2c8-e285-428d-b81e-f3d07ec53c12-00000.orc
new file mode 100644
index 0000000..4ca7c90
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00014-14-dc56d2c8-e285-428d-b81e-f3d07ec53c12-00000.orc differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00015-15-d4a071f4-5d57-493e-8d21-8f82655087e4-00000.orc b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00015-15-d4a071f4-5d57-493e-8d21-8f82655087e4-00000.orc
new file mode 100644
index 0000000..3b32dd2
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00015-15-d4a071f4-5d57-493e-8d21-8f82655087e4-00000.orc differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00019-19-e9fa9638-8693-43dc-b1ba-2d8dc390b585-00000.orc b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00019-19-e9fa9638-8693-43dc-b1ba-2d8dc390b585-00000.orc
new file mode 100644
index 0000000..e083888
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00019-19-e9fa9638-8693-43dc-b1ba-2d8dc390b585-00000.orc differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00020-20-4526ff61-8991-4645-b448-bf3535492111-00000.orc b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00020-20-4526ff61-8991-4645-b448-bf3535492111-00000.orc
new file mode 100644
index 0000000..711e00b
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00020-20-4526ff61-8991-4645-b448-bf3535492111-00000.orc differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00028-28-439376ff-3d17-4494-94ba-586fa00caadd-00000.orc b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00028-28-439376ff-3d17-4494-94ba-586fa00caadd-00000.orc
new file mode 100644
index 0000000..866f5df
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00028-28-439376ff-3d17-4494-94ba-586fa00caadd-00000.orc differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00003-3-fb7663c5-aade-4690-bdfc-2334d605a891-00000.orc b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00003-3-fb7663c5-aade-4690-bdfc-2334d605a891-00000.orc
new file mode 100644
index 0000000..2859593
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00003-3-fb7663c5-aade-4690-bdfc-2334d605a891-00000.orc differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00007-7-fc53a761-1ee2-4ce4-9507-c9917917d2c9-00000.orc b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00007-7-fc53a761-1ee2-4ce4-9507-c9917917d2c9-00000.orc
new file mode 100644
index 0000000..b44d885
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00007-7-fc53a761-1ee2-4ce4-9507-c9917917d2c9-00000.orc differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00011-11-8887c762-b6a8-4e2c-b1e0-6069d6141917-00000.orc b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00011-11-8887c762-b6a8-4e2c-b1e0-6069d6141917-00000.orc
new file mode 100644
index 0000000..2687eda
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00011-11-8887c762-b6a8-4e2c-b1e0-6069d6141917-00000.orc differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00012-12-a85e02aa-ff28-4311-b996-a83f67a5e39b-00000.orc b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00012-12-a85e02aa-ff28-4311-b996-a83f67a5e39b-00000.orc
new file mode 100644
index 0000000..c4cf486
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00012-12-a85e02aa-ff28-4311-b996-a83f67a5e39b-00000.orc differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00022-22-dea5a8ec-ea06-4688-8293-486a62563ef3-00000.orc b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00022-22-dea5a8ec-ea06-4688-8293-486a62563ef3-00000.orc
new file mode 100644
index 0000000..86ac4e0
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00022-22-dea5a8ec-ea06-4688-8293-486a62563ef3-00000.orc differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00025-25-d182fe7e-de19-4df8-ac19-27bbfe17b5bd-00000.orc b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00025-25-d182fe7e-de19-4df8-ac19-27bbfe17b5bd-00000.orc
new file mode 100644
index 0000000..38833fc
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00025-25-d182fe7e-de19-4df8-ac19-27bbfe17b5bd-00000.orc differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00001-1-d5df20b1-6ce6-486a-b084-ea55b7954431-00000.orc b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00001-1-d5df20b1-6ce6-486a-b084-ea55b7954431-00000.orc
new file mode 100644
index 0000000..da3d170
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00001-1-d5df20b1-6ce6-486a-b084-ea55b7954431-00000.orc differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00006-6-b30cc130-03a1-45e5-b927-d80545593f9a-00000.orc b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00006-6-b30cc130-03a1-45e5-b927-d80545593f9a-00000.orc
new file mode 100644
index 0000000..1ce1eba
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00006-6-b30cc130-03a1-45e5-b927-d80545593f9a-00000.orc differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00009-9-6476cc9e-9b5a-48d3-8cce-ea7ebb2d904f-00000.orc b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00009-9-6476cc9e-9b5a-48d3-8cce-ea7ebb2d904f-00000.orc
new file mode 100644
index 0000000..c531b59
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00009-9-6476cc9e-9b5a-48d3-8cce-ea7ebb2d904f-00000.orc differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00017-17-c2ab6dc5-3586-4b45-8a3c-860fdc71980d-00000.orc b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00017-17-c2ab6dc5-3586-4b45-8a3c-860fdc71980d-00000.orc
new file mode 100644
index 0000000..fe14583
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00017-17-c2ab6dc5-3586-4b45-8a3c-860fdc71980d-00000.orc differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00023-23-fd03c61f-8c73-4639-8974-1f6431a30bc5-00000.orc b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00023-23-fd03c61f-8c73-4639-8974-1f6431a30bc5-00000.orc
new file mode 100644
index 0000000..1d83368
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00023-23-fd03c61f-8c73-4639-8974-1f6431a30bc5-00000.orc differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00027-27-9baec6d2-8e31-4d19-8259-1a9e546aaea5-00000.orc b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00027-27-9baec6d2-8e31-4d19-8259-1a9e546aaea5-00000.orc
new file mode 100644
index 0000000..baed8f1
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00027-27-9baec6d2-8e31-4d19-8259-1a9e546aaea5-00000.orc differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00030-30-433b1afa-9fee-4581-89fb-105483ebf1a2-00000.orc b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00030-30-433b1afa-9fee-4581-89fb-105483ebf1a2-00000.orc
new file mode 100644
index 0000000..e423097
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00030-30-433b1afa-9fee-4581-89fb-105483ebf1a2-00000.orc differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00031-31-e0156a2d-4240-4c8e-9724-6ad3231cf178-00000.orc b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00031-31-e0156a2d-4240-4c8e-9724-6ad3231cf178-00000.orc
new file mode 100644
index 0000000..c343c76
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00031-31-e0156a2d-4240-4c8e-9724-6ad3231cf178-00000.orc differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/02fb8198-e791-4d89-8afa-c75fb5443346-m0.avro b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/02fb8198-e791-4d89-8afa-c75fb5443346-m0.avro
new file mode 100644
index 0000000..f6a6e41
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/02fb8198-e791-4d89-8afa-c75fb5443346-m0.avro differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/snap-3506237933060603670-1-02fb8198-e791-4d89-8afa-c75fb5443346.avro b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/snap-3506237933060603670-1-02fb8198-e791-4d89-8afa-c75fb5443346.avro
new file mode 100644
index 0000000..ae0c45a
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/snap-3506237933060603670-1-02fb8198-e791-4d89-8afa-c75fb5443346.avro differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/v1.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/v1.metadata.json
new file mode 100644
index 0000000..6f69b2d
--- /dev/null
+++ b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/v1.metadata.json
@@ -0,0 +1,47 @@
+{
+ "format-version" : 1,
+ "table-uuid" : "ea1384cd-938f-4e85-86be-666434dedaaa",
+ "location" : "/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc",
+ "last-updated-ms" : 1600952811299,
+ "last-column-id" : 3,
+ "schema" : {
+ "type" : "struct",
+ "fields" : [ {
+ "id" : 1,
+ "name" : "id",
+ "required" : false,
+ "type" : "int"
+ }, {
+ "id" : 2,
+ "name" : "user",
+ "required" : true,
+ "type" : "string"
+ }, {
+ "id" : 3,
+ "name" : "action",
+ "required" : true,
+ "type" : "string"
+ } ]
+ },
+ "partition-spec" : [ {
+ "name" : "action",
+ "transform" : "identity",
+ "source-id" : 3,
+ "field-id" : 1000
+ } ],
+ "default-spec-id" : 0,
+ "partition-specs" : [ {
+ "spec-id" : 0,
+ "fields" : [ {
+ "name" : "action",
+ "transform" : "identity",
+ "source-id" : 3,
+ "field-id" : 1000
+ } ]
+ } ],
+ "properties" : { },
+ "current-snapshot-id" : -1,
+ "snapshots" : [ ],
+ "snapshot-log" : [ ],
+ "metadata-log" : [ ]
+}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/v2.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/v2.metadata.json
new file mode 100644
index 0000000..53fd9db
--- /dev/null
+++ b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/v2.metadata.json
@@ -0,0 +1,66 @@
+{
+ "format-version" : 1,
+ "table-uuid" : "ea1384cd-938f-4e85-86be-666434dedaaa",
+ "location" : "/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc",
+ "last-updated-ms" : 1600952815220,
+ "last-column-id" : 3,
+ "schema" : {
+ "type" : "struct",
+ "fields" : [ {
+ "id" : 1,
+ "name" : "id",
+ "required" : false,
+ "type" : "int"
+ }, {
+ "id" : 2,
+ "name" : "user",
+ "required" : true,
+ "type" : "string"
+ }, {
+ "id" : 3,
+ "name" : "action",
+ "required" : true,
+ "type" : "string"
+ } ]
+ },
+ "partition-spec" : [ {
+ "name" : "action",
+ "transform" : "identity",
+ "source-id" : 3,
+ "field-id" : 1000
+ } ],
+ "default-spec-id" : 0,
+ "partition-specs" : [ {
+ "spec-id" : 0,
+ "fields" : [ {
+ "name" : "action",
+ "transform" : "identity",
+ "source-id" : 3,
+ "field-id" : 1000
+ } ]
+ } ],
+ "properties" : { },
+ "current-snapshot-id" : 3506237933060603670,
+ "snapshots" : [ {
+ "snapshot-id" : 3506237933060603670,
+ "timestamp-ms" : 1600952815220,
+ "summary" : {
+ "operation" : "append",
+ "spark.app.id" : "local-1600952804995",
+ "added-data-files" : "20",
+ "added-records" : "20",
+ "changed-partition-count" : "3",
+ "total-records" : "20",
+ "total-data-files" : "20"
+ },
+ "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/snap-3506237933060603670-1-02fb8198-e791-4d89-8afa-c75fb5443346.avro"
+ } ],
+ "snapshot-log" : [ {
+ "timestamp-ms" : 1600952815220,
+ "snapshot-id" : 3506237933060603670
+ } ],
+ "metadata-log" : [ {
+ "timestamp-ms" : 1600952811299,
+ "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/v1.metadata.json"
+ } ]
+}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/version-hint.text b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/version-hint.text
new file mode 100644
index 0000000..d8263ee
--- /dev/null
+++ b/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/version-hint.text
@@ -0,0 +1 @@
+2
\ No newline at end of file
diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql
index cd8a0d2..467798f 100644
--- a/testdata/datasets/functional/functional_schema_template.sql
+++ b/testdata/datasets/functional/functional_schema_template.sql
@@ -2973,3 +2973,15 @@ TBLPROPERTIES('iceberg.file_format'='parquet', 'iceberg.catalog'='hadoop.catalog
`hadoop fs -mkdir -p /test-warehouse/iceberg_test/hadoop_catalog && \
hadoop fs -put -f ${IMPALA_HOME}/testdata/data/iceberg_test/hadoop_catalog/hadoop_catalog_test /test-warehouse/iceberg_test/hadoop_catalog/
====
+---- DATASET
+functional
+---- BASE_TABLE_NAME
+iceberg_partitioned_orc_external
+---- CREATE
+CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name}
+STORED AS ICEBERG
+TBLPROPERTIES('iceberg.file_format'='orc', 'iceberg.catalog'='hadoop.catalog', 'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc', 'iceberg.table_identifier'='functional_parquet.iceberg_partitioned_orc');
+---- DEPENDENT_LOAD
+`hadoop fs -mkdir -p /test-warehouse/iceberg_test/hadoop_catalog && \
+hadoop fs -put -f ${IMPALA_HOME}/testdata/data/iceberg_test/hadoop_catalog/iceberg_partitioned_orc /test-warehouse/iceberg_test/hadoop_catalog/
+====
diff --git a/testdata/datasets/functional/schema_constraints.csv b/testdata/datasets/functional/schema_constraints.csv
index 39b724f..83136ea 100644
--- a/testdata/datasets/functional/schema_constraints.csv
+++ b/testdata/datasets/functional/schema_constraints.csv
@@ -63,6 +63,8 @@ table_name:hudi_non_partitioned, constraint:restrict_to, table_format:parquet/no
table_name:hudi_as_parquet, constraint:restrict_to, table_format:parquet/none/none
table_name:iceberg_partitioned, constraint:restrict_to, table_format:parquet/none/none
table_name:iceberg_non_partitioned, constraint:restrict_to, table_format:parquet/none/none
+table_name:hadoop_catalog_test_external, constraint:restrict_to, table_format:parquet/none/none
+table_name:iceberg_partitioned_orc_external, constraint:restrict_to, table_format:parquet/none/none
# TODO: Support Avro. Data loading currently fails for Avro because complex types
# cannot be converted to the corresponding Avro types yet.
diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-create.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-create.test
index 24f5239..fd06875 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-create.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-create.test
@@ -288,3 +288,54 @@ DESCRIBE FORMATTED iceberg_test9;
---- TYPES
string, string, string
====
+---- QUERY
+CREATE TABLE iceberg_test10(
+ level STRING,
+ event_time TIMESTAMP,
+ register_time DATE,
+ message STRING,
+ price DECIMAL(8,1),
+ map_test MAP <STRING, array <STRING>>,
+ struct_test STRUCT <f1: BIGINT, f2: BIGINT>
+)
+PARTITION BY SPEC
+(
+ level IDENTITY,
+ event_time IDENTITY,
+ event_time HOUR,
+ register_time DAY
+)
+STORED AS ICEBERG
+TBLPROPERTIES('iceberg.file_format'='orc','iceberg_catalog'='hadoop.tables',
+'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test');
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+DESCRIBE iceberg_test10;
+---- RESULTS
+'level','string',''
+'event_time','timestamp',''
+'register_time','date',''
+'message','string',''
+'price','decimal(8,1)',''
+'map_test','map<string,array<string>>',''
+'struct_test','struct<\n f1:bigint,\n f2:bigint\n>',''
+---- TYPES
+STRING,STRING,STRING
+====
+---- QUERY
+SHOW PARTITIONS iceberg_test10;
+---- RESULTS
+0,1,1000,'level','IDENTITY'
+0,2,1001,'event_time','IDENTITY'
+0,2,1002,'event_time_hour','HOUR'
+0,3,1003,'register_time_day','DAY'
+---- TYPES
+BIGINT,BIGINT,BIGINT,STRING,STRING
+====
+---- QUERY
+DROP TABLE iceberg_test10;
+---- RESULTS
+'Table has been dropped.'
+====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test
index 9c94d7f..5081644 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test
@@ -282,6 +282,89 @@ Path,Size,Partition
STRING,STRING,STRING
====
---- QUERY
+SELECT count(*) from iceberg_partitioned_orc_external;
+---- TYPES
+bigint
+---- RESULTS
+20
+====
+---- QUERY
+SELECT count(*) from iceberg_partitioned_orc_external
+where id > 10
+---- TYPES
+bigint
+---- RESULTS
+10
+====
+---- QUERY
+SELECT count(*) from iceberg_partitioned_orc_external
+where action <> 'click'
+---- TYPES
+bigint
+---- RESULTS
+14
+====
+---- QUERY
+# Test distinct count
+SELECT count(distinct id),count(distinct action) from iceberg_partitioned_orc_external
+---- TYPES
+bigint,bigint
+---- RESULTS
+20,3
+====
+---- QUERY
+SELECT * from iceberg_partitioned_orc_external
+where id < 5
+ORDER BY id
+---- TYPES
+int,string,string
+---- RESULTS
+1,'Alex','view'
+2,'Lisa','download'
+3,'Alan','click'
+4,'Alex','view'
+====
+---- QUERY
+SHOW PARTITIONS iceberg_partitioned_orc_external
+---- TYPES
+bigint,bigint,bigint,string,string
+---- RESULTS
+0,3,1000,'action','IDENTITY'
+====
+---- QUERY
+SHOW FILES in iceberg_partitioned_orc_external
+---- LABELS
+Path,Size,Partition
+---- RESULTS
+'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00004-4-0982a5d3-48c0-4dd0-ab87-d24190894251-00000.orc',regex:.*,''
+'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00014-14-dc56d2c8-e285-428d-b81e-f3d07ec53c12-00000.orc',regex:.*,''
+'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00015-15-d4a071f4-5d57-493e-8d21-8f82655087e4-00000.orc',regex:.*,''
+'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00019-19-e9fa9638-8693-43dc-b1ba-2d8dc390b585-00000.orc',regex:.*,''
+'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00020-20-4526ff61-8991-4645-b448-bf3535492111-00000.orc',regex:.*,''
+'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00028-28-439376ff-3d17-4494-94ba-586fa00caadd-00000.orc',regex:.*,''
+'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00003-3-fb7663c5-aade-4690-bdfc-2334d605a891-00000.orc',regex:.*,''
+'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00007-7-fc53a761-1ee2-4ce4-9507-c9917917d2c9-00000.orc',regex:.*,''
+'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00011-11-8887c762-b6a8-4e2c-b1e0-6069d6141917-00000.orc',regex:.*,''
+'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00012-12-a85e02aa-ff28-4311-b996-a83f67a5e39b-00000.orc',regex:.*,''
+'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00022-22-dea5a8ec-ea06-4688-8293-486a62563ef3-00000.orc',regex:.*,''
+'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00025-25-d182fe7e-de19-4df8-ac19-27bbfe17b5bd-00000.orc',regex:.*,''
+'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00001-1-d5df20b1-6ce6-486a-b084-ea55b7954431-00000.orc',regex:.*,''
+'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00006-6-b30cc130-03a1-45e5-b927-d80545593f9a-00000.orc',regex:.*,''
+'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00009-9-6476cc9e-9b5a-48d3-8cce-ea7ebb2d904f-00000.orc',regex:.*,''
+'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00017-17-c2ab6dc5-3586-4b45-8a3c-860fdc71980d-00000.orc',regex:.*,''
+'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00023-23-fd03c61f-8c73-4639-8974-1f6431a30bc5-00000.orc',regex:.*,''
+'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00027-27-9baec6d2-8e31-4d19-8259-1a9e546aaea5-00000.orc',regex:.*,''
+'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00030-30-433b1afa-9fee-4581-89fb-105483ebf1a2-00000.orc',regex:.*,''
+'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00031-31-e0156a2d-4240-4c8e-9724-6ad3231cf178-00000.orc',regex:.*,''
+'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/02fb8198-e791-4d89-8afa-c75fb5443346-m0.avro',regex:.*,''
+'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/snap-3506237933060603670-1-02fb8198-e791-4d89-8afa-c75fb5443346.avro',regex:.*,''
+'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/v1.metadata.json',regex:.*,''
+'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/v2.metadata.json',regex:.*,''
+'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/version-hint.text',regex:.*,''
+---- TYPES
+STRING,STRING,STRING
+====
+---- QUERY
describe formatted iceberg_partitioned;
---- RESULTS: VERIFY_IS_SUBSET
'Location: ','$NAMENODE/test-warehouse/iceberg_test/iceberg_partitioned','NULL'
@@ -310,3 +393,14 @@ describe formatted hadoop_catalog_test_external;
---- TYPES
string, string, string
====
+---- QUERY
+describe formatted iceberg_partitioned_orc_external;
+---- RESULTS: VERIFY_IS_SUBSET
+'Location: ','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc','NULL'
+'','iceberg.catalog_location','/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc'
+'','iceberg.table_identifier','functional_parquet.iceberg_partitioned_orc'
+'','iceberg.file_format ','orc '
+'','iceberg.catalog ','hadoop.catalog '
+---- TYPES
+string, string, string
+====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test b/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test
index 5c649b6..61bd879 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test
@@ -689,3 +689,25 @@ TBLPROPERTIES ('external.table.purge'='TRUE', 'iceberg.catalog'='hadoop.catalog'
'iceberg.file_format'='parquet',
'iceberg.catalog_location'='/test-warehouse/hadoop_catalog_test')
====
+---- CREATE_TABLE
+CREATE TABLE iceberg_test_orc (
+ level STRING
+)
+STORED AS ICEBERG
+TBLPROPERTIES('iceberg.file_format'='orc', 'iceberg.catalog'='hadoop.tables')
+---- RESULTS-HIVE
+CREATE TABLE show_create_table_test_db.iceberg_test_orc (
+ level STRING
+)
+STORED AS ICEBERG
+LOCATION '$$location_uri$$'
+TBLPROPERTIES('iceberg.file_format'='orc', 'iceberg.catalog'='hadoop.tables')
+---- RESULTS-HIVE-3
+CREATE EXTERNAL TABLE show_create_table_test_db.iceberg_test_orc (
+ level STRING
+)
+STORED AS ICEBERG
+LOCATION '$$location_uri$$'
+TBLPROPERTIES ('external.table.purge'='TRUE', 'iceberg.file_format'='orc',
+'iceberg.catalog'='hadoop.tables')
+====