You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by db...@apache.org on 2023/03/07 13:49:04 UTC
[impala] 01/03: IMPALA-11479: Add Java unit tests for IcebergUtil.
This is an automated email from the ASF dual-hosted git repository.
dbecker pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 374d011a7caae3ad91e209f39b4537f9152a8ca2
Author: Andrew Sherman <as...@cloudera.com>
AuthorDate: Fri Aug 5 14:20:21 2022 -0700
IMPALA-11479: Add Java unit tests for IcebergUtil.
This does not test all of IcebergUtil, but it is a start.
Tidy up some IcebergUtil code and fix a few spelling mistakes.
Change-Id: Ib15993a9ed3d5802dda4edb2011a90ead6d06ed4
Reviewed-on: http://gerrit.cloudera.org:8080/19543
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
.../apache/impala/common/TransactionKeepalive.java | 1 -
.../java/org/apache/impala/util/IcebergUtil.java | 113 +++---
.../impala/catalog/local/LocalCatalogTest.java | 2 +-
.../org/apache/impala/util/IcebergUtilTest.java | 400 +++++++++++++++++++++
4 files changed, 460 insertions(+), 56 deletions(-)
diff --git a/fe/src/main/java/org/apache/impala/common/TransactionKeepalive.java b/fe/src/main/java/org/apache/impala/common/TransactionKeepalive.java
index 52dff28fd..126937826 100644
--- a/fe/src/main/java/org/apache/impala/common/TransactionKeepalive.java
+++ b/fe/src/main/java/org/apache/impala/common/TransactionKeepalive.java
@@ -37,7 +37,6 @@ import org.apache.impala.thrift.TQueryCtx;
import org.apache.log4j.Logger;
import com.google.common.base.Preconditions;
-import com.sun.tools.javac.code.Attribute.Array;
/**
* Object of this class creates a daemon thread that periodically heartbeats the
diff --git a/fe/src/main/java/org/apache/impala/util/IcebergUtil.java b/fe/src/main/java/org/apache/impala/util/IcebergUtil.java
index 5dbbf61f2..c6595a968 100644
--- a/fe/src/main/java/org/apache/impala/util/IcebergUtil.java
+++ b/fe/src/main/java/org/apache/impala/util/IcebergUtil.java
@@ -99,10 +99,13 @@ import org.apache.impala.thrift.TIcebergPartitionField;
import org.apache.impala.thrift.TIcebergPartitionSpec;
import org.apache.impala.thrift.TIcebergPartitionTransformType;
+@SuppressWarnings("UnstableApiUsage")
public class IcebergUtil {
private static final int ICEBERG_EPOCH_YEAR = 1970;
private static final int ICEBERG_EPOCH_MONTH = 1;
+ @SuppressWarnings("unused")
private static final int ICEBERG_EPOCH_DAY = 1;
+ @SuppressWarnings("unused")
private static final int ICEBERG_EPOCH_HOUR = 0;
/**
@@ -124,8 +127,7 @@ public class IcebergUtil {
case HIVE_CATALOG: return IcebergHiveCatalog.getInstance();
case HADOOP_CATALOG: return new IcebergHadoopCatalog(location);
case CATALOGS: return IcebergCatalogs.getInstance();
- default: throw new ImpalaRuntimeException (
- "Unexpected catalog type: " + catalog.toString());
+ default: throw new ImpalaRuntimeException("Unexpected catalog type: " + catalog);
}
}
@@ -186,8 +188,7 @@ public class IcebergUtil {
* Get Iceberg Transaction for 'feTable', usually use Transaction to update Iceberg
* table schema.
*/
- public static Transaction getIcebergTransaction(FeIcebergTable feTable)
- throws TableLoadingException, ImpalaRuntimeException {
+ public static Transaction getIcebergTransaction(FeIcebergTable feTable) {
return feTable.getIcebergApiTable().newTransaction();
}
@@ -317,7 +318,7 @@ public class IcebergUtil {
*/
public static TIcebergFileFormat getIcebergFileFormat(
org.apache.hadoop.hive.metastore.api.Table msTable) {
- TIcebergFileFormat fileFormat = null;
+ TIcebergFileFormat fileFormat;
Map<String, String> params = msTable.getParameters();
if (params.containsKey(IcebergTable.ICEBERG_FILE_FORMAT)) {
fileFormat = IcebergUtil.getIcebergFileFormat(
@@ -389,7 +390,7 @@ public class IcebergUtil {
PartitionField field, HashMap<String, Integer> transformParams)
throws TableLoadingException {
String type = field.transform().toString();
- String transformMappingKey = getPartitonTransformMappingKey(field.sourceId(),
+ String transformMappingKey = getPartitionTransformMappingKey(field.sourceId(),
getPartitionTransformType(type));
return getPartitionTransform(type, transformParams.get(transformMappingKey));
}
@@ -405,15 +406,15 @@ public class IcebergUtil {
return getPartitionTransform(transformType, null);
}
- public static TIcebergPartitionTransformType getPartitionTransformType(
+ private static TIcebergPartitionTransformType getPartitionTransformType(
String transformType) throws TableLoadingException {
Preconditions.checkNotNull(transformType);
transformType = transformType.toUpperCase();
if ("IDENTITY".equals(transformType)) {
return TIcebergPartitionTransformType.IDENTITY;
- } else if (transformType != null && transformType.startsWith("BUCKET")) {
+ } else if (transformType.startsWith("BUCKET")) {
return TIcebergPartitionTransformType.BUCKET;
- } else if (transformType != null && transformType.startsWith("TRUNCATE")) {
+ } else if (transformType.startsWith("TRUNCATE")) {
return TIcebergPartitionTransformType.TRUNCATE;
}
switch (transformType) {
@@ -428,8 +429,8 @@ public class IcebergUtil {
}
}
- private static String getPartitonTransformMappingKey(int sourceId,
- TIcebergPartitionTransformType transformType) {
+ private static String getPartitionTransformMappingKey(
+ int sourceId, TIcebergPartitionTransformType transformType) {
return sourceId + "_" + transformType.toString();
}
@@ -438,10 +439,10 @@ public class IcebergUtil {
* PartitionSpec and its transform's parameter. Only Bucket and Truncate transforms
* have a parameter, for other transforms this mapping will have a null.
* source ID and the transform type are needed together to uniquely identify a specific
- * field in the PartitionSpec. (Unfortunaltely, fieldId is not available in the Visitor
+ * field in the PartitionSpec. (Unfortunately, fieldId is not available in the Visitor
* class below.)
* The reason for implementing the PartitionSpecVisitor below was that Iceberg doesn't
- * expose the interface of the transform types outside of their package and the only
+ * expose the interface of the transform types outside their package and the only
* way to get the transform's parameter is implementing this visitor class.
*/
public static HashMap<String, Integer> getPartitionTransformParams(PartitionSpec spec) {
@@ -449,61 +450,61 @@ public class IcebergUtil {
spec, new PartitionSpecVisitor<Pair<String, Integer>>() {
@Override
public Pair<String, Integer> identity(String sourceName, int sourceId) {
- String mappingKey = getPartitonTransformMappingKey(sourceId,
+ String mappingKey = getPartitionTransformMappingKey(sourceId,
TIcebergPartitionTransformType.IDENTITY);
- return new Pair<String, Integer>(mappingKey, null);
+ return new Pair<>(mappingKey, null);
}
@Override
public Pair<String, Integer> bucket(String sourceName, int sourceId,
int numBuckets) {
- String mappingKey = getPartitonTransformMappingKey(sourceId,
+ String mappingKey = getPartitionTransformMappingKey(sourceId,
TIcebergPartitionTransformType.BUCKET);
- return new Pair<String, Integer>(mappingKey, numBuckets);
+ return new Pair<>(mappingKey, numBuckets);
}
@Override
public Pair<String, Integer> truncate(String sourceName, int sourceId,
int width) {
- String mappingKey = getPartitonTransformMappingKey(sourceId,
+ String mappingKey = getPartitionTransformMappingKey(sourceId,
TIcebergPartitionTransformType.TRUNCATE);
- return new Pair<String, Integer>(mappingKey, width);
+ return new Pair<>(mappingKey, width);
}
@Override
public Pair<String, Integer> year(String sourceName, int sourceId) {
- String mappingKey = getPartitonTransformMappingKey(sourceId,
+ String mappingKey = getPartitionTransformMappingKey(sourceId,
TIcebergPartitionTransformType.YEAR);
- return new Pair<String, Integer>(mappingKey, null);
+ return new Pair<>(mappingKey, null);
}
@Override
public Pair<String, Integer> month(String sourceName, int sourceId) {
- String mappingKey = getPartitonTransformMappingKey(sourceId,
+ String mappingKey = getPartitionTransformMappingKey(sourceId,
TIcebergPartitionTransformType.MONTH);
- return new Pair<String, Integer>(mappingKey, null);
+ return new Pair<>(mappingKey, null);
}
@Override
public Pair<String, Integer> day(String sourceName, int sourceId) {
- String mappingKey = getPartitonTransformMappingKey(sourceId,
+ String mappingKey = getPartitionTransformMappingKey(sourceId,
TIcebergPartitionTransformType.DAY);
- return new Pair<String, Integer>(mappingKey, null);
+ return new Pair<>(mappingKey, null);
}
@Override
public Pair<String, Integer> hour(String sourceName, int sourceId) {
- String mappingKey = getPartitonTransformMappingKey(sourceId,
+ String mappingKey = getPartitionTransformMappingKey(sourceId,
TIcebergPartitionTransformType.HOUR);
- return new Pair<String, Integer>(mappingKey, null);
+ return new Pair<>(mappingKey, null);
}
@Override
public Pair<String, Integer> alwaysNull(int fieldId, String sourceName,
int sourceId) {
- String mappingKey = getPartitonTransformMappingKey(sourceId,
+ String mappingKey = getPartitionTransformMappingKey(sourceId,
TIcebergPartitionTransformType.VOID);
- return new Pair<String, Integer>(mappingKey, null);
+ return new Pair<>(mappingKey, null);
}
});
// Move the content of the List into a HashMap for faster querying in the future.
@@ -539,8 +540,14 @@ public class IcebergUtil {
/**
* Transform TIcebergFileFormat to HdfsFileFormat
*/
+ @SuppressWarnings("unused")
public static HdfsFileFormat toHdfsFileFormat(String format) {
- return HdfsFileFormat.fromThrift(toTHdfsFileFormat(getIcebergFileFormat(format)));
+ TIcebergFileFormat icebergFileFormat = getIcebergFileFormat(format);
+ if (icebergFileFormat == null) {
+ // Can't pass null to toTHdfsFileFormat(), so throw.
+ throw new IllegalArgumentException("unknown table format " + format);
+ }
+ return HdfsFileFormat.fromThrift(toTHdfsFileFormat(icebergFileFormat));
}
/**
@@ -645,7 +652,6 @@ public class IcebergUtil {
}
@Override
- @SuppressWarnings("unchecked")
public <T> T get(int pos, Class<T> javaClass) {
return javaClass.cast(values[pos]);
}
@@ -741,7 +747,7 @@ public class IcebergUtil {
* return value should be 14.
*/
private static Integer parseYearToTransformYear(String yearStr) {
- Integer year = Integer.valueOf(yearStr);
+ int year = Integer.parseInt(yearStr);
return year - ICEBERG_EPOCH_YEAR;
}
@@ -749,12 +755,11 @@ public class IcebergUtil {
* In the partition path months are represented as <year>-<month>, e.g. 2021-01. We
* need to convert it to a single integer which represents the months from '1970-01'.
*/
- private static Integer parseMonthToTransformMonth(String monthStr)
- throws ImpalaRuntimeException {
+ private static Integer parseMonthToTransformMonth(String monthStr) {
String[] parts = monthStr.split("-", -1);
Preconditions.checkState(parts.length == 2);
- Integer year = Integer.valueOf(parts[0]);
- Integer month = Integer.valueOf(parts[1]);
+ int year = Integer.parseInt(parts[0]);
+ int month = Integer.parseInt(parts[1]);
int years = year - ICEBERG_EPOCH_YEAR;
int months = month - ICEBERG_EPOCH_MONTH;
return years * 12 + months;
@@ -769,10 +774,10 @@ public class IcebergUtil {
final OffsetDateTime EPOCH = Instant.ofEpochSecond(0).atOffset(ZoneOffset.UTC);
String[] parts = hourStr.split("-", -1);
Preconditions.checkState(parts.length == 4);
- Integer year = Integer.valueOf(parts[0]);
- Integer month = Integer.valueOf(parts[1]);
- Integer day = Integer.valueOf(parts[2]);
- Integer hour = Integer.valueOf(parts[3]);
+ int year = Integer.parseInt(parts[0]);
+ int month = Integer.parseInt(parts[1]);
+ int day = Integer.parseInt(parts[2]);
+ int hour = Integer.parseInt(parts[3]);
OffsetDateTime datetime = OffsetDateTime.of(
LocalDateTime.of(year, month, day, hour, /*minute=*/0),
ZoneOffset.UTC);
@@ -812,9 +817,9 @@ public class IcebergUtil {
clevel > IcebergTable.MAX_PARQUET_COMPRESSION_LEVEL) {
errMsg.append("Parquet compression level for Iceberg table should fall in " +
"the range of [")
- .append(String.valueOf(IcebergTable.MIN_PARQUET_COMPRESSION_LEVEL))
+ .append(IcebergTable.MIN_PARQUET_COMPRESSION_LEVEL)
.append("..")
- .append(String.valueOf(IcebergTable.MAX_PARQUET_COMPRESSION_LEVEL))
+ .append(IcebergTable.MAX_PARQUET_COMPRESSION_LEVEL)
.append("]");
return null;
}
@@ -836,9 +841,9 @@ public class IcebergUtil {
rowGroupSize > IcebergTable.MAX_PARQUET_ROW_GROUP_SIZE) {
errMsg.append("Parquet row group size for Iceberg table should ")
.append("fall in the range of [")
- .append(String.valueOf(IcebergTable.MIN_PARQUET_ROW_GROUP_SIZE))
+ .append(IcebergTable.MIN_PARQUET_ROW_GROUP_SIZE)
.append("..")
- .append(String.valueOf(IcebergTable.MAX_PARQUET_ROW_GROUP_SIZE))
+ .append(IcebergTable.MAX_PARQUET_ROW_GROUP_SIZE)
.append("]");
return null;
}
@@ -873,9 +878,9 @@ public class IcebergUtil {
errMsg.append("Parquet ")
.append(descr)
.append(" for Iceberg table should fall in the range of [")
- .append(String.valueOf(IcebergTable.MIN_PARQUET_PAGE_SIZE))
+ .append(IcebergTable.MIN_PARQUET_PAGE_SIZE)
.append("..")
- .append(String.valueOf(IcebergTable.MAX_PARQUET_PAGE_SIZE))
+ .append(IcebergTable.MAX_PARQUET_PAGE_SIZE)
.append("]");
return null;
}
@@ -979,45 +984,45 @@ public class IcebergUtil {
schema, field, new PartitionSpecVisitor<Pair<Byte, Integer>>() {
@Override
public Pair<Byte, Integer> identity(String sourceName, int sourceId) {
- return new Pair<Byte, Integer>(FbIcebergTransformType.IDENTITY, null);
+ return new Pair<>(FbIcebergTransformType.IDENTITY, null);
}
@Override
public Pair<Byte, Integer> bucket(String sourceName, int sourceId,
int numBuckets) {
- return new Pair<Byte, Integer>(FbIcebergTransformType.BUCKET, numBuckets);
+ return new Pair<>(FbIcebergTransformType.BUCKET, numBuckets);
}
@Override
public Pair<Byte, Integer> truncate(String sourceName, int sourceId,
int width) {
- return new Pair<Byte, Integer>(FbIcebergTransformType.TRUNCATE, width);
+ return new Pair<>(FbIcebergTransformType.TRUNCATE, width);
}
@Override
public Pair<Byte, Integer> year(String sourceName, int sourceId) {
- return new Pair<Byte, Integer>(FbIcebergTransformType.YEAR, null);
+ return new Pair<>(FbIcebergTransformType.YEAR, null);
}
@Override
public Pair<Byte, Integer> month(String sourceName, int sourceId) {
- return new Pair<Byte, Integer>(FbIcebergTransformType.MONTH, null);
+ return new Pair<>(FbIcebergTransformType.MONTH, null);
}
@Override
public Pair<Byte, Integer> day(String sourceName, int sourceId) {
- return new Pair<Byte, Integer>(FbIcebergTransformType.DAY, null);
+ return new Pair<>(FbIcebergTransformType.DAY, null);
}
@Override
public Pair<Byte, Integer> hour(String sourceName, int sourceId) {
- return new Pair<Byte, Integer>(FbIcebergTransformType.HOUR, null);
+ return new Pair<>(FbIcebergTransformType.HOUR, null);
}
@Override
public Pair<Byte, Integer> alwaysNull(int fieldId, String sourceName,
int sourceId) {
- return new Pair<Byte, Integer>(FbIcebergTransformType.VOID, null);
+ return new Pair<>(FbIcebergTransformType.VOID, null);
}
});
}
diff --git a/fe/src/test/java/org/apache/impala/catalog/local/LocalCatalogTest.java b/fe/src/test/java/org/apache/impala/catalog/local/LocalCatalogTest.java
index 5f6e78fd2..a2a72ce3c 100644
--- a/fe/src/test/java/org/apache/impala/catalog/local/LocalCatalogTest.java
+++ b/fe/src/test/java/org/apache/impala/catalog/local/LocalCatalogTest.java
@@ -272,7 +272,7 @@ public class LocalCatalogTest {
}
/**
- * This test verifies that the network adresses used by the LocalIcebergTable are
+ * This test verifies that the network addresses used by the LocalIcebergTable are
* the same used by CatalogD.
*/
@Test
diff --git a/fe/src/test/java/org/apache/impala/util/IcebergUtilTest.java b/fe/src/test/java/org/apache/impala/util/IcebergUtilTest.java
new file mode 100644
index 000000000..3a3d0031c
--- /dev/null
+++ b/fe/src/test/java/org/apache/impala/util/IcebergUtilTest.java
@@ -0,0 +1,400 @@
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+package org.apache.impala.util;
+
+import static org.apache.impala.thrift.TIcebergCatalog.CATALOGS;
+import static org.apache.impala.thrift.TIcebergCatalog.HADOOP_CATALOG;
+import static org.apache.impala.thrift.TIcebergCatalog.HADOOP_TABLES;
+import static org.apache.impala.thrift.TIcebergCatalog.HIVE_CATALOG;
+import static org.apache.impala.util.IcebergUtil.getFilePathHash;
+import static org.apache.impala.util.IcebergUtil.getIcebergFileFormat;
+import static org.apache.impala.util.IcebergUtil.getPartitionTransform;
+import static org.apache.impala.util.IcebergUtil.getPartitionTransformParams;
+import static org.apache.impala.util.IcebergUtil.isPartitionColumn;
+import static org.apache.impala.util.IcebergUtil.toHdfsFileFormat;
+import static org.apache.impala.util.IcebergUtil.toTHdfsFileFormat;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import com.google.common.collect.ImmutableList;
+
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.DataFiles;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.catalog.TableIdentifier;
+import org.apache.iceberg.mr.Catalogs;
+import org.apache.iceberg.types.Types;
+import org.apache.impala.analysis.IcebergPartitionField;
+import org.apache.impala.analysis.IcebergPartitionSpec;
+import org.apache.impala.analysis.IcebergPartitionTransform;
+import org.apache.impala.catalog.HdfsFileFormat;
+import org.apache.impala.catalog.IcebergColumn;
+import org.apache.impala.catalog.IcebergTable;
+import org.apache.impala.catalog.TableLoadingException;
+import org.apache.impala.catalog.Type;
+import org.apache.impala.catalog.iceberg.IcebergCatalog;
+import org.apache.impala.catalog.iceberg.IcebergCatalogs;
+import org.apache.impala.catalog.iceberg.IcebergHadoopCatalog;
+import org.apache.impala.catalog.iceberg.IcebergHadoopTables;
+import org.apache.impala.catalog.iceberg.IcebergHiveCatalog;
+import org.apache.impala.common.AnalysisException;
+import org.apache.impala.common.ImpalaRuntimeException;
+import org.apache.impala.thrift.THdfsFileFormat;
+import org.apache.impala.thrift.TIcebergCatalog;
+import org.apache.impala.thrift.TIcebergFileFormat;
+import org.apache.impala.thrift.TIcebergPartitionTransformType;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+/**
+ * Unit tests for Iceberg Utilities.
+ */
+public class IcebergUtilTest {
+ /**
+ * Unit test for IcebergUtil.getTIcebergCatalog() and IcebergUtil.getIcebergCatalog().
+ */
+ @Test
+ public void testGetCatalog() throws ImpalaRuntimeException {
+ CatalogMapping[] mappings = new CatalogMapping[] {
+ new CatalogMapping("hadoop.tables", HADOOP_TABLES, IcebergHadoopTables.class),
+ new CatalogMapping("hadoop.catalog", HADOOP_CATALOG, IcebergHadoopCatalog.class),
+ new CatalogMapping("hive.catalog", HIVE_CATALOG, IcebergHiveCatalog.class),
+ new CatalogMapping(null, HIVE_CATALOG, IcebergHiveCatalog.class),
+ new CatalogMapping("other string", CATALOGS, IcebergCatalogs.class),
+ };
+ for (CatalogMapping testValue : mappings) {
+ TIcebergCatalog catalog = IcebergUtil.getTIcebergCatalog(testValue.propertyName);
+ assertEquals("err for " + testValue.propertyName, testValue.catalog, catalog);
+ IcebergCatalog impl = IcebergUtil.getIcebergCatalog(catalog, "location");
+ assertEquals("err for " + testValue.propertyName, testValue.clazz, impl.getClass());
+ }
+ }
+
+ /**
+ * Unit test for IcebergUtil.getIcebergTableIdentifier().
+ */
+ @Test
+ public void testGetIcebergTableIdentifier() {
+ // Test a table with no table properties.
+ Table table = new Table();
+ table.setParameters(new HashMap<>());
+ String tableName = "table_name";
+ table.setTableName(tableName);
+ String dbname = "database_name";
+ table.setDbName(dbname);
+ TableIdentifier icebergTableIdentifier = IcebergUtil.getIcebergTableIdentifier(table);
+ assertEquals(
+ TableIdentifier.parse("database_name.table_name"), icebergTableIdentifier);
+
+ // If iceberg.table_identifier is not set then the value of the "name" property
+ // is used.
+ String nameId = "db.table";
+ table.putToParameters(Catalogs.NAME, nameId);
+ icebergTableIdentifier = IcebergUtil.getIcebergTableIdentifier(table);
+ assertEquals(TableIdentifier.parse(nameId), icebergTableIdentifier);
+
+ // If iceberg.table_identifier is set then that is used.
+ String tableId = "foo.bar";
+ table.putToParameters(IcebergTable.ICEBERG_TABLE_IDENTIFIER, tableId);
+ icebergTableIdentifier = IcebergUtil.getIcebergTableIdentifier(table);
+ assertEquals(TableIdentifier.parse(tableId), icebergTableIdentifier);
+
+ // If iceberg.table_identifier set to a simple name, then the default catalog is used.
+ table.putToParameters(IcebergTable.ICEBERG_TABLE_IDENTIFIER, "noDatabase");
+ icebergTableIdentifier = IcebergUtil.getIcebergTableIdentifier(table);
+ assertEquals(TableIdentifier.parse("default.noDatabase"), icebergTableIdentifier);
+ }
+
+ /**
+ * Unit test for isHiveCatalog().
+ */
+ @Test
+ public void testIsHiveCatalog() {
+ CatalogType[] catalogTypes = new CatalogType[] {
+ // For hadoop.tables amd hadoop.catalog we are not using Hive Catalog.
+ new CatalogType("hadoop.tables", false),
+ new CatalogType("hadoop.catalog", false),
+ // For all other values of ICEBERG_CATALOG then Hive Catalog is used.
+ new CatalogType("hive.catalog", true),
+ new CatalogType(null, true),
+ new CatalogType("other string", true),
+ };
+ for (CatalogType testValue : catalogTypes) {
+ Table table = new Table();
+ table.putToParameters(IcebergTable.ICEBERG_CATALOG, testValue.propertyName);
+ assertEquals("err in " + testValue.propertyName, testValue.isHiveCatalog,
+ IcebergUtil.isHiveCatalog(table));
+ }
+ }
+
+ /**
+ * Unit test for getIcebergFileFormat(), toHdfsFileFormat() and toTHdfsFileFormat().
+ */
+ @Test
+ public void testToHdfsFileFormat() {
+ assertEquals(THdfsFileFormat.ORC, toTHdfsFileFormat(TIcebergFileFormat.ORC));
+ assertEquals(THdfsFileFormat.PARQUET, toTHdfsFileFormat(TIcebergFileFormat.PARQUET));
+ assertEquals(HdfsFileFormat.ORC, toHdfsFileFormat(TIcebergFileFormat.ORC));
+ assertEquals(HdfsFileFormat.PARQUET, toHdfsFileFormat(TIcebergFileFormat.PARQUET));
+ assertEquals(HdfsFileFormat.ORC, toHdfsFileFormat("ORC"));
+ assertEquals(HdfsFileFormat.PARQUET, toHdfsFileFormat("PARQUET"));
+ assertEquals(HdfsFileFormat.PARQUET, toHdfsFileFormat((String) null));
+ try {
+ toHdfsFileFormat("unknown");
+ fail("did not get expected assertion");
+ } catch (IllegalArgumentException e) {
+ // fall through
+ }
+ assertEquals(TIcebergFileFormat.ORC, getIcebergFileFormat("ORC"));
+ assertEquals(TIcebergFileFormat.PARQUET, getIcebergFileFormat("PARQUET"));
+ assertNull(getIcebergFileFormat("unknown"));
+ }
+
+ /**
+ * Unit test forgetPartitionTransform().
+ */
+ @Test
+ public void testGetPartitionTransform() {
+ // Case 1
+ // Transforms that work OK.
+ PartitionTransform[] goodTransforms = new PartitionTransform[] {
+ new PartitionTransform("BUCKET", 5),
+ new PartitionTransform("TRUNCATE", 4),
+ new PartitionTransform("HOUR", null),
+ new PartitionTransform("HOURS", null),
+ new PartitionTransform("DAY", null),
+ new PartitionTransform("DAYS", null),
+ new PartitionTransform("MONTH", null),
+ new PartitionTransform("MONTHS", null),
+ new PartitionTransform("YEAR", null),
+ new PartitionTransform("YEARS", null),
+ new PartitionTransform("VOID", null),
+ new PartitionTransform("IDENTITY", null),
+ };
+ for (PartitionTransform partitionTransform : goodTransforms) {
+ IcebergPartitionTransform transform = null;
+ try {
+ transform = getPartitionTransform(
+ partitionTransform.transformName, partitionTransform.parameter);
+ } catch (TableLoadingException t) {
+ fail("Transform " + partitionTransform + " caught unexpected " + t);
+ }
+ assertNotNull(transform);
+ try {
+ transform.analyze(null);
+ } catch (AnalysisException t) {
+ fail("Transform " + partitionTransform + " caught unexpected " + t);
+ }
+ }
+
+ // Case 2
+ // Transforms that get TableLoadingException.
+ PartitionTransform[] tableExceptions = new PartitionTransform[] {
+ new PartitionTransform("JUNK", -5),
+ };
+ for (PartitionTransform partitionTransform : tableExceptions) {
+ try {
+ /* IcebergPartitionTransform transform = */ getPartitionTransform(
+ partitionTransform.transformName, partitionTransform.parameter);
+ fail("Transform " + partitionTransform + " should have got exception");
+ } catch (TableLoadingException t) {
+ // OK, fall through
+ }
+ }
+
+ // Case 3
+ // Transforms that fail analysis.
+ PartitionTransform[] failAnalysis = new PartitionTransform[] {
+ new PartitionTransform("BUCKET", -5),
+ new PartitionTransform("TRUNCATE", -4),
+ };
+ for (PartitionTransform partitionTransform : failAnalysis) {
+ IcebergPartitionTransform transform = null;
+ try {
+ transform = getPartitionTransform(
+ partitionTransform.transformName, partitionTransform.parameter);
+ } catch (TableLoadingException t) {
+ fail("Transform " + partitionTransform + " caught unexpected " + t);
+ }
+ assertNotNull(transform);
+ try {
+ transform.analyze(null);
+ fail("Transform " + partitionTransform + " should have got exception");
+ } catch (AnalysisException t) {
+ // OK, fall through
+ }
+ }
+ }
+
+ /**
+ * Unit test for getDataFilePathHash().
+ */
+ @Test
+ public void testGetDataFilePathHash() {
+ String hash = getFilePathHash(FILE_A);
+ assertNotNull(hash);
+ String hash2 = getFilePathHash(FILE_A);
+ assertEquals(hash, hash2);
+ }
+
+ /**
+ * Unit test for getPartitionTransformParams().
+ */
+ @Test
+ public void testGetPartitionTransformParams() {
+ int numBuckets = 128;
+ PartitionSpec partitionSpec =
+ PartitionSpec.builderFor(SCHEMA).bucket("i", numBuckets).build();
+ HashMap<String, Integer> partitionTransformParams =
+ getPartitionTransformParams(partitionSpec);
+ assertNotNull(partitionTransformParams);
+ String expectedKey = "1_BUCKET";
+ assertTrue(partitionTransformParams.containsKey(expectedKey));
+ assertEquals(numBuckets, (long) partitionTransformParams.get(expectedKey));
+ }
+
+ /**
+ * Unit test for isPartitionColumn().
+ */
+ @Test
+ public void testIsPartitionColumn() {
+ {
+ // Case 1
+ // No partition fields: isPartitionColumn() should return false.
+ int fieldId = 3;
+ IcebergColumn column =
+ new IcebergColumn("name", Type.BOOLEAN, "comment", 0, fieldId, 5, 0, true);
+ List<IcebergPartitionField> fieldList = new ArrayList<>();
+ IcebergPartitionSpec icebergPartitionSpec = new IcebergPartitionSpec(4, fieldList);
+ assertFalse(isPartitionColumn(column, icebergPartitionSpec));
+ }
+ {
+ // Case 2
+ // A partition field source id matches a column field id: isPartitionColumn() should
+ // return true.
+ int id = 3;
+ IcebergColumn column =
+ new IcebergColumn("name", Type.BOOLEAN, "comment", 0, id, 105, 0, true);
+ IcebergPartitionTransform icebergPartitionTransform =
+ new IcebergPartitionTransform(TIcebergPartitionTransformType.IDENTITY);
+ IcebergPartitionField field =
+ new IcebergPartitionField(id, 106, "name", "name", icebergPartitionTransform);
+ ImmutableList<IcebergPartitionField> fieldList = ImmutableList.of(field);
+ IcebergPartitionSpec icebergPartitionSpec = new IcebergPartitionSpec(4, fieldList);
+ assertTrue(isPartitionColumn(column, icebergPartitionSpec));
+ }
+ {
+ // Case 3
+ // Partition field source id does not match a column field id: isPartitionColumn()
+ // should return false.
+ IcebergColumn column =
+ new IcebergColumn("name", Type.BOOLEAN, "comment", 0, 108, 105, 0, true);
+ IcebergPartitionTransform icebergPartitionTransform =
+ new IcebergPartitionTransform(TIcebergPartitionTransformType.IDENTITY);
+ IcebergPartitionField field =
+ new IcebergPartitionField(107, 106, "name", "name", icebergPartitionTransform);
+ ImmutableList<IcebergPartitionField> fieldList = ImmutableList.of(field);
+ IcebergPartitionSpec icebergPartitionSpec = new IcebergPartitionSpec(4, fieldList);
+ assertFalse(isPartitionColumn(column, icebergPartitionSpec));
+ }
+ }
+
+ /**
+ * Holder class for testing Partition transforms.
+ */
+ static class PartitionTransform {
+ String transformName;
+ Integer parameter;
+
+ PartitionTransform(String transformName, Integer parameter) {
+ this.transformName = transformName;
+ this.parameter = parameter;
+ }
+
+ @Override
+ public String toString() {
+ return "PartitionTransform{"
+ + "transformName='" + transformName + '\'' + ", parameter=" + parameter + '}';
+ }
+ }
+
+ /**
+ * A simple Schema object.
+ */
+ public static final Schema SCHEMA =
+ new Schema(Types.NestedField.required(1, "i", Types.IntegerType.get()),
+ Types.NestedField.required(2, "l", Types.LongType.get()),
+ Types.NestedField.required(3, "id", Types.IntegerType.get()),
+ Types.NestedField.required(4, "data", Types.StringType.get()));
+
+ /**
+ * Partition spec used to create tables.
+ */
+ protected static final PartitionSpec SPEC =
+ PartitionSpec.builderFor(SCHEMA).bucket("data", 10).build();
+
+ /**
+ * A test DataFile.
+ */
+ static final DataFile FILE_A =
+ DataFiles.builder(SPEC)
+ .withPath("/path/to/data-a.parquet")
+ .withFileSizeInBytes(10)
+ .withPartitionPath("data_bucket=0") // Easy way to set partition data for now.
+ .withRecordCount(1)
+ .build();
+
+ /**
+ * Holder class for testing isHiveCatalog().
+ */
+ static class CatalogType {
+ String propertyName;
+ boolean isHiveCatalog;
+
+ CatalogType(String propertyName, boolean isHiveCatalog) {
+ this.propertyName = propertyName;
+ this.isHiveCatalog = isHiveCatalog;
+ }
+ }
+
+ /**
+ * Holder class for test of catalog functions.
+ */
+ static class CatalogMapping {
+ String propertyName;
+ TIcebergCatalog catalog;
+ Class<?> clazz;
+
+ CatalogMapping(String propertyName, TIcebergCatalog catalog, Class<?> clazz) {
+ this.propertyName = propertyName;
+ this.catalog = catalog;
+ this.clazz = clazz;
+ }
+ }
+}
\ No newline at end of file