You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2022/05/23 23:55:25 UTC
[iceberg] branch 0.13.x updated: Nessie: Fix NPE while accessing refreshed table in nessie catalog (#4509) (#4840)
This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch 0.13.x
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/0.13.x by this push:
new bcc036ebc9 Nessie: Fix NPE while accessing refreshed table in nessie catalog (#4509) (#4840)
bcc036ebc9 is described below
commit bcc036ebc918d10dab2bed0c025d8e81d3cea467
Author: Eduard Tudenhöfner <et...@gmail.com>
AuthorDate: Tue May 24 01:55:19 2022 +0200
Nessie: Fix NPE while accessing refreshed table in nessie catalog (#4509) (#4840)
Co-authored-by: Ajantha Bhat <aj...@gmail.com>
---
.../java/org/apache/iceberg/TableMetadata.java | 14 ++++++++++-
.../iceberg/nessie/NessieTableOperations.java | 1 +
.../iceberg/nessie/TestBranchVisibility.java | 27 ++++++++++++++++++++++
3 files changed, 41 insertions(+), 1 deletion(-)
diff --git a/core/src/main/java/org/apache/iceberg/TableMetadata.java b/core/src/main/java/org/apache/iceberg/TableMetadata.java
index 164e29505c..2533fa0347 100644
--- a/core/src/main/java/org/apache/iceberg/TableMetadata.java
+++ b/core/src/main/java/org/apache/iceberg/TableMetadata.java
@@ -731,6 +731,7 @@ public class TableMetadata implements Serializable {
public static class Builder {
private final TableMetadata base;
+ private String metadataLocation;
private int formatVersion;
private String uuid;
private Long lastUpdatedMillis;
@@ -795,6 +796,11 @@ public class TableMetadata implements Serializable {
this.sortOrdersById = Maps.newHashMap(base.sortOrdersById);
}
+ public Builder withMetadataLocation(String newMetadataLocation) {
+ this.metadataLocation = newMetadataLocation;
+ return this;
+ }
+
public Builder assignUUID() {
if (uuid == null) {
this.uuid = UUID.randomUUID().toString();
@@ -1009,6 +1015,12 @@ public class TableMetadata implements Serializable {
this.lastUpdatedMillis = System.currentTimeMillis();
}
+ // when associated with a metadata file, table metadata must have no changes so that the metadata matches exactly
+ // what is in the metadata file, which does not store changes. metadata location with changes is inconsistent.
+ Preconditions.checkArgument(
+ changes.size() == 0 || discardChanges || metadataLocation == null,
+ "Cannot set metadata location with changes to table metadata: %s changes", changes.size());
+
Schema schema = schemasById.get(currentSchemaId);
PartitionSpec.checkCompatibility(specsById.get(defaultSpecId), schema);
SortOrder.checkCompatibility(sortOrdersById.get(defaultSortOrderId), schema);
@@ -1018,7 +1030,7 @@ public class TableMetadata implements Serializable {
List<HistoryEntry> newSnapshotLog = updateSnapshotLog(snapshotLog, snapshotsById, currentSnapshotId, changes);
return new TableMetadata(
- null,
+ metadataLocation,
formatVersion,
uuid,
location,
diff --git a/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java b/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java
index 6b406626c8..3c64c19a27 100644
--- a/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java
+++ b/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java
@@ -90,6 +90,7 @@ public class NessieTableOperations extends BaseMetastoreTableOperations {
.setCurrentSchema(table.getSchemaId())
.setDefaultSortOrder(table.getSortOrderId())
.setDefaultPartitionSpec(table.getSpecId())
+ .withMetadataLocation(metadataLocation)
.discardChanges()
.build();
}
diff --git a/nessie/src/test/java/org/apache/iceberg/nessie/TestBranchVisibility.java b/nessie/src/test/java/org/apache/iceberg/nessie/TestBranchVisibility.java
index 64bd6660be..3a8b40d7b2 100644
--- a/nessie/src/test/java/org/apache/iceberg/nessie/TestBranchVisibility.java
+++ b/nessie/src/test/java/org/apache/iceberg/nessie/TestBranchVisibility.java
@@ -22,6 +22,7 @@ package org.apache.iceberg.nessie;
import java.util.Collections;
import java.util.Map;
import org.apache.avro.generic.GenericRecordBuilder;
+import org.apache.iceberg.BaseTable;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.Snapshot;
import org.apache.iceberg.Table;
@@ -173,6 +174,32 @@ public class TestBranchVisibility extends BaseTestIceberg {
Assertions.assertThat(metadataOn2).isNotEqualTo(metadataOnTest).isNotEqualTo(metadataOnTest2);
}
+ @Test
+ public void testMetadataLocation() throws Exception {
+ String branch1 = "test";
+ String branch2 = "branch-2";
+
+ // commit on tableIdentifier1 on branch1
+ NessieCatalog catalog = initCatalog(branch1);
+ String metadataLocationOfCommit1 = addRow(catalog, tableIdentifier1, "initial-data",
+ ImmutableMap.of("id0", 4L));
+
+ createBranch(branch2, catalog.currentHash(), branch1);
+ // commit on tableIdentifier1 on branch2
+ catalog = initCatalog(branch2);
+ String metadataLocationOfCommit2 = addRow(catalog, tableIdentifier1, "some-more-data",
+ ImmutableMap.of("id0", 42L));
+ Assertions.assertThat(metadataLocationOfCommit2).isNotNull().isNotEqualTo(metadataLocationOfCommit1);
+
+ catalog = initCatalog(branch1);
+ // load tableIdentifier1 on branch1
+ BaseTable table = (BaseTable) catalog.loadTable(tableIdentifier1);
+ // branch1's tableIdentifier1's metadata location
+ // should be the latest global state (aka commit2 from branch2)
+ Assertions.assertThat(table.operations().current().metadataFileLocation())
+ .isNotNull().isEqualTo(metadataLocationOfCommit2);
+ }
+
/**
* Complex-ish test case that verifies that both the snapshot-ID and schema-ID are properly set
* and retained when working with a mixture of DDLs and DMLs across multiple branches.