You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2022/05/23 23:55:25 UTC

[iceberg] branch 0.13.x updated: Nessie: Fix NPE while accessing refreshed table in nessie catalog (#4509) (#4840)

This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch 0.13.x
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/0.13.x by this push:
     new bcc036ebc9 Nessie: Fix NPE while accessing refreshed table in nessie catalog (#4509) (#4840)
bcc036ebc9 is described below

commit bcc036ebc918d10dab2bed0c025d8e81d3cea467
Author: Eduard Tudenhöfner <et...@gmail.com>
AuthorDate: Tue May 24 01:55:19 2022 +0200

    Nessie: Fix NPE while accessing refreshed table in nessie catalog (#4509) (#4840)
    
    Co-authored-by: Ajantha Bhat <aj...@gmail.com>
---
 .../java/org/apache/iceberg/TableMetadata.java     | 14 ++++++++++-
 .../iceberg/nessie/NessieTableOperations.java      |  1 +
 .../iceberg/nessie/TestBranchVisibility.java       | 27 ++++++++++++++++++++++
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/core/src/main/java/org/apache/iceberg/TableMetadata.java b/core/src/main/java/org/apache/iceberg/TableMetadata.java
index 164e29505c..2533fa0347 100644
--- a/core/src/main/java/org/apache/iceberg/TableMetadata.java
+++ b/core/src/main/java/org/apache/iceberg/TableMetadata.java
@@ -731,6 +731,7 @@ public class TableMetadata implements Serializable {
 
   public static class Builder {
     private final TableMetadata base;
+    private String metadataLocation;
     private int formatVersion;
     private String uuid;
     private Long lastUpdatedMillis;
@@ -795,6 +796,11 @@ public class TableMetadata implements Serializable {
       this.sortOrdersById = Maps.newHashMap(base.sortOrdersById);
     }
 
+    public Builder withMetadataLocation(String newMetadataLocation) {
+      this.metadataLocation = newMetadataLocation;
+      return this;
+    }
+
     public Builder assignUUID() {
       if (uuid == null) {
         this.uuid = UUID.randomUUID().toString();
@@ -1009,6 +1015,12 @@ public class TableMetadata implements Serializable {
         this.lastUpdatedMillis = System.currentTimeMillis();
       }
 
+      // when associated with a metadata file, table metadata must have no changes so that the metadata matches exactly
+      // what is in the metadata file, which does not store changes. metadata location with changes is inconsistent.
+      Preconditions.checkArgument(
+          changes.size() == 0 || discardChanges || metadataLocation == null,
+          "Cannot set metadata location with changes to table metadata: %s changes", changes.size());
+
       Schema schema = schemasById.get(currentSchemaId);
       PartitionSpec.checkCompatibility(specsById.get(defaultSpecId), schema);
       SortOrder.checkCompatibility(sortOrdersById.get(defaultSortOrderId), schema);
@@ -1018,7 +1030,7 @@ public class TableMetadata implements Serializable {
       List<HistoryEntry> newSnapshotLog = updateSnapshotLog(snapshotLog, snapshotsById, currentSnapshotId, changes);
 
       return new TableMetadata(
-          null,
+          metadataLocation,
           formatVersion,
           uuid,
           location,
diff --git a/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java b/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java
index 6b406626c8..3c64c19a27 100644
--- a/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java
+++ b/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java
@@ -90,6 +90,7 @@ public class NessieTableOperations extends BaseMetastoreTableOperations {
         .setCurrentSchema(table.getSchemaId())
         .setDefaultSortOrder(table.getSortOrderId())
         .setDefaultPartitionSpec(table.getSpecId())
+        .withMetadataLocation(metadataLocation)
         .discardChanges()
         .build();
   }
diff --git a/nessie/src/test/java/org/apache/iceberg/nessie/TestBranchVisibility.java b/nessie/src/test/java/org/apache/iceberg/nessie/TestBranchVisibility.java
index 64bd6660be..3a8b40d7b2 100644
--- a/nessie/src/test/java/org/apache/iceberg/nessie/TestBranchVisibility.java
+++ b/nessie/src/test/java/org/apache/iceberg/nessie/TestBranchVisibility.java
@@ -22,6 +22,7 @@ package org.apache.iceberg.nessie;
 import java.util.Collections;
 import java.util.Map;
 import org.apache.avro.generic.GenericRecordBuilder;
+import org.apache.iceberg.BaseTable;
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.Snapshot;
 import org.apache.iceberg.Table;
@@ -173,6 +174,32 @@ public class TestBranchVisibility extends BaseTestIceberg {
     Assertions.assertThat(metadataOn2).isNotEqualTo(metadataOnTest).isNotEqualTo(metadataOnTest2);
   }
 
+  @Test
+  public void testMetadataLocation() throws Exception {
+    String branch1 = "test";
+    String branch2 = "branch-2";
+
+    // commit on tableIdentifier1 on branch1
+    NessieCatalog catalog = initCatalog(branch1);
+    String metadataLocationOfCommit1 = addRow(catalog, tableIdentifier1, "initial-data",
+        ImmutableMap.of("id0", 4L));
+
+    createBranch(branch2, catalog.currentHash(), branch1);
+    // commit on tableIdentifier1 on branch2
+    catalog = initCatalog(branch2);
+    String metadataLocationOfCommit2 = addRow(catalog, tableIdentifier1, "some-more-data",
+        ImmutableMap.of("id0", 42L));
+    Assertions.assertThat(metadataLocationOfCommit2).isNotNull().isNotEqualTo(metadataLocationOfCommit1);
+
+    catalog = initCatalog(branch1);
+    // load tableIdentifier1 on branch1
+    BaseTable table = (BaseTable) catalog.loadTable(tableIdentifier1);
+    // branch1's tableIdentifier1's metadata location
+    // should be the latest global state (aka commit2 from branch2)
+    Assertions.assertThat(table.operations().current().metadataFileLocation())
+        .isNotNull().isEqualTo(metadataLocationOfCommit2);
+  }
+
   /**
    * Complex-ish test case that verifies that both the snapshot-ID and schema-ID are properly set
    * and retained when working with a mixture of DDLs and DMLs across multiple branches.