You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by ao...@apache.org on 2020/10/29 18:51:31 UTC

[iceberg] branch master updated: Core: Fix entries metadata tables with delete manifests (#1673)

This is an automated email from the ASF dual-hosted git repository.

aokolnychyi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new 43a7427  Core: Fix entries metadata tables with delete manifests (#1673)
43a7427 is described below

commit 43a7427874ab01c6811979b3b97e6c97fe074184
Author: Russell Spitzer <ru...@GMAIL.COM>
AuthorDate: Thu Oct 29 13:51:18 2020 -0500

    Core: Fix entries metadata tables with delete manifests (#1673)
---
 .../java/org/apache/iceberg/AllEntriesTable.java   |  2 +-
 .../org/apache/iceberg/ManifestEntriesTable.java   | 19 +++++++++----
 .../apache/iceberg/TestEntriesMetadataTable.java   | 33 ++++++++++++++++++++++
 3 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/AllEntriesTable.java b/core/src/main/java/org/apache/iceberg/AllEntriesTable.java
index 7bfbb2c..b6368fa 100644
--- a/core/src/main/java/org/apache/iceberg/AllEntriesTable.java
+++ b/core/src/main/java/org/apache/iceberg/AllEntriesTable.java
@@ -115,7 +115,7 @@ public class AllEntriesTable extends BaseMetadataTable {
       ResidualEvaluator residuals = ResidualEvaluator.unpartitioned(filter);
 
       return CloseableIterable.transform(manifests, manifest -> new ManifestEntriesTable.ManifestReadTask(
-          ops.io(), manifest, fileSchema, schemaString, specString, residuals));
+          ops.io(), manifest, fileSchema, schemaString, specString, residuals, ops.current().specsById()));
     }
   }
 
diff --git a/core/src/main/java/org/apache/iceberg/ManifestEntriesTable.java b/core/src/main/java/org/apache/iceberg/ManifestEntriesTable.java
index 262cd92..33cc668 100644
--- a/core/src/main/java/org/apache/iceberg/ManifestEntriesTable.java
+++ b/core/src/main/java/org/apache/iceberg/ManifestEntriesTable.java
@@ -19,6 +19,7 @@
 
 package org.apache.iceberg;
 
+import java.util.Map;
 import org.apache.iceberg.expressions.Expression;
 import org.apache.iceberg.expressions.Expressions;
 import org.apache.iceberg.expressions.ResidualEvaluator;
@@ -112,7 +113,8 @@ public class ManifestEntriesTable extends BaseMetadataTable {
       ResidualEvaluator residuals = ResidualEvaluator.unpartitioned(filter);
 
       return CloseableIterable.transform(manifests, manifest ->
-          new ManifestReadTask(ops.io(), manifest, fileSchema, schemaString, specString, residuals));
+          new ManifestReadTask(ops.io(), manifest, fileSchema, schemaString, specString, residuals,
+              ops.current().specsById()));
     }
   }
 
@@ -120,20 +122,27 @@ public class ManifestEntriesTable extends BaseMetadataTable {
     private final Schema fileSchema;
     private final FileIO io;
     private final ManifestFile manifest;
+    private final Map<Integer, PartitionSpec> specsById;
 
     ManifestReadTask(FileIO io, ManifestFile manifest, Schema fileSchema, String schemaString,
-                     String specString, ResidualEvaluator residuals) {
+                     String specString, ResidualEvaluator residuals, Map<Integer, PartitionSpec> specsById) {
       super(DataFiles.fromManifest(manifest), null, schemaString, specString, residuals);
       this.fileSchema = fileSchema;
       this.io = io;
       this.manifest = manifest;
+      this.specsById = specsById;
     }
 
     @Override
     public CloseableIterable<StructLike> rows() {
-      return CloseableIterable.transform(
-          ManifestFiles.read(manifest, io).project(fileSchema).entries(),
-          file -> (GenericManifestEntry<?>) file);
+      if (manifest.content() == ManifestContent.DATA) {
+        return CloseableIterable.transform(ManifestFiles.read(manifest, io).project(fileSchema).entries(),
+            file -> (GenericManifestEntry<DataFile>) file);
+      } else {
+        return CloseableIterable.transform(ManifestFiles.readDeleteManifest(manifest, io, specsById)
+                .project(fileSchema).entries(),
+            file -> (GenericManifestEntry<DeleteFile>) file);
+      }
     }
 
     @Override
diff --git a/core/src/test/java/org/apache/iceberg/TestEntriesMetadataTable.java b/core/src/test/java/org/apache/iceberg/TestEntriesMetadataTable.java
index eb2088d..baa90c3 100644
--- a/core/src/test/java/org/apache/iceberg/TestEntriesMetadataTable.java
+++ b/core/src/test/java/org/apache/iceberg/TestEntriesMetadataTable.java
@@ -19,8 +19,11 @@
 
 package org.apache.iceberg;
 
+import java.util.List;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
 import org.junit.Assert;
+import org.junit.Assume;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
@@ -131,4 +134,34 @@ public class TestEntriesMetadataTable extends TableTestBase {
     Assert.assertEquals(expectedSplits, Iterables.size(scan.planTasks()));
   }
 
+  @Test
+  public void testEntriesTableWithDeleteManifests() throws Exception {
+    Assume.assumeTrue("Only V2 Tables Support Deletes", formatVersion >= 2);
+    table.newAppend()
+        .appendFile(FILE_A)
+        .appendFile(FILE_B)
+        .commit();
+
+    table.newRowDelta()
+        .addDeletes(FILE_A_DELETES)
+        .commit();
+
+    Table entriesTable = new ManifestEntriesTable(table.ops(), table);
+    TableScan scan = entriesTable.newScan();
+
+    Schema expectedSchema = ManifestEntry.getSchema(table.spec().partitionType());
+
+    assertEquals("A tableScan.select() should prune the schema",
+        expectedSchema.asStruct(),
+        scan.schema().asStruct());
+
+    List<FileScanTask> files = ImmutableList.copyOf(scan.planFiles());
+    Assert.assertEquals("Data file should be the table's manifest",
+        Iterables.getOnlyElement(table.currentSnapshot().dataManifests()).path(), files.get(0).file().path());
+    Assert.assertEquals("Should contain 2 data file records", 2, files.get(0).file().recordCount());
+    Assert.assertEquals("Delete file should be in the table manifest",
+        Iterables.getOnlyElement(table.currentSnapshot().deleteManifests()).path(), files.get(1).file().path());
+    Assert.assertEquals("Should contain 1 delete file record", 1, files.get(1).file().recordCount());
+  }
+
 }