You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by ao...@apache.org on 2020/10/29 18:51:31 UTC
[iceberg] branch master updated: Core: Fix entries metadata tables
with delete manifests (#1673)
This is an automated email from the ASF dual-hosted git repository.
aokolnychyi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new 43a7427 Core: Fix entries metadata tables with delete manifests (#1673)
43a7427 is described below
commit 43a7427874ab01c6811979b3b97e6c97fe074184
Author: Russell Spitzer <ru...@GMAIL.COM>
AuthorDate: Thu Oct 29 13:51:18 2020 -0500
Core: Fix entries metadata tables with delete manifests (#1673)
---
.../java/org/apache/iceberg/AllEntriesTable.java | 2 +-
.../org/apache/iceberg/ManifestEntriesTable.java | 19 +++++++++----
.../apache/iceberg/TestEntriesMetadataTable.java | 33 ++++++++++++++++++++++
3 files changed, 48 insertions(+), 6 deletions(-)
diff --git a/core/src/main/java/org/apache/iceberg/AllEntriesTable.java b/core/src/main/java/org/apache/iceberg/AllEntriesTable.java
index 7bfbb2c..b6368fa 100644
--- a/core/src/main/java/org/apache/iceberg/AllEntriesTable.java
+++ b/core/src/main/java/org/apache/iceberg/AllEntriesTable.java
@@ -115,7 +115,7 @@ public class AllEntriesTable extends BaseMetadataTable {
ResidualEvaluator residuals = ResidualEvaluator.unpartitioned(filter);
return CloseableIterable.transform(manifests, manifest -> new ManifestEntriesTable.ManifestReadTask(
- ops.io(), manifest, fileSchema, schemaString, specString, residuals));
+ ops.io(), manifest, fileSchema, schemaString, specString, residuals, ops.current().specsById()));
}
}
diff --git a/core/src/main/java/org/apache/iceberg/ManifestEntriesTable.java b/core/src/main/java/org/apache/iceberg/ManifestEntriesTable.java
index 262cd92..33cc668 100644
--- a/core/src/main/java/org/apache/iceberg/ManifestEntriesTable.java
+++ b/core/src/main/java/org/apache/iceberg/ManifestEntriesTable.java
@@ -19,6 +19,7 @@
package org.apache.iceberg;
+import java.util.Map;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.expressions.ResidualEvaluator;
@@ -112,7 +113,8 @@ public class ManifestEntriesTable extends BaseMetadataTable {
ResidualEvaluator residuals = ResidualEvaluator.unpartitioned(filter);
return CloseableIterable.transform(manifests, manifest ->
- new ManifestReadTask(ops.io(), manifest, fileSchema, schemaString, specString, residuals));
+ new ManifestReadTask(ops.io(), manifest, fileSchema, schemaString, specString, residuals,
+ ops.current().specsById()));
}
}
@@ -120,20 +122,27 @@ public class ManifestEntriesTable extends BaseMetadataTable {
private final Schema fileSchema;
private final FileIO io;
private final ManifestFile manifest;
+ private final Map<Integer, PartitionSpec> specsById;
ManifestReadTask(FileIO io, ManifestFile manifest, Schema fileSchema, String schemaString,
- String specString, ResidualEvaluator residuals) {
+ String specString, ResidualEvaluator residuals, Map<Integer, PartitionSpec> specsById) {
super(DataFiles.fromManifest(manifest), null, schemaString, specString, residuals);
this.fileSchema = fileSchema;
this.io = io;
this.manifest = manifest;
+ this.specsById = specsById;
}
@Override
public CloseableIterable<StructLike> rows() {
- return CloseableIterable.transform(
- ManifestFiles.read(manifest, io).project(fileSchema).entries(),
- file -> (GenericManifestEntry<?>) file);
+ if (manifest.content() == ManifestContent.DATA) {
+ return CloseableIterable.transform(ManifestFiles.read(manifest, io).project(fileSchema).entries(),
+ file -> (GenericManifestEntry<DataFile>) file);
+ } else {
+ return CloseableIterable.transform(ManifestFiles.readDeleteManifest(manifest, io, specsById)
+ .project(fileSchema).entries(),
+ file -> (GenericManifestEntry<DeleteFile>) file);
+ }
}
@Override
diff --git a/core/src/test/java/org/apache/iceberg/TestEntriesMetadataTable.java b/core/src/test/java/org/apache/iceberg/TestEntriesMetadataTable.java
index eb2088d..baa90c3 100644
--- a/core/src/test/java/org/apache/iceberg/TestEntriesMetadataTable.java
+++ b/core/src/test/java/org/apache/iceberg/TestEntriesMetadataTable.java
@@ -19,8 +19,11 @@
package org.apache.iceberg;
+import java.util.List;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
import org.junit.Assert;
+import org.junit.Assume;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
@@ -131,4 +134,34 @@ public class TestEntriesMetadataTable extends TableTestBase {
Assert.assertEquals(expectedSplits, Iterables.size(scan.planTasks()));
}
+ @Test
+ public void testEntriesTableWithDeleteManifests() throws Exception {
+ Assume.assumeTrue("Only V2 Tables Support Deletes", formatVersion >= 2);
+ table.newAppend()
+ .appendFile(FILE_A)
+ .appendFile(FILE_B)
+ .commit();
+
+ table.newRowDelta()
+ .addDeletes(FILE_A_DELETES)
+ .commit();
+
+ Table entriesTable = new ManifestEntriesTable(table.ops(), table);
+ TableScan scan = entriesTable.newScan();
+
+ Schema expectedSchema = ManifestEntry.getSchema(table.spec().partitionType());
+
+ assertEquals("A tableScan.select() should prune the schema",
+ expectedSchema.asStruct(),
+ scan.schema().asStruct());
+
+ List<FileScanTask> files = ImmutableList.copyOf(scan.planFiles());
+ Assert.assertEquals("Data file should be the table's manifest",
+ Iterables.getOnlyElement(table.currentSnapshot().dataManifests()).path(), files.get(0).file().path());
+ Assert.assertEquals("Should contain 2 data file records", 2, files.get(0).file().recordCount());
+ Assert.assertEquals("Delete file should be in the table manifest",
+ Iterables.getOnlyElement(table.currentSnapshot().deleteManifests()).path(), files.get(1).file().path());
+ Assert.assertEquals("Should contain 1 delete file record", 1, files.get(1).file().recordCount());
+ }
+
}