You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@iceberg.apache.org by GitBox <gi...@apache.org> on 2018/12/13 20:56:20 UTC

[GitHub] rdblue closed pull request #50: Use manifest lists by default and fix tests.

rdblue closed pull request #50: Use manifest lists by default and fix tests.
URL: https://github.com/apache/incubator-iceberg/pull/50
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/api/src/main/java/com/netflix/iceberg/Files.java b/api/src/main/java/com/netflix/iceberg/Files.java
index e85825a..197dcc1 100644
--- a/api/src/main/java/com/netflix/iceberg/Files.java
+++ b/api/src/main/java/com/netflix/iceberg/Files.java
@@ -99,6 +99,9 @@ public static InputFile localInput(File file) {
   }
 
   public static InputFile localInput(String file) {
+    if (file.startsWith("file:")) {
+      return localInput(new File(file.replaceFirst("file:", "")));
+    }
     return localInput(new File(file));
   }
 
diff --git a/core/src/main/java/com/netflix/iceberg/BaseSnapshot.java b/core/src/main/java/com/netflix/iceberg/BaseSnapshot.java
index 36a873a..554f24f 100644
--- a/core/src/main/java/com/netflix/iceberg/BaseSnapshot.java
+++ b/core/src/main/java/com/netflix/iceberg/BaseSnapshot.java
@@ -138,7 +138,7 @@ private void cacheChanges() {
 
     // accumulate adds and deletes from all manifests.
     // because manifests can be reused in newer snapshots, filter the changes by snapshot id.
-    for (String manifest : Iterables.transform(manifests, ManifestFile::path)) {
+    for (String manifest : Iterables.transform(manifests(), ManifestFile::path)) {
       try (ManifestReader reader = ManifestReader.read(ops.io().newInputFile(manifest))) {
         for (ManifestEntry add : reader.addedFiles()) {
           if (add.snapshotId() == snapshotId) {
@@ -164,7 +164,7 @@ public String toString() {
     return Objects.toStringHelper(this)
         .add("id", snapshotId)
         .add("timestamp_ms", timestampMillis)
-        .add("manifests", manifests)
+        .add("manifests", manifests())
         .toString();
   }
 }
diff --git a/core/src/main/java/com/netflix/iceberg/TableProperties.java b/core/src/main/java/com/netflix/iceberg/TableProperties.java
index 0d99c7e..69bfcf2 100644
--- a/core/src/main/java/com/netflix/iceberg/TableProperties.java
+++ b/core/src/main/java/com/netflix/iceberg/TableProperties.java
@@ -73,5 +73,5 @@
   public static final String WRITE_NEW_DATA_LOCATION = "write.folder-storage.path";
 
   public static final String MANIFEST_LISTS_ENABLED = "write.manifest-lists.enabled";
-  public static final boolean MANIFEST_LISTS_ENABLED_DEFAULT = false;
+  public static final boolean MANIFEST_LISTS_ENABLED_DEFAULT = true;
 }
diff --git a/core/src/test/java/com/netflix/iceberg/TableTestBase.java b/core/src/test/java/com/netflix/iceberg/TableTestBase.java
index c723daa..010896c 100644
--- a/core/src/test/java/com/netflix/iceberg/TableTestBase.java
+++ b/core/src/test/java/com/netflix/iceberg/TableTestBase.java
@@ -94,13 +94,13 @@ public void cleanupTables() {
     TestTables.clearTables();
   }
 
-  List<File> listMetadataFiles(String ext) {
-    return listMetadataFiles(tableDir, ext);
+  List<File> listManifestFiles() {
+    return listManifestFiles(tableDir);
   }
 
-  List<File> listMetadataFiles(File tableDir, String ext) {
-    return Lists.newArrayList(new File(tableDir, "metadata").listFiles(
-        (dir, name) -> Files.getFileExtension(name).equalsIgnoreCase(ext)));
+  List<File> listManifestFiles(File tableDir) {
+    return Lists.newArrayList(new File(tableDir, "metadata").listFiles((dir, name) ->
+        !name.startsWith("snap") && Files.getFileExtension(name).equalsIgnoreCase("avro")));
   }
 
   private TestTables.TestTable create(Schema schema, PartitionSpec spec) {
diff --git a/core/src/test/java/com/netflix/iceberg/TestCreateTransaction.java b/core/src/test/java/com/netflix/iceberg/TestCreateTransaction.java
index 257f6e3..ffdec59 100644
--- a/core/src/test/java/com/netflix/iceberg/TestCreateTransaction.java
+++ b/core/src/test/java/com/netflix/iceberg/TestCreateTransaction.java
@@ -49,7 +49,7 @@ public void testCreateTransaction() throws IOException {
     Assert.assertEquals("Should have metadata version 0",
         0, (int) TestTables.metadataVersion("test_create"));
     Assert.assertEquals("Should have 0 manifest files",
-        0, listMetadataFiles(tableDir, "avro").size());
+        0, listManifestFiles(tableDir).size());
 
     Assert.assertEquals("Table schema should match with reassigned IDs",
         assignFreshIds(SCHEMA).asStruct(), meta.schema().asStruct());
@@ -86,7 +86,7 @@ public void testCreateAndAppendWithTransaction() throws IOException {
     Assert.assertEquals("Should have metadata version 0",
         0, (int) TestTables.metadataVersion("test_append"));
     Assert.assertEquals("Should have 1 manifest file",
-        1, listMetadataFiles(tableDir, "avro").size());
+        1, listManifestFiles(tableDir).size());
 
     Assert.assertEquals("Table schema should match with reassigned IDs",
         assignFreshIds(SCHEMA).asStruct(), meta.schema().asStruct());
@@ -128,7 +128,7 @@ public void testCreateAndAppendWithTable() throws IOException {
     Assert.assertEquals("Should have metadata version 0",
         0, (int) TestTables.metadataVersion("test_append"));
     Assert.assertEquals("Should have 1 manifest file",
-        1, listMetadataFiles(tableDir, "avro").size());
+        1, listManifestFiles(tableDir).size());
 
     Assert.assertEquals("Table schema should match with reassigned IDs",
         assignFreshIds(SCHEMA).asStruct(), meta.schema().asStruct());
@@ -166,7 +166,7 @@ public void testCreateAndUpdatePropertiesWithTransaction() throws IOException {
     Assert.assertEquals("Should have metadata version 0",
         0, (int) TestTables.metadataVersion("test_properties"));
     Assert.assertEquals("Should have 0 manifest files",
-        0, listMetadataFiles(tableDir, "avro").size());
+        0, listManifestFiles(tableDir).size());
 
     Assert.assertEquals("Table schema should match with reassigned IDs",
         assignFreshIds(SCHEMA).asStruct(), meta.schema().asStruct());
@@ -208,7 +208,7 @@ public void testCreateAndUpdatePropertiesWithTable() throws IOException {
     Assert.assertEquals("Should have metadata version 0",
         0, (int) TestTables.metadataVersion("test_properties"));
     Assert.assertEquals("Should have 0 manifest files",
-        0, listMetadataFiles(tableDir, "avro").size());
+        0, listManifestFiles(tableDir).size());
 
     Assert.assertEquals("Table schema should match with reassigned IDs",
         assignFreshIds(SCHEMA).asStruct(), meta.schema().asStruct());
diff --git a/core/src/test/java/com/netflix/iceberg/TestFastAppend.java b/core/src/test/java/com/netflix/iceberg/TestFastAppend.java
index 4d9e174..02c60ad 100644
--- a/core/src/test/java/com/netflix/iceberg/TestFastAppend.java
+++ b/core/src/test/java/com/netflix/iceberg/TestFastAppend.java
@@ -32,7 +32,7 @@
 
   @Test
   public void testEmptyTableAppend() {
-    Assert.assertEquals("Table should start empty", 0, listMetadataFiles("avro").size());
+    Assert.assertEquals("Table should start empty", 0, listManifestFiles().size());
 
     TableMetadata base = readMetadata();
     Assert.assertNull("Should not have a current snapshot", base.currentSnapshot());
diff --git a/core/src/test/java/com/netflix/iceberg/TestMergeAppend.java b/core/src/test/java/com/netflix/iceberg/TestMergeAppend.java
index 6b78c63..3446c97 100644
--- a/core/src/test/java/com/netflix/iceberg/TestMergeAppend.java
+++ b/core/src/test/java/com/netflix/iceberg/TestMergeAppend.java
@@ -33,7 +33,7 @@
 public class TestMergeAppend extends TableTestBase {
   @Test
   public void testEmptyTableAppend() {
-    Assert.assertEquals("Table should start empty", 0, listMetadataFiles("avro").size());
+    Assert.assertEquals("Table should start empty", 0, listManifestFiles().size());
 
     TableMetadata base = readMetadata();
     Assert.assertNull("Should not have a current snapshot", base.currentSnapshot());
@@ -56,7 +56,7 @@ public void testMergeWithExistingManifest() {
     // merge all manifests for this test
     table.updateProperties().set("commit.manifest.min-count-to-merge", "1").commit();
 
-    Assert.assertEquals("Table should start empty", 0, listMetadataFiles("avro").size());
+    Assert.assertEquals("Table should start empty", 0, listManifestFiles().size());
 
     table.newAppend()
         .appendFile(FILE_A)
@@ -92,7 +92,7 @@ public void testMergeWithExistingManifestAfterDelete() {
     // merge all manifests for this test
     table.updateProperties().set("commit.manifest.min-count-to-merge", "1").commit();
 
-    Assert.assertEquals("Table should start empty", 0, listMetadataFiles("avro").size());
+    Assert.assertEquals("Table should start empty", 0, listManifestFiles().size());
 
     table.newAppend()
         .appendFile(FILE_A)
@@ -145,7 +145,7 @@ public void testMinMergeCount() {
     // only merge when there are at least 4 manifests
     table.updateProperties().set("commit.manifest.min-count-to-merge", "4").commit();
 
-    Assert.assertEquals("Table should start empty", 0, listMetadataFiles("avro").size());
+    Assert.assertEquals("Table should start empty", 0, listManifestFiles().size());
 
     table.newFastAppend()
         .appendFile(FILE_A)
@@ -193,7 +193,7 @@ public void testMergeSizeTargetWithExistingManifest() {
         .set(TableProperties.MANIFEST_TARGET_SIZE_BYTES, "10")
         .commit();
 
-    Assert.assertEquals("Table should start empty", 0, listMetadataFiles("avro").size());
+    Assert.assertEquals("Table should start empty", 0, listManifestFiles().size());
 
     table.newAppend()
         .appendFile(FILE_A)
diff --git a/core/src/test/java/com/netflix/iceberg/TestReplaceFiles.java b/core/src/test/java/com/netflix/iceberg/TestReplaceFiles.java
index 032b680..3cc1d50 100644
--- a/core/src/test/java/com/netflix/iceberg/TestReplaceFiles.java
+++ b/core/src/test/java/com/netflix/iceberg/TestReplaceFiles.java
@@ -36,7 +36,7 @@
 
   @Test
   public void testEmptyTable() {
-    Assert.assertEquals("Table should start empty", 0, listMetadataFiles("avro").size());
+    Assert.assertEquals("Table should start empty", 0, listManifestFiles().size());
 
     TableMetadata base = readMetadata();
     Assert.assertNull("Should not have a current snapshot", base.currentSnapshot());
@@ -51,7 +51,7 @@ public void testEmptyTable() {
 
   @Test
   public void testAddOnly() {
-    Assert.assertEquals("Table should start empty", 0, listMetadataFiles("avro").size());
+    Assert.assertEquals("Table should start empty", 0, listManifestFiles().size());
 
     AssertHelpers.assertThrows("Expected an exception",
         IllegalArgumentException.class,
@@ -63,7 +63,7 @@ public void testAddOnly() {
 
   @Test
   public void testDeleteOnly() {
-    Assert.assertEquals("Table should start empty", 0, listMetadataFiles("avro").size());
+    Assert.assertEquals("Table should start empty", 0, listManifestFiles().size());
 
     AssertHelpers.assertThrows("Expected an exception",
         IllegalArgumentException.class,
@@ -75,7 +75,7 @@ public void testDeleteOnly() {
 
   @Test
   public void testDeleteWithDuplicateEntriesInManifest() {
-    Assert.assertEquals("Table should start empty", 0, listMetadataFiles("avro").size());
+    Assert.assertEquals("Table should start empty", 0, listManifestFiles().size());
 
     table.newAppend()
         .appendFile(FILE_A)
@@ -111,12 +111,12 @@ public void testDeleteWithDuplicateEntriesInManifest() {
         statuses(DELETED, DELETED, EXISTING));
 
     // We should only get the 3 manifests that this test is expected to add.
-    Assert.assertEquals("Only 3 manifests should exist", 3, listMetadataFiles("avro").size());
+    Assert.assertEquals("Only 3 manifests should exist", 3, listManifestFiles().size());
   }
 
   @Test
   public void testAddAndDelete() {
-    Assert.assertEquals("Table should start empty", 0, listMetadataFiles("avro").size());
+    Assert.assertEquals("Table should start empty", 0, listManifestFiles().size());
 
     table.newAppend()
         .appendFile(FILE_A)
@@ -151,7 +151,7 @@ public void testAddAndDelete() {
         statuses(DELETED, EXISTING));
 
     // We should only get the 3 manifests that this test is expected to add.
-    Assert.assertEquals("Only 3 manifests should exist", 3, listMetadataFiles("avro").size());
+    Assert.assertEquals("Only 3 manifests should exist", 3, listManifestFiles().size());
   }
 
   @Test
@@ -182,7 +182,7 @@ public void testFailure() {
     Assert.assertFalse("Should clean up new manifest", new File(manifest2.path()).exists());
 
     // As commit failed all the manifests added with rewrite should be cleaned up
-    Assert.assertEquals("Only 1 manifest should exist", 1, listMetadataFiles("avro").size());
+    Assert.assertEquals("Only 1 manifest should exist", 1, listManifestFiles().size());
   }
 
   @Test
@@ -215,12 +215,12 @@ public void testRecovery() {
         metadata.currentSnapshot().manifests().contains(manifest2));
 
     // 2 manifests added by rewrite and 1 original manifest should be found.
-    Assert.assertEquals("Only 3 manifests should exist", 3, listMetadataFiles("avro").size());
+    Assert.assertEquals("Only 3 manifests should exist", 3, listManifestFiles().size());
   }
 
   @Test
   public void testDeleteNonExistentFile() {
-    Assert.assertEquals("Table should start empty", 0, listMetadataFiles("avro").size());
+    Assert.assertEquals("Table should start empty", 0, listManifestFiles().size());
 
     table.newAppend()
         .appendFile(FILE_A)
@@ -238,12 +238,12 @@ public void testDeleteNonExistentFile() {
             .rewriteFiles(Sets.newSet(FILE_C), Sets.newSet(FILE_D))
             .commit());
 
-    Assert.assertEquals("Only 1 manifests should exist", 1, listMetadataFiles("avro").size());
+    Assert.assertEquals("Only 1 manifests should exist", 1, listManifestFiles().size());
   }
 
   @Test
   public void testAlreadyDeletedFile() {
-    Assert.assertEquals("Table should start empty", 0, listMetadataFiles("avro").size());
+    Assert.assertEquals("Table should start empty", 0, listManifestFiles().size());
 
     table.newAppend()
         .appendFile(FILE_A)
@@ -282,6 +282,6 @@ public void testAlreadyDeletedFile() {
             .rewriteFiles(Sets.newSet(FILE_A), Sets.newSet(FILE_D))
             .commit());
 
-    Assert.assertEquals("Only 3 manifests should exist", 3, listMetadataFiles("avro").size());
+    Assert.assertEquals("Only 3 manifests should exist", 3, listManifestFiles().size());
   }
 }
diff --git a/core/src/test/java/com/netflix/iceberg/TestReplaceTransaction.java b/core/src/test/java/com/netflix/iceberg/TestReplaceTransaction.java
index 4528a4f..2c4e7d9 100644
--- a/core/src/test/java/com/netflix/iceberg/TestReplaceTransaction.java
+++ b/core/src/test/java/com/netflix/iceberg/TestReplaceTransaction.java
@@ -289,7 +289,7 @@ public void testReplaceToCreateAndAppend() throws IOException {
     Assert.assertEquals("Should have metadata version 0",
         0, (int) TestTables.metadataVersion("test_append"));
     Assert.assertEquals("Should have 1 manifest file",
-        1, listMetadataFiles(tableDir, "avro").size());
+        1, listManifestFiles(tableDir).size());
 
     Assert.assertEquals("Table schema should match with reassigned IDs",
         assignFreshIds(SCHEMA).asStruct(), meta.schema().asStruct());
diff --git a/core/src/test/java/com/netflix/iceberg/TestTables.java b/core/src/test/java/com/netflix/iceberg/TestTables.java
index e6aea02..f1dbe4a 100644
--- a/core/src/test/java/com/netflix/iceberg/TestTables.java
+++ b/core/src/test/java/com/netflix/iceberg/TestTables.java
@@ -27,12 +27,11 @@
 import com.netflix.iceberg.io.InputFile;
 import com.netflix.iceberg.io.OutputFile;
 import java.io.File;
-import java.io.IOException;
 import java.util.Map;
 
 import static com.netflix.iceberg.TableMetadata.newTableMetadata;
 
-class TestTables {
+public class TestTables {
   static TestTable create(File temp, String name, Schema schema, PartitionSpec spec) {
     TestTableOperations ops = new TestTableOperations(name, temp);
     if (ops.current() != null) {
@@ -112,7 +111,7 @@ static Integer metadataVersion(String tableName) {
     }
   }
 
-  static class TestTableOperations implements TableOperations {
+  public static class TestTableOperations implements TableOperations {
 
     private final String tableName;
     private final File metadata;
@@ -120,7 +119,7 @@ static Integer metadataVersion(String tableName) {
     private long lastSnapshotId = 0;
     private int failCommits = 0;
 
-    TestTableOperations(String tableName, File location) {
+    public TestTableOperations(String tableName, File location) {
       this.tableName = tableName;
       this.metadata = new File(location, "metadata");
       metadata.mkdirs();
diff --git a/core/src/test/java/com/netflix/iceberg/hadoop/HadoopTableTestBase.java b/core/src/test/java/com/netflix/iceberg/hadoop/HadoopTableTestBase.java
index 31fd28d..683a0b2 100644
--- a/core/src/test/java/com/netflix/iceberg/hadoop/HadoopTableTestBase.java
+++ b/core/src/test/java/com/netflix/iceberg/hadoop/HadoopTableTestBase.java
@@ -29,6 +29,7 @@
 import com.netflix.iceberg.Table;
 import com.netflix.iceberg.TableMetadata;
 import com.netflix.iceberg.TableMetadataParser;
+import com.netflix.iceberg.TestTables;
 import com.netflix.iceberg.types.Types;
 import org.apache.hadoop.conf.Configuration;
 import org.junit.Before;
@@ -114,9 +115,9 @@ public void setupTable() throws Exception {
     this.table = TABLES.create(SCHEMA, SPEC, tableLocation);
   }
 
-  List<File> listMetadataFiles(String ext) {
-    return Lists.newArrayList(metadataDir.listFiles(
-        (dir, name) -> Files.getFileExtension(name).equalsIgnoreCase(ext)));
+  List<File> listManifestFiles() {
+    return Lists.newArrayList(metadataDir.listFiles((dir, name) ->
+        !name.startsWith("snap") && Files.getFileExtension(name).equalsIgnoreCase("avro")));
   }
 
   File version(int i) {
@@ -124,7 +125,8 @@ File version(int i) {
   }
 
   TableMetadata readMetadataVersion(int version) {
-    return TableMetadataParser.read(null, localInput(version(version)));
+    return TableMetadataParser.read(new TestTables.TestTableOperations("table", tableDir),
+        localInput(version(version)));
   }
 
   int readVersionHint() throws IOException {
diff --git a/core/src/test/java/com/netflix/iceberg/hadoop/TestHadoopCommits.java b/core/src/test/java/com/netflix/iceberg/hadoop/TestHadoopCommits.java
index 657abf3..0ee0210 100644
--- a/core/src/test/java/com/netflix/iceberg/hadoop/TestHadoopCommits.java
+++ b/core/src/test/java/com/netflix/iceberg/hadoop/TestHadoopCommits.java
@@ -62,7 +62,7 @@ public void testCreateTable() throws Exception {
     Assert.assertEquals("Should write the current version to the hint file",
         1, readVersionHint());
 
-    List<File> manifests = listMetadataFiles("avro");
+    List<File> manifests = listManifestFiles();
     Assert.assertEquals("Should contain 0 Avro manifest files", 0, manifests.size());
   }
 
@@ -88,7 +88,7 @@ public void testSchemaUpdate() throws Exception {
     List<FileScanTask> tasks = Lists.newArrayList(table.newScan().planFiles());
     Assert.assertEquals("Should not create any scan tasks", 0, tasks.size());
 
-    List<File> manifests = listMetadataFiles("avro");
+    List<File> manifests = listManifestFiles();
     Assert.assertEquals("Should contain 0 Avro manifest files", 0, manifests.size());
   }
 
@@ -108,7 +108,7 @@ public void testFailedCommit() throws Exception {
     AssertHelpers.assertThrows("Should fail to commit change based on v1 when v2 exists",
         CommitFailedException.class, "Version 2 already exists", update::commit);
 
-    List<File> manifests = listMetadataFiles("avro");
+    List<File> manifests = listManifestFiles();
     Assert.assertEquals("Should contain 0 Avro manifest files", 0, manifests.size());
   }
 
@@ -144,7 +144,7 @@ public void testStaleMetadata() throws Exception {
     AssertHelpers.assertThrows("Should fail with stale base metadata",
         CommitFailedException.class, "based on stale table metadata", updateCopy::commit);
 
-    List<File> manifests = listMetadataFiles("avro");
+    List<File> manifests = listManifestFiles();
     Assert.assertEquals("Should contain 0 Avro manifest files", 0, manifests.size());
   }
 
@@ -196,7 +196,7 @@ public void testFastAppend() throws Exception {
     List<FileScanTask> tasks = Lists.newArrayList(table.newScan().planFiles());
     Assert.assertEquals("Should scan 1 file", 1, tasks.size());
 
-    List<File> manifests = listMetadataFiles("avro");
+    List<File> manifests = listManifestFiles();
     Assert.assertEquals("Should contain only one Avro manifest file", 1, manifests.size());
 
     // second append
@@ -213,7 +213,7 @@ public void testFastAppend() throws Exception {
     Assert.assertEquals("Should scan 2 files", 2, tasks.size());
 
     Assert.assertEquals("Should contain 2 Avro manifest files",
-        2, listMetadataFiles("avro").size());
+        2, listManifestFiles().size());
 
     TableMetadata metadata = readMetadataVersion(3);
     Assert.assertEquals("Current snapshot should contain 2 manifests",
@@ -236,7 +236,7 @@ public void testMergeAppend() throws Exception {
     Assert.assertEquals("Should scan 3 files", 3, tasks.size());
 
     Assert.assertEquals("Should contain 3 Avro manifest files",
-        3, listMetadataFiles("avro").size());
+        3, listManifestFiles().size());
 
     TableMetadata metadata = readMetadataVersion(5);
     Assert.assertEquals("Current snapshot should contain 1 merged manifest",


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services