You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2019/08/13 17:14:28 UTC
[incubator-iceberg] branch master updated: Fix reading of old
.metadata.json.gz files (#371)
This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new 5233fd4 Fix reading of old .metadata.json.gz files (#371)
5233fd4 is described below
commit 5233fd4a2d6cf3b29d1dcfb6095b2955e38cbec5
Author: Xabriel J. Collazo Mojica <xc...@adobe.com>
AuthorDate: Tue Aug 13 10:14:23 2019 -0700
Fix reading of old .metadata.json.gz files (#371)
---
.../org/apache/iceberg/TableMetadataParser.java | 5 +++++
.../iceberg/hadoop/HadoopTableOperations.java | 14 ++++++++++++
.../apache/iceberg/hadoop/HadoopTableTestBase.java | 26 +++++++++++++++++++++-
.../apache/iceberg/hadoop/TestHadoopCommits.java | 26 ++++++++++++++++++++++
4 files changed, 70 insertions(+), 1 deletion(-)
diff --git a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java
index 99bc1d7..03f947d 100644
--- a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java
+++ b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java
@@ -116,6 +116,11 @@ public class TableMetadataParser {
return codec.extension + ".metadata.json";
}
+ public static String getOldFileExtension(Codec codec) {
+ // we have to be backward-compatible with .metadata.json.gz files
+ return ".metadata.json" + codec.extension;
+ }
+
public static String toJson(TableMetadata metadata) {
StringWriter writer = new StringWriter();
try {
diff --git a/core/src/main/java/org/apache/iceberg/hadoop/HadoopTableOperations.java b/core/src/main/java/org/apache/iceberg/hadoop/HadoopTableOperations.java
index 0486bdd..67ae296 100644
--- a/core/src/main/java/org/apache/iceberg/hadoop/HadoopTableOperations.java
+++ b/core/src/main/java/org/apache/iceberg/hadoop/HadoopTableOperations.java
@@ -178,7 +178,17 @@ public class HadoopTableOperations implements TableOperations {
if (fs.exists(metadataFile)) {
return metadataFile;
}
+
+ if (codec.equals(TableMetadataParser.Codec.GZIP)) {
+ // we have to be backward-compatible with .metadata.json.gz files
+ metadataFile = oldMetadataFilePath(metadataVersion, codec);
+ fs = getFileSystem(metadataFile, conf);
+ if (fs.exists(metadataFile)) {
+ return metadataFile;
+ }
+ }
}
+
return null;
}
@@ -186,6 +196,10 @@ public class HadoopTableOperations implements TableOperations {
return metadataPath("v" + metadataVersion + TableMetadataParser.getFileExtension(codec));
}
+ private Path oldMetadataFilePath(int metadataVersion, TableMetadataParser.Codec codec) {
+ return metadataPath("v" + metadataVersion + TableMetadataParser.getOldFileExtension(codec));
+ }
+
private Path metadataPath(String filename) {
return new Path(new Path(location, "metadata"), filename);
}
diff --git a/core/src/test/java/org/apache/iceberg/hadoop/HadoopTableTestBase.java b/core/src/test/java/org/apache/iceberg/hadoop/HadoopTableTestBase.java
index f771df6..11e85b4 100644
--- a/core/src/test/java/org/apache/iceberg/hadoop/HadoopTableTestBase.java
+++ b/core/src/test/java/org/apache/iceberg/hadoop/HadoopTableTestBase.java
@@ -22,9 +22,12 @@ package org.apache.iceberg.hadoop;
import com.google.common.collect.Lists;
import com.google.common.io.Files;
import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.List;
+import java.util.zip.GZIPOutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.DataFiles;
@@ -121,7 +124,8 @@ public class HadoopTableTestBase {
}
List<File> listMetadataJsonFiles() {
- return Lists.newArrayList(metadataDir.listFiles((dir, name) -> name.endsWith(".metadata.json")));
+ return Lists.newArrayList(metadataDir.listFiles((dir, name) ->
+ name.endsWith(".metadata.json") || name.endsWith(".metadata.json.gz")));
}
File version(int versionNumber) {
@@ -142,4 +146,24 @@ public class HadoopTableTestBase {
new File(metadataDir, ".version-hint.text.crc").delete();
Files.write(String.valueOf(version), versionHintFile, StandardCharsets.UTF_8);
}
+
+ /*
+ * Rewrites all current metadata files to gz compressed with extension .metadata.json.gz.
+ * Assumes source files are not compressed.
+ */
+ void rewriteMetadataAsGzipWithOldExtension() throws IOException {
+ List<File> metadataJsonFiles = listMetadataJsonFiles();
+ for (File file : metadataJsonFiles) {
+ try (FileInputStream input = new FileInputStream(file)) {
+ try (GZIPOutputStream gzOutput = new GZIPOutputStream(new FileOutputStream(file.getAbsolutePath() + ".gz"))) {
+ int bb;
+ while ((bb = input.read()) != -1) {
+ gzOutput.write(bb);
+ }
+ }
+ }
+ // delete original metadata file
+ file.delete();
+ }
+ }
}
diff --git a/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopCommits.java b/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopCommits.java
index 1c56123..2012122 100644
--- a/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopCommits.java
+++ b/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopCommits.java
@@ -367,4 +367,30 @@ public class TestHadoopCommits extends HadoopTableTestBase {
Set<String> expected = Sets.newHashSet("v1.metadata.json", "v2.metadata.json");
assertEquals("only v1 and v2 metadata.json should exist.", expected, actual);
}
+
+ @Test
+ public void testCanReadOldCompressedManifestFiles() throws Exception {
+ assertTrue("Should create v1 metadata",
+ version(1).exists() && version(1).isFile());
+
+ // do a file append
+ table.newAppend()
+ .appendFile(FILE_A)
+ .commit();
+
+ // since we don't generate old file extensions anymore, let's convert existing metadata to old .metadata.json.gz
+ // to test backwards compatibility
+ rewriteMetadataAsGzipWithOldExtension();
+
+ List<File> metadataFiles = listMetadataJsonFiles();
+
+ assertEquals("Should have two versions", 2, metadataFiles.size());
+ assertTrue("Metadata should be compressed with old format.",
+ metadataFiles.stream().allMatch(f -> f.getName().endsWith(".metadata.json.gz")));
+
+ Table reloaded = TABLES.load(tableLocation);
+
+ List<FileScanTask> tasks = Lists.newArrayList(reloaded.newScan().planFiles());
+ Assert.assertEquals("Should scan 1 files", 1, tasks.size());
+ }
}