You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by ao...@apache.org on 2023/05/09 15:56:58 UTC

[iceberg] branch master updated: Core: Refactor naming in MergingSnapshotProducer (#7564)

This is an automated email from the ASF dual-hosted git repository.

aokolnychyi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new 615ab116a6 Core: Refactor naming in MergingSnapshotProducer (#7564)
615ab116a6 is described below

commit 615ab116a6935d334ae5c6666cfcc0a65eaaa631
Author: Anton Okolnychyi <ao...@apple.com>
AuthorDate: Tue May 9 08:56:51 2023 -0700

    Core: Refactor naming in MergingSnapshotProducer (#7564)
---
 .../org/apache/iceberg/BaseOverwriteFiles.java     |  2 +-
 .../apache/iceberg/MergingSnapshotProducer.java    | 60 +++++++++++++---------
 2 files changed, 36 insertions(+), 26 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/BaseOverwriteFiles.java b/core/src/main/java/org/apache/iceberg/BaseOverwriteFiles.java
index b829be7ccf..cd594e2bd0 100644
--- a/core/src/main/java/org/apache/iceberg/BaseOverwriteFiles.java
+++ b/core/src/main/java/org/apache/iceberg/BaseOverwriteFiles.java
@@ -125,7 +125,7 @@ public class BaseOverwriteFiles extends MergingSnapshotProducer<OverwriteFiles>
       StrictMetricsEvaluator metrics =
           new StrictMetricsEvaluator(base.schema(), rowFilter, isCaseSensitive());
 
-      for (DataFile file : addedFiles()) {
+      for (DataFile file : addedDataFiles()) {
         // the real test is that the strict or metrics test matches the file, indicating that all
         // records in the file match the filter. inclusive is used to avoid testing the metrics,
         // which is more complicated
diff --git a/core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java b/core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java
index 00e1827818..6ccfa86bf0 100644
--- a/core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java
+++ b/core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java
@@ -83,7 +83,7 @@ abstract class MergingSnapshotProducer<ThisT> extends SnapshotProducer<ThisT> {
   private final boolean snapshotIdInheritanceEnabled;
 
   // update data
-  private final List<DataFile> newFiles = Lists.newArrayList();
+  private final List<DataFile> newDataFiles = Lists.newArrayList();
   private Long newDataFilesDataSequenceNumber;
   private final Map<Integer, List<DeleteFile>> newDeleteFilesBySpec = Maps.newHashMap();
   private final List<ManifestFile> appendManifests = Lists.newArrayList();
@@ -93,9 +93,9 @@ abstract class MergingSnapshotProducer<ThisT> extends SnapshotProducer<ThisT> {
   private Expression deleteExpression = Expressions.alwaysFalse();
   private PartitionSpec dataSpec;
 
-  // cache new manifests after writing
-  private ManifestFile cachedNewManifest = null;
-  private boolean hasNewFiles = false;
+  // cache new data manifests after writing
+  private ManifestFile cachedNewDataManifest = null;
+  private boolean hasNewDataFiles = false;
 
   // cache new manifests for delete files
   private final List<ManifestFile> cachedNewDeleteManifests = Lists.newLinkedList();
@@ -157,8 +157,18 @@ abstract class MergingSnapshotProducer<ThisT> extends SnapshotProducer<ThisT> {
     return deleteExpression;
   }
 
+  /**
+   * Returns added data files.
+   *
+   * @deprecated since 1.3.0, will be removed in 1.4.0; use {@link #addedDataFiles()}.
+   */
+  @Deprecated
   protected List<DataFile> addedFiles() {
-    return ImmutableList.copyOf(newFiles);
+    return addedDataFiles();
+  }
+
+  protected List<DataFile> addedDataFiles() {
+    return ImmutableList.copyOf(newDataFiles);
   }
 
   protected void failAnyDelete() {
@@ -218,7 +228,7 @@ abstract class MergingSnapshotProducer<ThisT> extends SnapshotProducer<ThisT> {
   }
 
   protected boolean addsDataFiles() {
-    return newFiles.size() > 0;
+    return newDataFiles.size() > 0;
   }
 
   protected boolean addsDeleteFiles() {
@@ -230,8 +240,8 @@ abstract class MergingSnapshotProducer<ThisT> extends SnapshotProducer<ThisT> {
     Preconditions.checkNotNull(file, "Invalid data file: null");
     setDataSpec(file);
     addedFilesSummary.addedFile(dataSpec(), file);
-    hasNewFiles = true;
-    newFiles.add(file);
+    hasNewDataFiles = true;
+    newDataFiles.add(file);
   }
 
   /** Add a delete file to the new snapshot. */
@@ -847,7 +857,7 @@ abstract class MergingSnapshotProducer<ThisT> extends SnapshotProducer<ThisT> {
                 || manifest.hasExistingFiles()
                 || manifest.snapshotId() == snapshotId();
     Iterable<ManifestFile> unmergedManifests =
-        Iterables.filter(Iterables.concat(prepareNewManifests(), filtered), shouldKeep);
+        Iterables.filter(Iterables.concat(prepareNewDataManifests(), filtered), shouldKeep);
     Iterable<ManifestFile> unmergedDeleteManifests =
         Iterables.filter(Iterables.concat(prepareDeleteManifests(), filteredDeletes), shouldKeep);
 
@@ -886,9 +896,9 @@ abstract class MergingSnapshotProducer<ThisT> extends SnapshotProducer<ThisT> {
   }
 
   private void cleanUncommittedAppends(Set<ManifestFile> committed) {
-    if (cachedNewManifest != null && !committed.contains(cachedNewManifest)) {
-      deleteFile(cachedNewManifest.path());
-      this.cachedNewManifest = null;
+    if (cachedNewDataManifest != null && !committed.contains(cachedNewDataManifest)) {
+      deleteFile(cachedNewDataManifest.path());
+      this.cachedNewDataManifest = null;
     }
 
     ListIterator<ManifestFile> deleteManifestsIterator = cachedNewDeleteManifests.listIterator();
@@ -928,10 +938,10 @@ abstract class MergingSnapshotProducer<ThisT> extends SnapshotProducer<ThisT> {
     cleanUncommittedAppends(committed);
   }
 
-  private Iterable<ManifestFile> prepareNewManifests() {
+  private Iterable<ManifestFile> prepareNewDataManifests() {
     Iterable<ManifestFile> newManifests;
-    if (newFiles.size() > 0) {
-      ManifestFile newManifest = newFilesAsManifest();
+    if (newDataFiles.size() > 0) {
+      ManifestFile newManifest = newDataFilesAsManifest();
       newManifests =
           Iterables.concat(
               ImmutableList.of(newManifest), appendManifests, rewrittenAppendManifests);
@@ -944,33 +954,33 @@ abstract class MergingSnapshotProducer<ThisT> extends SnapshotProducer<ThisT> {
         manifest -> GenericManifestFile.copyOf(manifest).withSnapshotId(snapshotId()).build());
   }
 
-  private ManifestFile newFilesAsManifest() {
-    if (hasNewFiles && cachedNewManifest != null) {
-      deleteFile(cachedNewManifest.path());
-      cachedNewManifest = null;
+  private ManifestFile newDataFilesAsManifest() {
+    if (hasNewDataFiles && cachedNewDataManifest != null) {
+      deleteFile(cachedNewDataManifest.path());
+      cachedNewDataManifest = null;
     }
 
-    if (cachedNewManifest == null) {
+    if (cachedNewDataManifest == null) {
       try {
         ManifestWriter<DataFile> writer = newManifestWriter(dataSpec());
         try {
           if (newDataFilesDataSequenceNumber == null) {
-            writer.addAll(newFiles);
+            writer.addAll(newDataFiles);
           } else {
-            newFiles.forEach(f -> writer.add(f, newDataFilesDataSequenceNumber));
+            newDataFiles.forEach(f -> writer.add(f, newDataFilesDataSequenceNumber));
           }
         } finally {
           writer.close();
         }
 
-        this.cachedNewManifest = writer.toManifestFile();
-        this.hasNewFiles = false;
+        this.cachedNewDataManifest = writer.toManifestFile();
+        this.hasNewDataFiles = false;
       } catch (IOException e) {
         throw new RuntimeIOException(e, "Failed to close manifest writer");
       }
     }
 
-    return cachedNewManifest;
+    return cachedNewDataManifest;
   }
 
   private Iterable<ManifestFile> prepareDeleteManifests() {