You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by dw...@apache.org on 2022/09/30 15:08:00 UTC

[iceberg] branch master updated: API,Core: Add scan planning metrics for indexed/eq/pos delete files (#5809)

This is an automated email from the ASF dual-hosted git repository.

dweeks pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new 9eb5cbd342 API,Core: Add scan planning metrics for indexed/eq/pos delete files (#5809)
9eb5cbd342 is described below

commit 9eb5cbd342a5c72c2a45bad3982b66f012e05f08
Author: Eduard Tudenhöfner <et...@gmail.com>
AuthorDate: Fri Sep 30 17:07:52 2022 +0200

    API,Core: Add scan planning metrics for indexed/eq/pos delete files (#5809)
    
    * API,Core: Add scan planning metrics for indexed/eq/pos delete files
    
    * evaluate deletes files only once
---
 .../org/apache/iceberg/metrics/ScanReport.java     | 30 ++++++++++++++++++++++
 .../java/org/apache/iceberg/DeleteFileIndex.java   | 13 ++++++++++
 .../iceberg/metrics/ScanMetricsResultParser.java   | 19 ++++++++++++++
 .../iceberg/TestScanPlanningAndReporting.java      | 30 ++++++++++++++++++++++
 .../metrics/TestScanMetricsResultParser.java       | 21 +++++++++++++++
 .../iceberg/metrics/TestScanReportParser.java      | 21 +++++++++++++++
 6 files changed, 134 insertions(+)

diff --git a/api/src/main/java/org/apache/iceberg/metrics/ScanReport.java b/api/src/main/java/org/apache/iceberg/metrics/ScanReport.java
index 631306b681..42af394cdf 100644
--- a/api/src/main/java/org/apache/iceberg/metrics/ScanReport.java
+++ b/api/src/main/java/org/apache/iceberg/metrics/ScanReport.java
@@ -137,6 +137,15 @@ public interface ScanReport {
     @Nullable
     CounterResult skippedDeleteManifests();
 
+    @Nullable
+    CounterResult indexedDeleteFiles();
+
+    @Nullable
+    CounterResult equalityDeleteFiles();
+
+    @Nullable
+    CounterResult positionalDeleteFiles();
+
     static ScanMetricsResult fromScanMetrics(ScanMetrics scanMetrics) {
       Preconditions.checkArgument(null != scanMetrics, "Invalid scan metrics: null");
       return ImmutableScanMetricsResult.builder()
@@ -154,6 +163,9 @@ public interface ScanReport {
           .skippedDeleteFiles(CounterResult.fromCounter(scanMetrics.skippedDeleteFiles()))
           .scannedDeleteManifests(CounterResult.fromCounter(scanMetrics.scannedDeleteManifests()))
           .skippedDeleteManifests(CounterResult.fromCounter(scanMetrics.skippedDeleteManifests()))
+          .indexedDeleteFiles(CounterResult.fromCounter(scanMetrics.indexedDeleteFiles()))
+          .equalityDeleteFiles(CounterResult.fromCounter(scanMetrics.equalityDeleteFiles()))
+          .positionalDeleteFiles(CounterResult.fromCounter(scanMetrics.positionalDeleteFiles()))
           .build();
     }
   }
@@ -174,6 +186,9 @@ public interface ScanReport {
     public static final String SKIPPED_DELETE_MANIFESTS = "skipped-delete-manifests";
     public static final String SKIPPED_DATA_FILES = "skipped-data-files";
     public static final String SKIPPED_DELETE_FILES = "skipped-delete-files";
+    public static final String INDEXED_DELETE_FILES = "indexed-delete-files";
+    public static final String EQUALITY_DELETE_FILES = "equality-delete-files";
+    public static final String POSITIONAL_DELETE_FILES = "positional-delete-files";
 
     public static ScanMetrics noop() {
       return ScanMetrics.of(MetricsContext.nullMetrics());
@@ -246,6 +261,21 @@ public interface ScanReport {
       return metricsContext().counter(SKIPPED_DELETE_MANIFESTS, MetricsContext.Unit.COUNT);
     }
 
+    @Value.Derived
+    public Counter indexedDeleteFiles() {
+      return metricsContext().counter(INDEXED_DELETE_FILES, MetricsContext.Unit.COUNT);
+    }
+
+    @Value.Derived
+    public Counter equalityDeleteFiles() {
+      return metricsContext().counter(EQUALITY_DELETE_FILES, MetricsContext.Unit.COUNT);
+    }
+
+    @Value.Derived
+    public Counter positionalDeleteFiles() {
+      return metricsContext().counter(POSITIONAL_DELETE_FILES, MetricsContext.Unit.COUNT);
+    }
+
     public static ScanMetrics of(MetricsContext metricsContext) {
       return ImmutableScanMetrics.builder().metricsContext(metricsContext).build();
     }
diff --git a/core/src/main/java/org/apache/iceberg/DeleteFileIndex.java b/core/src/main/java/org/apache/iceberg/DeleteFileIndex.java
index 6871feaa8a..32fee87ae8 100644
--- a/core/src/main/java/org/apache/iceberg/DeleteFileIndex.java
+++ b/core/src/main/java/org/apache/iceberg/DeleteFileIndex.java
@@ -509,6 +509,19 @@ class DeleteFileIndex {
         }
       }
 
+      scanMetrics.indexedDeleteFiles().increment(deleteEntries.size());
+      deleteFilesByPartition
+          .values()
+          .forEach(
+              entry -> {
+                FileContent content = entry.file().content();
+                if (content == FileContent.EQUALITY_DELETES) {
+                  scanMetrics.equalityDeleteFiles().increment();
+                } else if (content == FileContent.POSITION_DELETES) {
+                  scanMetrics.positionalDeleteFiles().increment();
+                }
+              });
+
       return new DeleteFileIndex(
           specsById, globalApplySeqs, globalDeletes, sortedDeletesByPartition);
     }
diff --git a/core/src/main/java/org/apache/iceberg/metrics/ScanMetricsResultParser.java b/core/src/main/java/org/apache/iceberg/metrics/ScanMetricsResultParser.java
index 7ff12b7e24..98ebaf80a6 100644
--- a/core/src/main/java/org/apache/iceberg/metrics/ScanMetricsResultParser.java
+++ b/core/src/main/java/org/apache/iceberg/metrics/ScanMetricsResultParser.java
@@ -108,6 +108,21 @@ class ScanMetricsResultParser {
       CounterResultParser.toJson(metrics.skippedDeleteManifests(), gen);
     }
 
+    if (null != metrics.indexedDeleteFiles()) {
+      gen.writeFieldName(ScanMetrics.INDEXED_DELETE_FILES);
+      CounterResultParser.toJson(metrics.indexedDeleteFiles(), gen);
+    }
+
+    if (null != metrics.equalityDeleteFiles()) {
+      gen.writeFieldName(ScanMetrics.EQUALITY_DELETE_FILES);
+      CounterResultParser.toJson(metrics.equalityDeleteFiles(), gen);
+    }
+
+    if (null != metrics.positionalDeleteFiles()) {
+      gen.writeFieldName(ScanMetrics.POSITIONAL_DELETE_FILES);
+      CounterResultParser.toJson(metrics.positionalDeleteFiles(), gen);
+    }
+
     gen.writeEndObject();
   }
 
@@ -142,6 +157,10 @@ class ScanMetricsResultParser {
             CounterResultParser.fromJson(ScanMetrics.SCANNED_DELETE_MANIFESTS, json))
         .skippedDeleteManifests(
             CounterResultParser.fromJson(ScanMetrics.SKIPPED_DELETE_MANIFESTS, json))
+        .indexedDeleteFiles(CounterResultParser.fromJson(ScanMetrics.INDEXED_DELETE_FILES, json))
+        .equalityDeleteFiles(CounterResultParser.fromJson(ScanMetrics.EQUALITY_DELETE_FILES, json))
+        .positionalDeleteFiles(
+            CounterResultParser.fromJson(ScanMetrics.POSITIONAL_DELETE_FILES, json))
         .build();
   }
 }
diff --git a/core/src/test/java/org/apache/iceberg/TestScanPlanningAndReporting.java b/core/src/test/java/org/apache/iceberg/TestScanPlanningAndReporting.java
index 3ee1b35eef..6ad13b2e29 100644
--- a/core/src/test/java/org/apache/iceberg/TestScanPlanningAndReporting.java
+++ b/core/src/test/java/org/apache/iceberg/TestScanPlanningAndReporting.java
@@ -141,6 +141,9 @@ public class TestScanPlanningAndReporting extends TableTestBase {
     assertThat(result.totalDeleteFileSizeInBytes().value()).isEqualTo(20L);
     assertThat(result.skippedDataFiles().value()).isEqualTo(0);
     assertThat(result.skippedDeleteFiles().value()).isEqualTo(0);
+    assertThat(result.indexedDeleteFiles().value()).isEqualTo(2);
+    assertThat(result.equalityDeleteFiles().value()).isEqualTo(0);
+    assertThat(result.positionalDeleteFiles().value()).isEqualTo(2);
   }
 
   @Test
@@ -212,6 +215,33 @@ public class TestScanPlanningAndReporting extends TableTestBase {
     assertThat(result.totalDeleteManifests().value()).isEqualTo(2);
     assertThat(result.totalFileSizeInBytes().value()).isEqualTo(10L);
     assertThat(result.totalDeleteFileSizeInBytes().value()).isEqualTo(10L);
+    assertThat(result.indexedDeleteFiles().value()).isEqualTo(1);
+    assertThat(result.equalityDeleteFiles().value()).isEqualTo(1);
+    assertThat(result.positionalDeleteFiles().value()).isEqualTo(0);
+  }
+
+  @Test
+  public void scanningWithEqualityAndPositionalDeleteFiles() throws IOException {
+    String tableName = "scan-planning-with-eq-and-pos-delete-files";
+    Table table =
+        TestTables.create(
+            tableDir, tableName, SCHEMA, SPEC, SortOrder.unsorted(), formatVersion, reporter);
+    table.newAppend().appendFile(FILE_A).commit();
+    // FILE_A_DELETES = positionalDelete / FILE_A2_DELETES = equalityDelete
+    table.newRowDelta().addDeletes(FILE_A_DELETES).addDeletes(FILE_A2_DELETES).commit();
+    TableScan tableScan = table.newScan();
+
+    try (CloseableIterable<FileScanTask> fileScanTasks =
+        tableScan.filter(Expressions.equal("data", "6")).planFiles()) {
+      fileScanTasks.forEach(task -> {});
+    }
+
+    ScanReport scanReport = reporter.lastReport();
+    assertThat(scanReport).isNotNull();
+    ScanMetricsResult result = scanReport.scanMetrics();
+    assertThat(result.indexedDeleteFiles().value()).isEqualTo(2);
+    assertThat(result.equalityDeleteFiles().value()).isEqualTo(1);
+    assertThat(result.positionalDeleteFiles().value()).isEqualTo(1);
   }
 
   private static class TestScanReporter implements ScanReporter {
diff --git a/core/src/test/java/org/apache/iceberg/metrics/TestScanMetricsResultParser.java b/core/src/test/java/org/apache/iceberg/metrics/TestScanMetricsResultParser.java
index ffc0a6baf8..06abaceef8 100644
--- a/core/src/test/java/org/apache/iceberg/metrics/TestScanMetricsResultParser.java
+++ b/core/src/test/java/org/apache/iceberg/metrics/TestScanMetricsResultParser.java
@@ -178,6 +178,9 @@ public class TestScanMetricsResultParser {
     scanMetrics.skippedDeleteFiles().increment(3L);
     scanMetrics.scannedDeleteManifests().increment(3L);
     scanMetrics.skippedDeleteManifests().increment(3L);
+    scanMetrics.indexedDeleteFiles().increment(10L);
+    scanMetrics.positionalDeleteFiles().increment(6L);
+    scanMetrics.equalityDeleteFiles().increment(4L);
 
     ScanMetricsResult scanMetricsResult = ScanMetricsResult.fromScanMetrics(scanMetrics);
     Assertions.assertThat(
@@ -195,6 +198,9 @@ public class TestScanMetricsResultParser {
                     + "\"skipped-delete-files\":{\"unit\":\"count\",\"value\":3},"
                     + "\"scanned-delete-manifests\":{\"unit\":\"count\",\"value\":3},"
                     + "\"skipped-delete-manifests\":{\"unit\":\"count\",\"value\":3},"
+                    + "\"indexed-delete-files\":{\"unit\":\"count\",\"value\":10},"
+                    + "\"equality-delete-files\":{\"unit\":\"count\",\"value\":4},"
+                    + "\"positional-delete-files\":{\"unit\":\"count\",\"value\":6},"
                     + "\"extra\": \"value\",\"extra2\":23}"))
         .isEqualTo(scanMetricsResult);
   }
@@ -236,6 +242,9 @@ public class TestScanMetricsResultParser {
     scanMetrics.skippedDeleteFiles().increment(3L);
     scanMetrics.scannedDeleteManifests().increment(3L);
     scanMetrics.skippedDeleteManifests().increment(3L);
+    scanMetrics.indexedDeleteFiles().increment(10L);
+    scanMetrics.positionalDeleteFiles().increment(6L);
+    scanMetrics.equalityDeleteFiles().increment(4L);
 
     ScanMetricsResult scanMetricsResult = ScanMetricsResult.fromScanMetrics(scanMetrics);
 
@@ -293,6 +302,18 @@ public class TestScanMetricsResultParser {
             + "  \"skipped-delete-manifests\" : {\n"
             + "    \"unit\" : \"count\",\n"
             + "    \"value\" : 3\n"
+            + "  },\n"
+            + "  \"indexed-delete-files\" : {\n"
+            + "    \"unit\" : \"count\",\n"
+            + "    \"value\" : 10\n"
+            + "  },\n"
+            + "  \"equality-delete-files\" : {\n"
+            + "    \"unit\" : \"count\",\n"
+            + "    \"value\" : 4\n"
+            + "  },\n"
+            + "  \"positional-delete-files\" : {\n"
+            + "    \"unit\" : \"count\",\n"
+            + "    \"value\" : 6\n"
             + "  }\n"
             + "}";
 
diff --git a/core/src/test/java/org/apache/iceberg/metrics/TestScanReportParser.java b/core/src/test/java/org/apache/iceberg/metrics/TestScanReportParser.java
index fffcdf64ad..8f77be4824 100644
--- a/core/src/test/java/org/apache/iceberg/metrics/TestScanReportParser.java
+++ b/core/src/test/java/org/apache/iceberg/metrics/TestScanReportParser.java
@@ -84,6 +84,9 @@ public class TestScanReportParser {
     scanMetrics.skippedDeleteFiles().increment(3L);
     scanMetrics.scannedDeleteManifests().increment(3L);
     scanMetrics.skippedDeleteManifests().increment(3L);
+    scanMetrics.indexedDeleteFiles().increment(10L);
+    scanMetrics.positionalDeleteFiles().increment(6L);
+    scanMetrics.equalityDeleteFiles().increment(4L);
 
     String tableName = "roundTripTableName";
     Schema projection =
@@ -114,6 +117,9 @@ public class TestScanReportParser {
                     + "\"skipped-delete-files\":{\"unit\":\"count\",\"value\":3},"
                     + "\"scanned-delete-manifests\":{\"unit\":\"count\",\"value\":3},"
                     + "\"skipped-delete-manifests\":{\"unit\":\"count\",\"value\":3},"
+                    + "\"indexed-delete-files\":{\"unit\":\"count\",\"value\":10},"
+                    + "\"equality-delete-files\":{\"unit\":\"count\",\"value\":4},"
+                    + "\"positional-delete-files\":{\"unit\":\"count\",\"value\":6},"
                     + "\"extra-metric\":\"extra-val\"},"
                     + "\"extra\":\"extraVal\"}"))
         .usingRecursiveComparison()
@@ -174,6 +180,9 @@ public class TestScanReportParser {
     scanMetrics.skippedDeleteFiles().increment(3L);
     scanMetrics.scannedDeleteManifests().increment(3L);
     scanMetrics.skippedDeleteManifests().increment(3L);
+    scanMetrics.indexedDeleteFiles().increment(10L);
+    scanMetrics.positionalDeleteFiles().increment(6L);
+    scanMetrics.equalityDeleteFiles().increment(4L);
 
     String tableName = "roundTripTableName";
     Schema projection =
@@ -256,6 +265,18 @@ public class TestScanReportParser {
             + "    \"skipped-delete-manifests\" : {\n"
             + "      \"unit\" : \"count\",\n"
             + "      \"value\" : 3\n"
+            + "    },\n"
+            + "    \"indexed-delete-files\" : {\n"
+            + "      \"unit\" : \"count\",\n"
+            + "      \"value\" : 10\n"
+            + "    },\n"
+            + "    \"equality-delete-files\" : {\n"
+            + "      \"unit\" : \"count\",\n"
+            + "      \"value\" : 4\n"
+            + "    },\n"
+            + "    \"positional-delete-files\" : {\n"
+            + "      \"unit\" : \"count\",\n"
+            + "      \"value\" : 6\n"
             + "    }\n"
             + "  }\n"
             + "}";