You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by dw...@apache.org on 2022/09/30 15:08:00 UTC
[iceberg] branch master updated: API,Core: Add scan planning metrics for indexed/eq/pos delete files (#5809)
This is an automated email from the ASF dual-hosted git repository.
dweeks pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new 9eb5cbd342 API,Core: Add scan planning metrics for indexed/eq/pos delete files (#5809)
9eb5cbd342 is described below
commit 9eb5cbd342a5c72c2a45bad3982b66f012e05f08
Author: Eduard Tudenhöfner <et...@gmail.com>
AuthorDate: Fri Sep 30 17:07:52 2022 +0200
API,Core: Add scan planning metrics for indexed/eq/pos delete files (#5809)
* API,Core: Add scan planning metrics for indexed/eq/pos delete files
* evaluate deletes files only once
---
.../org/apache/iceberg/metrics/ScanReport.java | 30 ++++++++++++++++++++++
.../java/org/apache/iceberg/DeleteFileIndex.java | 13 ++++++++++
.../iceberg/metrics/ScanMetricsResultParser.java | 19 ++++++++++++++
.../iceberg/TestScanPlanningAndReporting.java | 30 ++++++++++++++++++++++
.../metrics/TestScanMetricsResultParser.java | 21 +++++++++++++++
.../iceberg/metrics/TestScanReportParser.java | 21 +++++++++++++++
6 files changed, 134 insertions(+)
diff --git a/api/src/main/java/org/apache/iceberg/metrics/ScanReport.java b/api/src/main/java/org/apache/iceberg/metrics/ScanReport.java
index 631306b681..42af394cdf 100644
--- a/api/src/main/java/org/apache/iceberg/metrics/ScanReport.java
+++ b/api/src/main/java/org/apache/iceberg/metrics/ScanReport.java
@@ -137,6 +137,15 @@ public interface ScanReport {
@Nullable
CounterResult skippedDeleteManifests();
+ @Nullable
+ CounterResult indexedDeleteFiles();
+
+ @Nullable
+ CounterResult equalityDeleteFiles();
+
+ @Nullable
+ CounterResult positionalDeleteFiles();
+
static ScanMetricsResult fromScanMetrics(ScanMetrics scanMetrics) {
Preconditions.checkArgument(null != scanMetrics, "Invalid scan metrics: null");
return ImmutableScanMetricsResult.builder()
@@ -154,6 +163,9 @@ public interface ScanReport {
.skippedDeleteFiles(CounterResult.fromCounter(scanMetrics.skippedDeleteFiles()))
.scannedDeleteManifests(CounterResult.fromCounter(scanMetrics.scannedDeleteManifests()))
.skippedDeleteManifests(CounterResult.fromCounter(scanMetrics.skippedDeleteManifests()))
+ .indexedDeleteFiles(CounterResult.fromCounter(scanMetrics.indexedDeleteFiles()))
+ .equalityDeleteFiles(CounterResult.fromCounter(scanMetrics.equalityDeleteFiles()))
+ .positionalDeleteFiles(CounterResult.fromCounter(scanMetrics.positionalDeleteFiles()))
.build();
}
}
@@ -174,6 +186,9 @@ public interface ScanReport {
public static final String SKIPPED_DELETE_MANIFESTS = "skipped-delete-manifests";
public static final String SKIPPED_DATA_FILES = "skipped-data-files";
public static final String SKIPPED_DELETE_FILES = "skipped-delete-files";
+ public static final String INDEXED_DELETE_FILES = "indexed-delete-files";
+ public static final String EQUALITY_DELETE_FILES = "equality-delete-files";
+ public static final String POSITIONAL_DELETE_FILES = "positional-delete-files";
public static ScanMetrics noop() {
return ScanMetrics.of(MetricsContext.nullMetrics());
@@ -246,6 +261,21 @@ public interface ScanReport {
return metricsContext().counter(SKIPPED_DELETE_MANIFESTS, MetricsContext.Unit.COUNT);
}
+ @Value.Derived
+ public Counter indexedDeleteFiles() {
+ return metricsContext().counter(INDEXED_DELETE_FILES, MetricsContext.Unit.COUNT);
+ }
+
+ @Value.Derived
+ public Counter equalityDeleteFiles() {
+ return metricsContext().counter(EQUALITY_DELETE_FILES, MetricsContext.Unit.COUNT);
+ }
+
+ @Value.Derived
+ public Counter positionalDeleteFiles() {
+ return metricsContext().counter(POSITIONAL_DELETE_FILES, MetricsContext.Unit.COUNT);
+ }
+
public static ScanMetrics of(MetricsContext metricsContext) {
return ImmutableScanMetrics.builder().metricsContext(metricsContext).build();
}
diff --git a/core/src/main/java/org/apache/iceberg/DeleteFileIndex.java b/core/src/main/java/org/apache/iceberg/DeleteFileIndex.java
index 6871feaa8a..32fee87ae8 100644
--- a/core/src/main/java/org/apache/iceberg/DeleteFileIndex.java
+++ b/core/src/main/java/org/apache/iceberg/DeleteFileIndex.java
@@ -509,6 +509,19 @@ class DeleteFileIndex {
}
}
+ scanMetrics.indexedDeleteFiles().increment(deleteEntries.size());
+ deleteFilesByPartition
+ .values()
+ .forEach(
+ entry -> {
+ FileContent content = entry.file().content();
+ if (content == FileContent.EQUALITY_DELETES) {
+ scanMetrics.equalityDeleteFiles().increment();
+ } else if (content == FileContent.POSITION_DELETES) {
+ scanMetrics.positionalDeleteFiles().increment();
+ }
+ });
+
return new DeleteFileIndex(
specsById, globalApplySeqs, globalDeletes, sortedDeletesByPartition);
}
diff --git a/core/src/main/java/org/apache/iceberg/metrics/ScanMetricsResultParser.java b/core/src/main/java/org/apache/iceberg/metrics/ScanMetricsResultParser.java
index 7ff12b7e24..98ebaf80a6 100644
--- a/core/src/main/java/org/apache/iceberg/metrics/ScanMetricsResultParser.java
+++ b/core/src/main/java/org/apache/iceberg/metrics/ScanMetricsResultParser.java
@@ -108,6 +108,21 @@ class ScanMetricsResultParser {
CounterResultParser.toJson(metrics.skippedDeleteManifests(), gen);
}
+ if (null != metrics.indexedDeleteFiles()) {
+ gen.writeFieldName(ScanMetrics.INDEXED_DELETE_FILES);
+ CounterResultParser.toJson(metrics.indexedDeleteFiles(), gen);
+ }
+
+ if (null != metrics.equalityDeleteFiles()) {
+ gen.writeFieldName(ScanMetrics.EQUALITY_DELETE_FILES);
+ CounterResultParser.toJson(metrics.equalityDeleteFiles(), gen);
+ }
+
+ if (null != metrics.positionalDeleteFiles()) {
+ gen.writeFieldName(ScanMetrics.POSITIONAL_DELETE_FILES);
+ CounterResultParser.toJson(metrics.positionalDeleteFiles(), gen);
+ }
+
gen.writeEndObject();
}
@@ -142,6 +157,10 @@ class ScanMetricsResultParser {
CounterResultParser.fromJson(ScanMetrics.SCANNED_DELETE_MANIFESTS, json))
.skippedDeleteManifests(
CounterResultParser.fromJson(ScanMetrics.SKIPPED_DELETE_MANIFESTS, json))
+ .indexedDeleteFiles(CounterResultParser.fromJson(ScanMetrics.INDEXED_DELETE_FILES, json))
+ .equalityDeleteFiles(CounterResultParser.fromJson(ScanMetrics.EQUALITY_DELETE_FILES, json))
+ .positionalDeleteFiles(
+ CounterResultParser.fromJson(ScanMetrics.POSITIONAL_DELETE_FILES, json))
.build();
}
}
diff --git a/core/src/test/java/org/apache/iceberg/TestScanPlanningAndReporting.java b/core/src/test/java/org/apache/iceberg/TestScanPlanningAndReporting.java
index 3ee1b35eef..6ad13b2e29 100644
--- a/core/src/test/java/org/apache/iceberg/TestScanPlanningAndReporting.java
+++ b/core/src/test/java/org/apache/iceberg/TestScanPlanningAndReporting.java
@@ -141,6 +141,9 @@ public class TestScanPlanningAndReporting extends TableTestBase {
assertThat(result.totalDeleteFileSizeInBytes().value()).isEqualTo(20L);
assertThat(result.skippedDataFiles().value()).isEqualTo(0);
assertThat(result.skippedDeleteFiles().value()).isEqualTo(0);
+ assertThat(result.indexedDeleteFiles().value()).isEqualTo(2);
+ assertThat(result.equalityDeleteFiles().value()).isEqualTo(0);
+ assertThat(result.positionalDeleteFiles().value()).isEqualTo(2);
}
@Test
@@ -212,6 +215,33 @@ public class TestScanPlanningAndReporting extends TableTestBase {
assertThat(result.totalDeleteManifests().value()).isEqualTo(2);
assertThat(result.totalFileSizeInBytes().value()).isEqualTo(10L);
assertThat(result.totalDeleteFileSizeInBytes().value()).isEqualTo(10L);
+ assertThat(result.indexedDeleteFiles().value()).isEqualTo(1);
+ assertThat(result.equalityDeleteFiles().value()).isEqualTo(1);
+ assertThat(result.positionalDeleteFiles().value()).isEqualTo(0);
+ }
+
+ @Test
+ public void scanningWithEqualityAndPositionalDeleteFiles() throws IOException {
+ String tableName = "scan-planning-with-eq-and-pos-delete-files";
+ Table table =
+ TestTables.create(
+ tableDir, tableName, SCHEMA, SPEC, SortOrder.unsorted(), formatVersion, reporter);
+ table.newAppend().appendFile(FILE_A).commit();
+ // FILE_A_DELETES = positionalDelete / FILE_A2_DELETES = equalityDelete
+ table.newRowDelta().addDeletes(FILE_A_DELETES).addDeletes(FILE_A2_DELETES).commit();
+ TableScan tableScan = table.newScan();
+
+ try (CloseableIterable<FileScanTask> fileScanTasks =
+ tableScan.filter(Expressions.equal("data", "6")).planFiles()) {
+ fileScanTasks.forEach(task -> {});
+ }
+
+ ScanReport scanReport = reporter.lastReport();
+ assertThat(scanReport).isNotNull();
+ ScanMetricsResult result = scanReport.scanMetrics();
+ assertThat(result.indexedDeleteFiles().value()).isEqualTo(2);
+ assertThat(result.equalityDeleteFiles().value()).isEqualTo(1);
+ assertThat(result.positionalDeleteFiles().value()).isEqualTo(1);
}
private static class TestScanReporter implements ScanReporter {
diff --git a/core/src/test/java/org/apache/iceberg/metrics/TestScanMetricsResultParser.java b/core/src/test/java/org/apache/iceberg/metrics/TestScanMetricsResultParser.java
index ffc0a6baf8..06abaceef8 100644
--- a/core/src/test/java/org/apache/iceberg/metrics/TestScanMetricsResultParser.java
+++ b/core/src/test/java/org/apache/iceberg/metrics/TestScanMetricsResultParser.java
@@ -178,6 +178,9 @@ public class TestScanMetricsResultParser {
scanMetrics.skippedDeleteFiles().increment(3L);
scanMetrics.scannedDeleteManifests().increment(3L);
scanMetrics.skippedDeleteManifests().increment(3L);
+ scanMetrics.indexedDeleteFiles().increment(10L);
+ scanMetrics.positionalDeleteFiles().increment(6L);
+ scanMetrics.equalityDeleteFiles().increment(4L);
ScanMetricsResult scanMetricsResult = ScanMetricsResult.fromScanMetrics(scanMetrics);
Assertions.assertThat(
@@ -195,6 +198,9 @@ public class TestScanMetricsResultParser {
+ "\"skipped-delete-files\":{\"unit\":\"count\",\"value\":3},"
+ "\"scanned-delete-manifests\":{\"unit\":\"count\",\"value\":3},"
+ "\"skipped-delete-manifests\":{\"unit\":\"count\",\"value\":3},"
+ + "\"indexed-delete-files\":{\"unit\":\"count\",\"value\":10},"
+ + "\"equality-delete-files\":{\"unit\":\"count\",\"value\":4},"
+ + "\"positional-delete-files\":{\"unit\":\"count\",\"value\":6},"
+ "\"extra\": \"value\",\"extra2\":23}"))
.isEqualTo(scanMetricsResult);
}
@@ -236,6 +242,9 @@ public class TestScanMetricsResultParser {
scanMetrics.skippedDeleteFiles().increment(3L);
scanMetrics.scannedDeleteManifests().increment(3L);
scanMetrics.skippedDeleteManifests().increment(3L);
+ scanMetrics.indexedDeleteFiles().increment(10L);
+ scanMetrics.positionalDeleteFiles().increment(6L);
+ scanMetrics.equalityDeleteFiles().increment(4L);
ScanMetricsResult scanMetricsResult = ScanMetricsResult.fromScanMetrics(scanMetrics);
@@ -293,6 +302,18 @@ public class TestScanMetricsResultParser {
+ " \"skipped-delete-manifests\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 3\n"
+ + " },\n"
+ + " \"indexed-delete-files\" : {\n"
+ + " \"unit\" : \"count\",\n"
+ + " \"value\" : 10\n"
+ + " },\n"
+ + " \"equality-delete-files\" : {\n"
+ + " \"unit\" : \"count\",\n"
+ + " \"value\" : 4\n"
+ + " },\n"
+ + " \"positional-delete-files\" : {\n"
+ + " \"unit\" : \"count\",\n"
+ + " \"value\" : 6\n"
+ " }\n"
+ "}";
diff --git a/core/src/test/java/org/apache/iceberg/metrics/TestScanReportParser.java b/core/src/test/java/org/apache/iceberg/metrics/TestScanReportParser.java
index fffcdf64ad..8f77be4824 100644
--- a/core/src/test/java/org/apache/iceberg/metrics/TestScanReportParser.java
+++ b/core/src/test/java/org/apache/iceberg/metrics/TestScanReportParser.java
@@ -84,6 +84,9 @@ public class TestScanReportParser {
scanMetrics.skippedDeleteFiles().increment(3L);
scanMetrics.scannedDeleteManifests().increment(3L);
scanMetrics.skippedDeleteManifests().increment(3L);
+ scanMetrics.indexedDeleteFiles().increment(10L);
+ scanMetrics.positionalDeleteFiles().increment(6L);
+ scanMetrics.equalityDeleteFiles().increment(4L);
String tableName = "roundTripTableName";
Schema projection =
@@ -114,6 +117,9 @@ public class TestScanReportParser {
+ "\"skipped-delete-files\":{\"unit\":\"count\",\"value\":3},"
+ "\"scanned-delete-manifests\":{\"unit\":\"count\",\"value\":3},"
+ "\"skipped-delete-manifests\":{\"unit\":\"count\",\"value\":3},"
+ + "\"indexed-delete-files\":{\"unit\":\"count\",\"value\":10},"
+ + "\"equality-delete-files\":{\"unit\":\"count\",\"value\":4},"
+ + "\"positional-delete-files\":{\"unit\":\"count\",\"value\":6},"
+ "\"extra-metric\":\"extra-val\"},"
+ "\"extra\":\"extraVal\"}"))
.usingRecursiveComparison()
@@ -174,6 +180,9 @@ public class TestScanReportParser {
scanMetrics.skippedDeleteFiles().increment(3L);
scanMetrics.scannedDeleteManifests().increment(3L);
scanMetrics.skippedDeleteManifests().increment(3L);
+ scanMetrics.indexedDeleteFiles().increment(10L);
+ scanMetrics.positionalDeleteFiles().increment(6L);
+ scanMetrics.equalityDeleteFiles().increment(4L);
String tableName = "roundTripTableName";
Schema projection =
@@ -256,6 +265,18 @@ public class TestScanReportParser {
+ " \"skipped-delete-manifests\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 3\n"
+ + " },\n"
+ + " \"indexed-delete-files\" : {\n"
+ + " \"unit\" : \"count\",\n"
+ + " \"value\" : 10\n"
+ + " },\n"
+ + " \"equality-delete-files\" : {\n"
+ + " \"unit\" : \"count\",\n"
+ + " \"value\" : 4\n"
+ + " },\n"
+ + " \"positional-delete-files\" : {\n"
+ + " \"unit\" : \"count\",\n"
+ + " \"value\" : 6\n"
+ " }\n"
+ " }\n"
+ "}";