You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2022/08/03 22:44:37 UTC

[iceberg] branch master updated: Core: Simplify scan planning & reporting tests (#5428)

This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new 1b989c86a3 Core: Simplify scan planning & reporting tests (#5428)
1b989c86a3 is described below

commit 1b989c86a3c0da0bbe7d5f385618116cd899bdf2
Author: Eduard Tudenhöfner <et...@gmail.com>
AuthorDate: Thu Aug 4 00:44:32 2022 +0200

    Core: Simplify scan planning & reporting tests (#5428)
---
 .../iceberg/TestScanPlanningAndReporting.java      | 153 +++------------------
 1 file changed, 20 insertions(+), 133 deletions(-)

diff --git a/data/src/test/java/org/apache/iceberg/TestScanPlanningAndReporting.java b/core/src/test/java/org/apache/iceberg/TestScanPlanningAndReporting.java
similarity index 53%
rename from data/src/test/java/org/apache/iceberg/TestScanPlanningAndReporting.java
rename to core/src/test/java/org/apache/iceberg/TestScanPlanningAndReporting.java
index 84e651bcbf..8c3fa181e0 100644
--- a/data/src/test/java/org/apache/iceberg/TestScanPlanningAndReporting.java
+++ b/core/src/test/java/org/apache/iceberg/TestScanPlanningAndReporting.java
@@ -18,28 +18,17 @@
  */
 package org.apache.iceberg;
 
-import static org.apache.iceberg.Files.localInput;
-import static org.apache.iceberg.types.Types.NestedField.required;
 import static org.assertj.core.api.Assertions.assertThat;
-import static org.junit.Assert.assertTrue;
 
-import java.io.File;
 import java.io.IOException;
 import java.time.Duration;
-import java.util.Arrays;
 import java.util.List;
-import org.apache.iceberg.data.GenericRecord;
-import org.apache.iceberg.data.parquet.GenericParquetWriter;
 import org.apache.iceberg.expressions.Expressions;
 import org.apache.iceberg.io.CloseableIterable;
-import org.apache.iceberg.io.FileAppender;
 import org.apache.iceberg.metrics.LoggingScanReporter;
 import org.apache.iceberg.metrics.ScanReport;
 import org.apache.iceberg.metrics.ScanReporter;
-import org.apache.iceberg.parquet.Parquet;
-import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
 import org.apache.iceberg.relocated.com.google.common.collect.Lists;
-import org.apache.iceberg.types.Types;
 import org.junit.Test;
 
 public class TestScanPlanningAndReporting extends TableTestBase {
@@ -51,9 +40,15 @@ public class TestScanPlanningAndReporting extends TableTestBase {
   }
 
   @Test
-  public void testScanPlanningWithReport() throws IOException {
-    String tableName = "simple-scan-planning";
-    Table table = createTableWithCustomRecords(tableName);
+  public void scanningWithMultipleDataManifests() throws IOException {
+    String tableName = "multiple-data-manifests";
+    Table table =
+        TestTables.create(
+            tableDir, tableName, SCHEMA, SPEC, SortOrder.unsorted(), formatVersion, reporter);
+
+    table.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit();
+    table.newAppend().appendFile(FILE_D).commit();
+    table.refresh();
     TableScan tableScan = table.newScan();
 
     // should be 3 files
@@ -65,72 +60,43 @@ public class TestScanPlanningAndReporting extends TableTestBase {
     assertThat(scanReport).isNotNull();
 
     assertThat(scanReport.tableName()).isEqualTo(tableName);
-    assertThat(scanReport.snapshotId()).isEqualTo(1L);
+    assertThat(scanReport.snapshotId()).isEqualTo(2L);
     assertThat(scanReport.filter()).isEqualTo(Expressions.alwaysTrue());
     assertThat(scanReport.scanMetrics().totalPlanningDuration().totalDuration())
         .isGreaterThan(Duration.ZERO);
     assertThat(scanReport.scanMetrics().resultDataFiles().value()).isEqualTo(3);
     assertThat(scanReport.scanMetrics().resultDeleteFiles().value()).isEqualTo(0);
-    assertThat(scanReport.scanMetrics().scannedDataManifests().value()).isEqualTo(1);
+    assertThat(scanReport.scanMetrics().scannedDataManifests().value()).isEqualTo(2);
     assertThat(scanReport.scanMetrics().skippedDataManifests().value()).isEqualTo(0);
-    assertThat(scanReport.scanMetrics().totalDataManifests().value()).isEqualTo(1);
+    assertThat(scanReport.scanMetrics().totalDataManifests().value()).isEqualTo(2);
     assertThat(scanReport.scanMetrics().totalDeleteManifests().value()).isEqualTo(0);
-    assertThat(scanReport.scanMetrics().totalFileSizeInBytes().value()).isEqualTo(1850L);
+    assertThat(scanReport.scanMetrics().totalFileSizeInBytes().value()).isEqualTo(30L);
     assertThat(scanReport.scanMetrics().totalDeleteFileSizeInBytes().value()).isEqualTo(0L);
 
-    // should be 1 file
+    // we should hit only a single data manifest and only a single data file
     try (CloseableIterable<FileScanTask> fileScanTasks =
-        tableScan.filter(Expressions.lessThan("x", "30")).planFiles()) {
+        table.newScan().filter(Expressions.equal("data", "1")).planFiles()) {
       fileScanTasks.forEach(task -> {});
     }
 
     scanReport = reporter.lastReport();
     assertThat(scanReport).isNotNull();
     assertThat(scanReport.tableName()).isEqualTo(tableName);
-    assertThat(scanReport.snapshotId()).isEqualTo(1L);
+    assertThat(scanReport.snapshotId()).isEqualTo(2L);
     assertThat(scanReport.scanMetrics().totalPlanningDuration().totalDuration())
         .isGreaterThan(Duration.ZERO);
     assertThat(scanReport.scanMetrics().resultDataFiles().value()).isEqualTo(1);
     assertThat(scanReport.scanMetrics().resultDeleteFiles().value()).isEqualTo(0);
     assertThat(scanReport.scanMetrics().scannedDataManifests().value()).isEqualTo(1);
-    assertThat(scanReport.scanMetrics().skippedDataManifests().value()).isEqualTo(0);
-    assertThat(scanReport.scanMetrics().totalDataManifests().value()).isEqualTo(1);
+    assertThat(scanReport.scanMetrics().skippedDataManifests().value()).isEqualTo(1);
+    assertThat(scanReport.scanMetrics().totalDataManifests().value()).isEqualTo(2);
     assertThat(scanReport.scanMetrics().totalDeleteManifests().value()).isEqualTo(0);
-    assertThat(scanReport.scanMetrics().totalFileSizeInBytes().value()).isEqualTo(616L);
+    assertThat(scanReport.scanMetrics().totalFileSizeInBytes().value()).isEqualTo(10L);
     assertThat(scanReport.scanMetrics().totalDeleteFileSizeInBytes().value()).isEqualTo(0L);
   }
 
-  private Table createTableWithCustomRecords(String tableName) throws IOException {
-    Schema schema =
-        new Schema(
-            required(1, "id", Types.IntegerType.get()), required(2, "x", Types.StringType.get()));
-
-    Table table =
-        TestTables.create(
-            tableDir,
-            tableName,
-            schema,
-            PartitionSpec.builderFor(schema).build(),
-            SortOrder.unsorted(),
-            formatVersion,
-            reporter);
-    GenericRecord record = GenericRecord.create(schema);
-    record.setField("id", 1);
-    record.setField("x", "23");
-
-    GenericRecord record2 = record.copy(ImmutableMap.of("id", 2, "x", "30"));
-    GenericRecord record3 = record.copy(ImmutableMap.of("id", 3, "x", "45"));
-    GenericRecord record4 = record.copy(ImmutableMap.of("id", 4, "x", "51"));
-
-    DataFile dataFile = writeParquetFile(table, Arrays.asList(record, record3));
-    DataFile dataFile2 = writeParquetFile(table, Arrays.asList(record2));
-    DataFile dataFile3 = writeParquetFile(table, Arrays.asList(record4));
-    table.newFastAppend().appendFile(dataFile).appendFile(dataFile2).appendFile(dataFile3).commit();
-    return table;
-  }
-
   @Test
-  public void deleteScanning() throws IOException {
+  public void scanningWithDeletes() throws IOException {
     Table table =
         TestTables.create(
             tableDir,
@@ -165,85 +131,6 @@ public class TestScanPlanningAndReporting extends TableTestBase {
     assertThat(scanReport.scanMetrics().totalDeleteFileSizeInBytes().value()).isEqualTo(20L);
   }
 
-  @Test
-  public void multipleDataManifests() throws IOException {
-    Table table =
-        TestTables.create(
-            tableDir,
-            "multiple-data-manifests",
-            SCHEMA,
-            SPEC,
-            SortOrder.unsorted(),
-            formatVersion,
-            reporter);
-
-    table.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit();
-    table.newAppend().appendFile(FILE_C).appendFile(FILE_D).commit();
-
-    TableScan tableScan = table.newScan();
-
-    try (CloseableIterable<FileScanTask> fileScanTasks = tableScan.planFiles()) {
-      fileScanTasks.forEach(task -> {});
-    }
-
-    ScanReport scanReport = reporter.lastReport();
-    assertThat(scanReport).isNotNull();
-    assertThat(scanReport.tableName()).isEqualTo("multiple-data-manifests");
-    assertThat(scanReport.snapshotId()).isEqualTo(2L);
-    assertThat(scanReport.scanMetrics().totalPlanningDuration().totalDuration())
-        .isGreaterThan(Duration.ZERO);
-    assertThat(scanReport.scanMetrics().resultDataFiles().value()).isEqualTo(4);
-    assertThat(scanReport.scanMetrics().resultDeleteFiles().value()).isEqualTo(0);
-    assertThat(scanReport.scanMetrics().scannedDataManifests().value()).isEqualTo(2);
-    assertThat(scanReport.scanMetrics().skippedDataManifests().value()).isEqualTo(0);
-    assertThat(scanReport.scanMetrics().totalDataManifests().value()).isEqualTo(2);
-    assertThat(scanReport.scanMetrics().totalDeleteManifests().value()).isEqualTo(0);
-    assertThat(scanReport.scanMetrics().totalFileSizeInBytes().value()).isEqualTo(40L);
-    assertThat(scanReport.scanMetrics().totalDeleteFileSizeInBytes().value()).isEqualTo(0L);
-
-    // we should hit only a single data manifest and only a single data file
-    try (CloseableIterable<FileScanTask> fileScanTasks =
-        tableScan.filter(Expressions.equal("data", "1")).planFiles()) {
-      fileScanTasks.forEach(task -> {});
-    }
-
-    scanReport = reporter.lastReport();
-    assertThat(scanReport).isNotNull();
-    assertThat(scanReport.tableName()).isEqualTo("multiple-data-manifests");
-    assertThat(scanReport.snapshotId()).isEqualTo(2L);
-    assertThat(scanReport.scanMetrics().totalPlanningDuration().totalDuration())
-        .isGreaterThan(Duration.ZERO);
-    assertThat(scanReport.scanMetrics().resultDataFiles().value()).isEqualTo(1);
-    assertThat(scanReport.scanMetrics().resultDeleteFiles().value()).isEqualTo(0);
-    assertThat(scanReport.scanMetrics().scannedDataManifests().value()).isEqualTo(1);
-    assertThat(scanReport.scanMetrics().skippedDataManifests().value()).isEqualTo(1);
-    assertThat(scanReport.scanMetrics().totalDataManifests().value()).isEqualTo(2);
-    assertThat(scanReport.scanMetrics().totalDeleteManifests().value()).isEqualTo(0);
-    assertThat(scanReport.scanMetrics().totalFileSizeInBytes().value()).isEqualTo(10L);
-    assertThat(scanReport.scanMetrics().totalDeleteFileSizeInBytes().value()).isEqualTo(0L);
-  }
-
-  private DataFile writeParquetFile(Table table, List<GenericRecord> records) throws IOException {
-    File parquetFile = temp.newFile();
-    assertTrue(parquetFile.delete());
-    FileAppender<GenericRecord> appender =
-        Parquet.write(Files.localOutput(parquetFile))
-            .schema(table.schema())
-            .createWriterFunc(GenericParquetWriter::buildWriter)
-            .build();
-    try {
-      appender.addAll(records);
-    } finally {
-      appender.close();
-    }
-
-    return DataFiles.builder(table.spec())
-        .withInputFile(localInput(parquetFile))
-        .withMetrics(appender.metrics())
-        .withFormat(FileFormat.PARQUET)
-        .build();
-  }
-
   private static class TestScanReporter implements ScanReporter {
     private final List<ScanReport> reports = Lists.newArrayList();
     // this is mainly so that we see scan reports being logged during tests