You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ms...@apache.org on 2022/02/23 15:58:21 UTC
svn commit: r1898349 - in /pdfbox/trunk/benchmark: ./ src/ src/main/ src/main/java/ src/main/java/org/ src/main/java/org/apache/ src/main/java/org/apache/pdfbox/ src/main/java/org/apache/pdfbox/benchmark/
Author: msahyoun
Date: Wed Feb 23 15:58:20 2022
New Revision: 1898349
URL: http://svn.apache.org/viewvc?rev=1898349&view=rev
Log:
PDFBOX-5329: inital commit of new benchmark package
Added:
pdfbox/trunk/benchmark/ (with props)
pdfbox/trunk/benchmark/README.md
pdfbox/trunk/benchmark/pom.xml
pdfbox/trunk/benchmark/src/
pdfbox/trunk/benchmark/src/main/
pdfbox/trunk/benchmark/src/main/java/
pdfbox/trunk/benchmark/src/main/java/org/
pdfbox/trunk/benchmark/src/main/java/org/apache/
pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/
pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/
pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/LoadAndSave.java
pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/NullOutputStream.java
pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/Rendering.java
pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/TextExtraction.java
Propchange: pdfbox/trunk/benchmark/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Wed Feb 23 15:58:20 2022
@@ -0,0 +1 @@
+target
Added: pdfbox/trunk/benchmark/README.md
URL: http://svn.apache.org/viewvc/pdfbox/trunk/benchmark/README.md?rev=1898349&view=auto
==============================================================================
--- pdfbox/trunk/benchmark/README.md (added)
+++ pdfbox/trunk/benchmark/README.md Wed Feb 23 15:58:20 2022
@@ -0,0 +1,81 @@
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+--->
+
+# Benchmark module
+
+This module contains benchmarks written using [JMH](https://openjdk.java.net/projects/code-tools/jmh/) from OpenJDK.
+
+## Running benchmarks
+
+The default behavior is to run all benchmarks:
+
+ java -jar target/benchmarks.jar
+
+List all available benchmarks:
+
+ java -jar target/benchmarks.jar -l
+
+Pass a pattern or name after the command to select the benchmarks:
+
+ java -jar target/benchmarks.jar LoadAndSave.loadMediumFile
+
+Check which benchmarks match the provided pattern:
+
+ java -jar target/benchmarks.jar -l LoadAndSave
+
+Run a specific test and override the number of forks, iterations and warm-up iteration to `2`:
+
+ java -jar target/benchmarks.jar -f 2 -i 2 -wi 2 LoadAndSave.loadMediumFile
+
+Get a list of available profilers:
+
+ java -jar target/benchmarks.jar -lprof
+
+The following sections cover async profiler and GC profilers in more detail.
+
+## Using JMH with async-profiler
+
+JMH includes [async-profiler](https://github.com/jvm-profiling-tools/async-profiler). After download run
+JMH using the async-profiler:
+
+ java -jar target/benchmarks.jar -prof async:libPath=/path/to/libasyncProfiler.so
+
+With flame graph output (the semicolon is escaped to ensure it is not treated as a command separator):
+
+ java -jar target/benchmarks.jar -prof async:libPath=/path/to/libasyncProfiler.so\;output=flamegraph
+
+To get help on options to be used for the async-profiler use the following command:
+
+ java -jar target/benchmarks.jar -prof async:help
+
+## Using JMH with GC profiler
+
+To measure the allocation rate run the benchmark with `-prof gc`:
+
+ java -jar target/benchmarks.jar -prof gc
+
+For profiling the `norm` alloc rates are important as this which measure the allocations per operation rather than allocations per second which can increase/decrease with faster/slower code.
+
+## Writing benchmarks
+
+Examples for writing JMH tests are available from the projects [samples](https://github.com/openjdk/jmh/tree/master/jmh-samples) provided by the JMH project.
+
+Tutorials are available at
+
+ - http://tutorials.jenkov.com/java-performance/jmh.html
+ - https://www.baeldung.com/java-microbenchmark-harness
+ - https://mkyong.com/java/java-jmh-benchmark-tutorial/
\ No newline at end of file
Added: pdfbox/trunk/benchmark/pom.xml
URL: http://svn.apache.org/viewvc/pdfbox/trunk/benchmark/pom.xml?rev=1898349&view=auto
==============================================================================
--- pdfbox/trunk/benchmark/pom.xml (added)
+++ pdfbox/trunk/benchmark/pom.xml Wed Feb 23 15:58:20 2022
@@ -0,0 +1,145 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.pdfbox</groupId>
+ <artifactId>pdfbox-parent</artifactId>
+ <version>3.0.0-SNAPSHOT</version>
+ <relativePath>../parent/pom.xml</relativePath>
+ </parent>
+
+ <artifactId>pdfbox-benchmark</artifactId>
+
+ <packaging>jar</packaging>
+ <name>Apache PDFBox Benchmarks</name>
+ <description>
+ The Apache PDFBox library is an open source Java tool for working with PDF documents.
+ This artefact contains commandline tools using Apache PDFBox.
+ </description>
+ <inceptionYear>2002</inceptionYear>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.openjdk.jmh</groupId>
+ <artifactId>jmh-core</artifactId>
+ <version>${jmh.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.openjdk.jmh</groupId>
+ <artifactId>jmh-generator-annprocess</artifactId>
+ <version>${jmh.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>pdfbox</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ </dependencies>
+
+ <properties>
+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+ <jmh.version>1.33</jmh.version>
+ <uberjar.name>benchmarks</uberjar.name>
+ </properties>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <version>3.2.1</version>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <finalName>${uberjar.name}</finalName>
+ <transformers>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+ <mainClass>org.openjdk.jmh.Main</mainClass>
+ </transformer>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
+ </transformers>
+ <filters>
+ <filter>
+ <!--
+ Shading signed JARs will fail without this.
+ http://stackoverflow.com/questions/999489/invalid-signature-file-when-attempting-to-run-a-jar
+ -->
+ <artifact>*:*</artifact>
+ <excludes>
+ <exclude>META-INF/*.SF</exclude>
+ <exclude>META-INF/*.DSA</exclude>
+ <exclude>META-INF/*.RSA</exclude>
+ </excludes>
+ </filter>
+ </filters>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>com.googlecode.maven-download-plugin</groupId>
+ <artifactId>download-maven-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>benchmark-large-file</id>
+ <phase>generate-test-resources</phase>
+ <goals>
+ <goal>wget</goal>
+ </goals>
+ <configuration>
+ <url>https://crossasia-books.ub.uni-heidelberg.de/xasia/reader/download/506/506-42-86246-2-10-20190822.pdf</url>
+ <outputDirectory>${project.build.directory}/pdfs</outputDirectory>
+ <outputFileName>506-42-86246-2-10-20190822.pdf</outputFileName>
+ <sha512>ed2d295d0dfc702174bafd04df79ae4aaf56289f2befc981f217d3b7990e59106f8b7358fe147a9aeaf179dc1f2432c2cc064b0243c91cf18418df59be15bb96</sha512>
+ </configuration>
+ </execution>
+ <execution>
+ <id>benchmark-medium-file</id>
+ <phase>generate-test-resources</phase>
+ <goals>
+ <goal>wget</goal>
+ </goals>
+ <configuration>
+ <url>https://crossasia-books.ub.uni-heidelberg.de/xasia/reader/download/849/849-42-94772-1-10-20210818.pdf</url>
+ <outputDirectory>${project.build.directory}/pdfs</outputDirectory>
+ <outputFileName>849-42-94772-1-10-20210818.pdf</outputFileName>
+ <sha512>78ef8c0f2a3027d44fdfb8afc63ef7dc2cac8ae8f6d35fab4a8782d1c99354a0d944ae9b38026e8a7d82c03142000f78b3715064bd4c52245d7e2feeb241654f</sha512>
+ </configuration>
+ </execution>
+ <execution>
+ <id>pdf32000-2008</id>
+ <phase>generate-test-resources</phase>
+ <goals>
+ <goal>wget</goal>
+ </goals>
+ <configuration>
+ <url>https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf</url>
+ <outputDirectory>${project.build.directory}/pdfs</outputDirectory>
+ <outputFileName>PDF32000_2008.pdf</outputFileName>
+ <sha512>690ce2177154a9526d378b0a6dec48cb2cf648fb7d3f2e43358e43e0b551a1af1b97c68e79b147c70b59c45687e7a98d5858159fca7bb93c3bb419070f7e4dae</sha512>
+ </configuration>
+ </execution>
+ <execution>
+ <id>altona-test-suite</id>
+ <phase>generate-test-resources</phase>
+ <goals>
+ <goal>wget</goal>
+ </goals>
+ <configuration>
+ <url>http://www.eci.org/lib/exe/fetch.php?media=downloads:altona_test_suite:eci_altona-test-suite-v2_technical2_x4.pdf</url>
+ <outputDirectory>${project.build.directory}/pdfs</outputDirectory>
+ <outputFileName>eci_altona-test-suite-v2_technical2_x4.pdf</outputFileName>
+ <sha512>11303a7b9c20f0fb67258715219f8cbdf4d0e52b394a16d21ab0f8517e2cb453337a216d65af35e28fabc56eafc64ed40c1ff4a4d40aef48e66168b9a3d0fc49</sha512>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+
+</project>
\ No newline at end of file
Added: pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/LoadAndSave.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/LoadAndSave.java?rev=1898349&view=auto
==============================================================================
--- pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/LoadAndSave.java (added)
+++ pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/LoadAndSave.java Wed Feb 23 15:58:20 2022
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.benchmark;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.pdfbox.Loader;
+import org.apache.pdfbox.pdfwriter.compress.CompressParameters;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.infra.Blackhole;
+
+public class LoadAndSave {
+
+ static final String MEDIUM_SIZE_TEST_FILE = "target/pdfs/849-42-94772-1-10-20210818.pdf";
+ static final String LARGE_SIZE_TEST_FILE = "target/pdfs/506-42-86246-2-10-20190822.pdf";
+
+ @Benchmark
+ @OutputTimeUnit(TimeUnit.MILLISECONDS)
+ @BenchmarkMode(Mode.AverageTime)
+ public void loadMediumFile(Blackhole blackhole) throws IOException {
+ PDDocument pdf = Loader.loadPDF(new File(MEDIUM_SIZE_TEST_FILE));
+ blackhole.consume(pdf);
+ pdf.close();
+ }
+
+ @Benchmark
+ @OutputTimeUnit(TimeUnit.MILLISECONDS)
+ @BenchmarkMode(Mode.AverageTime)
+ public void saveMediumFile(Blackhole blackhole) throws IOException {
+ PDDocument pdf = Loader.loadPDF(new File(MEDIUM_SIZE_TEST_FILE));
+ pdf.save(new NullOutputStream());
+ pdf.close();
+ }
+
+ @Benchmark
+ @OutputTimeUnit(TimeUnit.MILLISECONDS)
+ @BenchmarkMode(Mode.AverageTime)
+ public void saveIncrementalMediumFile(Blackhole blackhole) throws IOException {
+ PDDocument pdf = Loader.loadPDF(new File(MEDIUM_SIZE_TEST_FILE));
+ pdf.saveIncremental(new NullOutputStream());
+ pdf.close();
+ }
+
+ @Benchmark
+ @OutputTimeUnit(TimeUnit.MILLISECONDS)
+ @BenchmarkMode(Mode.AverageTime)
+ public void saveNoCompressionMediumFile(Blackhole blackhole) throws IOException {
+ PDDocument pdf = Loader.loadPDF(new File(MEDIUM_SIZE_TEST_FILE));
+ pdf.save(new NullOutputStream(),CompressParameters.NO_COMPRESSION);
+ pdf.close();
+ }
+
+ @Benchmark
+ @OutputTimeUnit(TimeUnit.MILLISECONDS)
+ @BenchmarkMode(Mode.AverageTime)
+ public void loadLargeFile(Blackhole blackhole) throws IOException {
+ PDDocument pdf = Loader.loadPDF(new File(LARGE_SIZE_TEST_FILE));
+ blackhole.consume(pdf);
+ pdf.close();
+ }
+
+ @Benchmark
+ @OutputTimeUnit(TimeUnit.MILLISECONDS)
+ @BenchmarkMode(Mode.AverageTime)
+ public void saveLargeFile(Blackhole blackhole) throws IOException {
+ PDDocument pdf = Loader.loadPDF(new File(LARGE_SIZE_TEST_FILE));
+ pdf.save(new NullOutputStream());
+ pdf.close();
+ }
+
+
+ @Benchmark
+ @OutputTimeUnit(TimeUnit.MILLISECONDS)
+ @BenchmarkMode(Mode.AverageTime)
+ public void saveIncrementalLargeFile(Blackhole blackhole) throws IOException {
+ PDDocument pdf = Loader.loadPDF(new File(LARGE_SIZE_TEST_FILE));
+ pdf.saveIncremental(new NullOutputStream());
+ pdf.close();
+ }
+
+ @Benchmark
+ @OutputTimeUnit(TimeUnit.MILLISECONDS)
+ @BenchmarkMode(Mode.AverageTime)
+ public void saveNoCompressionLargeFile(Blackhole blackhole) throws IOException {
+ PDDocument pdf = Loader.loadPDF(new File(LARGE_SIZE_TEST_FILE));
+ pdf.save(new NullOutputStream(),CompressParameters.NO_COMPRESSION);
+ pdf.close();
+ }
+}
\ No newline at end of file
Added: pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/NullOutputStream.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/NullOutputStream.java?rev=1898349&view=auto
==============================================================================
--- pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/NullOutputStream.java (added)
+++ pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/NullOutputStream.java Wed Feb 23 15:58:20 2022
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.benchmark;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+public class NullOutputStream extends OutputStream {
+
+ @Override
+ public void write(byte[] b) throws IOException {
+ // don't write anything
+ }
+
+ @Override
+ public void write(byte[] b, int off, int len) throws IOException {
+ // don't write anything
+ }
+
+ @Override
+ public void write(int b) throws IOException {
+ // don't write anything
+ }
+}
Added: pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/Rendering.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/Rendering.java?rev=1898349&view=auto
==============================================================================
--- pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/Rendering.java (added)
+++ pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/Rendering.java Wed Feb 23 15:58:20 2022
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.benchmark;
+
+import java.awt.image.BufferedImage;
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.concurrent.TimeUnit;
+
+import javax.imageio.ImageIO;
+
+import org.apache.pdfbox.Loader;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.rendering.PDFRenderer;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.infra.Blackhole;
+
+public class Rendering {
+
+ static final String ALTONA_TEST_SUITE = "target/pdfs/altona-test-suite";
+ static final String GHENT_CMYK_X4 = "target/pdfs/Ghent_PDF_Output_Suite_V50_Full/Categories/1-CMYK/Test pages/Ghent_PDF-Output-Test-V50_CMYK_X4.pdf";
+ static final String PDF32000_2008 = "target/pdfs/PDF32000_2008.pdf";
+ static final String RENDER_OUTPUT_DIR = "target/renditions";
+
+ static {
+ System.setProperty("org.apache.commons.logging.Log",
+ "org.apache.commons.logging.impl.NoOpLog");
+ java.util.logging.Logger.getLogger("org.apache").setLevel(java.util.logging.Level.OFF);
+ Path path = Paths.get(RENDER_OUTPUT_DIR);
+ try {
+ Files.createDirectories(path);
+ } catch (IOException e) {
+ // this shouldn't fail and if it does as the
+ // test should be run manually don't care atm
+ }
+ }
+
+ @Benchmark
+ @OutputTimeUnit(TimeUnit.MILLISECONDS)
+ @BenchmarkMode(Mode.AverageTime)
+ public void renderGhentCMYKNoOutput(Blackhole blackhole) throws IOException {
+ PDDocument pdf = Loader.loadPDF(new File(GHENT_CMYK_X4));
+ PDFRenderer renderer = new PDFRenderer(pdf);
+ int numPages = pdf.getNumberOfPages();
+ for (int i = 0; i< numPages; i++)
+ {
+ blackhole.consume(renderer.renderImageWithDPI(i, 600));
+ }
+ pdf.close();
+ }
+
+ @Benchmark
+ @OutputTimeUnit(TimeUnit.MILLISECONDS)
+ @BenchmarkMode(Mode.AverageTime)
+ public void renderGhentCMYK(Blackhole blackhole) throws IOException {
+ PDDocument pdf = Loader.loadPDF(new File(GHENT_CMYK_X4));
+ PDFRenderer renderer = new PDFRenderer(pdf);
+ int numPages = pdf.getNumberOfPages();
+ for (int i = 0; i< numPages; i++)
+ {
+ BufferedImage bi = renderer.renderImageWithDPI(i, 600);
+ ImageIO.write(bi, "PNG", new File(RENDER_OUTPUT_DIR, "ghent-" + i + ".png"));
+ }
+ pdf.close();
+ }
+
+ @Benchmark
+ @OutputTimeUnit(TimeUnit.MILLISECONDS)
+ @BenchmarkMode(Mode.AverageTime)
+ public void renderAltonaNoOutput(Blackhole blackhole) throws IOException {
+ PDDocument pdf = Loader.loadPDF(new File(ALTONA_TEST_SUITE));
+ PDFRenderer renderer = new PDFRenderer(pdf);
+ int numPages = pdf.getNumberOfPages();
+ for (int i = 0; i< numPages; i++)
+ {
+ blackhole.consume(renderer.renderImageWithDPI(i, 600));
+ }
+ pdf.close();
+ }
+
+ @Benchmark
+ @OutputTimeUnit(TimeUnit.MILLISECONDS)
+ @BenchmarkMode(Mode.AverageTime)
+ public void renderAltona(Blackhole blackhole) throws IOException {
+ PDDocument pdf = Loader.loadPDF(new File(ALTONA_TEST_SUITE));
+ PDFRenderer renderer = new PDFRenderer(pdf);
+ int numPages = pdf.getNumberOfPages();
+ for (int i = 0; i< numPages; i++)
+ {
+ BufferedImage bi = renderer.renderImageWithDPI(i, 600);
+ ImageIO.write(bi, "PNG", new File(RENDER_OUTPUT_DIR, "altona-" + i + ".png"));
+ }
+ pdf.close();
+ }
+
+ @Benchmark
+ @OutputTimeUnit(TimeUnit.MILLISECONDS)
+ @BenchmarkMode(Mode.AverageTime)
+ public void renderPDFSpecNoOutput(Blackhole blackhole) throws IOException {
+ PDDocument pdf = Loader.loadPDF(new File(PDF32000_2008));
+ PDFRenderer renderer = new PDFRenderer(pdf);
+ int numPages = pdf.getNumberOfPages();
+ for (int i = 0; i< numPages; i++)
+ {
+ blackhole.consume(renderer.renderImageWithDPI(i, 150));
+ }
+ pdf.close();
+ }
+
+ @Benchmark
+ @OutputTimeUnit(TimeUnit.MILLISECONDS)
+ @BenchmarkMode(Mode.AverageTime)
+ public void renderPDFSpec(Blackhole blackhole) throws IOException {
+ PDDocument pdf = Loader.loadPDF(new File(PDF32000_2008));
+ PDFRenderer renderer = new PDFRenderer(pdf);
+ int numPages = pdf.getNumberOfPages();
+ for (int i = 0; i< numPages; i++)
+ {
+ BufferedImage bi = renderer.renderImageWithDPI(i, 150);
+ ImageIO.write(bi, "PNG", new BufferedOutputStream(new FileOutputStream(new File (RENDER_OUTPUT_DIR, "pdf32000_2008-" + i + ".png"))));
+ }
+ pdf.close();
+ }
+}
\ No newline at end of file
Added: pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/TextExtraction.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/TextExtraction.java?rev=1898349&view=auto
==============================================================================
--- pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/TextExtraction.java (added)
+++ pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/TextExtraction.java Wed Feb 23 15:58:20 2022
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.benchmark;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.pdfbox.Loader;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.text.PDFTextStripper;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.infra.Blackhole;
+
+public class TextExtraction {
+
+ static final String PDF32000_2008 = "target/pdfs/PDF32000_2008.pdf";
+
+ static {
+ System.setProperty("org.apache.commons.logging.Log",
+ "org.apache.commons.logging.impl.NoOpLog");
+ java.util.logging.Logger.getLogger("org.apache").setLevel(java.util.logging.Level.OFF);
+ }
+
+
+ @Benchmark
+ @OutputTimeUnit(TimeUnit.MILLISECONDS)
+ @BenchmarkMode(Mode.AverageTime)
+ public void extractPDFSpecUnsorted(Blackhole blackhole) throws IOException {
+ PDDocument pdf = Loader.loadPDF(new File(PDF32000_2008));
+ PDFTextStripper pdfStripper = new PDFTextStripper();
+ pdfStripper.setSortByPosition(false);
+ String parsedText = pdfStripper.getText(pdf);
+ blackhole.consume(parsedText);
+ pdf.close();
+ }
+
+ @Benchmark
+ @OutputTimeUnit(TimeUnit.MILLISECONDS)
+ @BenchmarkMode(Mode.AverageTime)
+ public void extractPDFSpecSorted(Blackhole blackhole) throws IOException {
+ PDDocument pdf = Loader.loadPDF(new File(PDF32000_2008));
+ PDFTextStripper pdfStripper = new PDFTextStripper();
+ pdfStripper.setSortByPosition(true);
+ String parsedText = pdfStripper.getText(pdf);
+ blackhole.consume(parsedText);
+ pdf.close();
+ }
+}
\ No newline at end of file