You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ms...@apache.org on 2022/02/23 15:58:21 UTC

svn commit: r1898349 - in /pdfbox/trunk/benchmark: ./ src/ src/main/ src/main/java/ src/main/java/org/ src/main/java/org/apache/ src/main/java/org/apache/pdfbox/ src/main/java/org/apache/pdfbox/benchmark/

Author: msahyoun
Date: Wed Feb 23 15:58:20 2022
New Revision: 1898349

URL: http://svn.apache.org/viewvc?rev=1898349&view=rev
Log:
PDFBOX-5329: inital commit of new benchmark package

Added:
    pdfbox/trunk/benchmark/   (with props)
    pdfbox/trunk/benchmark/README.md
    pdfbox/trunk/benchmark/pom.xml
    pdfbox/trunk/benchmark/src/
    pdfbox/trunk/benchmark/src/main/
    pdfbox/trunk/benchmark/src/main/java/
    pdfbox/trunk/benchmark/src/main/java/org/
    pdfbox/trunk/benchmark/src/main/java/org/apache/
    pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/
    pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/
    pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/LoadAndSave.java
    pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/NullOutputStream.java
    pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/Rendering.java
    pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/TextExtraction.java

Propchange: pdfbox/trunk/benchmark/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Wed Feb 23 15:58:20 2022
@@ -0,0 +1 @@
+target

Added: pdfbox/trunk/benchmark/README.md
URL: http://svn.apache.org/viewvc/pdfbox/trunk/benchmark/README.md?rev=1898349&view=auto
==============================================================================
--- pdfbox/trunk/benchmark/README.md (added)
+++ pdfbox/trunk/benchmark/README.md Wed Feb 23 15:58:20 2022
@@ -0,0 +1,81 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+--->
+
+# Benchmark module
+
+This module contains benchmarks written using [JMH](https://openjdk.java.net/projects/code-tools/jmh/) from OpenJDK.
+
+## Running benchmarks
+
+The default behavior is to run all benchmarks:
+
+    java -jar target/benchmarks.jar
+
+List all available benchmarks:
+
+    java -jar target/benchmarks.jar -l
+
+Pass a pattern or name after the command to select the benchmarks:
+
+    java -jar target/benchmarks.jar LoadAndSave.loadMediumFile
+
+Check which benchmarks match the provided pattern:
+
+    java -jar target/benchmarks.jar -l LoadAndSave
+
+Run a specific test and override the number of forks, iterations and warm-up iteration to `2`:
+
+    java -jar target/benchmarks.jar  -f 2 -i 2 -wi 2 LoadAndSave.loadMediumFile
+
+Get a list of available profilers:
+
+    java -jar target/benchmarks.jar -lprof
+
+The following sections cover async profiler and GC profilers in more detail.
+
+## Using JMH with async-profiler
+
+JMH includes [async-profiler](https://github.com/jvm-profiling-tools/async-profiler). After download run 
+JMH using the async-profiler:
+
+    java -jar target/benchmarks.jar -prof async:libPath=/path/to/libasyncProfiler.so
+
+With flame graph output (the semicolon is escaped to ensure it is not treated as a command separator):
+
+    java -jar target/benchmarks.jar -prof async:libPath=/path/to/libasyncProfiler.so\;output=flamegraph
+
+To get help on options to be used for the async-profiler use the following command:
+
+    java -jar target/benchmarks.jar -prof async:help
+
+## Using JMH with GC profiler
+
+To measure the allocation rate run the benchmark with `-prof gc`:
+
+    java -jar target/benchmarks.jar -prof gc
+
+For profiling the `norm` alloc rates are important as this which measure the allocations per operation rather than allocations per second which can increase/decrease with faster/slower code.
+
+## Writing benchmarks
+
+Examples for writing JMH tests are available from the projects [samples](https://github.com/openjdk/jmh/tree/master/jmh-samples) provided by the JMH project.
+
+Tutorials are available at 
+
+  - http://tutorials.jenkov.com/java-performance/jmh.html
+  - https://www.baeldung.com/java-microbenchmark-harness
+  - https://mkyong.com/java/java-jmh-benchmark-tutorial/
\ No newline at end of file

Added: pdfbox/trunk/benchmark/pom.xml
URL: http://svn.apache.org/viewvc/pdfbox/trunk/benchmark/pom.xml?rev=1898349&view=auto
==============================================================================
--- pdfbox/trunk/benchmark/pom.xml (added)
+++ pdfbox/trunk/benchmark/pom.xml Wed Feb 23 15:58:20 2022
@@ -0,0 +1,145 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <groupId>org.apache.pdfbox</groupId>
+        <artifactId>pdfbox-parent</artifactId>
+        <version>3.0.0-SNAPSHOT</version>
+        <relativePath>../parent/pom.xml</relativePath>
+    </parent>
+
+    <artifactId>pdfbox-benchmark</artifactId>
+
+    <packaging>jar</packaging>
+    <name>Apache PDFBox Benchmarks</name>
+    <description>
+        The Apache PDFBox library is an open source Java tool for working with PDF documents. 
+        This artefact contains commandline tools using Apache PDFBox.
+      </description>
+    <inceptionYear>2002</inceptionYear>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.openjdk.jmh</groupId>
+            <artifactId>jmh-core</artifactId>
+            <version>${jmh.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.openjdk.jmh</groupId>
+            <artifactId>jmh-generator-annprocess</artifactId>
+            <version>${jmh.version}</version>
+            <scope>provided</scope>
+        </dependency>
+		<dependency>
+			<groupId>${project.groupId}</groupId>
+			<artifactId>pdfbox</artifactId>
+			<version>${project.version}</version>
+		</dependency>
+    </dependencies>
+
+    <properties>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+        <jmh.version>1.33</jmh.version>
+        <uberjar.name>benchmarks</uberjar.name>
+    </properties>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-shade-plugin</artifactId>
+                <version>3.2.1</version>
+                <executions>
+                    <execution>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>shade</goal>
+                        </goals>
+                        <configuration>
+                            <finalName>${uberjar.name}</finalName>
+                            <transformers>
+                                <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+                                    <mainClass>org.openjdk.jmh.Main</mainClass>
+                                </transformer>
+                                <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
+                            </transformers>
+                            <filters>
+                                <filter>
+                                    <!--
+                                        Shading signed JARs will fail without this.
+                                        http://stackoverflow.com/questions/999489/invalid-signature-file-when-attempting-to-run-a-jar
+                                    -->
+                                    <artifact>*:*</artifact>
+                                    <excludes>
+                                        <exclude>META-INF/*.SF</exclude>
+                                        <exclude>META-INF/*.DSA</exclude>
+                                        <exclude>META-INF/*.RSA</exclude>
+                                    </excludes>
+                                </filter>
+                            </filters>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>com.googlecode.maven-download-plugin</groupId>
+                <artifactId>download-maven-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>benchmark-large-file</id>
+                        <phase>generate-test-resources</phase>
+                        <goals>
+                            <goal>wget</goal>
+                        </goals>
+                        <configuration>
+                            <url>https://crossasia-books.ub.uni-heidelberg.de/xasia/reader/download/506/506-42-86246-2-10-20190822.pdf</url>
+                            <outputDirectory>${project.build.directory}/pdfs</outputDirectory>
+                            <outputFileName>506-42-86246-2-10-20190822.pdf</outputFileName>
+                            <sha512>ed2d295d0dfc702174bafd04df79ae4aaf56289f2befc981f217d3b7990e59106f8b7358fe147a9aeaf179dc1f2432c2cc064b0243c91cf18418df59be15bb96</sha512>
+                        </configuration>
+                    </execution>
+                    <execution>
+                        <id>benchmark-medium-file</id>
+                        <phase>generate-test-resources</phase>
+                        <goals>
+                            <goal>wget</goal>
+                        </goals>
+                        <configuration>
+                            <url>https://crossasia-books.ub.uni-heidelberg.de/xasia/reader/download/849/849-42-94772-1-10-20210818.pdf</url>
+                            <outputDirectory>${project.build.directory}/pdfs</outputDirectory>
+                            <outputFileName>849-42-94772-1-10-20210818.pdf</outputFileName>
+                            <sha512>78ef8c0f2a3027d44fdfb8afc63ef7dc2cac8ae8f6d35fab4a8782d1c99354a0d944ae9b38026e8a7d82c03142000f78b3715064bd4c52245d7e2feeb241654f</sha512>
+                        </configuration>
+                    </execution>
+                    <execution>
+                        <id>pdf32000-2008</id>
+                        <phase>generate-test-resources</phase>
+                        <goals>
+                            <goal>wget</goal>
+                        </goals>
+                        <configuration>
+                            <url>https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf</url>
+                            <outputDirectory>${project.build.directory}/pdfs</outputDirectory>
+                            <outputFileName>PDF32000_2008.pdf</outputFileName>
+                            <sha512>690ce2177154a9526d378b0a6dec48cb2cf648fb7d3f2e43358e43e0b551a1af1b97c68e79b147c70b59c45687e7a98d5858159fca7bb93c3bb419070f7e4dae</sha512>
+                        </configuration>
+                    </execution>
+                    <execution>
+                        <id>altona-test-suite</id>
+                        <phase>generate-test-resources</phase>
+                        <goals>
+                            <goal>wget</goal>
+                        </goals>
+                        <configuration>
+                            <url>http://www.eci.org/lib/exe/fetch.php?media=downloads:altona_test_suite:eci_altona-test-suite-v2_technical2_x4.pdf</url>
+                            <outputDirectory>${project.build.directory}/pdfs</outputDirectory>
+                            <outputFileName>eci_altona-test-suite-v2_technical2_x4.pdf</outputFileName>
+                            <sha512>11303a7b9c20f0fb67258715219f8cbdf4d0e52b394a16d21ab0f8517e2cb453337a216d65af35e28fabc56eafc64ed40c1ff4a4d40aef48e66168b9a3d0fc49</sha512>
+                        </configuration>
+                    </execution>                    
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
\ No newline at end of file

Added: pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/LoadAndSave.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/LoadAndSave.java?rev=1898349&view=auto
==============================================================================
--- pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/LoadAndSave.java (added)
+++ pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/LoadAndSave.java Wed Feb 23 15:58:20 2022
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.benchmark;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.pdfbox.Loader;
+import org.apache.pdfbox.pdfwriter.compress.CompressParameters;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.infra.Blackhole;
+
+public class LoadAndSave {
+
+    static final String MEDIUM_SIZE_TEST_FILE = "target/pdfs/849-42-94772-1-10-20210818.pdf";
+    static final String LARGE_SIZE_TEST_FILE = "target/pdfs/506-42-86246-2-10-20190822.pdf";
+
+    @Benchmark
+    @OutputTimeUnit(TimeUnit.MILLISECONDS)
+    @BenchmarkMode(Mode.AverageTime)
+    public void loadMediumFile(Blackhole blackhole) throws IOException {
+        PDDocument pdf = Loader.loadPDF(new File(MEDIUM_SIZE_TEST_FILE));
+        blackhole.consume(pdf);
+        pdf.close();
+    }
+
+    @Benchmark
+    @OutputTimeUnit(TimeUnit.MILLISECONDS)
+    @BenchmarkMode(Mode.AverageTime)
+    public void saveMediumFile(Blackhole blackhole) throws IOException {
+        PDDocument pdf = Loader.loadPDF(new File(MEDIUM_SIZE_TEST_FILE));
+        pdf.save(new NullOutputStream());
+        pdf.close();
+    }
+
+    @Benchmark
+    @OutputTimeUnit(TimeUnit.MILLISECONDS)
+    @BenchmarkMode(Mode.AverageTime)
+    public void saveIncrementalMediumFile(Blackhole blackhole) throws IOException {
+        PDDocument pdf = Loader.loadPDF(new File(MEDIUM_SIZE_TEST_FILE));
+        pdf.saveIncremental(new NullOutputStream());
+        pdf.close();
+    }
+
+    @Benchmark
+    @OutputTimeUnit(TimeUnit.MILLISECONDS)
+    @BenchmarkMode(Mode.AverageTime)
+    public void saveNoCompressionMediumFile(Blackhole blackhole) throws IOException {
+        PDDocument pdf = Loader.loadPDF(new File(MEDIUM_SIZE_TEST_FILE));
+        pdf.save(new NullOutputStream(),CompressParameters.NO_COMPRESSION);
+        pdf.close();
+    }
+
+    @Benchmark
+    @OutputTimeUnit(TimeUnit.MILLISECONDS)
+    @BenchmarkMode(Mode.AverageTime)
+    public void loadLargeFile(Blackhole blackhole) throws IOException {
+        PDDocument pdf = Loader.loadPDF(new File(LARGE_SIZE_TEST_FILE));
+        blackhole.consume(pdf);
+        pdf.close();
+    }
+
+    @Benchmark
+    @OutputTimeUnit(TimeUnit.MILLISECONDS)
+    @BenchmarkMode(Mode.AverageTime)
+    public void saveLargeFile(Blackhole blackhole) throws IOException {
+        PDDocument pdf = Loader.loadPDF(new File(LARGE_SIZE_TEST_FILE));
+        pdf.save(new NullOutputStream());
+        pdf.close();
+    }
+
+
+    @Benchmark
+    @OutputTimeUnit(TimeUnit.MILLISECONDS)
+    @BenchmarkMode(Mode.AverageTime)
+    public void saveIncrementalLargeFile(Blackhole blackhole) throws IOException {
+        PDDocument pdf = Loader.loadPDF(new File(LARGE_SIZE_TEST_FILE));
+        pdf.saveIncremental(new NullOutputStream());
+        pdf.close();
+    }
+    
+    @Benchmark
+    @OutputTimeUnit(TimeUnit.MILLISECONDS)
+    @BenchmarkMode(Mode.AverageTime)
+    public void saveNoCompressionLargeFile(Blackhole blackhole) throws IOException {
+        PDDocument pdf = Loader.loadPDF(new File(LARGE_SIZE_TEST_FILE));
+        pdf.save(new NullOutputStream(),CompressParameters.NO_COMPRESSION);
+        pdf.close();
+    }
+}
\ No newline at end of file

Added: pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/NullOutputStream.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/NullOutputStream.java?rev=1898349&view=auto
==============================================================================
--- pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/NullOutputStream.java (added)
+++ pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/NullOutputStream.java Wed Feb 23 15:58:20 2022
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.benchmark;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+public class NullOutputStream extends OutputStream {
+
+    @Override
+    public void write(byte[] b) throws IOException {
+        // don't write anything
+    }
+
+    @Override
+    public void write(byte[] b, int off, int len) throws IOException {
+        // don't write anything
+    }
+
+    @Override
+    public void write(int b) throws IOException {
+        // don't write anything
+    }
+}

Added: pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/Rendering.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/Rendering.java?rev=1898349&view=auto
==============================================================================
--- pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/Rendering.java (added)
+++ pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/Rendering.java Wed Feb 23 15:58:20 2022
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.benchmark;
+
+import java.awt.image.BufferedImage;
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.concurrent.TimeUnit;
+
+import javax.imageio.ImageIO;
+
+import org.apache.pdfbox.Loader;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.rendering.PDFRenderer;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.infra.Blackhole;
+
+public class Rendering {
+
+    static final String ALTONA_TEST_SUITE = "target/pdfs/altona-test-suite";
+    static final String GHENT_CMYK_X4 = "target/pdfs/Ghent_PDF_Output_Suite_V50_Full/Categories/1-CMYK/Test pages/Ghent_PDF-Output-Test-V50_CMYK_X4.pdf";
+    static final String PDF32000_2008 = "target/pdfs/PDF32000_2008.pdf";
+    static final String RENDER_OUTPUT_DIR = "target/renditions";
+
+    static {
+        System.setProperty("org.apache.commons.logging.Log",
+                     "org.apache.commons.logging.impl.NoOpLog");
+        java.util.logging.Logger.getLogger("org.apache").setLevel(java.util.logging.Level.OFF);
+        Path path = Paths.get(RENDER_OUTPUT_DIR);
+        try {
+            Files.createDirectories(path);
+        } catch (IOException e) {
+            // this shouldn't fail and if it does as the
+            // test should be run manually don't care atm 
+        }
+    }
+    
+    @Benchmark
+    @OutputTimeUnit(TimeUnit.MILLISECONDS)
+    @BenchmarkMode(Mode.AverageTime)
+    public void renderGhentCMYKNoOutput(Blackhole blackhole) throws IOException {
+        PDDocument pdf = Loader.loadPDF(new File(GHENT_CMYK_X4));
+        PDFRenderer renderer = new PDFRenderer(pdf);
+        int numPages = pdf.getNumberOfPages();
+        for (int i = 0; i< numPages; i++)
+        {
+            blackhole.consume(renderer.renderImageWithDPI(i, 600));
+        }
+        pdf.close();
+    }
+
+    @Benchmark
+    @OutputTimeUnit(TimeUnit.MILLISECONDS)
+    @BenchmarkMode(Mode.AverageTime)
+    public void renderGhentCMYK(Blackhole blackhole) throws IOException {
+        PDDocument pdf = Loader.loadPDF(new File(GHENT_CMYK_X4));
+        PDFRenderer renderer = new PDFRenderer(pdf);
+        int numPages = pdf.getNumberOfPages();
+        for (int i = 0; i< numPages; i++)
+        {
+            BufferedImage bi = renderer.renderImageWithDPI(i, 600);
+            ImageIO.write(bi, "PNG", new File(RENDER_OUTPUT_DIR, "ghent-" + i + ".png"));
+        }
+        pdf.close();
+    }
+
+    @Benchmark
+    @OutputTimeUnit(TimeUnit.MILLISECONDS)
+    @BenchmarkMode(Mode.AverageTime)
+    public void renderAltonaNoOutput(Blackhole blackhole) throws IOException {
+        PDDocument pdf = Loader.loadPDF(new File(ALTONA_TEST_SUITE));
+        PDFRenderer renderer = new PDFRenderer(pdf);
+        int numPages = pdf.getNumberOfPages();
+        for (int i = 0; i< numPages; i++)
+        {
+            blackhole.consume(renderer.renderImageWithDPI(i, 600));
+        }
+        pdf.close();
+    }
+
+    @Benchmark
+    @OutputTimeUnit(TimeUnit.MILLISECONDS)
+    @BenchmarkMode(Mode.AverageTime)
+    public void renderAltona(Blackhole blackhole) throws IOException {
+        PDDocument pdf = Loader.loadPDF(new File(ALTONA_TEST_SUITE));
+        PDFRenderer renderer = new PDFRenderer(pdf);
+        int numPages = pdf.getNumberOfPages();
+        for (int i = 0; i< numPages; i++)
+        {
+            BufferedImage bi = renderer.renderImageWithDPI(i, 600);
+            ImageIO.write(bi, "PNG", new File(RENDER_OUTPUT_DIR, "altona-" + i + ".png"));
+        }
+        pdf.close();
+    }
+
+    @Benchmark
+    @OutputTimeUnit(TimeUnit.MILLISECONDS)
+    @BenchmarkMode(Mode.AverageTime)
+    public void renderPDFSpecNoOutput(Blackhole blackhole) throws IOException {
+        PDDocument pdf = Loader.loadPDF(new File(PDF32000_2008));
+        PDFRenderer renderer = new PDFRenderer(pdf);
+        int numPages = pdf.getNumberOfPages();
+        for (int i = 0; i< numPages; i++)
+        {
+            blackhole.consume(renderer.renderImageWithDPI(i, 150));
+        }
+        pdf.close();
+    }
+
+    @Benchmark
+    @OutputTimeUnit(TimeUnit.MILLISECONDS)
+    @BenchmarkMode(Mode.AverageTime)
+    public void renderPDFSpec(Blackhole blackhole) throws IOException {
+        PDDocument pdf = Loader.loadPDF(new File(PDF32000_2008));
+        PDFRenderer renderer = new PDFRenderer(pdf);
+        int numPages = pdf.getNumberOfPages();
+        for (int i = 0; i< numPages; i++)
+        {
+            BufferedImage bi = renderer.renderImageWithDPI(i, 150);
+            ImageIO.write(bi, "PNG", new BufferedOutputStream(new FileOutputStream(new File (RENDER_OUTPUT_DIR, "pdf32000_2008-" + i + ".png"))));
+        }
+        pdf.close();
+    }
+}
\ No newline at end of file

Added: pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/TextExtraction.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/TextExtraction.java?rev=1898349&view=auto
==============================================================================
--- pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/TextExtraction.java (added)
+++ pdfbox/trunk/benchmark/src/main/java/org/apache/pdfbox/benchmark/TextExtraction.java Wed Feb 23 15:58:20 2022
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.benchmark;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.pdfbox.Loader;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.text.PDFTextStripper;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.infra.Blackhole;
+
+public class TextExtraction {
+
+    static final String PDF32000_2008 = "target/pdfs/PDF32000_2008.pdf";
+
+    static {
+        System.setProperty("org.apache.commons.logging.Log",
+                     "org.apache.commons.logging.impl.NoOpLog");
+        java.util.logging.Logger.getLogger("org.apache").setLevel(java.util.logging.Level.OFF);
+    }
+    
+
+    @Benchmark
+    @OutputTimeUnit(TimeUnit.MILLISECONDS)
+    @BenchmarkMode(Mode.AverageTime)
+    public void extractPDFSpecUnsorted(Blackhole blackhole) throws IOException {
+        PDDocument pdf = Loader.loadPDF(new File(PDF32000_2008));
+        PDFTextStripper pdfStripper = new PDFTextStripper();
+        pdfStripper.setSortByPosition(false);
+        String parsedText = pdfStripper.getText(pdf);
+        blackhole.consume(parsedText);
+        pdf.close();
+    }
+
+    @Benchmark
+    @OutputTimeUnit(TimeUnit.MILLISECONDS)
+    @BenchmarkMode(Mode.AverageTime)
+    public void extractPDFSpecSorted(Blackhole blackhole) throws IOException {
+        PDDocument pdf = Loader.loadPDF(new File(PDF32000_2008));
+        PDFTextStripper pdfStripper = new PDFTextStripper();
+        pdfStripper.setSortByPosition(true);
+        String parsedText = pdfStripper.getText(pdf);
+        blackhole.consume(parsedText);
+        pdf.close();
+    }
+}
\ No newline at end of file