You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2021/11/14 09:44:33 UTC
svn commit: r1895014 - in /pdfbox/trunk/pdfbox/src: main/java/org/apache/pdfbox/text/ main/java/org/apache/pdfbox/util/ test/java/org/apache/pdfbox/util/
Author: tilman
Date: Sun Nov 14 09:44:32 2021
New Revision: 1895014
URL: http://svn.apache.org/viewvc?rev=1895014&view=rev
Log:
PDFBOX-5308: use MergeSort instead of QuickSort, as suggested by Alistair Oldfield
Added:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/IterativeMergeSort.java (with props)
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestSort.java
- copied, changed from r1894994, pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestQuickSort.java
Removed:
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestQuickSort.java
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java?rev=1895014&r1=1895013&r2=1895014&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java Sun Nov 14 09:44:32 2021
@@ -47,7 +47,7 @@ import org.apache.pdfbox.pdmodel.PDPageT
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem;
import org.apache.pdfbox.pdmodel.interactive.pagenavigation.PDThreadBead;
-import org.apache.pdfbox.util.QuickSort;
+import org.apache.pdfbox.util.IterativeMergeSort;
/**
* This class will take a pdf document and strip out all of the text and ignore the formatting and such. Please note; it
@@ -506,7 +506,7 @@ public class PDFTextStripper extends Leg
}
catch (IllegalArgumentException e)
{
- QuickSort.sort(textList, comparator);
+ IterativeMergeSort.sort(textList, comparator);
}
}
Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/IterativeMergeSort.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/IterativeMergeSort.java?rev=1895014&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/IterativeMergeSort.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/IterativeMergeSort.java Sun Nov 14 09:44:32 2021
@@ -0,0 +1,146 @@
+package org.apache.pdfbox.util;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.util.Comparator;
+import java.util.List;
+import java.util.ListIterator;
+
+/**
+ * This class provides an iterative (bottom-up) implementation of the MergeSort algorithm
+ * for any generic Java object which implements a {@link Comparator}.
+ *
+ * <p>
+ * This implementation uses an iterative implementation approach over the more
+ * classical recursive approach in order to save the auxiliary space required
+ * by the call stack in recursive implementations.
+ * </p>
+ *
+ * Complexity:
+ * <ul>
+ * <li>Worst case time: O(n log n)</li>
+ * <li>Best case time: O(n log n)</li>
+ * <li>Average case time: O(n log n)</li>
+ * <li>Space: O(n log n)</li>
+ * </ul>
+ *
+ * @author Alistair Oldfield
+ *
+ * @see https://en.wikipedia.org/wiki/Merge_sort
+ *
+ */
+public class IterativeMergeSort
+{
+
+ /**
+ * Sorts this list according to the order induced by the specified
+ * {@link Comparator}.
+ *
+ * @param <T> the class of the objects in the list
+ * @param list the list to be sorted.
+ * @param cmp the comparator to determine the order of the list.
+ *
+ * @see java.util.Collections.sort(List<T>, Comparator<? super T>)
+ *
+ */
+ @SuppressWarnings({ "unchecked", "rawtypes"})
+ public static <T> void sort(List<T> list, Comparator<? super T> cmp)
+ {
+
+ if (list.size() < 2)
+ {
+ return;
+ }
+ Object[] arr = list.toArray();
+ iterativeMergeSort(arr, (Comparator) cmp);
+
+ ListIterator<T> i = list.listIterator();
+ for (Object e : arr)
+ {
+ i.next();
+ i.set((T) e);
+ }
+ }
+
+ /**
+ * Sorts the array using iterative (bottom-up) merge sort.
+ *
+ * @param <T> the class of the objects in the list
+ * @param arr the array of objects to be sorted.
+ * @param cmp the comparator to determine the order of the list.
+ */
+ private static <T> void iterativeMergeSort(T[] arr, Comparator<? super T> cmp)
+ {
+
+ T[] aux = arr.clone();
+
+ for (int blockSize = 1; blockSize < arr.length; blockSize = (blockSize << 1))
+ {
+ for (int start = 0; start < arr.length; start += (blockSize << 1))
+ {
+ merge(arr, aux, start, start + blockSize, start + (blockSize << 1), cmp);
+ }
+ }
+ return;
+ }
+
+ /**
+ * Merges two sorted subarrays arr and aux into the order specified by cmp and places the
+ * ordered result back into into arr array.
+ *
+ * @param <T>
+ * @param arr Array containing source data to be sorted and target for destination data
+ * @param aux Array containing copy of source data to be sorted
+ * @param from Start index of left data run so that Left run is arr[from : mid-1].
+ * @param mid End index of left data run and start index of right run data so that Left run is
+ * arr[from : mid-1] and Right run is arr[mid : to]
+ * @param to End index of right run data so that Right run is arr[mid : to]
+ * @param cmp the comparator to determine the order of the list.
+ */
+ private static <T> void merge(T[] arr, T[] aux, int from, int mid, int to, Comparator<? super T> cmp)
+ {
+ if (mid >= arr.length)
+ {
+ return;
+ }
+ if (to > arr.length)
+ {
+ to = arr.length;
+ }
+ int i = from, j = mid;
+ for (int k = from; k < to; k++)
+ {
+ if (i == mid)
+ {
+ aux[k] = arr[j++];
+ }
+ else if (j == to)
+ {
+ aux[k] = arr[i++];
+ }
+ else if (cmp.compare(arr[j], arr[i]) < 0)
+ {
+ aux[k] = arr[j++];
+ }
+ else
+ {
+ aux[k] = arr[i++];
+ }
+ }
+ System.arraycopy(aux, from, arr, from, to - from);
+ }
+}
Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/IterativeMergeSort.java
------------------------------------------------------------------------------
svn:eol-style = native
Copied: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestSort.java (from r1894994, pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestQuickSort.java)
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestSort.java?p2=pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestSort.java&p1=pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestQuickSort.java&r1=1894994&r2=1895014&rev=1895014&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestQuickSort.java (original)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestSort.java Sun Nov 14 09:44:32 2021
@@ -28,14 +28,17 @@ import org.junit.jupiter.api.Test;
*
* @author Uwe Pachler
*/
-class TestQuickSort
+class TestSort
{
<T extends Comparable<T>> void doTest(T[] input, T[] expected)
{
- List<T> list = Arrays.asList(input);
+ List<T> list = Arrays.asList(input.clone());
QuickSort.sort(list);
+ assertTrue(Arrays.equals(list.toArray(new Object[input.length]), expected));
+ list = Arrays.asList(input.clone());
+ IterativeMergeSort.sort(list, Comparable::compareTo);
assertTrue(Arrays.equals(list.toArray(new Object[input.length]), expected));
}