You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2021/11/14 09:44:33 UTC

svn commit: r1895014 - in /pdfbox/trunk/pdfbox/src: main/java/org/apache/pdfbox/text/ main/java/org/apache/pdfbox/util/ test/java/org/apache/pdfbox/util/

Author: tilman
Date: Sun Nov 14 09:44:32 2021
New Revision: 1895014

URL: http://svn.apache.org/viewvc?rev=1895014&view=rev
Log:
PDFBOX-5308: use MergeSort instead of QuickSort, as suggested by Alistair Oldfield

Added:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/IterativeMergeSort.java   (with props)
    pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestSort.java
      - copied, changed from r1894994, pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestQuickSort.java
Removed:
    pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestQuickSort.java
Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java?rev=1895014&r1=1895013&r2=1895014&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java Sun Nov 14 09:44:32 2021
@@ -47,7 +47,7 @@ import org.apache.pdfbox.pdmodel.PDPageT
 import org.apache.pdfbox.pdmodel.common.PDRectangle;
 import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem;
 import org.apache.pdfbox.pdmodel.interactive.pagenavigation.PDThreadBead;
-import org.apache.pdfbox.util.QuickSort;
+import org.apache.pdfbox.util.IterativeMergeSort;
 
 /**
  * This class will take a pdf document and strip out all of the text and ignore the formatting and such. Please note; it
@@ -506,7 +506,7 @@ public class PDFTextStripper extends Leg
                 }
                 catch (IllegalArgumentException e)
                 {
-                    QuickSort.sort(textList, comparator);
+                    IterativeMergeSort.sort(textList, comparator);
                 }
             }
 

Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/IterativeMergeSort.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/IterativeMergeSort.java?rev=1895014&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/IterativeMergeSort.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/IterativeMergeSort.java Sun Nov 14 09:44:32 2021
@@ -0,0 +1,146 @@
+package org.apache.pdfbox.util;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.util.Comparator;
+import java.util.List;
+import java.util.ListIterator;
+
+/**
+ * This class provides an iterative (bottom-up) implementation of the MergeSort algorithm 
+ * for any generic Java object which implements a {@link Comparator}.
+ * 
+ * <p>
+ * This implementation uses an iterative implementation approach over the more 
+ * classical recursive approach in order to save the auxiliary space required 
+ * by the call stack in recursive implementations.
+ * </p>
+ * 
+ * Complexity:
+ * <ul>
+ *     <li>Worst case time: O(n log n)</li>
+ *     <li>Best case time: O(n log n)</li>
+ *     <li>Average case time: O(n log n)</li>
+ *     <li>Space: O(n log n)</li>
+ * </ul>
+ * 
+ * @author Alistair Oldfield
+ * 
+ * @see https://en.wikipedia.org/wiki/Merge_sort
+ *
+ */
+public class IterativeMergeSort
+{
+
+    /**
+     * Sorts this list according to the order induced by the specified
+     * {@link Comparator}.
+     * 
+     * @param  <T> the class of the objects in the list
+     * @param  list the list to be sorted.
+     * @param  cmp the comparator to determine the order of the list.
+     * 
+     * @see java.util.Collections.sort(List<T>, Comparator<? super T>)
+     * 
+     */
+    @SuppressWarnings({ "unchecked", "rawtypes"})
+    public static <T> void sort(List<T> list, Comparator<? super T> cmp)
+    {
+
+        if (list.size() < 2)
+        {
+            return;
+        }
+        Object[] arr = list.toArray();
+        iterativeMergeSort(arr, (Comparator) cmp);
+
+        ListIterator<T> i = list.listIterator();
+        for (Object e : arr)
+        {
+            i.next();
+            i.set((T) e);
+        }
+    }
+
+    /**
+     * Sorts the array using iterative (bottom-up) merge sort.
+     *
+     * @param <T> the class of the objects in the list
+     * @param arr the array of objects to be sorted.
+     * @param cmp the comparator to determine the order of the list.
+     */
+    private static <T> void iterativeMergeSort(T[] arr, Comparator<? super T> cmp)
+    {
+
+        T[] aux = arr.clone();
+
+        for (int blockSize = 1; blockSize < arr.length; blockSize = (blockSize << 1))
+        {
+            for (int start = 0; start < arr.length; start += (blockSize << 1))
+            {
+                merge(arr, aux, start, start + blockSize, start + (blockSize << 1), cmp);
+            }
+        }
+        return;
+    }
+
+    /**
+     * Merges two sorted subarrays arr and aux into the order specified by cmp and places the
+     * ordered result back into into arr array.
+     *
+     * @param <T>
+     * @param arr Array containing source data to be sorted and target for destination data
+     * @param aux Array containing copy of source data to be sorted
+     * @param from Start index of left data run so that Left run is arr[from : mid-1].
+     * @param mid End index of left data run and start index of right run data so that Left run is
+     * arr[from : mid-1] and Right run is arr[mid : to]
+     * @param to End index of right run data so that Right run is arr[mid : to]
+     * @param cmp the comparator to determine the order of the list.
+     */
+    private static <T> void merge(T[] arr, T[] aux, int from, int mid, int to, Comparator<? super T> cmp)
+    {
+        if (mid >= arr.length)
+        {
+            return;
+        }
+        if (to > arr.length)
+        {
+            to = arr.length;
+        }
+        int i = from, j = mid;
+        for (int k = from; k < to; k++)
+        {
+            if (i == mid)
+            {
+                aux[k] = arr[j++];
+            }
+            else if (j == to)
+            {
+                aux[k] = arr[i++];
+            }
+            else if (cmp.compare(arr[j], arr[i]) < 0)
+            {
+                aux[k] = arr[j++];
+            }
+            else
+            {
+                aux[k] = arr[i++];
+            }
+        }
+        System.arraycopy(aux, from, arr, from, to - from);
+    }
+}

Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/IterativeMergeSort.java
------------------------------------------------------------------------------
    svn:eol-style = native

Copied: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestSort.java (from r1894994, pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestQuickSort.java)
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestSort.java?p2=pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestSort.java&p1=pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestQuickSort.java&r1=1894994&r2=1895014&rev=1895014&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestQuickSort.java (original)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestSort.java Sun Nov 14 09:44:32 2021
@@ -28,14 +28,17 @@ import org.junit.jupiter.api.Test;
  *
  * @author Uwe Pachler
  */
-class TestQuickSort
+class TestSort
 {
 
     <T extends Comparable<T>> void doTest(T[] input, T[] expected)
     {
-        List<T> list = Arrays.asList(input);
+        List<T> list = Arrays.asList(input.clone());
         QuickSort.sort(list);
+        assertTrue(Arrays.equals(list.toArray(new Object[input.length]), expected));
 
+        list = Arrays.asList(input.clone());
+        IterativeMergeSort.sort(list, Comparable::compareTo);
         assertTrue(Arrays.equals(list.toArray(new Object[input.length]), expected));
     }