You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2014/10/12 14:56:55 UTC

svn commit: r1631169 - in /pdfbox/trunk/pdfbox/src: main/java/org/apache/pdfbox/util/PDFTextStripper.java main/java/org/apache/pdfbox/util/QuickSort.java test/java/org/apache/pdfbox/util/TestQuickSort.java

Author: lehmi
Date: Sun Oct 12 12:56:54 2014
New Revision: 1631169

URL: http://svn.apache.org/r1631169
Log:
PDFBOX-1512: don't use Collections.sort for JDKs >= 1.7 to avoid an IllegalArgumentException as proposed by Uwe Pachler

Added:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/QuickSort.java   (with props)
    pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestQuickSort.java   (with props)
Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java?rev=1631169&r1=1631168&r2=1631169&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java Sun Oct 12 12:56:54 2014
@@ -64,6 +64,8 @@ public class PDFTextStripper extends PDF
     private static float DEFAULT_INDENT_THRESHOLD = 2.0f;
     private static float DEFAULT_DROP_THRESHOLD = 2.5f;
 
+    private static final boolean useCustomQuicksort;
+    
     // enable the ability to set the default indent/drop thresholds
     // with -D system properties:
     //    pdftextstripper.indent
@@ -106,6 +108,15 @@ public class PDFTextStripper extends PDF
                 // ignore and use default
             }
         }
+        
+        // check if we need to use the custom quicksort algorithm as a 
+        // workaround to the transitivity issue of TextPositionComparator:
+        // https://issues.apache.org/jira/browse/PDFBOX-1512
+        String[] versionComponents = System.getProperty("java.version").split("\\.");
+        int javaMajorVersion = Integer.parseInt(versionComponents[0]);
+        int javaMinorVersion = Integer.parseInt(versionComponents[1]);
+        boolean is16orLess = javaMajorVersion == 1 && javaMinorVersion <= 6;
+        useCustomQuicksort = !is16orLess;
     }
 
     /**
@@ -474,7 +485,18 @@ public class PDFTextStripper extends PDF
             if (getSortByPosition())
             {
                 TextPositionComparator comparator = new TextPositionComparator();
-                Collections.sort(textList, comparator);
+                				
+                // because the TextPositionComparator is not transitive, but 
+                // JDK7+ enforces transitivity on comparators, we need to use
+                // a custom quicksort implementation (which is slower, unfortunately).
+                if(useCustomQuicksort) 
+                {
+                	QuickSort.sort( textList, comparator );
+                } 
+                else 
+                {
+                	Collections.sort( textList, comparator );
+                }
             }
             Iterator<TextPosition> textIter = textList.iterator();
             // Before we can display the text, we need to do some normalizing.

Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/QuickSort.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/QuickSort.java?rev=1631169&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/QuickSort.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/QuickSort.java Sun Oct 12 12:56:54 2014
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.util;
+
+import java.util.Comparator;
+import java.util.List;
+
+/**
+ * see http://de.wikipedia.org/wiki/Quicksort.
+ * 
+ * @author UWe Pachler
+ */
+public class QuickSort
+{
+    
+    private QuickSort()
+    {
+    }
+    
+    private static final Comparator<? extends Comparable> objComp = new Comparator<Comparable>()
+    {
+        public int compare(Comparable object1, Comparable object2)
+        {
+            return object1.compareTo(object2);
+        }
+    };
+
+    /**
+     * Sorts the given list using the given comparator.
+     * 
+     * @param list list to be sorted
+     * @param cmp comparator used to compare the object swithin the list
+     */
+    public static <T> void sort(List<T> list, Comparator<T> cmp)
+    {
+        int size = list.size();
+        if (size < 2)
+        {
+            return;
+        }
+        quicksort(list, cmp, 0, size - 1);
+    }
+
+    /**
+     * Sorts the given list using compareTo as comparator.
+     * 
+     * @param list list to be sorted
+     */
+    public static <T extends Comparable> void sort(List<T> list)
+    {
+        sort(list, (Comparator<T>) objComp);
+    }
+
+    private static <T> void quicksort(List<T> list, Comparator<T> cmp, int left, int right)
+    {
+        if (left < right)
+        {
+            int splitter = split(list, cmp, left, right);
+            quicksort(list, cmp, left, splitter - 1);
+            quicksort(list, cmp, splitter + 1, right);
+        }
+    }
+
+    private static <T> void swap(List<T> list, int i, int j)
+    {
+        T tmp = list.get(i);
+        list.set(i, list.get(j));
+        list.set(j, tmp);
+    }
+
+    private static <T> int split(List<T> list, Comparator<T> cmp, int left, int right)
+    {
+        int i = left;
+        int j = right - 1;
+        T pivot = list.get(right);
+        do
+        {
+            while (cmp.compare(list.get(i), pivot) <= 0 && i < right)
+            {
+                ++i;
+            }
+            while (cmp.compare(pivot, list.get(j)) <= 0 && j > left)
+            {
+                --j;
+            }
+            if (i < j)
+            {
+                swap(list, i, j);
+            }
+
+        } while (i < j);
+
+        if (cmp.compare(pivot, list.get(i)) < 0)
+        {
+            swap(list, i, right);
+        }
+        return i;
+    }
+}

Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/QuickSort.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestQuickSort.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestQuickSort.java?rev=1631169&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestQuickSort.java (added)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestQuickSort.java Sun Oct 12 12:56:54 2014
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.util;
+
+import java.util.Arrays;
+import java.util.List;
+import junit.framework.TestCase;
+
+/**
+ *
+ * @author Uwe Pachler
+ */
+public class TestQuickSort extends TestCase
+{
+
+    <T extends Comparable<T>> void doTest(T[] input, T[] expected)
+    {
+        List<T> list = Arrays.asList(input);
+        QuickSort.sort(list);
+
+        boolean equal = Arrays.equals(list.toArray(new Object[input.length]), input);
+
+        assertTrue(equal);
+    }
+
+    /**
+     * Test for different cases.
+     */
+    public void testSort()
+    {
+
+        {
+            Integer[] input = new Integer[] { 9, 8, 7, 6, 5, 4, 3, 2, 1 };
+            Integer[] expected = new Integer[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+            doTest(input, expected);
+        }
+
+        {
+            Integer[] input = new Integer[] { 4, 3, 2, 1, 9, 8, 7, 6, 5 };
+            Integer[] expected = new Integer[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+            doTest(input, expected);
+        }
+
+        {
+            Integer[] input = new Integer[] {};
+            Integer[] expected = new Integer[] {};
+            doTest(input, expected);
+        }
+
+        {
+            Integer[] input = new Integer[] { 5 };
+            Integer[] expected = new Integer[] { 5 };
+            doTest(input, expected);
+        }
+
+        {
+            Integer[] input = new Integer[] { 5, 6 };
+            Integer[] expected = new Integer[] { 5, 6 };
+            doTest(input, expected);
+        }
+
+        {
+            Integer[] input = new Integer[] { 6, 5 };
+            Integer[] expected = new Integer[] { 5, 6 };
+            doTest(input, expected);
+        }
+
+    }
+}

Propchange: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestQuickSort.java
------------------------------------------------------------------------------
    svn:eol-style = native