You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2014/10/12 14:56:55 UTC
svn commit: r1631169 - in /pdfbox/trunk/pdfbox/src:
main/java/org/apache/pdfbox/util/PDFTextStripper.java
main/java/org/apache/pdfbox/util/QuickSort.java
test/java/org/apache/pdfbox/util/TestQuickSort.java
Author: lehmi
Date: Sun Oct 12 12:56:54 2014
New Revision: 1631169
URL: http://svn.apache.org/r1631169
Log:
PDFBOX-1512: don't use Collections.sort for JDKs >= 1.7 to avoid an IllegalArgumentException as proposed by Uwe Pachler
Added:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/QuickSort.java (with props)
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestQuickSort.java (with props)
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java?rev=1631169&r1=1631168&r2=1631169&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java Sun Oct 12 12:56:54 2014
@@ -64,6 +64,8 @@ public class PDFTextStripper extends PDF
private static float DEFAULT_INDENT_THRESHOLD = 2.0f;
private static float DEFAULT_DROP_THRESHOLD = 2.5f;
+ private static final boolean useCustomQuicksort;
+
// enable the ability to set the default indent/drop thresholds
// with -D system properties:
// pdftextstripper.indent
@@ -106,6 +108,15 @@ public class PDFTextStripper extends PDF
// ignore and use default
}
}
+
+ // check if we need to use the custom quicksort algorithm as a
+ // workaround to the transitivity issue of TextPositionComparator:
+ // https://issues.apache.org/jira/browse/PDFBOX-1512
+ String[] versionComponents = System.getProperty("java.version").split("\\.");
+ int javaMajorVersion = Integer.parseInt(versionComponents[0]);
+ int javaMinorVersion = Integer.parseInt(versionComponents[1]);
+ boolean is16orLess = javaMajorVersion == 1 && javaMinorVersion <= 6;
+ useCustomQuicksort = !is16orLess;
}
/**
@@ -474,7 +485,18 @@ public class PDFTextStripper extends PDF
if (getSortByPosition())
{
TextPositionComparator comparator = new TextPositionComparator();
- Collections.sort(textList, comparator);
+
+ // because the TextPositionComparator is not transitive, but
+ // JDK7+ enforces transitivity on comparators, we need to use
+ // a custom quicksort implementation (which is slower, unfortunately).
+ if(useCustomQuicksort)
+ {
+ QuickSort.sort( textList, comparator );
+ }
+ else
+ {
+ Collections.sort( textList, comparator );
+ }
}
Iterator<TextPosition> textIter = textList.iterator();
// Before we can display the text, we need to do some normalizing.
Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/QuickSort.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/QuickSort.java?rev=1631169&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/QuickSort.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/QuickSort.java Sun Oct 12 12:56:54 2014
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.util;
+
+import java.util.Comparator;
+import java.util.List;
+
+/**
+ * see http://de.wikipedia.org/wiki/Quicksort.
+ *
+ * @author UWe Pachler
+ */
+public class QuickSort
+{
+
+ private QuickSort()
+ {
+ }
+
+ private static final Comparator<? extends Comparable> objComp = new Comparator<Comparable>()
+ {
+ public int compare(Comparable object1, Comparable object2)
+ {
+ return object1.compareTo(object2);
+ }
+ };
+
+ /**
+ * Sorts the given list using the given comparator.
+ *
+ * @param list list to be sorted
+ * @param cmp comparator used to compare the object swithin the list
+ */
+ public static <T> void sort(List<T> list, Comparator<T> cmp)
+ {
+ int size = list.size();
+ if (size < 2)
+ {
+ return;
+ }
+ quicksort(list, cmp, 0, size - 1);
+ }
+
+ /**
+ * Sorts the given list using compareTo as comparator.
+ *
+ * @param list list to be sorted
+ */
+ public static <T extends Comparable> void sort(List<T> list)
+ {
+ sort(list, (Comparator<T>) objComp);
+ }
+
+ private static <T> void quicksort(List<T> list, Comparator<T> cmp, int left, int right)
+ {
+ if (left < right)
+ {
+ int splitter = split(list, cmp, left, right);
+ quicksort(list, cmp, left, splitter - 1);
+ quicksort(list, cmp, splitter + 1, right);
+ }
+ }
+
+ private static <T> void swap(List<T> list, int i, int j)
+ {
+ T tmp = list.get(i);
+ list.set(i, list.get(j));
+ list.set(j, tmp);
+ }
+
+ private static <T> int split(List<T> list, Comparator<T> cmp, int left, int right)
+ {
+ int i = left;
+ int j = right - 1;
+ T pivot = list.get(right);
+ do
+ {
+ while (cmp.compare(list.get(i), pivot) <= 0 && i < right)
+ {
+ ++i;
+ }
+ while (cmp.compare(pivot, list.get(j)) <= 0 && j > left)
+ {
+ --j;
+ }
+ if (i < j)
+ {
+ swap(list, i, j);
+ }
+
+ } while (i < j);
+
+ if (cmp.compare(pivot, list.get(i)) < 0)
+ {
+ swap(list, i, right);
+ }
+ return i;
+ }
+}
Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/QuickSort.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestQuickSort.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestQuickSort.java?rev=1631169&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestQuickSort.java (added)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestQuickSort.java Sun Oct 12 12:56:54 2014
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.util;
+
+import java.util.Arrays;
+import java.util.List;
+import junit.framework.TestCase;
+
+/**
+ *
+ * @author Uwe Pachler
+ */
+public class TestQuickSort extends TestCase
+{
+
+ <T extends Comparable<T>> void doTest(T[] input, T[] expected)
+ {
+ List<T> list = Arrays.asList(input);
+ QuickSort.sort(list);
+
+ boolean equal = Arrays.equals(list.toArray(new Object[input.length]), input);
+
+ assertTrue(equal);
+ }
+
+ /**
+ * Test for different cases.
+ */
+ public void testSort()
+ {
+
+ {
+ Integer[] input = new Integer[] { 9, 8, 7, 6, 5, 4, 3, 2, 1 };
+ Integer[] expected = new Integer[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+ doTest(input, expected);
+ }
+
+ {
+ Integer[] input = new Integer[] { 4, 3, 2, 1, 9, 8, 7, 6, 5 };
+ Integer[] expected = new Integer[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+ doTest(input, expected);
+ }
+
+ {
+ Integer[] input = new Integer[] {};
+ Integer[] expected = new Integer[] {};
+ doTest(input, expected);
+ }
+
+ {
+ Integer[] input = new Integer[] { 5 };
+ Integer[] expected = new Integer[] { 5 };
+ doTest(input, expected);
+ }
+
+ {
+ Integer[] input = new Integer[] { 5, 6 };
+ Integer[] expected = new Integer[] { 5, 6 };
+ doTest(input, expected);
+ }
+
+ {
+ Integer[] input = new Integer[] { 6, 5 };
+ Integer[] expected = new Integer[] { 5, 6 };
+ doTest(input, expected);
+ }
+
+ }
+}
Propchange: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestQuickSort.java
------------------------------------------------------------------------------
svn:eol-style = native