You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2014/10/12 15:07:23 UTC

svn commit: r1631170 - in /pdfbox/branches/1.8/pdfbox: ./ src/main/java/org/apache/pdfbox/util/PDFTextStripper.java src/main/java/org/apache/pdfbox/util/QuickSort.java src/test/java/org/apache/pdfbox/util/TestQuickSort.java

Author: lehmi
Date: Sun Oct 12 13:07:22 2014
New Revision: 1631170

URL: http://svn.apache.org/r1631170
Log:
PDFBOX-1512: don't use Collections.sort for JDKs >= 1.7 to avoid an IllegalArgumentException as proposed by Uwe Pachler

Added:
    pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/util/QuickSort.java
      - copied unchanged from r1631169, pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/QuickSort.java
    pdfbox/branches/1.8/pdfbox/src/test/java/org/apache/pdfbox/util/TestQuickSort.java
      - copied unchanged from r1631169, pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestQuickSort.java
Modified:
    pdfbox/branches/1.8/pdfbox/   (props changed)
    pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java

Propchange: pdfbox/branches/1.8/pdfbox/
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Sun Oct 12 13:07:22 2014
@@ -0,0 +1 @@
+/pdfbox/trunk/pdfbox:1460370,1460372,1461796,1465163,1465217,1465266,1465359-1465360,1465366,1467638,1468195,1469999,1470107,1470242,1476795,1477806,1478472,1479136,1479287,1480000,1480030,1484089,1484277,1484547,1485771,1485781,1486337,1486413,1486423,1486440,1487557,1488049,1490022-1490023,1490408,1493503,1494083,1495799,1495802-1495803,1497532,1504210,1504214,1505737,1509187,1512367,1512433,1512661,1515101,1515165,1515905,1517273,1517281,1517288,1519174,1521194,1528826,1528833,1530018,1530740,1535953,1535956,1536136,1536173,1536441,1536463,1537287,1538191,1538203,1538222,1538341,1538371,1538394-1538395,1540801,1540811,1541625,1541714,1541987,1542291,1542780,1544789,1544806,1544818,1544972,1544977,1548387,1548394,1549022,1549025,1549027,1551220,1552521,1552533,1552552,1553017,1553175,1553220,1554632,1554774,1554792,1554845,1555186,1555345,1555550,1557339,1557374,1557546,1557553,1557561,1557793,1558205,1558570,1561095,1561191,1563199,1563210,1563215,1563426,1563429,1564846,1588736,
 1598655,1598885,1599016,1599656,1599786,1601144,1601451,1631169

Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java?rev=1631170&r1=1631169&r2=1631170&view=diff
==============================================================================
--- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java (original)
+++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java Sun Oct 12 13:07:22 2014
@@ -68,6 +68,8 @@ public class PDFTextStripper extends PDF
     private static float DEFAULT_INDENT_THRESHOLD = 2.0f;
     private static float DEFAULT_DROP_THRESHOLD = 2.5f;
 
+    private static final boolean useCustomQuicksort;
+    
     //enable the ability to set the default indent/drop thresholds
     //with -D system properties:
     //    pdftextstripper.indent
@@ -111,6 +113,15 @@ public class PDFTextStripper extends PDF
                 //ignore and use default
             }
         }
+        
+        // check if we need to use the custom quicksort algorithm as a 
+        // workaround to the transitivity issue of TextPositionComparator:
+        // https://issues.apache.org/jira/browse/PDFBOX-1512
+        String[] versionComponents = System.getProperty("java.version").split("\\.");
+        int javaMajorVersion = Integer.parseInt(versionComponents[0]);
+        int javaMinorVersion = Integer.parseInt(versionComponents[1]);
+        boolean is16orLess = javaMajorVersion == 1 && javaMinorVersion <= 6;
+        useCustomQuicksort = !is16orLess;
     }
 
     /**
@@ -566,7 +577,17 @@ public class PDFTextStripper extends PDF
             if( getSortByPosition() )
             {
                 TextPositionComparator comparator = new TextPositionComparator();
-                Collections.sort( textList, comparator );
+                // because the TextPositionComparator is not transitive, but 
+                // JDK7+ enforces transitivity on comparators, we need to use
+                // a custom quicksort implementation (which is slower, unfortunately).
+                if(useCustomQuicksort) 
+                {
+                	QuickSort.sort( textList, comparator );
+                } 
+                else 
+                {
+                	Collections.sort( textList, comparator );
+                }
             }
             Iterator<TextPosition> textIter = textList.iterator();
             /* Before we can display the text, we need to do some normalizing.