You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by tp...@apache.org on 2015/03/02 21:12:17 UTC

svn commit: r1663415 - /tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java

Author: tpalsulich
Date: Mon Mar  2 20:12:17 2015
New Revision: 1663415

URL: http://svn.apache.org/r1663415
Log:
TIKA-758. Remove PDFBOX workarounds in PDF2XHTML.

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java?rev=1663415&r1=1663414&r2=1663415&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java Mon Mar  2 20:12:17 2015
@@ -94,10 +94,6 @@ class PDF2XHTML extends PDFTextStripper
      */
     private final static int MAX_ACROFORM_RECURSIONS = 10;
 
-
-    // TODO: remove once PDFBOX-2160 is fixed:
-    private boolean inParagraph = false;
-
     /**
      * This keeps track of the pdf object ids for inline
      * images that have been processed.  If {@link PDFParserConfig#getExtractUniqueInlineImagesOnly()
@@ -399,13 +395,6 @@ class PDF2XHTML extends PDFTextStripper
 
     @Override
     protected void writeParagraphStart() throws IOException {
-        // TODO: remove once PDFBOX-2160 is fixed
-        if (inParagraph) {
-            // Close last paragraph
-            writeParagraphEnd();
-        }
-        assert !inParagraph;
-        inParagraph = true;
         try {
             handler.startElement("p");
         } catch (SAXException e) {
@@ -415,12 +404,6 @@ class PDF2XHTML extends PDFTextStripper
 
     @Override
     protected void writeParagraphEnd() throws IOException {
-        // TODO: remove once PDFBOX-2160 is fixed
-        if (!inParagraph) {
-            writeParagraphStart();
-        }
-        assert inParagraph;
-        inParagraph = false;
         try {
             handler.endElement("p");
         } catch (SAXException e) {
@@ -661,8 +644,6 @@ class PDF2XHTML extends PDFTextStripper
             }
         } catch (IOException e) {
             //swallow
-        } catch (NullPointerException e) {
-            //TODO: remove once PDFBOX-2161 is fixed
         }
 
         if (attrs.getLength() > 0 || sb.length() > 0) {