You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ja...@apache.org on 2014/06/17 07:10:23 UTC
svn commit: r1603056 - in /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox: pdmodel/graphics/state/PDTextState.java util/PDFStreamEngine.java util/PDFTextStripper.java util/operator/SetHorizontalTextScaling.java util/operator/ShowTextGlyph.java

Author: jahewson
Date: Tue Jun 17 05:10:23 2014
New Revision: 1603056

URL: http://svn.apache.org/r1603056
Log:
PDFBOX-2145: Clean up PDFStreamEngine and PDFTextStripper

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/PDTextState.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/SetHorizontalTextScaling.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/ShowTextGlyph.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/PDTextState.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/PDTextState.java?rev=1603056&r1=1603055&r2=1603056&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/PDTextState.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/PDTextState.java Tue Jun 17 05:10:23 2014
@@ -119,7 +119,7 @@ public class PDTextState implements Clon
      *
      * @return The horizontalScaling.
      */
-    public float getHorizontalScalingPercent()
+    public float getHorizontalScaling()
     {
         return horizontalScaling;
     }
@@ -129,7 +129,7 @@ public class PDTextState implements Clon
      *
      * @param value The horizontalScaling.
      */
-    public void setHorizontalScalingPercent(float value)
+    public void setHorizontalScaling(float value)
     {
         horizontalScaling = value;
     }

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java?rev=1603056&r1=1603055&r2=1603056&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java Tue Jun 17 05:10:23 2014
@@ -42,7 +42,6 @@ import org.apache.pdfbox.pdmodel.font.PD
 import org.apache.pdfbox.pdmodel.font.PDType3Font;
 import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState;
 import org.apache.pdfbox.pdmodel.graphics.state.PDGraphicsState;
-import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace;
 import org.apache.pdfbox.pdmodel.graphics.PDXObject;
 import org.apache.pdfbox.text.TextPosition;
 import org.apache.pdfbox.util.operator.OperatorProcessor;
@@ -51,6 +50,7 @@ import org.apache.pdfbox.util.operator.P
 /**
  * Processes a PDF content stream and executes certain operations.
  * Provides a callback interface for clients that want to do things with the stream.
+ *
  * {@see org.apache.pdfbox.util.PDFTextStripper}
  * @author Ben Litchfield
  */
@@ -69,9 +69,6 @@ public class PDFStreamEngine
 
     private Stack<PDResources> streamResourcesStack = new Stack<PDResources>();
 
-    private int validCharCnt;
-    private int totalCharCnt;
-
     private int pageRotation;
     private PDRectangle drawingRectangle;
 
@@ -86,8 +83,9 @@ public class PDFStreamEngine
     }
 
     /**
-     * Constructor with engine properties. The property keys are all PDF operators, the values are class names used to
-     * execute those operators. An empty value means that the operator will be silently ignored.
+     * Constructor with engine properties. The property keys are all PDF operators, the values are
+     * class names used to execute those operators. An empty value means that the operator will be
+     * silently ignored.
      * 
      * @param properties The engine properties.
      */
@@ -166,15 +164,13 @@ public class PDFStreamEngine
     }
 
     /**
-     * This method must be called between processing documents. The PDFStreamEngine caches information for the document
-     * between pages and this will release the cached information. This only needs to be called if processing a new
-     * document.
-     *
+     * This method must be called between processing documents. The PDFStreamEngine caches
+     * information for the document between pages and this will release the cached information.
+     * This only needs to be called if processing a new document.
      */
     public void resetEngine()
     {
-        validCharCnt = 0;
-        totalCharCnt = 0;
+        // overridden in subclasses
     }
 
     /**
@@ -201,11 +197,10 @@ public class PDFStreamEngine
      * @param cosStream the Stream to execute.
      * @param drawingSize the size of the page
      * @param rotation the page rotation
-     * 
      * @throws IOException if there is an error accessing the stream.
      */
-    public void processStream(PDResources resources, COSStream cosStream, PDRectangle drawingSize, int rotation)
-            throws IOException
+    public void processStream(PDResources resources, COSStream cosStream, PDRectangle drawingSize,
+                              int rotation) throws IOException
     {
         initStream(drawingSize, rotation);
         processSubStream(resources, cosStream);
@@ -216,7 +211,6 @@ public class PDFStreamEngine
      * 
      * @param resources The resources used when processing the stream.
      * @param cosStream The stream to process.
-     * 
      * @throws IOException If there is an exception while processing the stream.
      */
     public void processSubStream(PDResources resources, COSStream cosStream) throws IOException
@@ -275,8 +269,8 @@ public class PDFStreamEngine
     }
 
     /**
-     * A method provided as an event interface to allow a subclass to perform some specific functionality when text
-     * needs to be processed.
+     * A method provided as an event interface to allow a subclass to perform some specific
+     * functionality when text needs to be processed.
      * 
      * @param text The text to be processed.
      */
@@ -286,37 +280,22 @@ public class PDFStreamEngine
     }
 
     /**
-     * A method provided as an event interface to allow a subclass to perform some specific functionality on the string
-     * encoded by a glyph.
-     * 
-     * @param str The string to be processed.
-     * 
-     * @return the altered string
-     */
-    protected String inspectFontEncoding(String str)
-    {
-        return str;
-    }
-
-    /**
-     * Process encoded text from the PDF Stream. You should override this method if you want to perform an action when
-     * encoded text is being processed.
+     * Process encoded text from the PDF Stream. You should override this method if you want to
+     * perform an action when encoded text is being processed.
      * 
      * @param string The encoded text
-     * 
      * @throws IOException If there is an error processing the string
      */
     public void processEncodedText(byte[] string) throws IOException
     {
-        /*
-         * Note on variable names. There are three different units being used in this code. Character sizes are given in
-         * glyph units, text locations are initially given in text units, and we want to save the data in display units.
-         * The variable names should end with Text or Disp to represent if the values are in text or disp units (no
-         * glyph units are saved).
-         */
+        // Note on variable names. There are three different units being used in this code.
+        // Character sizes are given in glyph units, text locations are initially given in text
+        // units, and we want to save the data in display units. The variable names should end with
+        // Text or Disp to represent if the values are in text or disp units (no glyph units are
+        // saved).
+
         final float fontSizeText = graphicsState.getTextState().getFontSize();
-        final float horizontalScalingText = graphicsState.getTextState().getHorizontalScalingPercent() / 100f;
-        // float verticalScalingText = horizontalScaling;//not sure if this is right but what else to do???
+        final float horizontalScalingText = graphicsState.getTextState().getHorizontalScaling() / 100f;
         final float riseText = graphicsState.getTextState().getRise();
         final float wordSpacingText = graphicsState.getTextState().getWordSpacing();
         final float characterSpacingText = graphicsState.getTextState().getCharacterSpacing();
@@ -327,7 +306,7 @@ public class PDFStreamEngine
         // were a single byte will result in two output characters "fi"
 
         final PDFont font = graphicsState.getTextState().getFont();
-        // all fonts are providing the width/height of a character in thousandths of a unit of text space
+        // all fonts have the width/height of a character in thousandths of a unit of text space
         float fontMatrixXScaling = 1 / 1000f;
         float fontMatrixYScaling = 1 / 1000f;
         float glyphSpaceToTextSpaceFactor = 1 / 1000f;
@@ -344,9 +323,8 @@ public class PDFStreamEngine
         float spaceWidthText = 0;
         try
         {
-            // to avoid crash as described in PDFBOX-614
-            // lets see what the space displacement should be
-            spaceWidthText = (font.getSpaceWidth() * glyphSpaceToTextSpaceFactor);
+            // to avoid crash as described in PDFBOX-614, see what the space displacement should be
+            spaceWidthText = font.getSpaceWidth() * glyphSpaceToTextSpaceFactor;
         }
         catch (Throwable exception)
         {
@@ -355,9 +333,8 @@ public class PDFStreamEngine
 
         if (spaceWidthText == 0)
         {
-            spaceWidthText = (font.getAverageFontWidth() * glyphSpaceToTextSpaceFactor);
-            // The average space width appears to be higher than necessary
-            // so lets make it a little bit smaller.
+            spaceWidthText = font.getAverageFontWidth() * glyphSpaceToTextSpaceFactor;
+            // the average space width appears to be higher than necessary so make it smaller
             spaceWidthText *= .80f;
         }
         if (spaceWidthText == 0)
@@ -380,13 +357,13 @@ public class PDFStreamEngine
         Matrix td = new Matrix();
         Matrix tempMatrix = new Matrix();
 
-        int codeLength = 1;
+        int codeLength;
         for (int i = 0; i < string.length; i += codeLength)
         {
             // Decode the value to a Unicode character
             codeLength = 1;
             String c = font.encode(string, i, codeLength);
-            int[] codePoints = null;
+            int[] codePoints;
             if (c == null && i + 1 < string.length)
             {
                 // maybe a multibyte encoding
@@ -400,19 +377,20 @@ public class PDFStreamEngine
             }
 
             // the space width has to be transformed into display units
-            float spaceWidthDisp = spaceWidthText * fontSizeText * horizontalScalingText * textMatrix.getXScale()
-                    * ctm.getXScale();
+            float spaceWidthDisp = spaceWidthText * fontSizeText * horizontalScalingText *
+                    textMatrix.getXScale()  * ctm.getXScale();
 
-            // todo, handle horizontal displacement
+            // TODO: handle horizontal displacement
             // get the width and height of this character in text units
-            float characterHorizontalDisplacementText = font.getFontWidth(string, i, codeLength);
-            float characterVerticalDisplacementText = font.getFontHeight(string, i, codeLength);
+            float charHorizontalDisplacementText = font.getFontWidth(string, i, codeLength);
+            float charVerticalDisplacementText = font.getFontHeight(string, i, codeLength);
 
             // multiply the width/height with the scaling factor
-            characterHorizontalDisplacementText = characterHorizontalDisplacementText * fontMatrixXScaling;
-            characterVerticalDisplacementText = characterVerticalDisplacementText * fontMatrixYScaling;
+            charHorizontalDisplacementText = charHorizontalDisplacementText * fontMatrixXScaling;
+            charVerticalDisplacementText = charVerticalDisplacementText * fontMatrixYScaling;
 
-            maxVerticalDisplacementText = Math.max(maxVerticalDisplacementText, characterVerticalDisplacementText);
+            maxVerticalDisplacementText = Math.max(maxVerticalDisplacementText,
+                    charVerticalDisplacementText);
 
             // PDF Spec - 5.5.2 Word Spacing
             //
@@ -433,18 +411,19 @@ public class PDFStreamEngine
             // code 32 non-space resulted in errors consistent with this interpretation.
             //
             float spacingText = 0;
-            if ((string[i] == 0x20) && codeLength == 1)
+            if (string[i] == 0x20 && codeLength == 1)
             {
                 spacingText += wordSpacingText;
             }
             textMatrix.multiply(ctm, textXctm);
             // Convert textMatrix to display units
-            // We need to instantiate a new Matrix instance here as it is passed to the TextPosition constructor below.
+            // We need to instantiate a new Matrix instance here as it is passed to the TextPosition
+            // constructor below
             Matrix textMatrixStart = textStateParameters.multiply(textXctm);
 
-            // TODO : tx should be set for horizontal text and ty for vertical text
+            // TODO: tx should be set for horizontal text and ty for vertical text
             // which seems to be specified in the font (not the direction in the matrix).
-            float tx = ((characterHorizontalDisplacementText) * fontSizeText) * horizontalScalingText;
+            float tx = charHorizontalDisplacementText * fontSizeText * horizontalScalingText;
             float ty = 0;
             // reset the matrix instead of creating a new one
             td.reset();
@@ -462,8 +441,8 @@ public class PDFStreamEngine
             final float endYPosition = textMatrixEnd.getYPosition();
 
             // add some spacing to the text matrix (see comment above)
-            tx = ((characterHorizontalDisplacementText) * fontSizeText + characterSpacingText + spacingText)
-                    * horizontalScalingText;
+            tx = (charHorizontalDisplacementText * fontSizeText + characterSpacingText +
+                    spacingText) * horizontalScalingText;
             td.setValue(2, 0, tx);
             td.multiply(textMatrix, textMatrix);
 
@@ -472,27 +451,20 @@ public class PDFStreamEngine
             float startXPosition = textMatrixStart.getXPosition();
             float widthText = endXPosition - startXPosition;
 
-            // there are several cases where one character code will
-            // output multiple characters. For example "fi" or a
-            // glyphname that has no mapping like "visiblespace"
-            if (c != null)
-            {
-                validCharCnt++;
-            }
-            else
+            // PDFBOX-373: Replace a null entry with "?" so it is not printed as "(null)"
+            if (c == null)
             {
-                // PDFBOX-373: Replace a null entry with "?" so it is
-                // not printed as "(null)"
                 c = "?";
             }
-            totalCharCnt++;
 
-            float totalVerticalDisplacementDisp = maxVerticalDisplacementText * fontSizeText * textXctm.getYScale();
+            float totalVerticalDisplacementDisp = maxVerticalDisplacementText * fontSizeText *
+                    textXctm.getYScale();
 
             // process the decoded text
-            processTextPosition(new TextPosition(pageRotation, pageWidth, pageHeight, textMatrixStart, endXPosition,
-                    endYPosition, totalVerticalDisplacementDisp, widthText, spaceWidthDisp, c, codePoints, font,
-                    fontSizeText, (int) (fontSizeText * textMatrix.getXScale())));
+            processTextPosition(new TextPosition(pageRotation, pageWidth, pageHeight,
+                    textMatrixStart, endXPosition, endYPosition, totalVerticalDisplacementDisp,
+                    widthText, spaceWidthDisp, c, codePoints, font, fontSizeText,
+                    (int)(fontSizeText * textMatrix.getXScale())));
         }
     }
 
@@ -501,15 +473,14 @@ public class PDFStreamEngine
      * 
      * @param operation The operation to perform.
      * @param arguments The list of arguments.
-     * 
      * @throws IOException If there is an error processing the operation.
      */
     public void processOperator(String operation, List<COSBase> arguments) throws IOException
     {
         try
         {
-            PDFOperator oper = PDFOperator.getOperator(operation);
-            processOperator(oper, arguments);
+            PDFOperator operator = PDFOperator.getOperator(operation);
+            processOperator(operator, arguments);
         }
         catch (IOException e)
         {
@@ -522,7 +493,6 @@ public class PDFStreamEngine
      * 
      * @param operator The operation to perform.
      * @param arguments The list of arguments.
-     * 
      * @throws IOException If there is an error processing the operation.
      */
     protected void processOperator(PDFOperator operator, List<COSBase> arguments) throws IOException
@@ -545,35 +515,7 @@ public class PDFStreamEngine
     }
 
     /**
-     * Transforms the given point using the current transformation matrix
-     *
-     * @param x x-coordinate of the point to be transformed
-     * @param y y-coordinate of the point to be transformed
-     * @return the transformed point
-     */
-   /* public Point2D.Double transformPoint(double x, double y)
-    {
-        double[] position = { x, y };
-        Matrix ctm = graphicsState.getCurrentTransformationMatrix();
-        ctm.createAffineTransform().transform(position, 0, position, 0, 1);
-        return new Point2D.Double(position[0], position[1]);
-    }*/
-
-    /**
-     * Transforms the given width using the current transformation matrix
-     *
-     * @param width the width to be transformed
-     * @return the transformed width
-     */
-    /*public double transformWidth(double width) {
-        Matrix ctm = graphicsState.getCurrentTransformationMatrix();
-        double x = ctm.getValue(0, 0) + ctm.getValue(1, 0);
-        double y = ctm.getValue(0, 1) + ctm.getValue(1, 1);
-        return width * Math.sqrt(0.5 * (x * x + y * y));
-    }*/
-
-    /**
-     * @return Returns the colorSpaces.
+     * @return Returns the XObjects.
      */
     public Map<String, PDXObject> getXObjects()
     {
@@ -581,14 +523,6 @@ public class PDFStreamEngine
     }
 
     /**
-     * @param value The colorSpaces to set.
-     */
-    public void setColorSpaces(Map<String, PDColorSpace> value)
-    {
-        streamResourcesStack.peek().setColorSpaces(value);
-    }
-
-    /**
      * @return Returns the fonts.
      */
     public Map<String, PDFont> getFonts()
@@ -706,31 +640,10 @@ public class PDFStreamEngine
     }
 
     /**
-     * Get the total number of valid characters in the doc that could be decoded in processEncodedText().
-     * 
-     * @return The number of valid characters.
-     */
-    public int getValidCharCnt()
-    {
-        return validCharCnt;
-    }
-
-    /**
-     * Get the total number of characters in the doc (including ones that could not be mapped).
-     * 
-     * @return The number of characters.
-     */
-    public int getTotalCharCnt()
-    {
-        return totalCharCnt;
-    }
-
-    /**
      * Remove all cached resources.
      */
     public void dispose()
     {
-        resetEngine();
         drawingRectangle = null;
         graphicsState = null;
         textLineMatrix = null;
@@ -750,9 +663,6 @@ public class PDFStreamEngine
             operators.clear();
             operators = null;
         }
-        if (unsupportedOperators != null)
-        {
-            unsupportedOperators.clear();
-        }
+        unsupportedOperators.clear();
     }
 }

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java?rev=1603056&r1=1603055&r2=1603056&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java Tue Jun 17 05:10:23 2014
@@ -53,71 +53,65 @@ import org.apache.pdfbox.text.TextPositi
 /**
  * This class will take a pdf document and strip out all of the text and ignore the
  * formatting and such.  Please note; it is up to clients of this class to verify that
- * a specific user has the correct permissions to extract text from the
- * PDF document.
+ * a specific user has the correct permissions to extract text from the PDF document.
  * 
  * The basic flow of this process is that we get a document and use a series of 
  * processXXX() functions that work on smaller and smaller chunks of the page.  
  * Eventually, we fully process each page and then print it. 
  *
- * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
- * 
+ * @author Ben Litchfield
  */
 public class PDFTextStripper extends PDFStreamEngine
 {
-
-    private static final String thisClassName = PDFTextStripper.class.getSimpleName().toLowerCase();
-
     private static float DEFAULT_INDENT_THRESHOLD = 2.0f;
     private static float DEFAULT_DROP_THRESHOLD = 2.5f;
 
-    //enable the ability to set the default indent/drop thresholds
-    //with -D system properties:
+    // enable the ability to set the default indent/drop thresholds
+    // with -D system properties:
     //    pdftextstripper.indent
     //    pdftextstripper.drop
     static
     {
-        String sdrop = null, sindent = null;
+        String strDrop = null, strIndent = null;
         try
         {
-            String prop = thisClassName + ".indent";
-            sindent = System.getProperty(prop);
-            prop = thisClassName + ".drop";
-            sdrop = System.getProperty(prop);
+            String className = PDFTextStripper.class.getSimpleName().toLowerCase();
+            String prop = className + ".indent";
+            strIndent = System.getProperty(prop);
+            prop = className + ".drop";
+            strDrop = System.getProperty(prop);
         }
         catch (SecurityException e)
         {
             // PDFBOX-1946 when run in an applet
             // ignore and use default
         }
-        if (sindent != null && sindent.length() > 0)
+        if (strIndent != null && strIndent.length() > 0)
         {
             try
             {
-                float f = Float.parseFloat(sindent);
-                DEFAULT_INDENT_THRESHOLD = f;
+                DEFAULT_INDENT_THRESHOLD = Float.parseFloat(strIndent);
             }
             catch (NumberFormatException nfe)
             {
-                //ignore and use default
+                // ignore and use default
             }
         }
-        if (sdrop != null && sdrop.length() > 0)
+        if (strDrop != null && strDrop.length() > 0)
         {
             try
             {
-                float f = Float.parseFloat(sdrop);
-                DEFAULT_DROP_THRESHOLD = f;
+                DEFAULT_DROP_THRESHOLD = Float.parseFloat(strDrop);
             }
             catch (NumberFormatException nfe)
             {
-                //ignore and use default
+                // ignore and use default
             }
         }
     }
 
     /**
-     * The platforms line separator.
+     * The platform's line separator.
      */
     protected final String systemLineSeparator = System.getProperty("line.separator"); 
 
@@ -146,12 +140,12 @@ public class PDFTextStripper extends PDF
     private float indentThreshold = DEFAULT_INDENT_THRESHOLD;
     private float dropThreshold = DEFAULT_DROP_THRESHOLD;
 
-    // We will need to estimate where to add spaces.  
-    // These are used to help guess. 
+    // We will need to estimate where to add spaces. These are used to help guess.
     private float spacingTolerance = .5f;
     private float averageCharTolerance = .3f;
 
     private List<PDThreadBead> pageArticles = null;
+
     /**
      * The charactersByArticle is used to extract text by article divisions.  For example
      * a PDF that has two columns like a newspaper, we want to extract the first column and
@@ -172,18 +166,8 @@ public class PDFTextStripper extends PDF
     private Map<String, TreeMap<Float, TreeSet<Float>>> characterListMapping =
         new HashMap<String, TreeMap<Float, TreeSet<Float>>>();
 
-    /**
-     * encoding that text will be written in (or null).
-     */
-    protected String outputEncoding; 
-
-    /**
-     * The document to read.
-     */
+    protected String outputEncoding;
     protected PDDocument document;
-    /**
-     * The stream to write the output to.
-     */
     protected Writer output;
 
     /**
@@ -193,16 +177,14 @@ public class PDFTextStripper extends PDF
     private TextNormalize normalize = null;
 
     /**
-     * True if we started a paragraph but haven't ended it
-     * yet.
+     * True if we started a paragraph but haven't ended it yet.
      */
     private boolean inParagraph;
 
     /**
      * Instantiate a new PDFTextStripper object. This object will load
      * properties from PDFTextStripper.properties and will not do
-     * anything special to convert the text to a more encoding-specific
-     * output.
+     * anything special to convert the text to a more encoding-specific output.
      *
      * @throws IOException If there is an error loading the properties.
      */
@@ -261,25 +243,9 @@ public class PDFTextStripper extends PDF
         return outputStream.toString();
     }
 
-    /**
-     * @deprecated
-     * @see PDFTextStripper#getText( PDDocument )
-     * @param doc The document to extract the text from.
-     * @return The document text.
-     * @throws IOException If there is an error extracting the text.
-     */
-    public String getText( COSDocument doc ) throws IOException
-    {
-        return getText( new PDDocument( doc ) );
-    }
-
-    /**
-     * {@inheritDoc}
-     */
     @Override
     public void resetEngine()
     {
-        super.resetEngine();
         currentPageNo = 0;
         document = null;
         if (charactersByArticle != null)
@@ -323,7 +289,6 @@ public class PDFTextStripper extends PDF
             // password (such a document appears to not be encrypted by
             // someone viewing the document, thus the confusion).  We will
             // attempt to decrypt with the empty password to handle this case.
-            //
             try
             {
                 StandardDecryptionMaterial sdm = new StandardDecryptionMaterial("");
@@ -360,9 +325,9 @@ public class PDFTextStripper extends PDF
                 endBookmarkPageNumber == -1 && endBookmark != null &&
                 startBookmark.getCOSObject() == endBookmark.getCOSObject() )
         {
-            //this is a special case where both the start and end bookmark
-            //are the same but point to nothing.  In this case
-            //we will not extract any text.
+            // this is a special case where both the start and end bookmark
+            // are the same but point to nothing.  In this case
+            // we will not extract any text.
             startBookmarkPageNumber = 0;
             endBookmarkPageNumber = 0;
         }
@@ -380,19 +345,20 @@ public class PDFTextStripper extends PDF
         }
     }
 
-    private int getPageNumber( PDOutlineItem bookmark, List<COSObjectable> allPages ) throws IOException
+    private int getPageNumber( PDOutlineItem bookmark, List<COSObjectable> allPages )
+            throws IOException
     {
         int pageNumber = -1;
         PDPage page = bookmark.findDestinationPage( document );
         if( page != null )
         {
-            pageNumber = allPages.indexOf( page )+1;//use one based indexing
+            pageNumber = allPages.indexOf( page ) + 1; // use one based indexing
         }
         return pageNumber;
     }
 
     /**
-     * This method is available for subclasses of this class.  It will be called before processing
+     * This method is available for subclasses of this class. It will be called before processing
      * of the document start.
      *
      * @param pdf The PDF document that is being processed.
@@ -404,7 +370,7 @@ public class PDFTextStripper extends PDF
     }
 
     /**
-     * This method is available for subclasses of this class.  It will be called after processing
+     * This method is available for subclasses of this class. It will be called after processing
      * of the document finishes.
      *
      * @param pdf The PDF document that is being processed.
@@ -442,7 +408,7 @@ public class PDFTextStripper extends PDF
             {
                 if( numberOfArticleSections < originalSize )
                 {
-                    ((List<TextPosition>)charactersByArticle.get( i )).clear();
+                    charactersByArticle.get( i ).clear();
                 }
                 else
                 {
@@ -505,7 +471,7 @@ public class PDFTextStripper extends PDF
      */
     protected void startPage( PDPage page ) throws IOException
     {
-        //default is to do nothing.
+        // default is to do nothing
     }
 
     /**
@@ -518,15 +484,15 @@ public class PDFTextStripper extends PDF
      */
     protected void endPage( PDPage page ) throws IOException
     {
-        //default is to do nothing
+        // default is to do nothing
     }
 
-    private static final float ENDOFLASTTEXTX_RESET_VALUE = -1;
-    private static final float MAXYFORLINE_RESET_VALUE = -Float.MAX_VALUE;
-    private static final float EXPECTEDSTARTOFNEXTWORDX_RESET_VALUE = -Float.MAX_VALUE;
-    private static final float MAXHEIGHTFORLINE_RESET_VALUE = -1;
-    private static final float MINYTOPFORLINE_RESET_VALUE = Float.MAX_VALUE;
-    private static final float LASTWORDSPACING_RESET_VALUE = -1;
+    private static final float END_OF_LAST_TEXT_X_RESET_VALUE = -1;
+    private static final float MAX_Y_FOR_LINE_RESET_VALUE = -Float.MAX_VALUE;
+    private static final float EXPECTED_START_OF_NEXT_WORD_X_RESET_VALUE = -Float.MAX_VALUE;
+    private static final float MAX_HEIGHT_FOR_LINE_RESET_VALUE = -1;
+    private static final float MIN_Y_TOP_FOR_LINE_RESET_VALUE = Float.MAX_VALUE;
+    private static final float LAST_WORD_SPACING_RESET_VALUE = -1;
 
     /**
      * This will print the text of the processed page to "output".
@@ -538,16 +504,16 @@ public class PDFTextStripper extends PDF
      */
     protected void writePage() throws IOException
     {
-        float maxYForLine = MAXYFORLINE_RESET_VALUE;
-        float minYTopForLine = MINYTOPFORLINE_RESET_VALUE;
-        float endOfLastTextX = ENDOFLASTTEXTX_RESET_VALUE;
-        float lastWordSpacing = LASTWORDSPACING_RESET_VALUE;
-        float maxHeightForLine = MAXHEIGHTFORLINE_RESET_VALUE;
+        float maxYForLine = MAX_Y_FOR_LINE_RESET_VALUE;
+        float minYTopForLine = MIN_Y_TOP_FOR_LINE_RESET_VALUE;
+        float endOfLastTextX = END_OF_LAST_TEXT_X_RESET_VALUE;
+        float lastWordSpacing = LAST_WORD_SPACING_RESET_VALUE;
+        float maxHeightForLine = MAX_HEIGHT_FOR_LINE_RESET_VALUE;
         PositionWrapper lastPosition = null;
         PositionWrapper lastLineStartPosition = null;
 
-        boolean startOfPage = true;//flag to indicate start of page
-        boolean startOfArticle = true;
+        boolean startOfPage = true; // flag to indicate start of page
+        boolean startOfArticle;
         if(charactersByArticle.size() > 0) 
         { 
             writePageStart();
@@ -562,80 +528,80 @@ public class PDFTextStripper extends PDF
                 Collections.sort( textList, comparator );
             }
             Iterator<TextPosition> textIter = textList.iterator();
-            /* Before we can display the text, we need to do some normalizing.
-             * Arabic and Hebrew text is right to left and is typically stored
-             * in its logical format, which means that the rightmost character is
-             * stored first, followed by the second character from the right etc.
-             * However, PDF stores the text in presentation form, which is left to
-             * right.  We need to do some normalization to convert the PDF data to
-             * the proper logical output format.
-             *
-             * Note that if we did not sort the text, then the output of reversing the
-             * text is undefined and can sometimes produce worse output then not trying
-             * to reverse the order.  Sorting should be done for these languages.
-             * */
-
-            /* First step is to determine if we have any right to left text, and
-             * if so, is it dominant. */
-            int ltrCnt = 0;
-            int rtlCnt = 0;
+            // Before we can display the text, we need to do some normalizing.
+            // Arabic and Hebrew text is right to left and is typically stored
+            // in its logical format, which means that the rightmost character is
+            // stored first, followed by the second character from the right etc.
+            // However, PDF stores the text in presentation form, which is left to
+            // right.  We need to do some normalization to convert the PDF data to
+            // the proper logical output format.
+            //
+            // Note that if we did not sort the text, then the output of reversing the
+            // text is undefined and can sometimes produce worse output then not trying
+            // to reverse the order. Sorting should be done for these languages.
+
+            // First step is to determine if we have any right to left text, and
+            // if so, is it dominant.
+            int ltrCount = 0;
+            int rtlCount = 0;
 
             while( textIter.hasNext() )
             {
-                TextPosition position = (TextPosition)textIter.next();
+                TextPosition position = textIter.next();
                 String stringValue = position.getCharacter();
                 for (int a = 0; a < stringValue.length(); a++)
                 {
                     byte dir = Character.getDirectionality(stringValue.charAt(a));
-                    if ((dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT ) ||
-                            (dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING) ||
-                            (dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE ))
+                    if (dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT ||
+                        dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING ||
+                        dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE)
                     {
-                        ltrCnt++;
+                        ltrCount++;
                     }
-                    else if ((dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT ) ||
-                            (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC) ||
-                            (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING) ||
-                            (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE ))
+                    else if (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT ||
+                             dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC ||
+                             dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING ||
+                             dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE)
                     {
-                        rtlCnt++;
+                        rtlCount++;
                     }
                 }
             }
             // choose the dominant direction
-            boolean isRtlDominant = rtlCnt > ltrCnt;
+            boolean isRtlDominant = rtlCount > ltrCount;
 
             startArticle(!isRtlDominant);
             startOfArticle = true;
             // we will later use this to skip reordering
-            boolean hasRtl = rtlCnt > 0;
+            boolean hasRtl = rtlCount > 0;
 
-            /* Now cycle through to print the text.
-             * We queue up a line at a time before we print so that we can convert
-             * the line from presentation form to logical form (if needed). 
-             */
+            // Now cycle through to print the text.
+            // We queue up a line at a time before we print so that we can convert
+            // the line from presentation form to logical form (if needed).
             List<TextPosition> line = new ArrayList<TextPosition>();
 
             textIter = textList.iterator();    // start from the beginning again
-            /* PDF files don't always store spaces. We will need to guess where we should add
-             * spaces based on the distances between TextPositions. Historically, this was done
-             * based on the size of the space character provided by the font. In general, this worked
-             * but there were cases where it did not work. Calculating the average character width
-             * and using that as a metric works better in some cases but fails in some cases where the
-             * spacing worked. So we use both. NOTE: Adobe reader also fails on some of these examples.
-             */
-            //Keeps track of the previous average character width
+            // PDF files don't always store spaces. We will need to guess where we should add
+            // spaces based on the distances between TextPositions. Historically, this was done
+            // based on the size of the space character provided by the font. In general, this
+            // worked but there were cases where it did not work. Calculating the average character
+            // width and using that as a metric works better in some cases but fails in some cases
+            // where the spacing worked. So we use both. NOTE: Adobe reader also fails on some of
+            // these examples.
+
+            // Keeps track of the previous average character width
             float previousAveCharWidth = -1;
             while( textIter.hasNext() )
             {
-                TextPosition position = (TextPosition)textIter.next();
+                TextPosition position = textIter.next();
                 PositionWrapper current = new PositionWrapper(position);
                 String characterValue = position.getCharacter();
 
                 //Resets the average character width when we see a change in font
                 // or a change in the font size
-                if(lastPosition != null && ((position.getFont() != lastPosition.getTextPosition().getFont())
-                        || (position.getFontSize() != lastPosition.getTextPosition().getFontSize())))
+                if(lastPosition != null &&
+                        (position.getFont() != lastPosition.getTextPosition().getFont() ||
+                         position.getFontSize() != lastPosition.getTextPosition().getFontSize()))
                 {
                     previousAveCharWidth = -1;
                 }
@@ -645,8 +611,8 @@ public class PDFTextStripper extends PDF
                 float positionWidth;
                 float positionHeight;
 
-                /* If we are sorting, then we need to use the text direction
-                 * adjusted coordinates, because they were used in the sorting. */
+                // If we are sorting, then we need to use the text direction
+                // adjusted coordinates, because they were used in the sorting.
                 if (getSortByPosition())
                 {
                     positionX = position.getXDirAdj();
@@ -665,11 +631,11 @@ public class PDFTextStripper extends PDF
                 //The current amount of characters in a word
                 int wordCharCount = position.getIndividualWidths().length;
 
-                /* Estimate the expected width of the space based on the
-                 * space character with some margin. */
+                // Estimate the expected width of the space based on the
+                // space character with some margin.
                 float wordSpacing = position.getWidthOfSpace();
-                float deltaSpace = 0;
-                if ((wordSpacing == 0) || (wordSpacing == Float.NaN))
+                float deltaSpace;
+                if (wordSpacing == 0 || wordSpacing == Float.NaN)
                 {
                     deltaSpace = Float.MAX_VALUE;
                 }
@@ -677,34 +643,33 @@ public class PDFTextStripper extends PDF
                 {
                     if( lastWordSpacing < 0 )
                     {
-                        deltaSpace = (wordSpacing * getSpacingTolerance());
+                        deltaSpace = wordSpacing * getSpacingTolerance();
                     }
                     else
                     {
-                        deltaSpace = (((wordSpacing+lastWordSpacing)/2f)* getSpacingTolerance());
+                        deltaSpace = (wordSpacing + lastWordSpacing) / 2f * getSpacingTolerance();
                     }
                 }
 
-                /* Estimate the expected width of the space based on the
-                 * average character width with some margin. This calculation does not
-                 * make a true average (average of averages) but we found that it gave the
-                 * best results after numerous experiments. Based on experiments we also found that
-                 * .3 worked well. */
+                // Estimate the expected width of the space based on the average character width
+                // with some margin. This calculation does not make a true average (average of
+                // averages) but we found that it gave the best results after numerous experiments.
+                // Based on experiments we also found that .3 worked well.
                 float averageCharWidth = -1;
                 if(previousAveCharWidth < 0)
                 {
-                    averageCharWidth = (positionWidth/wordCharCount);
+                    averageCharWidth = positionWidth / wordCharCount;
                 }
                 else
                 {
-                    averageCharWidth = (previousAveCharWidth + (positionWidth/wordCharCount))/2f;
+                    averageCharWidth = (previousAveCharWidth + positionWidth / wordCharCount) / 2f;
                 }
-                float deltaCharWidth = (averageCharWidth * getAverageCharTolerance());
+                float deltaCharWidth = averageCharWidth * getAverageCharTolerance();
 
-                //Compares the values obtained by the average method and the wordSpacing method and picks
-                //the smaller number.
-                float expectedStartOfNextWordX = EXPECTEDSTARTOFNEXTWORDX_RESET_VALUE;
-                if(endOfLastTextX != ENDOFLASTTEXTX_RESET_VALUE)
+                // Compares the values obtained by the average method and the wordSpacing method
+                // and picks the smaller number.
+                float expectedStartOfNextWordX = EXPECTED_START_OF_NEXT_WORD_X_RESET_VALUE;
+                if(endOfLastTextX != END_OF_LAST_TEXT_X_RESET_VALUE)
                 {
                     if(deltaCharWidth > deltaSpace)
                     {
@@ -729,25 +694,27 @@ public class PDFTextStripper extends PDF
                     // Text must overlap with the last rendered baseline text by at least
                     // a small amount in order to be considered as being on the same line.
 
-                    /* XXX BC: In theory, this check should really check if the next char is in full range
-                     * seen in this line. This is what I tried to do with minYTopForLine, but this caused a lot
-                     * of regression test failures.  So, I'm leaving it be for now. */
+                    // XXX BC: In theory, this check should really check if the next char is in
+                    // full range seen in this line. This is what I tried to do with minYTopForLine,
+                    // but this caused a lot of regression test failures.  So, I'm leaving it be for
+                    // now
                     if(!overlap(positionY, positionHeight, maxYForLine, maxHeightForLine))
                     {
                         writeLine(normalize(line,isRtlDominant,hasRtl),isRtlDominant);
                         line.clear();
                         lastLineStartPosition = 
-                            handleLineSeparation(current, lastPosition, lastLineStartPosition, maxHeightForLine);
-                        endOfLastTextX = ENDOFLASTTEXTX_RESET_VALUE;
-                        expectedStartOfNextWordX = EXPECTEDSTARTOFNEXTWORDX_RESET_VALUE;
-                        maxYForLine = MAXYFORLINE_RESET_VALUE;
-                        maxHeightForLine = MAXHEIGHTFORLINE_RESET_VALUE;
-                        minYTopForLine = MINYTOPFORLINE_RESET_VALUE;
+                            handleLineSeparation(current, lastPosition, lastLineStartPosition,
+                                    maxHeightForLine);
+                        endOfLastTextX = END_OF_LAST_TEXT_X_RESET_VALUE;
+                        expectedStartOfNextWordX = EXPECTED_START_OF_NEXT_WORD_X_RESET_VALUE;
+                        maxYForLine = MAX_Y_FOR_LINE_RESET_VALUE;
+                        maxHeightForLine = MAX_HEIGHT_FOR_LINE_RESET_VALUE;
+                        minYTopForLine = MIN_Y_TOP_FOR_LINE_RESET_VALUE;
                     }
-                    //Test if our TextPosition starts after a new word would be expected to start.
-                    if (expectedStartOfNextWordX != EXPECTEDSTARTOFNEXTWORDX_RESET_VALUE 
+                    // test if our TextPosition starts after a new word would be expected to start
+                    if (expectedStartOfNextWordX != EXPECTED_START_OF_NEXT_WORD_X_RESET_VALUE
                             && expectedStartOfNextWordX < positionX &&
-                            //only bother adding a space if the last character was not a space
+                            // only bother adding a space if the last character was not a space
                              lastPosition.getTextPosition().getCharacter() != null &&
                             !lastPosition.getTextPosition().getCharacter().endsWith( " " ) )
                     {
@@ -797,8 +764,8 @@ public class PDFTextStripper extends PDF
 
     private boolean overlap( float y1, float height1, float y2, float height2 )
     {
-        return within( y1, y2, .1f) || (y2 <= y1 && y2 >= y1-height1) ||
-        (y1 <= y2 && y1 >= y2-height2);
+        return within( y1, y2, .1f) || y2 <= y1 && y2 >= y1 - height1 ||
+                y1 <= y2 && y1 >= y2 - height2;
     }
 
     /**
@@ -847,8 +814,8 @@ public class PDFTextStripper extends PDF
     }
 
     /**
-     * Write a Java string to the output stream. The default implementation will ignore the <code>textPositions</code>
-     * and just calls {@link #writeString(String)}.
+     * Write a Java string to the output stream. The default implementation will ignore the
+     * <code>textPositions</code> and just calls {@link #writeString(String)}.
      *
      * @param text The text to write to the stream.
      * @param textPositions The TextPositions belonging to the text.
@@ -899,7 +866,8 @@ public class PDFTextStripper extends PDF
             String textCharacter = text.getCharacter();
             float textX = text.getX();
             float textY = text.getY();
-            TreeMap<Float, TreeSet<Float>> sameTextCharacters = characterListMapping.get( textCharacter );
+            TreeMap<Float, TreeSet<Float>> sameTextCharacters =
+                    characterListMapping.get( textCharacter );
             if( sameTextCharacters == null )
             {
                 sameTextCharacters = new TreeMap<Float, TreeSet<Float>>();
@@ -915,9 +883,8 @@ public class PDFTextStripper extends PDF
             // the TJ just backs up to compensate after each character).  Also, we subtract
             // an amount to allow for kerning (a percentage of the width of the last
             // character).
-            //
             boolean suppressCharacter = false;
-            float tolerance = (text.getWidth()/textCharacter.length())/3.0f;
+            float tolerance = text.getWidth()/textCharacter.length() / 3.0f;
             
             SortedMap<Float, TreeSet<Float>> xMatches =
                 sameTextCharacters.subMap(textX - tolerance, textX + tolerance );
@@ -945,8 +912,7 @@ public class PDFTextStripper extends PDF
         }
         if( showCharacter )
         {
-            //if we are showing the character then we need to determine which
-            //article it belongs to.
+            // if we are showing the character then we need to determine which article it belongs to
             int foundArticleDivisionIndex = -1;
             int notFoundButFirstLeftAndAboveArticleDivisionIndex = -1;
             int notFoundButFirstLeftArticleDivisionIndex = -1;
@@ -957,7 +923,7 @@ public class PDFTextStripper extends PDF
             {
                 for( int i=0; i<pageArticles.size() && foundArticleDivisionIndex == -1; i++ )
                 {
-                    PDThreadBead bead = (PDThreadBead)pageArticles.get( i );
+                    PDThreadBead bead = pageArticles.get( i );
                     if( bead != null )
                     {
                         PDRectangle rect = bead.getRectangle();
@@ -1014,31 +980,30 @@ public class PDFTextStripper extends PDF
                 articleDivisionIndex = charactersByArticle.size()-1;
             }
 
-            List<TextPosition> textList = (List<TextPosition>) charactersByArticle.get( articleDivisionIndex );
+            List<TextPosition> textList = charactersByArticle.get( articleDivisionIndex );
 
-            /* In the wild, some PDF encoded documents put diacritics (accents on
-             * top of characters) into a separate Tj element.  When displaying them
-             * graphically, the two chunks get overlayed.  With text output though,
-             * we need to do the overlay. This code recombines the diacritic with
-             * its associated character if the two are consecutive.
-             */ 
+            // In the wild, some PDF encoded documents put diacritics (accents on
+            // top of characters) into a separate Tj element.  When displaying them
+            // graphically, the two chunks get overlayed.  With text output though,
+            // we need to do the overlay. This code recombines the diacritic with
+            // its associated character if the two are consecutive.
             if(textList.isEmpty())
             {
                 textList.add(text);
             }
             else
             {
-                /* test if we overlap the previous entry.  
-                 * Note that we are making an assumption that we need to only look back
-                 * one TextPosition to find what we are overlapping.  
-                 * This may not always be true. */
-                TextPosition previousTextPosition = (TextPosition)textList.get(textList.size()-1);
+                // test if we overlap the previous entry.
+                // Note that we are making an assumption that we need to only look back
+                // one TextPosition to find what we are overlapping.
+                // This may not always be true. */
+                TextPosition previousTextPosition = textList.get(textList.size()-1);
                 if(text.isDiacritic() && previousTextPosition.contains(text))
                 {
                     previousTextPosition.mergeDiacritic(text, normalize);
                 }
-                /* If the previous TextPosition was the diacritic, merge it into this
-                 * one and remove it from the list. */
+                // If the previous TextPosition was the diacritic, merge it into this
+                // one and remove it from the list.
                 else if(previousTextPosition.isDiacritic() && text.contains(previousTextPosition))
                 {
                     text.mergeDiacritic(previousTextPosition, normalize);
@@ -1231,7 +1196,8 @@ public class PDFTextStripper extends PDF
     }
 
     /**
-     * Set if the text stripper should group the text output by a list of beads.  The default value is true!
+     * Set if the text stripper should group the text output by a list of beads.
+     * The default value is true!
      *
      * @param aShouldSeparateByBeads The new grouping of beads.
      */
@@ -1241,7 +1207,7 @@ public class PDFTextStripper extends PDF
     }
 
     /**
-     * Get the bookmark where text extraction should end, inclusive.  Default is null.
+     * Get the bookmark where text extraction should end, inclusive. Default is null.
      *
      * @return The ending bookmark.
      */
@@ -1546,57 +1512,20 @@ public class PDFTextStripper extends PDF
         articleEnd = articleEndValue;
     }
 
-
-    /**
-     * Reverse characters of a compound Arabic glyph.
-     * When getSortByPosition() is true, inspect the sequence encoded
-     * by one glyph. If the glyph encodes two or more Arabic characters,
-     * reverse these characters from a logical order to a visual order.
-     * This ensures that the bidirectional algorithm that runs later will
-     * convert them back to a logical order.
-     * 
-     * @param str a string obtained from font.encoding()
-     * 
-     * @return the reversed string
-     */
-    @Override
-    public String inspectFontEncoding(String str)
-    {
-        if (!sortByPosition || str == null || str.length() < 2)
-        {
-            return str;
-        }
-        for (int i = 0; i < str.length(); ++i)
-        {
-            if (Character.getDirectionality(str.charAt(i))
-                    != Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
-            {
-                return str;
-            }
-        }
-        StringBuilder reversed = new StringBuilder(str.length());
-        for (int i = str.length() - 1; i >= 0; --i)
-        {
-            reversed.append(str.charAt(i));
-        }
-        return reversed.toString();
-    }
-
     /**
      * handles the line separator for a new line given
      * the specified current and previous TextPositions.
      * @param current the current text position
      * @param lastPosition the previous text position
-     * @param lastLineStartPosition the last text position that followed a line
-     *        separator.
+     * @param lastLineStartPosition the last text position that followed a line separator.
      * @param maxHeightForLine max height for positions since lastLineStartPosition
      * @return start position of the last line
      * @throws IOException if something went wrong
      */
     protected PositionWrapper handleLineSeparation(PositionWrapper current,
-            PositionWrapper lastPosition, PositionWrapper lastLineStartPosition, float maxHeightForLine)
-            throws IOException 
-            {
+            PositionWrapper lastPosition, PositionWrapper lastLineStartPosition,
+            float maxHeightForLine) throws IOException
+    {
         current.setLineStart();
         isParagraphSeparation(current, lastPosition, lastLineStartPosition, maxHeightForLine);
         lastLineStartPosition = current;
@@ -1645,7 +1574,8 @@ public class PDFTextStripper extends PDF
      * @param maxHeightForLine max height for text positions since lasLineStartPosition.
      */
     protected void isParagraphSeparation(PositionWrapper position,  
-            PositionWrapper lastPosition, PositionWrapper lastLineStartPosition, float maxHeightForLine)
+            PositionWrapper lastPosition, PositionWrapper lastLineStartPosition,
+            float maxHeightForLine)
     {
         boolean result = false;
         if(lastLineStartPosition == null) 
@@ -1656,15 +1586,15 @@ public class PDFTextStripper extends PDF
         {
             float yGap = Math.abs(position.getTextPosition().getYDirAdj()-
                     lastPosition.getTextPosition().getYDirAdj());
-            float xGap = (position.getTextPosition().getXDirAdj()-
-                    lastLineStartPosition.getTextPosition().getXDirAdj());//do we need to flip this for rtl?
-            if(yGap > (getDropThreshold()*maxHeightForLine))
+            float xGap = position.getTextPosition().getXDirAdj()-
+                    lastLineStartPosition.getTextPosition().getXDirAdj();//do we need to flip this for rtl?
+            if(yGap > getDropThreshold()*maxHeightForLine)
             {
-                        result = true;
+                result = true;
             }
-            else if(xGap > (getIndentThreshold()*position.getTextPosition().getWidthOfSpace()))
+            else if(xGap > getIndentThreshold()*position.getTextPosition().getWidthOfSpace())
             {
-                //text is indented, but try to screen for hanging indent
+                // text is indented, but try to screen for hanging indent
                 if(!lastLineStartPosition.isParagraphStart())
                 {
                      result = true;
@@ -1676,24 +1606,24 @@ public class PDFTextStripper extends PDF
             }
             else if(xGap < -position.getTextPosition().getWidthOfSpace())
             {
-                //text is left of previous line. Was it a hanging indent?
+                // text is left of previous line. Was it a hanging indent?
                 if(!lastLineStartPosition.isParagraphStart())
                 {
-                            result = true;
+                    result = true;
                 }
             }
-            else if(Math.abs(xGap) < (0.25 * position.getTextPosition().getWidth()))
+            else if(Math.abs(xGap) < 0.25 * position.getTextPosition().getWidth())
             {
-                //current horizontal position is within 1/4 a char of the last
-                //linestart.  We'll treat them as lined up.
+                // current horizontal position is within 1/4 a char of the last
+                // linestart. We'll treat them as lined up.
                 if(lastLineStartPosition.isHangingIndent())
                 {
                     position.setHangingIndent();
                 }
                 else if(lastLineStartPosition.isParagraphStart())
                 {
-                    //check to see if the previous line looks like
-                    //any of a number of standard list item formats
+                    // check to see if the previous line looks like
+                    // any of a number of standard list item formats
                     Pattern liPattern = matchListItemPattern(lastLineStartPosition);
                     if(liPattern!=null)
                     {
@@ -1801,7 +1731,6 @@ public class PDFTextStripper extends PDF
             "[a-z]\\)",
             "[IVXL]+\\.",
             "[ivxl]+\\.",
-
     };
 
     private List<Pattern> listOfPatterns = null;
@@ -1863,7 +1792,7 @@ public class PDFTextStripper extends PDF
      * @param patterns list of patterns
      * @return matching pattern
      */
-    protected static final Pattern matchPattern(String string, List<Pattern> patterns)
+    protected static Pattern matchPattern(String string, List<Pattern> patterns)
     {
         Pattern matchedPattern = null;
         for(Pattern p : patterns)
@@ -1882,7 +1811,8 @@ public class PDFTextStripper extends PDF
      * @param isRtlDominant determines if rtl or ltl is dominant
      * @throws IOException if something went wrong
      */
-    private void writeLine(List<WordWithTextPositions> line, boolean isRtlDominant) throws IOException
+    private void writeLine(List<WordWithTextPositions> line, boolean isRtlDominant)
+            throws IOException
     {
         int numberOfStrings = line.size();
         for(int i=0; i<numberOfStrings; i++)
@@ -1903,7 +1833,8 @@ public class PDFTextStripper extends PDF
      * @param hasRtl determines if lines contains rtl formatted text(parts)
      * @return a list of strings, one string for every word
      */
-    private List<WordWithTextPositions> normalize(List<TextPosition> line, boolean isRtlDominant, boolean hasRtl)
+    private List<WordWithTextPositions> normalize(List<TextPosition> line, boolean isRtlDominant,
+                                                  boolean hasRtl)
     {
         LinkedList<WordWithTextPositions> normalized = new LinkedList<WordWithTextPositions>();
         StringBuilder lineBuilder = new StringBuilder();
@@ -1932,8 +1863,8 @@ public class PDFTextStripper extends PDF
     }
 
     /**
-     * Used within {@link #normalize(List, boolean, boolean)} to create a single {@link WordWithTextPositions}
-     * entry.
+     * Used within {@link #normalize(List, boolean, boolean)} to create a single
+     * {@link WordWithTextPositions} entry.
      */
     private WordWithTextPositions createWord(String word, List<TextPosition> wordPositions)
     {
@@ -1949,7 +1880,8 @@ public class PDFTextStripper extends PDF
     {
         if (text instanceof WordSeparator) 
         {
-            normalized.add(createWord(lineBuilder.toString(), new ArrayList<TextPosition>(wordPositions)));
+            normalized.add(createWord(lineBuilder.toString(),
+                    new ArrayList<TextPosition>(wordPositions)));
             lineBuilder = new StringBuilder();
             wordPositions.clear();
         }
@@ -1962,10 +1894,7 @@ public class PDFTextStripper extends PDF
     }
 
     /**
-     * internal marker class.  Used as a place holder in
-     * a line of TextPositions.
-     * @author ME21969
-     *
+     * internal marker class. Used as a place holder in a line of TextPositions.
      */
     private static final class WordSeparator extends TextPosition
     {

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/SetHorizontalTextScaling.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/SetHorizontalTextScaling.java?rev=1603056&r1=1603055&r2=1603056&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/SetHorizontalTextScaling.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/SetHorizontalTextScaling.java Tue Jun 17 05:10:23 2014
@@ -43,6 +43,6 @@ public class SetHorizontalTextScaling ex
     public void process(PDFOperator operator, List<COSBase> arguments) throws IOException
     {
         COSNumber scaling = (COSNumber)arguments.get(0);
-        context.getGraphicsState().getTextState().setHorizontalScalingPercent( scaling.floatValue() );
+        context.getGraphicsState().getTextState().setHorizontalScaling(scaling.floatValue());
     }
 }

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/ShowTextGlyph.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/ShowTextGlyph.java?rev=1603056&r1=1603055&r2=1603056&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/ShowTextGlyph.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/ShowTextGlyph.java Tue Jun 17 05:10:23 2014
@@ -43,7 +43,7 @@ public class ShowTextGlyph extends Opera
         COSArray array = (COSArray)arguments.get( 0 );
         int arraySize = array.size();
         float fontsize = context.getGraphicsState().getTextState().getFontSize();
-        float horizontalScaling = context.getGraphicsState().getTextState().getHorizontalScalingPercent()/100;
+        float horizontalScaling = context.getGraphicsState().getTextState().getHorizontalScaling()/100;
         for( int i=0; i<arraySize; i++ )
         {
             COSBase next = array.get( i );