You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2016/01/19 19:48:42 UTC

svn commit: r1725590 - in /pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap: CMap.java CodespaceRange.java

Author: lehmi
Date: Tue Jan 19 18:48:41 2016
New Revision: 1725590

URL: http://svn.apache.org/viewvc?rev=1725590&view=rev
Log:
PDFBOX-3120: fixed codespace range matching algorithm

Modified:
    pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java
    pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CodespaceRange.java

Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java?rev=1725590&r1=1725589&r2=1725590&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java (original)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java Tue Jan 19 18:48:41 2016
@@ -103,10 +103,6 @@ public class CMap
      */
     public int readCode(InputStream in) throws IOException
     {
-        // save the position in the string
-        in.mark(maxCodeLength);
-
-        // mapping algorithm
         byte[] bytes = new byte[maxCodeLength];
         in.read(bytes,0,minCodeLength);
         for (int i = minCodeLength-1; i < maxCodeLength; i++)
@@ -124,58 +120,11 @@ public class CMap
                 bytes[byteCount] = (byte)in.read();
             }
         }
-
-        // reset to the original position in the string
-        in.reset();
-
-        // modified mapping algorithm
-        for (int i = 0; i < maxCodeLength; i++)
-        {
-            final byte curByte = (byte)in.read(); 
-            bytes[i] = curByte;
-            final int byteCount = i + 1;
-            CodespaceRange match = null;
-            CodespaceRange shortest = null;
-            for (CodespaceRange range : codespaceRanges)
-            {
-                if (range.isPartialMatch(curByte, i))
-                {
-                    if (match == null)
-                    {
-                        match = range;
-                    }
-                    else if (range.getStart().length < match.getStart().length)
-                    {
-                        // for multiple matches, choose the codespace with the shortest codes
-                        match = range;
-                    }
-                }
-
-                // find shortest range
-                if (shortest == null || range.getStart().length < shortest.getStart().length)
-                {
-                    shortest = range;
-                }
-            }
-
-            // if there are no matches, the range with the shortest codes is chosen
-            if (match == null)
-            {
-                match = shortest;
-            }
-
-            // we're done when we have enough bytes for the matched range
-            if (match != null && match.getStart().length == byteCount)
-            {
-                return toInt(bytes, byteCount);
-            }
-        }
-
         throw new IOException("CMap is invalid");
     }
 
     /**
-     * Returns an int for the given a byte array
+     * Returns an int for the given byte array
      */
     private int toInt(byte[] data, int dataLen)
     {

Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CodespaceRange.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CodespaceRange.java?rev=1725590&r1=1725589&r2=1725590&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CodespaceRange.java (original)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CodespaceRange.java Tue Jan 19 18:48:41 2016
@@ -26,6 +26,8 @@ public class CodespaceRange
 {
     private byte[] start;
     private byte[] end;
+    private int startInt;
+    private int endInt;
     private int codeLength = 0;
     
     /**
@@ -61,6 +63,7 @@ public class CodespaceRange
     void setEnd(byte[] endBytes)
     {
         end = endBytes;
+        endInt = toInt(endBytes, endBytes.length);
     }
 
     /** Getter for property start.
@@ -80,6 +83,7 @@ public class CodespaceRange
     {
         start = startBytes;
         codeLength = start.length;
+        startInt = toInt(startBytes, startBytes.length);
     }
 
     /**
@@ -91,42 +95,31 @@ public class CodespaceRange
     }
 
     /**
-     * Returns true if the given code bytes match this codespace range.
+     * Returns an int for the given byte array
      */
-    public boolean isFullMatch(byte[] code, int codeLen)
+    private int toInt(byte[] data, int dataLen)
     {
-        // code must be the same length as the bounding codes
-        if (codeLen == codeLength)
+        int code = 0;
+        for (int i = 0; i < dataLen; ++i)
         {
-            // each of it bytes must lie between the corresponding bytes of the upper & lower bounds
-            for (int i = 0; i < codeLen; i++)
-            {
-                int startNum = start[i] & 0xff;
-                int endNum = end[i] & 0xff;
-                int codeNum = code[i] & 0xff;
-
-                if (codeNum > endNum || codeNum < startNum)
-                {
-                    return false;
-                }
-            }
-            return true;
+            code <<= 8;
+            code |= (data[i] + 256) % 256;
         }
-        return false;
+        return code;
     }
-    
     /**
-     * Returns true if the given byte matches the byte at the given index of this codespace range.
+     * Returns true if the given code bytes match this codespace range.
      */
-    public boolean isPartialMatch(byte b, int index)
+    public boolean isFullMatch(byte[] code, int codeLen)
     {
-        if (index == codeLength)
+        // code must be the same length as the bounding codes
+        if (codeLen == codeLength)
         {
-            return false;
+            int value = toInt(code, codeLen);
+            if (value >= startInt || value <=endInt)
+                return true;
         }
-        int startNum = start[index] & 0xff;
-        int endNum = end[index] & 0xff;
-        int codeNum = b & 0xff;
-        return !(codeNum > endNum || codeNum < startNum);
+        return false;
     }
+    
 }