You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2016/01/19 19:48:42 UTC
svn commit: r1725590 - in
/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap: CMap.java
CodespaceRange.java
Author: lehmi
Date: Tue Jan 19 18:48:41 2016
New Revision: 1725590
URL: http://svn.apache.org/viewvc?rev=1725590&view=rev
Log:
PDFBOX-3120: fixed codespace range matching algorithm
Modified:
pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java
pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CodespaceRange.java
Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java?rev=1725590&r1=1725589&r2=1725590&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java (original)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java Tue Jan 19 18:48:41 2016
@@ -103,10 +103,6 @@ public class CMap
*/
public int readCode(InputStream in) throws IOException
{
- // save the position in the string
- in.mark(maxCodeLength);
-
- // mapping algorithm
byte[] bytes = new byte[maxCodeLength];
in.read(bytes,0,minCodeLength);
for (int i = minCodeLength-1; i < maxCodeLength; i++)
@@ -124,58 +120,11 @@ public class CMap
bytes[byteCount] = (byte)in.read();
}
}
-
- // reset to the original position in the string
- in.reset();
-
- // modified mapping algorithm
- for (int i = 0; i < maxCodeLength; i++)
- {
- final byte curByte = (byte)in.read();
- bytes[i] = curByte;
- final int byteCount = i + 1;
- CodespaceRange match = null;
- CodespaceRange shortest = null;
- for (CodespaceRange range : codespaceRanges)
- {
- if (range.isPartialMatch(curByte, i))
- {
- if (match == null)
- {
- match = range;
- }
- else if (range.getStart().length < match.getStart().length)
- {
- // for multiple matches, choose the codespace with the shortest codes
- match = range;
- }
- }
-
- // find shortest range
- if (shortest == null || range.getStart().length < shortest.getStart().length)
- {
- shortest = range;
- }
- }
-
- // if there are no matches, the range with the shortest codes is chosen
- if (match == null)
- {
- match = shortest;
- }
-
- // we're done when we have enough bytes for the matched range
- if (match != null && match.getStart().length == byteCount)
- {
- return toInt(bytes, byteCount);
- }
- }
-
throw new IOException("CMap is invalid");
}
/**
- * Returns an int for the given a byte array
+ * Returns an int for the given byte array
*/
private int toInt(byte[] data, int dataLen)
{
Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CodespaceRange.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CodespaceRange.java?rev=1725590&r1=1725589&r2=1725590&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CodespaceRange.java (original)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CodespaceRange.java Tue Jan 19 18:48:41 2016
@@ -26,6 +26,8 @@ public class CodespaceRange
{
private byte[] start;
private byte[] end;
+ private int startInt;
+ private int endInt;
private int codeLength = 0;
/**
@@ -61,6 +63,7 @@ public class CodespaceRange
void setEnd(byte[] endBytes)
{
end = endBytes;
+ endInt = toInt(endBytes, endBytes.length);
}
/** Getter for property start.
@@ -80,6 +83,7 @@ public class CodespaceRange
{
start = startBytes;
codeLength = start.length;
+ startInt = toInt(startBytes, startBytes.length);
}
/**
@@ -91,42 +95,31 @@ public class CodespaceRange
}
/**
- * Returns true if the given code bytes match this codespace range.
+ * Returns an int for the given byte array
*/
- public boolean isFullMatch(byte[] code, int codeLen)
+ private int toInt(byte[] data, int dataLen)
{
- // code must be the same length as the bounding codes
- if (codeLen == codeLength)
+ int code = 0;
+ for (int i = 0; i < dataLen; ++i)
{
- // each of it bytes must lie between the corresponding bytes of the upper & lower bounds
- for (int i = 0; i < codeLen; i++)
- {
- int startNum = start[i] & 0xff;
- int endNum = end[i] & 0xff;
- int codeNum = code[i] & 0xff;
-
- if (codeNum > endNum || codeNum < startNum)
- {
- return false;
- }
- }
- return true;
+ code <<= 8;
+ code |= (data[i] + 256) % 256;
}
- return false;
+ return code;
}
-
/**
- * Returns true if the given byte matches the byte at the given index of this codespace range.
+ * Returns true if the given code bytes match this codespace range.
*/
- public boolean isPartialMatch(byte b, int index)
+ public boolean isFullMatch(byte[] code, int codeLen)
{
- if (index == codeLength)
+ // code must be the same length as the bounding codes
+ if (codeLen == codeLength)
{
- return false;
+ int value = toInt(code, codeLen);
+ if (value >= startInt || value <=endInt)
+ return true;
}
- int startNum = start[index] & 0xff;
- int endNum = end[index] & 0xff;
- int codeNum = b & 0xff;
- return !(codeNum > endNum || codeNum < startNum);
+ return false;
}
+
}