You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2014/11/23 17:45:52 UTC

svn commit: r1641219 - in /pdfbox/branches/1.8/fontbox/src: main/java/org/apache/fontbox/cmap/CMapParser.java test/java/org/apache/fontbox/cmap/TestCMapParser.java test/resources/cmap/CMapNoWhitespace

Author: lehmi
Date: Sun Nov 23 16:45:51 2014
New Revision: 1641219

URL: http://svn.apache.org/r1641219
Log:
PDFBOX-1648: improve the CMapParser to detect missing delimiters as proposed by May Gilead and Kevin Pearcey

Added:
    pdfbox/branches/1.8/fontbox/src/test/resources/cmap/CMapNoWhitespace
Modified:
    pdfbox/branches/1.8/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
    pdfbox/branches/1.8/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java

Modified: pdfbox/branches/1.8/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java?rev=1641219&r1=1641218&r2=1641219&view=diff
==============================================================================
--- pdfbox/branches/1.8/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java (original)
+++ pdfbox/branches/1.8/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java Sun Nov 23 16:45:51 2014
@@ -32,7 +32,7 @@ import org.apache.fontbox.util.ResourceL
 /**
  * This will parse a CMap stream.
  *
- * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
+ * @author Ben Litchfield
  * 
  */
 public class CMapParser
@@ -526,11 +526,15 @@ public class CMapParser
             StringBuffer buffer = new StringBuffer();
             int stringByte = is.read();
 
-            while (!isWhitespaceOrEOF(stringByte))
+            while (!isWhitespaceOrEOF(stringByte) && !isDelimiter(stringByte))
             {
                 buffer.append((char) stringByte);
                 stringByte = is.read();
             }
+            if (isDelimiter( stringByte)) 
+            {
+                is.unread(stringByte);
+            }
             retval = new LiteralName(buffer.toString());
             break;
         }
@@ -579,12 +583,12 @@ public class CMapParser
 
             // newline separator may be missing in malformed CMap files
             // see PDFBOX-2035
-            while (!isWhitespaceOrEOF(nextByte) && nextByte != '<')
+            while (!isWhitespaceOrEOF(nextByte) && !isDelimiter(nextByte) && !Character.isDigit(nextByte))
             {
                 buffer.append((char) nextByte);
                 nextByte = is.read();
             }
-            if (nextByte == '<')
+            if (isDelimiter(nextByte) || Character.isDigit(nextByte))
             {
                 is.unread(nextByte);
             }
@@ -611,6 +615,27 @@ public class CMapParser
         return aByte == -1 || aByte == 0x20 || aByte == 0x0D || aByte == 0x0A;
     }
 
+    /** Is this a standard PDF delimiter character? */
+    private boolean isDelimiter(int aByte) 
+    {
+        switch (aByte) 
+        {
+            case '(':
+            case ')':
+            case '<':
+            case '>':
+            case '[':
+            case ']':
+            case '{':
+            case '}':
+            case '/':
+            case '%':
+                return true;
+            default:
+                return false;
+        }
+    }
+
     private void increment(byte[] data)
     {
         increment(data, data.length - 1);

Modified: pdfbox/branches/1.8/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java?rev=1641219&r1=1641218&r2=1641219&view=diff
==============================================================================
--- pdfbox/branches/1.8/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java (original)
+++ pdfbox/branches/1.8/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java Sun Nov 23 16:45:51 2014
@@ -80,4 +80,20 @@ public class TestCMapParser extends Test
         assertEquals("CID 520 from cidchar <0208> 520", strCID3, cMap.lookupCID(cid3));
     }
 
+    /**
+     * Tets the parser against a valid, but poorly formated CMap file.
+     * @throws IOException If something went wrong
+     */
+    public void testParserWithPoorWhitespace() throws IOException 
+    {
+        final String resourceDir= "src/test/resources/cmap";
+        File inDir = new File(resourceDir);
+
+        CMapParser parser = new CMapParser();
+        CMap cMap = parser.parse( resourceDir, new FileInputStream(new File(inDir,"CMapNoWhitespace")));
+
+        assertNotNull("Failed to parse nasty CMap file", cMap);
+    }
+
+
 }

Added: pdfbox/branches/1.8/fontbox/src/test/resources/cmap/CMapNoWhitespace
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/fontbox/src/test/resources/cmap/CMapNoWhitespace?rev=1641219&view=auto
==============================================================================
--- pdfbox/branches/1.8/fontbox/src/test/resources/cmap/CMapNoWhitespace (added)
+++ pdfbox/branches/1.8/fontbox/src/test/resources/cmap/CMapNoWhitespace Sun Nov 23 16:45:51 2014
@@ -0,0 +1,32 @@
+%% Licensed to the Apache Software Foundation (ASF) under one or more
+%% contributor license agreements.  See the NOTICE file distributed with
+%% this work for additional information regarding copyright ownership.
+%% The ASF licenses this file to You under the Apache License, Version 2.0
+%% (the "License"); you may not use this file except in compliance with
+%% the License.  You may obtain a copy of the License at
+%%
+%%       http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% This file isn't a valid CMap file, but it contains everything what
+%% is needed to test the CMapParser
+%%
+%%EndComments
+
+/CIDInit /ProcSet findresource begin 12 dict begin begincmap /CIDSystemInfo <</Registry (DDACTR+F1+0) /Ordering (0) /Supplement 0 >> def/CMapName /DDACTR+F1+0 def
+/CMapType 2 def1 begincodespacerange <0000> <FFFF> endcodespacerange7 beginbfchar<0003> <0020>
+<0011> <002e>
+<001a> <0037>
+<001b> <0038>
+<0026> <0043>
+<0083> <00b0>
+<00ed> <2212>
+endbfcharendcmap CMapName currentdict /CMap defineresource pop end end
+
+%%EndResource
+%%EOF