You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2017/10/10 15:44:12 UTC

svn commit: r1811718 - in /pdfbox/branches/2.0/pdfbox/src: main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURI.java test/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURITest.java

Author: tilman
Date: Tue Oct 10 15:44:12 2017
New Revision: 1811718

URL: http://svn.apache.org/viewvc?rev=1811718&view=rev
Log:
PDFBOX-3958: support UTF-16

Modified:
    pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURI.java
    pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURITest.java

Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURI.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURI.java?rev=1811718&r1=1811717&r2=1811718&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURI.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURI.java Tue Oct 10 15:44:12 2017
@@ -82,7 +82,7 @@ public class PDActionURI extends PDActio
 
     /**
      * This will get the uniform resource identifier to resolve. It should be encoded in 7-bit
-     * ASCII, but UTF-8 is supported too.
+     * ASCII, but UTF-8 and UTF-16 are supported too.
      *
      * @return The URI entry of the specific URI action dictionary or null if there isn't any.
      */
@@ -91,7 +91,21 @@ public class PDActionURI extends PDActio
         COSBase base = action.getDictionaryObject(COSName.URI);
         if (base instanceof COSString)
         {
-            return new String(((COSString) base).getBytes(), Charsets.UTF_8);
+            byte[] bytes = ((COSString) base).getBytes();
+            if (bytes.length >= 2)
+            {
+                // UTF-16 (BE)
+                if ((bytes[0] & 0xFF) == 0xFE && (bytes[1] & 0xFF) == 0xFF)
+                {
+                    return action.getString(COSName.URI);
+                }
+                // UTF-16 (LE)
+                if ((bytes[0] & 0xFF) == 0xFF && (bytes[1] & 0xFF) == 0xFE)
+                {
+                    return action.getString(COSName.URI);
+                }
+            }
+            return new String(bytes, Charsets.UTF_8);
         }
         return null;
     }

Modified: pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURITest.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURITest.java?rev=1811718&r1=1811717&r2=1811718&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURITest.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURITest.java Tue Oct 10 15:44:12 2017
@@ -15,6 +15,9 @@
  */
 package org.apache.pdfbox.pdmodel.interactive.action;
 
+import java.io.IOException;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.cos.COSString;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
 import org.junit.Test;
@@ -37,4 +40,45 @@ public class PDActionURITest
         actionURI.setURI("http://組匶替綎.com/");
         assertEquals("http://経営承継.com/", actionURI.getURI());
     }
+
+    /**
+     * PDFBOX-3913: Check that URIs encoded in UTF16 (BE) are also supported.
+     *
+     * @throws IOException
+     */
+    @Test
+    public void testUTF16BEURI() throws IOException
+    {
+        PDActionURI actionURI = new PDActionURI();
+        
+        // found in govdocs file 534948.pdf
+        COSString utf16URI = COSString.parseHex("FEFF0068007400740070003A002F002F00770077"
+                + "0077002E006E00610070002E006500640075002F0063006100740061006C006F006700"
+                + "2F00310031003100340030002E00680074006D006C");
+        actionURI.getCOSObject().setItem(COSName.URI, utf16URI);
+        assertEquals("http://www.nap.edu/catalog/11140.html", actionURI.getURI());
+    }
+
+    /**
+     * PDFBOX-3913: Check that URIs encoded in UTF16 (LE) are also supported.
+     * 
+     * @throws IOException
+     */
+    @Test
+    public void testUTF16LEURI() throws IOException
+    {
+        PDActionURI actionURI = new PDActionURI();
+        
+        COSString utf16URI = COSString.parseHex("FFFE68007400740070003A00");
+        actionURI.getCOSObject().setItem(COSName.URI, utf16URI);
+        assertEquals("http:", actionURI.getURI());
+    }
+
+    @Test
+    public void testUTF7URI()
+    {
+        PDActionURI actionURI = new PDActionURI();
+        actionURI.setURI("http://pdfbox.apache.org/");
+        assertEquals("http://pdfbox.apache.org/", actionURI.getURI());
+    }
 }