You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ja...@apache.org on 2014/12/12 05:39:38 UTC
svn commit: r1644828 [1/2] - in /pdfbox/trunk:
pdfbox/src/main/java/org/apache/pdfbox/contentstream/
pdfbox/src/main/java/org/apache/pdfbox/cos/
pdfbox/src/main/java/org/apache/pdfbox/encoding/
pdfbox/src/main/java/org/apache/pdfbox/filter/ pdfbox/src/...
Author: jahewson
Date: Fri Dec 12 04:39:37 2014
New Revision: 1644828
URL: http://svn.apache.org/r1644828
Log:
PDFBOX-1242: Remove usage of COSString with content streams
Added:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/PDFDocEncoding.java (with props)
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/Charsets.java (with props)
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/Hex.java (with props)
Removed:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/persistence/util/COSHEXTable.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/StringUtil.java
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/encoding/
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/contentstream/PDFStreamEngine.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSString.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/filter/ASCIIHexFilter.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSStandardOutputStream.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriter.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/ContentStreamWriter.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/edit/PDPageContentStream.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/PDEncryption.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/SecurityHandler.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/StandardSecurityHandler.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFDictionary.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDSignature.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/documentnavigation/destination/PDNamedDestination.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDAppearanceString.java
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSString.java
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/utils/TestCOSUtils.java
pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractText.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/contentstream/PDFStreamEngine.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/contentstream/PDFStreamEngine.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/contentstream/PDFStreamEngine.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/contentstream/PDFStreamEngine.java Fri Dec 12 04:39:37 2014
@@ -79,9 +79,6 @@ public class PDFStreamEngine
private boolean isProcessingPage;
private Matrix initialMatrix;
- // skip malformed or otherwise unparseable input where possible
- private boolean forceParsing;
-
/**
* Creates a new PDFStreamEngine.
*/
@@ -90,26 +87,6 @@ public class PDFStreamEngine
}
/**
- * Indicates if force parsing is activated.
- *
- * @return true if force parsing is active
- */
- public boolean isForceParsing()
- {
- return forceParsing;
- }
-
- /**
- * Enable/Disable force parsing.
- *
- * @param forceParsingValue true activates force parsing
- */
- public void setForceParsing(boolean forceParsingValue)
- {
- forceParsing = forceParsingValue;
- }
-
- /**
* Register a custom operator processor with the engine.
*
* @param operator The operator as a string.
@@ -473,7 +450,7 @@ public class PDFStreamEngine
private void processStreamOperators(PDContentStream contentStream) throws IOException
{
List<COSBase> arguments = new ArrayList<COSBase>();
- PDFStreamParser parser = new PDFStreamParser(contentStream.getContentStream(), forceParsing);
+ PDFStreamParser parser = new PDFStreamParser(contentStream.getContentStream());
try
{
Iterator<Object> iter = parser.getTokenIterator();
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java Fri Dec 12 04:39:37 2014
@@ -91,34 +91,16 @@ public class COSDocument extends COSBase
private final File scratchDirectory;
private final boolean useScratchFile;
-
- /**
- * Flag to skip malformed or otherwise unparseable input where possible.
- */
- private final boolean forceParsing;
-
- /**
- * Constructor.
- *
- * @param forceParsingValue flag to skip malformed or otherwise unparseable
- * document content where possible
- */
- public COSDocument(boolean forceParsingValue)
- {
- this(null, forceParsingValue, false);
- }
/**
* Constructor.
*
- * @param forceParsingValue flag to skip malformed or otherwise unparseable
- * document content where possible
* @param useScratchFiles enables the usage of a scratch file if set to true
*
*/
- public COSDocument(boolean forceParsingValue, boolean useScratchFiles)
+ public COSDocument(boolean useScratchFiles)
{
- this(null, forceParsingValue, useScratchFiles);
+ this(null, useScratchFiles);
}
/**
@@ -128,14 +110,11 @@ public class COSDocument extends COSBase
*
* @param scratchDir directory for the temporary file,
* or <code>null</code> to use the system default
- * @param forceParsingValue flag to skip malformed or otherwise unparseable
- * document content where possible
* @param useScratchFiles enables the usage of a scratch file if set to true
*
*/
- public COSDocument(File scratchDir, boolean forceParsingValue, boolean useScratchFiles)
+ public COSDocument(File scratchDir, boolean useScratchFiles)
{
- forceParsing = forceParsingValue;
scratchDirectory = scratchDir;
useScratchFile = useScratchFiles;
}
@@ -145,7 +124,7 @@ public class COSDocument extends COSBase
*/
public COSDocument()
{
- this(false, false);
+ this(false);
}
/**
@@ -620,8 +599,7 @@ public class COSDocument extends COSBase
for( COSObject objStream : getObjectsByType( COSName.OBJ_STM ) )
{
COSStream stream = (COSStream)objStream.getObject();
- PDFObjectStreamParser parser =
- new PDFObjectStreamParser(stream, this, forceParsing);
+ PDFObjectStreamParser parser = new PDFObjectStreamParser(stream, this);
parser.parse();
for( COSObject next : parser.getObjects() )
{
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java Fri Dec 12 04:39:37 2014
@@ -21,11 +21,12 @@ import java.io.OutputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
+import org.apache.pdfbox.util.Charsets;
-import org.apache.pdfbox.persistence.util.COSHEXTable;
/**
- * A PDF named object.
+ * A PDF Name object.
+ *
* @author Ben Litchfield
*/
public final class COSName extends COSBase implements Comparable<COSName>
@@ -37,12 +38,6 @@ public final class COSName extends COSBa
// hey are already defined as static constants and don't need to be synchronized
private static Map<String, COSName> commonNameMap = new HashMap<String, COSName>();
- /** The prefix to a PDF name. */
- public static final byte[] NAME_PREFIX = new byte[] { 47 }; // The / character
-
- /** The escape character for a name. */
- public static final byte[] NAME_ESCAPE = new byte[] { 35 }; // The # character
-
//
// IMPORTANT: this list is *alphabetized* and does not need any JavaDoc
//
@@ -578,15 +573,9 @@ public final class COSName extends COSBa
}
@Override
- public boolean equals(Object o)
+ public boolean equals(Object object)
{
- boolean retval = this == o;
- if (!retval && o instanceof COSName)
- {
- COSName other = (COSName) o;
- retval = name == other.name || name.equals(other.name);
- }
- return retval;
+ return object instanceof COSName && name.equals(((COSName) object).name);
}
@Override
@@ -624,39 +613,37 @@ public final class COSName extends COSBa
*/
public void writePDF(OutputStream output) throws IOException
{
- output.write(NAME_PREFIX);
- byte[] bytes = getName().getBytes("ISO-8859-1");
- for (int i = 0; i < bytes.length; i++)
+ output.write('/');
+ byte[] bytes = getName().getBytes(Charsets.US_ASCII);
+ for (byte b : bytes)
{
- int current = (bytes[i] + 256) % 256;
+ int current = (b + 256) % 256;
- // Be more restrictive than the PDF spec, "Name Objects"
- // see PDFBOX-2073
- if ((current >= 'A' && current <= 'Z')
- || (current >= 'a' && current <= 'z')
- || (current >= '0' && current <= '9')
- || current == '+'
- || current == '-'
- || current == '_'
- || current == '@'
- || current == '*'
- || current == '$'
- || current == ';'
- || current == '.')
+ // be more restrictive than the PDF spec, "Name Objects", see PDFBOX-2073
+ if (current >= 'A' && current <= 'Z' ||
+ current >= 'a' && current <= 'z' ||
+ current >= '0' && current <= '9' ||
+ current == '+' ||
+ current == '-' ||
+ current == '_' ||
+ current == '@' ||
+ current == '*' ||
+ current == '$' ||
+ current == ';' ||
+ current == '.')
{
output.write(current);
}
else
{
- output.write(NAME_ESCAPE);
- output.write(COSHEXTable.TABLE[current]);
+ output.write('#');
+ output.write(String.format("%02X", current).getBytes(Charsets.US_ASCII));
}
}
}
/**
- * Not usually needed except if resources need to be reclaimed in a long running process. Patch provided by
- * flester@GMail.com incorporated 5/23/08, Danielwilson@users.SourceForge.net
+ * Not usually needed except if resources need to be reclaimed in a long running process.
*/
public static synchronized void clearResources()
{
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSString.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSString.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSString.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSString.java Fri Dec 12 04:39:37 2014
@@ -18,221 +18,70 @@ package org.apache.pdfbox.cos;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
-import java.io.OutputStream;
-import java.nio.charset.Charset;
+import java.util.Arrays;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.pdfbox.encoding.PDFDocEncodingCharset;
-import org.apache.pdfbox.persistence.util.COSHEXTable;
+import org.apache.pdfbox.util.Charsets;
+import org.apache.pdfbox.util.Hex;
/**
- * This represents a string object in a PDF document.
+ * A string object, which may be a text string, a PDFDocEncoded string, ASCII string, or byte string.
+ *
+ * <p>Text strings are used for character strings that contain information intended to be
+ * human-readable, such as text annotations, bookmark names, article names, document information,
+ * and so forth.
+ *
+ * <p>PDFDocEncoded strings are used for characters that are represented in a single byte.
+ *
+ * <p>ASCII strings are used for characters that are represented in a single byte using ASCII
+ * encoding.
+ *
+ * <p>Byte strings are used for binary data represented as a series of bytes, but the encoding is
+ * not known. The bytes of the string need not represent characters.
*
- * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
- * @version $Revision: 1.30 $
+ * @author Ben Litchfield
+ * @author John Hewson
*/
-public class COSString extends COSBase
+public final class COSString extends COSBase
{
-
- /**
- * Log instance.
- */
private static final Log LOG = LogFactory.getLog(COSString.class);
- /**
- * One of the open string tokens.
- */
- public static final byte[] STRING_OPEN = new byte[] { 40 }; // "(".getBytes();
- /**
- * One of the close string tokens.
- */
- public static final byte[] STRING_CLOSE = new byte[] { 41 }; // ")".getBytes( "ISO-8859-1" );
- /**
- * One of the open string tokens.
- */
- public static final byte[] HEX_STRING_OPEN = new byte[] { 60 }; // "<".getBytes( "ISO-8859-1" );
- /**
- * One of the close string tokens.
- */
- public static final byte[] HEX_STRING_CLOSE = new byte[] { 62 }; // ">".getBytes( "ISO-8859-1" );
- /**
- * the escape character in strings.
- */
- public static final byte[] ESCAPE = new byte[] { 92 }; // "\\".getBytes( "ISO-8859-1" );
-
- /**
- * CR escape characters.
- */
- public static final byte[] CR_ESCAPE = new byte[] { 92, 114 }; // "\\r".getBytes( "ISO-8859-1" );
- /**
- * LF escape characters.
- */
- public static final byte[] LF_ESCAPE = new byte[] { 92, 110 }; // "\\n".getBytes( "ISO-8859-1" );
- /**
- * HT escape characters.
- */
- public static final byte[] HT_ESCAPE = new byte[] { 92, 116 }; // "\\t".getBytes( "ISO-8859-1" );
- /**
- * BS escape characters.
- */
- public static final byte[] BS_ESCAPE = new byte[] { 92, 98 }; // "\\b".getBytes( "ISO-8859-1" );
- /**
- * FF escape characters.
- */
- public static final byte[] FF_ESCAPE = new byte[] { 92, 102 }; // "\\f".getBytes( "ISO-8859-1" );
-
- private ByteArrayOutputStream out = null;
- private String str = null;
-
- /**
- * Forces the string to be serialized in hex form but not literal form, the default is to stream in literal form.
- */
- private boolean forceHexForm = false;
-
- /**
- * Constructor.
- */
- public COSString()
- {
- out = new ByteArrayOutputStream();
- }
-
- /**
- * Explicit constructor for ease of manual PDF construction.
- *
- * @param value
- * The string value of the object.
- */
- public COSString(String value)
- {
- try
- {
- boolean unicode16 = false;
- char[] chars = value.toCharArray();
- int length = chars.length;
- for (int i = 0; i < length; i++)
- {
- if (chars[i] > 255)
- {
- unicode16 = true;
- break;
- }
- }
- if (unicode16)
- {
- byte[] data = value.getBytes("UTF-16BE");
- out = new ByteArrayOutputStream(data.length + 2);
- out.write(0xFE);
- out.write(0xFF);
- out.write(data);
- }
- else
- {
- byte[] data = value.getBytes("ISO-8859-1");
- out = new ByteArrayOutputStream(data.length);
- out.write(data);
- }
- }
- catch (IOException ignore)
- {
- LOG.error(ignore,ignore);
- // should never happen
- }
- }
-
- /**
- * Explicit constructor for ease of manual PDF construction.
- *
- * @param value
- * The string value of the object.
- */
- public COSString(byte[] value)
- {
- try
- {
- out = new ByteArrayOutputStream(value.length);
- out.write(value);
- }
- catch (IOException ignore)
- {
- LOG.error(ignore,ignore);
- // should never happen
- }
- }
-
- /**
- * Forces the string to be written in literal form instead of hexadecimal form.
- *
- * @param v
- * if v is true the string will be written in literal form, otherwise it will be written in hexa if
- * necessary.
- */
-
- public void setForceLiteralForm(boolean v)
- {
- forceHexForm = !v;
- }
-
- /**
- * Forces the string to be written in hexadecimal form instead of literal form.
- *
- * @param v
- * if v is true the string will be written in hexadecimal form otherwise it will be written in literal if
- * necessary.
- */
-
- public void setForceHexForm(boolean v)
- {
- forceHexForm = v;
- }
+ // legacy behaviour for old PDFParser
+ public static final boolean FORCE_PARSING =
+ Boolean.getBoolean("org.apache.pdfbox.forceParsing");
/**
* This will create a COS string from a string of hex characters.
- *
- * @param hex
- * A hex string.
+ *
+ * @param hex A hex string.
* @return A cos string with the hex characters converted to their actual bytes.
- * @throws IOException
- * If there is an error with the hex string.
+ * @throws IOException If there is an error with the hex string.
*/
- public static COSString createFromHexString(String hex) throws IOException
+ public static COSString parseHex(String hex) throws IOException
{
- return createFromHexString(hex, false);
- }
-
- /**
- * Creates a COS string from a string of hex characters, optionally ignoring malformed input.
- *
- * @param hex
- * A hex string.
- * @param force
- * flag to ignore malformed input
- * @return A cos string with the hex characters converted to their actual bytes.
- * @throws IOException
- * If there is an error with the hex string.
- */
- public static COSString createFromHexString(String hex, boolean force) throws IOException
- {
- COSString retval = new COSString();
+ ByteArrayOutputStream bytes = new ByteArrayOutputStream();
StringBuilder hexBuffer = new StringBuilder(hex.trim());
+
// if odd number then the last hex digit is assumed to be 0
if (hexBuffer.length() % 2 != 0)
{
hexBuffer.append('0');
}
+
int length = hexBuffer.length();
for (int i = 0; i < length; i += 2)
{
try
{
- retval.append(Integer.parseInt(hexBuffer.substring(i, i + 2), 16));
+ bytes.write(Integer.parseInt(hexBuffer.substring(i, i + 2), 16));
}
catch (NumberFormatException e)
{
- if (force)
+ if (FORCE_PARSING)
{
- retval.append('?');
+ LOG.warn("Encountered a malformed hex string");
+ bytes.write('?'); // todo: what does Acrobat do? Any example PDFs?
}
else
{
@@ -240,201 +89,154 @@ public class COSString extends COSBase
}
}
}
- return retval;
+
+ return new COSString(bytes.toByteArray());
}
+ private byte[] bytes;
+ private boolean forceHexForm;
+
/**
- * This will take this string and create a hex representation of the bytes that make the string.
- *
- * @return A hex string representing the bytes in this string.
+ * Creates a new PDF string from a byte array. This method can be used to read a string from
+ * an existing PDF file, or to create a new byte string.
+ *
+ * @param bytes The raw bytes of the PDF text string or byte string.
*/
- public String getHexString()
+ public COSString(byte[] bytes)
{
- StringBuilder retval = new StringBuilder(out.size() * 2);
- byte[] data = getBytes();
- int length = data.length;
- for (int i = 0; i < length; i++)
- {
- retval.append(COSHEXTable.HEX_TABLE[(data[i] + 256) % 256]);
- }
-
- return retval.toString();
+ setValue(bytes);
}
/**
- * This will get the string that this object wraps.
- *
- * @return The wrapped string.
+ * Creates a new <i>text string</i> from a Java String.
+ *
+ * @param text The string value of the object.
*/
- public String getString()
+ public COSString(String text)
{
- if (this.str != null)
+ // check whether the string uses only characters available in PDFDocEncoding
+ boolean isOnlyPDFDocEncoding = true;
+ for (char c : text.toCharArray())
+ {
+ if (!PDFDocEncoding.containsChar(c))
+ {
+ isOnlyPDFDocEncoding = false;
+ break;
+ }
+ }
+
+ if (isOnlyPDFDocEncoding)
{
- return this.str;
+ // PDFDocEncoded string
+ bytes = PDFDocEncoding.getBytes(text);
}
- String retval;
- Charset charset = PDFDocEncodingCharset.INSTANCE;
- byte[] data = getBytes();
- int start = 0;
- if (data.length > 2)
+ else
{
- if (data[0] == (byte) 0xFF && data[1] == (byte) 0xFE)
+ // UTF-16BE encoded string with a leading byte order marker
+ byte[] data = text.getBytes(Charsets.UTF_16BE);
+ ByteArrayOutputStream out = new ByteArrayOutputStream(data.length + 2);
+ out.write(0xFE); // BOM
+ out.write(0xFF); // BOM
+ try
{
- charset = Charset.forName("UTF-16LE");
- start = 2;
+ out.write(data);
}
- else if (data[0] == (byte) 0xFE && data[1] == (byte) 0xFF)
+ catch (IOException e)
{
- charset = Charset.forName("UTF-16BE");
- start = 2;
+ // should never happen
+ throw new RuntimeException(e);
}
+ bytes = out.toByteArray();
}
+ }
- retval = new String(data, start, data.length - start, charset);
- str = retval;
- return retval;
+ /**
+ * Sets the raw value of this string.
+ *
+ * @param value The raw bytes of the PDF text string or byte string.
+ */
+ public void setValue(byte[] value)
+ {
+ bytes = value.clone();
}
/**
- * This will append a byte[] to the string.
- *
- * @param data
- * The byte[] to add to this string.
- *
- * @throws IOException
- * If an IO error occurs while writing the byte.
+ * Sets whether or not to force the string is to be written in hex form.
+ * This is needed when signing PDF files.
+ *
+ * @param value True to force hex.
*/
- public void append(byte[] data) throws IOException
+ public void setForceHexForm(boolean value)
{
- out.write(data);
- this.str = null;
+ this.forceHexForm = value;
}
/**
- * This will append a byte to the string.
- *
- * @param in
- * The byte to add to this string.
- *
- * @throws IOException
- * If an IO error occurs while writing the byte.
+ * Returns true if the string is to be written in hex form.
*/
- public void append(int in) throws IOException
+ public boolean getForceHexForm()
{
- out.write(in);
- this.str = null;
+ return forceHexForm;
}
/**
- * This will reset the internal buffer.
+ * Returns the content of this string as a PDF <i>text string</i>.
*/
- public void reset()
+ public String getString()
{
- out.reset();
- this.str = null;
+ // text string - BOM indicates Unicode
+ if (bytes.length > 2)
+ {
+ if ((bytes[0] & 0xff) == 0xFE && (bytes[1] & 0xff) == 0xFF)
+ {
+ // UTF-16BE
+ return new String(bytes, 2, bytes.length - 2, Charsets.UTF_16BE);
+ }
+ else if ((bytes[0] & 0xff) == 0xFF && (bytes[1] & 0xff) == 0xFE)
+ {
+ // UTF-16LE - not in the PDF spec!
+ return new String(bytes, 2, bytes.length - 2, Charsets.UTF_16LE);
+ }
+ }
+
+ // otherwise use PDFDocEncoding
+ return PDFDocEncoding.toString(bytes);
}
/**
- * This will get the bytes of the string.
- *
- * @return A byte array that represents the string.
+ * Returns the content of this string as a PDF <i>ASCII string</i>.
*/
- public byte[] getBytes()
+ public String getASCII()
{
- return out.toByteArray();
+ // ASCII string
+ return new String(bytes, Charsets.US_ASCII);
}
/**
- * {@inheritDoc}
+ * Returns the raw bytes of the string. Best used with a PDF <i>byte string</i>.
*/
- @Override
- public String toString()
+ public byte[] getBytes()
{
- return "COSString{" + this.getString() + "}";
+ return bytes;
}
/**
- * This will output this string as a PDF object.
- *
- * @param output
- * The stream to write to.
- * @throws IOException
- * If there is an error writing to the stream.
- */
- public void writePDF(OutputStream output) throws IOException
- {
- boolean outsideASCII = false;
- // Lets first check if we need to escape this string.
- byte[] bytes = getBytes();
- int length = bytes.length;
- for (int i = 0; i < length && !outsideASCII; i++)
- {
- // if the byte is negative then it is an eight bit byte and is
- // outside the ASCII range.
- outsideASCII = bytes[i] < 0;
- }
- if (!outsideASCII && !forceHexForm)
- {
- output.write(STRING_OPEN);
- for (int i = 0; i < length; i++)
- {
- int b = (bytes[i] + 256) % 256;
- switch (b)
- {
- case '(':
- case ')':
- case '\\':
- {
- output.write(ESCAPE);
- output.write((byte) b);
- break;
- }
- case 10: // LF
- {
- output.write(LF_ESCAPE);
- break;
- }
- case 13: // CR
- {
- output.write(CR_ESCAPE);
- break;
- }
- case '\t':
- {
- output.write(HT_ESCAPE);
- break;
- }
- case '\b':
- {
- output.write(BS_ESCAPE);
- break;
- }
- case '\f':
- {
- output.write(FF_ESCAPE);
- break;
- }
- default:
- {
- output.write((byte) b);
- }
- }
- }
- output.write(STRING_CLOSE);
- }
- else
+ * This will take this string and create a hex representation of the bytes that make the string.
+ *
+ * @return A hex string representing the bytes in this string.
+ */
+ public String toHexString()
+ {
+ StringBuilder sb = new StringBuilder(bytes.length * 2);
+ for (byte b : bytes)
{
- output.write(HEX_STRING_OPEN);
- for (int i = 0; i < length; i++)
- {
- output.write(COSHEXTable.TABLE[(bytes[i] + 256) % 256]);
- }
- output.write(HEX_STRING_CLOSE);
+ sb.append(Hex.getString(b));
}
+ return sb.toString();
}
/**
- * visitor pattern double dispatch method.
+ * Visitor pattern double dispatch method.
*
* @param visitor The object to notify when visiting this object.
* @return any object, depending on the visitor implementation, or null
@@ -446,27 +248,22 @@ public class COSString extends COSBase
return visitor.visitFromString(this);
}
- /**
- * {@inheritDoc}
- */
@Override
public boolean equals(Object obj)
{
if (obj instanceof COSString)
{
COSString strObj = (COSString) obj;
- return this.getString().equals(strObj.getString()) && this.forceHexForm == strObj.forceHexForm;
+ return getString().equals(strObj.getString()) &&
+ forceHexForm == strObj.forceHexForm;
}
return false;
}
- /**
- * {@inheritDoc}
- */
@Override
public int hashCode()
{
- int result = getString().hashCode();
- return result += forceHexForm ? 17 : 0;
+ int result = Arrays.hashCode(bytes);
+ return result + (forceHexForm ? 17 : 0);
}
}
Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/PDFDocEncoding.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/PDFDocEncoding.java?rev=1644828&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/PDFDocEncoding.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/PDFDocEncoding.java Fri Dec 12 04:39:37 2014
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pdfbox.cos;
+
+import java.io.ByteArrayOutputStream;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * The "PDFDocEncoding" encoding. Note that this is *not* a Type 1 font encoding, it is used only
+ * within PDF "text strings".
+ */
+class PDFDocEncoding
+{
+ private static final char REPLACEMENT_CHARACTER = '\uFFFD';
+
+ private static final int[] codeToUni;
+ private static final Map<Character, Integer> uniToCode;
+
+ static
+ {
+ codeToUni = new int[256];
+ uniToCode = new HashMap<Character, Integer>(256);
+
+ // initialize with basically ISO-8859-1
+ for (int i = 0; i < 256; i++)
+ {
+ set(i, (char)i);
+ }
+
+ // then do all deviations (based on the table in ISO 32000-1:2008)
+ // block 1
+ set(0x18, '\u02D8'); // BREVE
+ set(0x19, '\u02C7'); // CARON
+ set(0x1A, '\u02C6'); // MODIFIER LETTER CIRCUMFLEX ACCENT
+ set(0x1B, '\u02D9'); // DOT ABOVE
+ set(0x1C, '\u02DD'); // DOUBLE ACUTE ACCENT
+ set(0x1D, '\u02DB'); // OGONEK
+ set(0x1E, '\u02DA'); // RING ABOVE
+ set(0x1F, '\u02DC'); // SMALL TILDE
+ // block 2
+ set(0x7F, REPLACEMENT_CHARACTER); // undefined
+ set(0x80, '\u2022'); // BULLET
+ set(0x81, '\u2020'); // DAGGER
+ set(0x82, '\u2021'); // DOUBLE DAGGER
+ set(0x83, '\u2026'); // HORIZONTAL ELLIPSIS
+ set(0x84, '\u2014'); // EM DASH
+ set(0x85, '\u2013'); // EN DASH
+ set(0x86, '\u0192'); // LATIN SMALL LETTER SCRIPT F
+ set(0x87, '\u2044'); // FRACTION SLASH (solidus)
+ set(0x88, '\u2039'); // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+ set(0x89, '\u203A'); // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+ set(0x8A, '\u2212'); // MINUS SIGN
+ set(0x8B, '\u2030'); // PER MILLE SIGN
+ set(0x8C, '\u201E'); // DOUBLE LOW-9 QUOTATION MARK (quotedblbase)
+ set(0x8D, '\u201C'); // LEFT DOUBLE QUOTATION MARK (quotedblleft)
+ set(0x8E, '\u201D'); // RIGHT DOUBLE QUOTATION MARK (quotedblright)
+ set(0x8F, '\u2018'); // LEFT SINGLE QUOTATION MARK (quoteleft)
+ set(0x90, '\u2019'); // RIGHT SINGLE QUOTATION MARK (quoteright)
+ set(0x91, '\u201A'); // SINGLE LOW-9 QUOTATION MARK (quotesinglbase)
+ set(0x92, '\u2122'); // TRADE MARK SIGN
+ set(0x93, '\uFB01'); // LATIN SMALL LIGATURE FI
+ set(0x94, '\uFB02'); // LATIN SMALL LIGATURE FL
+ set(0x95, '\u0141'); // LATIN CAPITAL LETTER L WITH STROKE
+ set(0x96, '\u0152'); // LATIN CAPITAL LIGATURE OE
+ set(0x97, '\u0160'); // LATIN CAPITAL LETTER S WITH CARON
+ set(0x98, '\u0178'); // LATIN CAPITAL LETTER Y WITH DIAERESIS
+ set(0x99, '\u017D'); // LATIN CAPITAL LETTER Z WITH CARON
+ set(0x9A, '\u0131'); // LATIN SMALL LETTER DOTLESS I
+ set(0x9B, '\u0142'); // LATIN SMALL LETTER L WITH STROKE
+ set(0x9C, '\u0153'); // LATIN SMALL LIGATURE OE
+ set(0x9D, '\u0161'); // LATIN SMALL LETTER S WITH CARON
+ set(0x9E, '\u017E'); // LATIN SMALL LETTER Z WITH CARON
+ set(0x9F, REPLACEMENT_CHARACTER); // undefined
+ set(0xA0, '\u20AC'); // EURO SIGN
+ // end of deviations
+ }
+
+ private static void set(int code, char unicode)
+ {
+ codeToUni[code] = unicode;
+ uniToCode.put(unicode, code);
+ }
+
+ /**
+ * Returns the string representation of the given PDFDocEncoded bytes.
+ */
+ public static String toString(byte[] bytes)
+ {
+ StringBuilder sb = new StringBuilder();
+ for (byte b : bytes)
+ {
+ if ((b & 0xff) >= codeToUni.length)
+ {
+ sb.append('?');
+ }
+ else
+ {
+ sb.append((char)codeToUni[b & 0xff]);
+ }
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Returns the given string encoded with PDFDocEncoding.
+ */
+ public static byte[] getBytes(String text)
+ {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ for (char c : text.toCharArray())
+ {
+ Integer code = uniToCode.get(c);
+ if (code == null)
+ {
+ out.write(0);
+ }
+ else
+ {
+ out.write(c);
+ }
+ }
+ return out.toByteArray();
+ }
+
+ /**
+ * Returns true if the given character is available in PDFDocEncoding.
+ *
+ * @param character UTF-16 character
+ */
+ public static boolean containsChar(char character)
+ {
+ return uniToCode.containsKey(character);
+ }
+}
Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/PDFDocEncoding.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/filter/ASCIIHexFilter.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/filter/ASCIIHexFilter.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/filter/ASCIIHexFilter.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/filter/ASCIIHexFilter.java Fri Dec 12 04:39:37 2014
@@ -23,11 +23,11 @@ import java.io.OutputStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.cos.COSDictionary;
-
-import org.apache.pdfbox.persistence.util.COSHEXTable;
+import org.apache.pdfbox.util.Hex;
/**
* Decodes data encoded in an ASCII hexadecimal form, reproducing the original binary data.
+ *
* @author Ben Litchfield
*/
final class ASCIIHexFilter extends Filter
@@ -106,7 +106,7 @@ final class ASCIIHexFilter extends Filte
private boolean isEOD(int c)
{
- return (c == 62); // '>' - EOD
+ return c == '>';
}
@Override
@@ -116,8 +116,7 @@ final class ASCIIHexFilter extends Filte
int byteRead;
while ((byteRead = input.read()) != -1)
{
- int value = (byteRead + 256) % 256;
- encoded.write(COSHEXTable.TABLE[value]);
+ encoded.write(Hex.getBytes((byte)byteRead));
}
encoded.flush();
}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java Fri Dec 12 04:39:37 2014
@@ -124,12 +124,6 @@ public abstract class BaseParser
private static final String NULL = "null";
/**
- * Default value of the {@link #forceParsing} flag.
- */
- public static final boolean FORCE_PARSING =
- Boolean.getBoolean("org.apache.pdfbox.forceParsing");
-
- /**
* This is the stream that will be read from.
*/
protected PushBackInputStream pdfSource;
@@ -140,29 +134,19 @@ public abstract class BaseParser
protected COSDocument document;
/**
- * Flag to skip malformed or otherwise unparseable input where possible.
- */
- protected final boolean forceParsing;
-
- /**
* Default constructor.
*/
public BaseParser()
{
- this.forceParsing = FORCE_PARSING;
}
/**
* Constructor.
*
- * @since Apache PDFBox 1.3.0
* @param input The input stream to read the data from.
- * @param forceParsingValue flag to skip malformed or otherwise unparseable
- * input where possible
* @throws IOException If there is an error reading the input stream.
*/
- public BaseParser(InputStream input, boolean forceParsingValue)
- throws IOException
+ public BaseParser(InputStream input) throws IOException
{
int pushbacksize = 65536;
try
@@ -177,18 +161,6 @@ public abstract class BaseParser
}
this.pdfSource = new PushBackInputStream(
new BufferedInputStream(input, 16384), pushbacksize);
- this.forceParsing = forceParsingValue;
- }
-
- /**
- * Constructor.
- *
- * @param input The input stream to read the data from.
- * @throws IOException If there is an error reading the input stream.
- */
- public BaseParser(InputStream input) throws IOException
- {
- this(input, FORCE_PARSING);
}
/**
@@ -471,19 +443,19 @@ public abstract class BaseParser
{
length = ( (COSNumber) streamLength).intValue();
}
-// commented out next chunk since for the sequentially working PDFParser
-// we do not know if length object is redefined later on and the currently
-// read indirect object might be obsolete (e.g. not referenced in xref table);
-// this would result in reading wrong number of bytes;
-// Thus the only reliable information is a direct length.
-// This exclusion shouldn't harm much since in case of indirect objects they will
-// typically be defined after the stream object, thus keeping the directly
-// provided length will fix most cases
-// else if ( ( streamLength instanceof COSObject ) &&
-// ( ( (COSObject) streamLength ).getObject() instanceof COSNumber ) )
-// {
-// length = ( (COSNumber) ( (COSObject) streamLength ).getObject() ).intValue();
-// }
+ // commented out next chunk since for the sequentially working PDFParser
+ // we do not know if length object is redefined later on and the currently
+ // read indirect object might be obsolete (e.g. not referenced in xref table);
+ // this would result in reading wrong number of bytes;
+ // Thus the only reliable information is a direct length.
+ // This exclusion shouldn't harm much since in case of indirect objects they will
+ // typically be defined after the stream object, thus keeping the directly
+ // provided length will fix most cases
+ // else if ( ( streamLength instanceof COSObject ) &&
+ // ( ( (COSObject) streamLength ).getObject() instanceof COSNumber ) )
+ // {
+ // length = ( (COSNumber) ( (COSObject) streamLength ).getObject() ).intValue();
+ // }
if ( length == -1 )
{
@@ -820,7 +792,7 @@ public abstract class BaseParser
protected COSString parseCOSString() throws IOException
{
char nextChar = (char)pdfSource.read();
- COSString retval = new COSString();
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
char openBrace;
char closeBrace;
if( nextChar == '(' )
@@ -854,13 +826,13 @@ public abstract class BaseParser
braces = checkForMissingCloseParen(braces);
if( braces != 0 )
{
- retval.append( ch );
+ out.write(ch);
}
}
else if( ch == openBrace )
{
braces++;
- retval.append( ch );
+ out.write(ch);
}
else if( ch == '\\' )
{
@@ -869,35 +841,35 @@ public abstract class BaseParser
switch(next)
{
case 'n':
- retval.append( '\n' );
+ out.write('\n');
break;
case 'r':
- retval.append( '\r' );
+ out.write('\r');
break;
case 't':
- retval.append( '\t' );
+ out.write('\t');
break;
case 'b':
- retval.append( '\b' );
+ out.write('\b');
break;
case 'f':
- retval.append( '\f' );
+ out.write('\f');
break;
case ')':
// PDFBox 276 /Title (c:\)
braces = checkForMissingCloseParen(braces);
if( braces != 0 )
{
- retval.append( next );
+ out.write(next);
}
else
{
- retval.append('\\');
+ out.write('\\');
}
break;
case '(':
case '\\':
- retval.append( next );
+ out.write(next);
break;
case 10:
case 13:
@@ -950,20 +922,20 @@ public abstract class BaseParser
{
throw new IOException( "Error: Expected octal character, actual='" + octal + "'" );
}
- retval.append( character );
+ out.write(character);
break;
}
default:
{
// dropping the backslash
// see 7.3.4.2 Literal Strings for further information
- retval.append( next );
+ out.write(next);
}
}
}
else
{
- retval.append( ch );
+ out.write(ch);
}
if (nextc != -2)
{
@@ -978,7 +950,7 @@ public abstract class BaseParser
{
pdfSource.unread(c);
}
- return retval;
+ return new COSString(out.toByteArray());
}
/**
@@ -1044,7 +1016,7 @@ public abstract class BaseParser
break;
}
}
- return COSString.createFromHexString( sBuf.toString(), forceParsing );
+ return COSString.parseHex(sBuf.toString());
}
/**
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java Fri Dec 12 04:39:37 2014
@@ -32,7 +32,6 @@ import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSNumber;
import org.apache.pdfbox.cos.COSObject;
-import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.cos.COSUnread;
import org.apache.pdfbox.io.RandomAccess;
@@ -297,7 +296,7 @@ public class ConformingPDFParser extends
protected COSBase processCosObject(String string) throws IOException {
if(string != null && string.endsWith(">")) {
// string of hex codes
- return COSString.createFromHexString(string.replaceAll("^<", "").replaceAll(">$", ""));
+ return COSString.parseHex(string.replaceAll("^<", "").replaceAll(">$", ""));
}
return null;
}
@@ -322,12 +321,12 @@ public class ConformingPDFParser extends
lastSection = lastSection.replaceAll("]$", "");
while(!lastSection.startsWith("[")) {
if(lastSection.matches("^\\s*<.*>\\s*$")) // it's a hex string
- array.add(COSString.createFromHexString(lastSection.replaceAll("^\\s*<", "").replaceAll(">\\s*$", "")));
+ array.add(COSString.parseHex(lastSection.replaceAll("^\\s*<", "").replaceAll(">\\s*$", "")));
lastSection = readBackwardUntilWhitespace();
}
lastSection = lastSection.replaceAll("^\\[", "");
if(lastSection.matches("^\\s*<.*>\\s*$")) // it's a hex string
- array.add(COSString.createFromHexString(lastSection.replaceAll("^\\s*<", "").replaceAll(">\\s*$", "")));
+ array.add(COSString.parseHex(lastSection.replaceAll("^\\s*<", "").replaceAll(">\\s*$", "")));
obj = array;
} else if(lastSection != null && lastSection.endsWith(">")) {
// string of hex codes
@@ -516,7 +515,7 @@ public class ConformingPDFParser extends
sb.append((char)singleByte);
singleByte = readByte();
}
- return new COSString(sb.toString());
+ return new COSString(sb.toString()); // fixme: should be calling COSString(byte[])
} else {
throw new RuntimeException("Not yet implemented: " + string
+ " loation=" + this.currentOffset);
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java Fri Dec 12 04:39:37 2014
@@ -311,7 +311,7 @@ public class NonSequentialPDFParser exte
+ " does not contain an integer value, but: '" + eofLookupRangeStr + "'");
}
}
- setDocument(new COSDocument(false, useScratchFiles));
+ setDocument(new COSDocument(useScratchFiles));
pdfSource = new PushBackInputStream(raStream, 4096);
}
@@ -1582,7 +1582,7 @@ public class NonSequentialPDFParser exte
{
// parse object stream
PDFObjectStreamParser parser = new PDFObjectStreamParser(
- (COSStream) objstmBaseObj, document, forceParsing);
+ (COSStream) objstmBaseObj, document);
parser.parse();
// get set of object numbers referenced for this object
@@ -1611,7 +1611,7 @@ public class NonSequentialPDFParser exte
/**
*
* @param dict the dictionary to be decrypted
- * @param the object number
+ * @param objNr the object number
* @param objGenNr the object generation number
* @throws IOException ff something went wrong
*/
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java Fri Dec 12 04:39:37 2014
@@ -50,37 +50,18 @@ public class PDFObjectStreamParser exten
/**
* Constructor.
*
- * @since Apache PDFBox 1.3.0
* @param strm The stream to parse.
* @param doc The document for the current parsing.
- * @param forceParsing flag to skip malformed or otherwise unparseable
- * input where possible
* @throws IOException If there is an error initializing the stream.
*/
- public PDFObjectStreamParser(
- COSStream strm, COSDocument doc, boolean forceParsing)
- throws IOException
+ public PDFObjectStreamParser(COSStream strm, COSDocument doc) throws IOException
{
- super(strm.getUnfilteredStream(), forceParsing);
+ super(strm.getUnfilteredStream());
setDocument(doc);
stream = strm;
}
/**
- * Constructor.
- *
- * @param strm The stream to parse.
- * @param doc The document for the current parsing.
- *
- * @throws IOException If there is an error initializing the stream.
- */
- public PDFObjectStreamParser(COSStream strm, COSDocument doc)
- throws IOException
- {
- this(strm, doc, FORCE_PARSING);
- }
-
- /**
* This will parse the tokens in the stream. This will close the
* stream when it is finished parsing.
*
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Fri Dec 12 04:39:37 2014
@@ -63,7 +63,14 @@ public class PDFParser extends BaseParse
private static final String PDF_DEFAULT_VERSION = "1.4";
private static final String FDF_DEFAULT_VERSION = "1.0";
-
+
+ // deprecated functionality from BaseParser:
+ /**
+ * Default value of the {@link #forceParsing} flag.
+ */
+ public static final boolean FORCE_PARSING =
+ Boolean.getBoolean("org.apache.pdfbox.forceParsing");
+
/**
* A list of duplicate objects found when Parsing the PDF
* File.
@@ -84,17 +91,21 @@ public class PDFParser extends BaseParse
* Temp file directory.
*/
private File tempDirectory = null;
-
private final boolean useScratchFile;
/**
+ * Flag to skip malformed or otherwise unparseable input where possible.
+ */
+ protected final boolean forceParsing;
+
+ /**
* Constructor.
*
* @param input The input stream that contains the PDF document.
*
* @throws IOException If there is an error initializing the stream.
*/
- public PDFParser( InputStream input ) throws IOException
+ public PDFParser( InputStream input ) throws IOException
{
this(input, FORCE_PARSING);
}
@@ -108,7 +119,7 @@ public class PDFParser extends BaseParse
*
* @throws IOException If there is an error initializing the stream.
*/
- public PDFParser(InputStream input, boolean force) throws IOException
+ public PDFParser(InputStream input, boolean force) throws IOException
{
this(input, force, false);
}
@@ -123,9 +134,10 @@ public class PDFParser extends BaseParse
*
* @throws IOException If there is an error initializing the stream.
*/
- public PDFParser(InputStream input, boolean force, boolean useScratchFiles) throws IOException
+ public PDFParser(InputStream input, boolean force, boolean useScratchFiles) throws IOException
{
- super(input, force);
+ super(input);
+ forceParsing = force;
useScratchFile = useScratchFiles;
}
@@ -168,11 +180,11 @@ public class PDFParser extends BaseParse
{
if( tempDirectory != null )
{
- document = new COSDocument( tempDirectory, forceParsing, true );
+ document = new COSDocument( tempDirectory, true );
}
else if(useScratchFile)
{
- document = new COSDocument( null, forceParsing, true );
+ document = new COSDocument( null, true );
}
else
{
@@ -369,7 +381,7 @@ public class PDFParser extends BaseParse
throw new IOException( "Error: Header doesn't contain versioninfo" );
}
- //sometimes there are some garbage bytes in the header before the header
+ //sometimes there are some garbage getBytes in the header before the header
//actually starts, so lets try to find the header first.
int headerStart = header.indexOf( PDF_HEADER );
if (headerStart == -1)
@@ -965,7 +977,7 @@ public class PDFParser extends BaseParse
xrefTrailerResolver.setTrailer( stream );
}
PDFXrefStreamParser parser =
- new PDFXrefStreamParser( stream, document, forceParsing, xrefTrailerResolver );
+ new PDFXrefStreamParser( stream, document, xrefTrailerResolver );
parser.parse();
}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java Fri Dec 12 04:39:37 2014
@@ -57,29 +57,12 @@ public class PDFStreamParser extends Bas
/**
* Constructor that takes a stream to parse.
*
- * @since Apache PDFBox 1.3.0
* @param stream The stream to read data from.
- * @param forceParsing flag to skip malformed or otherwise unparseable
- * input where possible
* @throws IOException If there is an error reading from the stream.
*/
- public PDFStreamParser(InputStream stream, boolean forceParsing)
- throws IOException
+ public PDFStreamParser(InputStream stream) throws IOException
{
- super(stream, forceParsing);
- }
-
- /**
- * Constructor that takes a stream to parse.
- *
- * @param stream The stream to read data from.
- *
- * @throws IOException If there is an error reading from the stream.
- */
- public PDFStreamParser(InputStream stream)
- throws IOException
- {
- this(stream, FORCE_PARSING);
+ super(stream);
}
/**
@@ -95,21 +78,6 @@ public class PDFStreamParser extends Bas
}
/**
- * Constructor.
- *
- * @since Apache PDFBox 1.3.0
- * @param stream The stream to parse.
- * @param forceParsing flag to skip malformed or otherwise unparseable
- * input where possible
- * @throws IOException If there is an error initializing the stream.
- */
- public PDFStreamParser(COSStream stream, boolean forceParsing)
- throws IOException
- {
- this(stream.getUnfilteredStream(), forceParsing);
- }
-
- /**
* Constructor.
*
* @param stream The stream to parse.
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java Fri Dec 12 04:39:37 2014
@@ -44,21 +44,16 @@ public class PDFXrefStreamParser extends
/**
* Constructor.
*
- * @since 1.3.0
* @param strm The stream to parse.
* @param doc The document for the current parsing.
- * @param forceParsing flag to skip malformed or otherwise unparseable
- * input where possible
* @param resolver resolver to read the xref/trailer information
*
* @throws IOException If there is an error initializing the stream.
*/
- public PDFXrefStreamParser(
- COSStream strm, COSDocument doc, boolean forceParsing,
- XrefTrailerResolver resolver )
+ public PDFXrefStreamParser(COSStream strm, COSDocument doc, XrefTrailerResolver resolver )
throws IOException
{
- super(strm.getUnfilteredStream(), forceParsing);
+ super(strm.getUnfilteredStream());
setDocument(doc);
stream = strm;
this.xrefTrailerResolver = resolver;
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSStandardOutputStream.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSStandardOutputStream.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSStandardOutputStream.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSStandardOutputStream.java Fri Dec 12 04:39:37 2014
@@ -20,8 +20,6 @@ import java.io.FilterOutputStream;
import java.io.IOException;
import java.io.OutputStream;
-import org.apache.pdfbox.util.StringUtil;
-
/**
* simple output stream with some minor features for generating "pretty" PDF files.
*
@@ -32,17 +30,17 @@ public class COSStandardOutputStream ext
/**
* To be used when 2 byte sequence is enforced.
*/
- public static final byte[] CRLF = StringUtil.getBytes("\r\n");
+ public static final byte[] CRLF = { '\r', '\n' };
/**
* Line feed character.
*/
- public static final byte[] LF = StringUtil.getBytes("\n");
+ public static final byte[] LF = { '\n' };
/**
* standard line separator.
*/
- public static final byte[] EOL = StringUtil.getBytes("\n");
+ public static final byte[] EOL = { '\n' };
// current byte position in the output stream
private long position = 0;
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriter.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriter.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriter.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriter.java Fri Dec 12 04:39:37 2014
@@ -60,48 +60,40 @@ import org.apache.pdfbox.pdmodel.PDDocum
import org.apache.pdfbox.pdmodel.encryption.SecurityHandler;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureInterface;
import org.apache.pdfbox.persistence.util.COSObjectKey;
-import org.apache.pdfbox.util.StringUtil;
+import org.apache.pdfbox.util.Charsets;
+import org.apache.pdfbox.util.Hex;
/**
- * this class acts on a in-memory representation of a pdf document.
- *
- * todo no support for incremental updates
- * todo single xref section only
- * todo no linearization
+ * This class acts on a in-memory representation of a PDF document.
*
* @author Michael Traut
- * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
- *
+ * @author Ben Litchfield
*/
public class COSWriter implements ICOSVisitor, Closeable
{
-
- /**
- * Log instance.
- */
private static final Log LOG = LogFactory.getLog(COSWriter.class);
/**
* The dictionary open token.
*/
- public static final byte[] DICT_OPEN = StringUtil.getBytes("<<");
+ public static final byte[] DICT_OPEN = "<<".getBytes(Charsets.US_ASCII);
/**
* The dictionary close token.
*/
- public static final byte[] DICT_CLOSE = StringUtil.getBytes(">>");
+ public static final byte[] DICT_CLOSE = ">>".getBytes(Charsets.US_ASCII);
/**
* space character.
*/
- public static final byte[] SPACE = StringUtil.getBytes(" ");
+ public static final byte[] SPACE = { ' ' };
/**
* The start to a PDF comment.
*/
- public static final byte[] COMMENT = StringUtil.getBytes("%");
+ public static final byte[] COMMENT = { '%' };
/**
* The output version of the PDF.
*/
- public static final byte[] VERSION = StringUtil.getBytes("PDF-1.4");
+ public static final byte[] VERSION = "PDF-1.4".getBytes(Charsets.US_ASCII);
/**
* Garbage bytes used to create the PDF header.
*/
@@ -109,57 +101,57 @@ public class COSWriter implements ICOSVi
/**
* The EOF constant.
*/
- public static final byte[] EOF = StringUtil.getBytes("%%EOF");
+ public static final byte[] EOF = "%%EOF".getBytes(Charsets.US_ASCII);
// pdf tokens
/**
* The reference token.
*/
- public static final byte[] REFERENCE = StringUtil.getBytes("R");
+ public static final byte[] REFERENCE = "R".getBytes(Charsets.US_ASCII);
/**
* The XREF token.
*/
- public static final byte[] XREF = StringUtil.getBytes("xref");
+ public static final byte[] XREF = "xref".getBytes(Charsets.US_ASCII);
/**
* The xref free token.
*/
- public static final byte[] XREF_FREE = StringUtil.getBytes("f");
+ public static final byte[] XREF_FREE = "f".getBytes(Charsets.US_ASCII);
/**
* The xref used token.
*/
- public static final byte[] XREF_USED = StringUtil.getBytes("n");
+ public static final byte[] XREF_USED = "n".getBytes(Charsets.US_ASCII);
/**
* The trailer token.
*/
- public static final byte[] TRAILER = StringUtil.getBytes("trailer");
+ public static final byte[] TRAILER = "trailer".getBytes(Charsets.US_ASCII);
/**
* The start xref token.
*/
- public static final byte[] STARTXREF = StringUtil.getBytes("startxref");
+ public static final byte[] STARTXREF = "startxref".getBytes(Charsets.US_ASCII);
/**
* The starting object token.
*/
- public static final byte[] OBJ = StringUtil.getBytes("obj");
+ public static final byte[] OBJ = "obj".getBytes(Charsets.US_ASCII);
/**
* The end object token.
*/
- public static final byte[] ENDOBJ = StringUtil.getBytes("endobj");
+ public static final byte[] ENDOBJ = "endobj".getBytes(Charsets.US_ASCII);
/**
* The array open token.
*/
- public static final byte[] ARRAY_OPEN = StringUtil.getBytes("[");
+ public static final byte[] ARRAY_OPEN = "[".getBytes(Charsets.US_ASCII);
/**
* The array close token.
*/
- public static final byte[] ARRAY_CLOSE = StringUtil.getBytes("]");
+ public static final byte[] ARRAY_CLOSE = "]".getBytes(Charsets.US_ASCII);
/**
* The open stream token.
*/
- public static final byte[] STREAM = StringUtil.getBytes("stream");
+ public static final byte[] STREAM = "stream".getBytes(Charsets.US_ASCII);
/**
* The close stream token.
*/
- public static final byte[] ENDSTREAM = StringUtil.getBytes("endstream");
+ public static final byte[] ENDSTREAM = "endstream".getBytes(Charsets.US_ASCII);
private NumberFormat formatXrefOffset = new DecimalFormat("0000000000");
@@ -594,11 +586,8 @@ public class COSWriter implements ICOSVi
}
/**
- * write the x ref section for the pdf file
- *
- * currently, the pdf is reconstructed from the scratch, so we write a single section
- *
- * todo support for incremental writing?
+ * Write the x ref section for the pdf file.
+ * Currently, the pdf is reconstructed from the scratch, so we write a single section.
*
* @param doc The document to write the xref from.
*
@@ -798,7 +787,7 @@ public class COSWriter implements ICOSVi
// sign the bytes
SignatureInterface signatureInterface = doc.getSignatureInterface();
byte[] sign = signatureInterface.sign(signStream);
- String signature = new COSString(sign).getHexString();
+ String signature = new COSString(sign).toHexString();
// substract 2 bytes because of the enclosing "<>"
if (signature.length() > signatureLength - 2)
{
@@ -961,12 +950,6 @@ public class COSWriter implements ICOSVi
{
COSNull.NULL.accept( this );
}
- else if( current instanceof COSString )
- {
- COSString copy = new COSString();
- copy.append(((COSString)current).getBytes());
- copy.accept(this);
- }
else
{
current.accept(this);
@@ -1260,7 +1243,7 @@ public class COSWriter implements ICOSVi
@Override
public Object visitFromString(COSString obj) throws IOException
{
- if(willEncrypt)
+ if (willEncrypt)
{
document.getEncryption().getSecurityHandler().encryptString(
obj,
@@ -1268,7 +1251,7 @@ public class COSWriter implements ICOSVi
currentObjectKey.getGeneration());
}
- obj.writePDF( getStandardOutput() );
+ COSWriter.writeString(obj, getStandardOutput());
return null;
}
@@ -1378,4 +1361,99 @@ public class COSWriter implements ICOSVi
}
cosDoc.accept(this);
}
+
+ /**
+ * This will output the given byte getString as a PDF object.
+ *
+ * @param output The stream to write to.
+ * @throws IOException If there is an error writing to the stream.
+ */
+ public static void writeString(COSString string, OutputStream output) throws IOException
+ {
+ writeString(string.getBytes(), string.getForceHexForm(), output);
+ }
+
+ /**
+ * This will output the given text/byte getString as a PDF object.
+ *
+ * @param output The stream to write to.
+ * @throws IOException If there is an error writing to the stream.
+ */
+ public static void writeString(byte[] bytes, OutputStream output) throws IOException
+ {
+ writeString(bytes, false, output);
+ }
+
+ /**
+ * This will output the given text/byte string as a PDF object.
+ *
+ * @param output The stream to write to.
+ * @throws IOException If there is an error writing to the stream.
+ */
+ private static void writeString(byte[] bytes, boolean forceHex, OutputStream output)
+ throws IOException
+ {
+ // check for non-ASCII characters
+ boolean isASCII = true;
+ for (byte b : bytes)
+ {
+ // if the byte is negative then it is an eight bit byte and is outside the ASCII range
+ if (b < 0)
+ {
+ isASCII = false;
+ break;
+ }
+ }
+
+ if (isASCII && !forceHex)
+ {
+ // write ASCII string
+ output.write('(');
+ for (byte b : bytes)
+ {
+ switch (b)
+ {
+ case '(':
+ case ')':
+ case '\\':
+ output.write('\\');
+ output.write(b);
+ break;
+ case '\r':
+ output.write('\\');
+ output.write('\r');
+ break;
+ case '\n':
+ output.write('\\');
+ output.write('\n');
+ break;
+ case '\t':
+ output.write('\\');
+ output.write('\t');
+ break;
+ case '\b':
+ output.write('\\');
+ output.write('\b');
+ break;
+ case '\f':
+ output.write('\\');
+ output.write('\f');
+ break;
+ default:
+ output.write(b);
+ }
+ }
+ output.write(')');
+ }
+ else
+ {
+ // write hex string
+ output.write('<');
+ for (byte b : bytes)
+ {
+ output.write(Hex.getBytes(b));
+ }
+ output.write('>');
+ }
+ }
}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/ContentStreamWriter.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/ContentStreamWriter.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/ContentStreamWriter.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/ContentStreamWriter.java Fri Dec 12 04:39:37 2014
@@ -86,7 +86,7 @@ public class ContentStreamWriter
{
if( o instanceof COSString )
{
- ((COSString)o).writePDF( output );
+ COSWriter.writeString((COSString)o, output);
}
else if( o instanceof COSFloat )
{
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/edit/PDPageContentStream.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/edit/PDPageContentStream.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/edit/PDPageContentStream.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/edit/PDPageContentStream.java Fri Dec 12 04:39:37 2014
@@ -20,11 +20,10 @@ import java.awt.Color;
import java.awt.color.ColorSpace;
import java.awt.geom.AffineTransform;
import java.awt.geom.PathIterator;
-import java.io.ByteArrayOutputStream;
import java.io.Closeable;
import java.io.IOException;
import java.io.OutputStream;
-import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.List;
@@ -37,7 +36,7 @@ import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSName;
-import org.apache.pdfbox.cos.COSString;
+import org.apache.pdfbox.pdfwriter.COSWriter;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
@@ -54,100 +53,91 @@ import org.apache.pdfbox.pdmodel.graphic
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDInlineImage;
+import org.apache.pdfbox.util.Charsets;
/**
- * This class is a convenience for creating page content streams. You MUST
- * call close() when you are finished with this object.
+ * This class is a convenience for creating page content streams. You MUST call close() when you
+ * are finished with this object.
*
- * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
+ * @author Ben Litchfield
*
*/
public class PDPageContentStream implements Closeable
{
- /**
- * Log instance.
- */
private static final Log LOG = LogFactory.getLog(PDPageContentStream.class);
+ private static byte[] toAscii(final String text)
+ {
+ return text.getBytes(Charsets.US_ASCII);
+ }
+
+ private static final byte[] BEGIN_TEXT = toAscii("BT\n");
+ private static final byte[] END_TEXT = toAscii("ET\n");
+ private static final byte[] SET_FONT = toAscii("Tf\n");
+ private static final byte[] MOVE_TEXT_POSITION = toAscii("Td\n");
+ private static final byte[] SET_TEXT_MATRIX = toAscii("Tm\n");
+ private static final byte[] SHOW_TEXT = toAscii("Tj\n");
+
+ private static final byte[] SAVE_GRAPHICS_STATE = toAscii("q\n");
+ private static final byte[] RESTORE_GRAPHICS_STATE = toAscii("Q\n");
+ private static final byte[] CONCATENATE_MATRIX = toAscii("cm\n");
+ private static final byte[] XOBJECT_DO = toAscii("Do\n");
+ private static final byte[] RG_STROKING = toAscii("RG\n");
+ private static final byte[] RG_NON_STROKING = toAscii("rg\n");
+ private static final byte[] K_STROKING = toAscii("K\n");
+ private static final byte[] K_NON_STROKING = toAscii("k\n");
+ private static final byte[] G_STROKING = toAscii("G\n");
+ private static final byte[] G_NON_STROKING = toAscii("g\n");
+ private static final byte[] RECTANGLE = toAscii("re\n");
+ private static final byte[] FILL_NON_ZERO = toAscii("f\n");
+ private static final byte[] FILL_EVEN_ODD = toAscii("f*\n");
+ private static final byte[] LINE_TO = toAscii("l\n");
+ private static final byte[] MOVE_TO = toAscii("m\n");
+ private static final byte[] CLOSE_STROKE = toAscii("s\n");
+ private static final byte[] STROKE = toAscii("S\n");
+ private static final byte[] LINE_WIDTH = toAscii("w\n");
+ private static final byte[] LINE_JOIN_STYLE = toAscii("j\n");
+ private static final byte[] LINE_CAP_STYLE = toAscii("J\n");
+ private static final byte[] LINE_DASH_PATTERN = toAscii("d\n");
+ private static final byte[] CLOSE_SUBPATH = toAscii("h\n");
+ private static final byte[] CLIP_PATH_NON_ZERO = toAscii("W\n");
+ private static final byte[] CLIP_PATH_EVEN_ODD = toAscii("W*\n");
+ private static final byte[] NOP = toAscii("n\n");
+ private static final byte[] BEZIER_312 = toAscii("c\n");
+ private static final byte[] BEZIER_32 = toAscii("v\n");
+ private static final byte[] BEZIER_313 = toAscii("y\n");
+
+ private static final byte[] BMC = toAscii("BMC\n");
+ private static final byte[] BDC = toAscii("BDC\n");
+ private static final byte[] EMC = toAscii("EMC\n");
+
+ private static final byte[] SET_STROKING_COLORSPACE = toAscii("CS\n");
+ private static final byte[] SET_NON_STROKING_COLORSPACE = toAscii("cs\n");
+
+ private static final byte[] SET_STROKING_COLOR_SIMPLE = toAscii("SC\n");
+ private static final byte[] SET_STROKING_COLOR_COMPLEX = toAscii("SCN\n");
+ private static final byte[] SET_NON_STROKING_COLOR_SIMPLE = toAscii("sc\n");
+ private static final byte[] SET_NON_STROKING_COLOR_COMPLEX = toAscii("scn\n");
+
+ private static final byte[] OPENING_BRACKET = toAscii("[");
+ private static final byte[] CLOSING_BRACKET = toAscii("]");
+ private static final byte[] NEWLINE = toAscii("\n");
+
+ private static final int SPACE = 32;
+
+ // instance variables
private OutputStream output;
- private boolean inTextMode = false;
private PDResources resources;
+ private boolean inTextMode = false;
private PDColorSpace currentStrokingColorSpace = PDDeviceGray.INSTANCE;
private PDColorSpace currentNonStrokingColorSpace = PDDeviceGray.INSTANCE;
// cached storage component for getting color values
- private float[] colorComponents = new float[4];
-
- private NumberFormat formatDecimal = NumberFormat.getNumberInstance(Locale.US);
-
- private static final String ISO8859 = "ISO-8859-1";
+ private final float[] colorComponents = new float[4];
- private static byte[] getISOBytes(final String s)
- {
- try
- {
- return s.getBytes(ISO8859);
- }
- catch (final UnsupportedEncodingException ex)
- {
- throw new IllegalStateException(ex);
- }
- }
-
- private static final byte[] BEGIN_TEXT = getISOBytes("BT\n");
- private static final byte[] END_TEXT = getISOBytes("ET\n");
- private static final byte[] SET_FONT = getISOBytes("Tf\n");
- private static final byte[] MOVE_TEXT_POSITION = getISOBytes("Td\n");
- private static final byte[] SET_TEXT_MATRIX = getISOBytes("Tm\n");
- private static final byte[] SHOW_TEXT = getISOBytes("Tj\n");
-
- private static final byte[] SAVE_GRAPHICS_STATE = getISOBytes("q\n");
- private static final byte[] RESTORE_GRAPHICS_STATE = getISOBytes("Q\n");
- private static final byte[] CONCATENATE_MATRIX = getISOBytes("cm\n");
- private static final byte[] XOBJECT_DO = getISOBytes("Do\n");
- private static final byte[] RG_STROKING = getISOBytes("RG\n");
- private static final byte[] RG_NON_STROKING = getISOBytes("rg\n");
- private static final byte[] K_STROKING = getISOBytes("K\n");
- private static final byte[] K_NON_STROKING = getISOBytes("k\n");
- private static final byte[] G_STROKING = getISOBytes("G\n");
- private static final byte[] G_NON_STROKING = getISOBytes("g\n");
- private static final byte[] RECTANGLE = getISOBytes("re\n");
- private static final byte[] FILL_NON_ZERO = getISOBytes("f\n");
- private static final byte[] FILL_EVEN_ODD = getISOBytes("f*\n");
- private static final byte[] LINE_TO = getISOBytes("l\n");
- private static final byte[] MOVE_TO = getISOBytes("m\n");
- private static final byte[] CLOSE_STROKE = getISOBytes("s\n");
- private static final byte[] STROKE = getISOBytes("S\n");
- private static final byte[] LINE_WIDTH = getISOBytes("w\n");
- private static final byte[] LINE_JOIN_STYLE = getISOBytes("j\n");
- private static final byte[] LINE_CAP_STYLE = getISOBytes("J\n");
- private static final byte[] LINE_DASH_PATTERN = getISOBytes("d\n");
- private static final byte[] CLOSE_SUBPATH = getISOBytes("h\n");
- private static final byte[] CLIP_PATH_NON_ZERO = getISOBytes("W\n");
- private static final byte[] CLIP_PATH_EVEN_ODD = getISOBytes("W*\n");
- private static final byte[] NOP = getISOBytes("n\n");
- private static final byte[] BEZIER_312 = getISOBytes("c\n");
- private static final byte[] BEZIER_32 = getISOBytes("v\n");
- private static final byte[] BEZIER_313 = getISOBytes("y\n");
-
- private static final byte[] BMC = getISOBytes("BMC\n");
- private static final byte[] BDC = getISOBytes("BDC\n");
- private static final byte[] EMC = getISOBytes("EMC\n");
-
- private static final byte[] SET_STROKING_COLORSPACE = getISOBytes("CS\n");
- private static final byte[] SET_NON_STROKING_COLORSPACE = getISOBytes("cs\n");
-
- private static final byte[] SET_STROKING_COLOR_SIMPLE = getISOBytes("SC\n");
- private static final byte[] SET_STROKING_COLOR_COMPLEX = getISOBytes("SCN\n");
- private static final byte[] SET_NON_STROKING_COLOR_SIMPLE = getISOBytes("sc\n");
- private static final byte[] SET_NON_STROKING_COLOR_COMPLEX = getISOBytes("scn\n");
-
- private static final byte[] OPENING_BRACKET = getISOBytes("[");
- private static final byte[] CLOSING_BRACKET = getISOBytes("]");
- private static final byte[] NEWLINE = getISOBytes("\n");
-
- private static final int SPACE = 32;
+ // number format
+ private final NumberFormat formatDecimal = NumberFormat.getNumberInstance(Locale.US);
/**
* Create a new PDPage content stream.
@@ -631,10 +621,7 @@ public class PDPageContentStream impleme
{
throw new IOException("Error: must call beginText() before drawString");
}
- COSString string = new COSString(text);
- ByteArrayOutputStream buffer = new ByteArrayOutputStream();
- string.writePDF(buffer);
- appendRawCommands(buffer.toByteArray());
+ COSWriter.writeString(text.getBytes(Charset.forName("ISO-8859-1")), output); // todo: use font's encoding
appendRawCommands(SPACE);
appendRawCommands(SHOW_TEXT);
}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/PDEncryption.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/PDEncryption.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/PDEncryption.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/PDEncryption.java Fri Dec 12 04:39:37 2014
@@ -258,9 +258,7 @@ public class PDEncryption
*/
public void setOwnerKey(byte[] o) throws IOException
{
- COSString owner = new COSString();
- owner.append( o );
- dictionary.setItem(COSName.O, owner);
+ dictionary.setItem(COSName.O, new COSString(o));
}
/**
@@ -290,9 +288,7 @@ public class PDEncryption
*/
public void setUserKey(byte[] u) throws IOException
{
- COSString user = new COSString();
- user.append( u );
- dictionary.setItem(COSName.U, user);
+ dictionary.setItem(COSName.U, new COSString(u));
}
/**
@@ -322,9 +318,7 @@ public class PDEncryption
*/
public void setOwnerEncryptionKey(byte[] oe) throws IOException
{
- COSString ownerEncryptionKey = new COSString();
- ownerEncryptionKey.append(oe);
- dictionary.setItem( COSName.OE, ownerEncryptionKey );
+ dictionary.setItem( COSName.OE, new COSString(oe) );
}
/**
@@ -354,9 +348,7 @@ public class PDEncryption
*/
public void setUserEncryptionKey(byte[] ue) throws IOException
{
- COSString userEncryptionKey = new COSString();
- userEncryptionKey.append(ue);
- dictionary.setItem( COSName.UE, userEncryptionKey );
+ dictionary.setItem( COSName.UE, new COSString(ue) );
}
/**
@@ -427,9 +419,7 @@ public class PDEncryption
COSArray array = new COSArray();
for (byte[] recipient : recipients)
{
- COSString recip = new COSString();
- recip.append(recipient);
- recip.setForceLiteralForm(true);
+ COSString recip = new COSString(recipient);
array.add(recip);
}
dictionary.setItem(COSName.RECIPIENTS, array);
@@ -579,9 +569,7 @@ public class PDEncryption
*/
public void setPerms(byte[] perms) throws IOException
{
- COSString user = new COSString();
- user.append( perms );
- dictionary.setItem( COSName.PERMS, user );
+ dictionary.setItem( COSName.PERMS, new COSString(perms) );
}
/**
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/SecurityHandler.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/SecurityHandler.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/SecurityHandler.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/SecurityHandler.java Fri Dec 12 04:39:37 2014
@@ -501,8 +501,7 @@ public abstract class SecurityHandler
ByteArrayInputStream data = new ByteArrayInputStream(string.getBytes());
ByteArrayOutputStream buffer = new ByteArrayOutputStream();
encryptData(objNum, genNum, data, buffer, true /* decrypt */);
- string.reset();
- string.append(buffer.toByteArray());
+ string.setValue(buffer.toByteArray());
}
/**
@@ -519,8 +518,7 @@ public abstract class SecurityHandler
ByteArrayInputStream data = new ByteArrayInputStream(string.getBytes());
ByteArrayOutputStream buffer = new ByteArrayOutputStream();
encryptData(objNum, genNum, data, buffer, false /* decrypt */);
- string.reset();
- string.append(buffer.toByteArray());
+ string.setValue(buffer.toByteArray());
}
/**