You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2009/08/10 21:34:46 UTC
svn commit: r802910 - in
/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox: ./ cos/ encoding/
encoding/conversion/ examples/pdmodel/ examples/signature/ examples/util/
exceptions/
Author: lehmi
Date: Mon Aug 10 19:34:46 2009
New Revision: 802910
URL: http://svn.apache.org/viewvc?rev=802910&view=rev
Log:
PDFBOX-464: reestablish checkstyle compliance
Modified:
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/ConvertColorspace.java
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/Decrypt.java
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/Encrypt.java
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/ExtractImages.java
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/ExtractText.java
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/PDFMerger.java
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/PDFSplit.java
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSArray.java
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSDocument.java
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSName.java
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSString.java
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/Encoding.java
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/conversion/CJKConverter.java
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/conversion/CJKEncoding.java
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/conversion/CMapSubstitution.java
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/conversion/EncodingConversionManager.java
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/conversion/EncodingConverter.java
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/pdmodel/AddImageToPDF.java
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/pdmodel/AddMetadataFromDocInfo.java
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/pdmodel/GoToSecondBookmarkOnOpen.java
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/signature/ShowSignature.java
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/exceptions/LoggingObject.java
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/ConvertColorspace.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/ConvertColorspace.java?rev=802910&r1=802909&r2=802910&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/ConvertColorspace.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/ConvertColorspace.java Mon Aug 10 19:34:46 2009
@@ -414,7 +414,8 @@
" The supported equiv colorspaces are RGB and CMYK.\n" +
" RGB color values are integers between 0 and 255" +
" CMYK color values are integer between 0 and 100.\n" +
- " Example: java org.apache.pdfbox.ConvertColorspace -equiv RGB:(255,0,0)=CMYK(0,99,100,0) input.pdf output.pdf\n" +
+ " Example: java org.apache.pdfbox.ConvertColorspace -equiv RGB:(255,0,0)=CMYK(0,99,100,0)" +
+ " input.pdf output.pdf\n" +
" <PDF Input file> The PDF document to use\n" +
" <PDF Output file> The PDF file to write the result to. Must be different of input file\n"
);
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/Decrypt.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/Decrypt.java?rev=802910&r1=802909&r2=802910&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/Decrypt.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/Decrypt.java Mon Aug 10 19:34:46 2009
@@ -41,7 +41,9 @@
private static final String PASSWORD = "-password";
private static final String KEYSTORE = "-keyStore";
-
+ private Decrypt()
+ {
+ }
/**
* This is the entry point for the application.
*
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/Encrypt.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/Encrypt.java?rev=802910&r1=802909&r2=802910&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/Encrypt.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/Encrypt.java Mon Aug 10 19:34:46 2009
@@ -36,6 +36,9 @@
*/
public class Encrypt
{
+ private Encrypt()
+ {
+ }
/**
* This is the entry point for the application.
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/ExtractImages.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/ExtractImages.java?rev=802910&r1=802909&r2=802910&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/ExtractImages.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/ExtractImages.java Mon Aug 10 19:34:46 2009
@@ -44,6 +44,10 @@
private static final String PASSWORD = "-password";
private static final String PREFIX = "-prefix";
+ private ExtractImages()
+ {
+ }
+
/**
* This is the entry point for the application.
*
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/ExtractText.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/ExtractText.java?rev=802910&r1=802909&r2=802910&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/ExtractText.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/ExtractText.java Mon Aug 10 19:34:46 2009
@@ -191,7 +191,9 @@
}
if ((encoding == null) && (toHTML))
+ {
encoding = "UTF-8";
+ }
if( toConsole )
{
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/PDFMerger.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/PDFMerger.java?rev=802910&r1=802909&r2=802910&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/PDFMerger.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/PDFMerger.java Mon Aug 10 19:34:46 2009
@@ -27,6 +27,10 @@
*/
public class PDFMerger
{
+
+ private PDFMerger()
+ {
+ }
/**
* Infamous main method.
*
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/PDFSplit.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/PDFSplit.java?rev=802910&r1=802909&r2=802910&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/PDFSplit.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/PDFSplit.java Mon Aug 10 19:34:46 2009
@@ -46,6 +46,9 @@
private static final String PASSWORD = "-password";
private static final String SPLIT = "-split";
+ private PDFSplit()
+ {
+ }
/**
* Infamous main method.
*
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSArray.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSArray.java?rev=802910&r1=802909&r2=802910&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSArray.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSArray.java Mon Aug 10 19:34:46 2009
@@ -479,15 +479,18 @@
}
}
- /*
- Return contents of COSArray as a Java List
- */
+ /**
+ * Return contents of COSArray as a Java List.
+ *
+ * @return the COSArray as List
+ */
public List toList()
{
ArrayList retList = new ArrayList(size());
for (int i = 0; i < size(); i++)
- retList.add(get(i));
-
+ {
+ retList.add(get(i));
+ }
return retList;
}
}
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSDocument.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSDocument.java?rev=802910&r1=802909&r2=802910&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSDocument.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSDocument.java Mon Aug 10 19:34:46 2009
@@ -52,7 +52,7 @@
private Map objectPool = new HashMap();
/**
- * Maps object and generation ids to object byte offsets
+ * Maps object and generation ids to object byte offsets.
*/
private Map xrefTable = new HashMap();
@@ -124,6 +124,7 @@
* @param type The type of the object.
*
* @return This will return an object with the specified type.
+ * @throws IOException If there is an error getting the object
*/
public COSObject getObjectByType( String type ) throws IOException
{
@@ -136,6 +137,7 @@
* @param type The type of the object.
*
* @return This will return an object with the specified type.
+ * @throws IOException If there is an error getting the object
*/
public COSObject getObjectByType( COSName type ) throws IOException
{
@@ -148,17 +150,19 @@
COSBase realObject = object.getObject();
if( realObject instanceof COSDictionary )
{
- try{
- COSDictionary dic = (COSDictionary)realObject;
- COSName objectType = (COSName)dic.getItem( COSName.TYPE );
- if( objectType != null && objectType.equals( type ) )
- {
- retval = object;
- }
- }catch (ClassCastException e){
- logger().warning(e.toString() + "\n at\n" + FullStackTrace(e));
- }
-
+ try
+ {
+ COSDictionary dic = (COSDictionary)realObject;
+ COSName objectType = (COSName)dic.getItem( COSName.TYPE );
+ if( objectType != null && objectType.equals( type ) )
+ {
+ retval = object;
+ }
+ }
+ catch (ClassCastException e)
+ {
+ logger().warning(e.toString() + "\n at\n" + FullStackTrace(e));
+ }
}
}
return retval;
@@ -170,6 +174,7 @@
* @param type The type of the object.
*
* @return This will return an object with the specified type.
+ * @throws IOException If there is an error getting the object
*/
public List getObjectsByType( String type ) throws IOException
{
@@ -182,6 +187,7 @@
* @param type The type of the object.
*
* @return This will return an object with the specified type.
+ * @throws IOException If there is an error getting the object
*/
public List getObjectsByType( COSName type ) throws IOException
{
@@ -194,16 +200,19 @@
COSBase realObject = object.getObject();
if( realObject instanceof COSDictionary )
{
- try{
- COSDictionary dic = (COSDictionary)realObject;
- COSName objectType = (COSName)dic.getItem( COSName.TYPE );
- if( objectType != null && objectType.equals( type ) )
- {
- retval.add( object );
- }
- }catch (ClassCastException e){
- logger().warning(e.toString() + "\n at\n" + FullStackTrace(e));
- }
+ try
+ {
+ COSDictionary dic = (COSDictionary)realObject;
+ COSName objectType = (COSName)dic.getItem( COSName.TYPE );
+ if( objectType != null && objectType.equals( type ) )
+ {
+ retval.add( object );
+ }
+ }
+ catch (ClassCastException e)
+ {
+ logger().warning(e.toString() + "\n at\n" + FullStackTrace(e));
+ }
}
}
return retval;
@@ -467,18 +476,20 @@
* Used to populate the XRef HashMap. Will add an Xreftable entry
* that maps ObjectKeys to byte offsets in the file.
* @param objKey The objkey, with id and gen numbers
- * @param currOffset The byte offset in this file
+ * @param offset The byte offset in this file
*/
- public void setXRef(COSObjectKey objKey, int offset) {
+ public void setXRef(COSObjectKey objKey, int offset)
+ {
xrefTable.put(objKey, new Integer(offset));
}
/**
* Returns the xrefTable which is a mapping of ObjectKeys
* to byte offsets in the file.
- * @return
+ * @return mapping of ObjectsKeys to byte offsets
*/
- public Map getXrefTable(){
+ public Map getXrefTable()
+ {
return xrefTable;
}
@@ -489,17 +500,18 @@
*
* @throws IOException if there is an error parsing the stream
*/
- public void parseXrefStreams() throws IOException {
- COSDictionary trailer = new COSDictionary();
+ public void parseXrefStreams() throws IOException
+ {
+ COSDictionary trailerDict = new COSDictionary();
Iterator xrefIter = getObjectsByType( "XRef" ).iterator();
while( xrefIter.hasNext() )
{
COSObject xrefStream = (COSObject)xrefIter.next();
COSStream stream = (COSStream)xrefStream.getObject();
- trailer.addAll(stream);
+ trailerDict.addAll(stream);
PDFXrefStreamParser parser = new PDFXrefStreamParser(stream, this);
parser.parse();
}
- setTrailer( trailer );
+ setTrailer( trailerDict );
}
}
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSName.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSName.java?rev=802910&r1=802909&r2=802910&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSName.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSName.java Mon Aug 10 19:34:46 2009
@@ -46,7 +46,7 @@
* All common COSName values are stored in a simple HashMap. They are already defined as
* static constants and don't need to be synchronized for multithreaded environments.
*/
- private static Map commonNameMap = new HashMap() ;
+ private static Map commonNameMap = new HashMap();
/**
@@ -522,17 +522,17 @@
COSName name = null;
if( aName != null )
{
- // Is it a common COSName ??
+ // Is it a common COSName ??
name = (COSName)commonNameMap.get( aName );
if( name == null )
{
- // It seems to be a document specific COSName
- name = (COSName)nameMap.get( aName );
- if( name == null )
- {
- //name is added to the synchronized map in the constructor
- name = new COSName( aName, false );
- }
+ // It seems to be a document specific COSName
+ name = (COSName)nameMap.get( aName );
+ if( name == null )
+ {
+ //name is added to the synchronized map in the constructor
+ name = new COSName( aName, false );
+ }
}
}
return name;
@@ -543,15 +543,20 @@
* that are created.
*
* @param aName The name of the COSName object.
- * @param staticValue Indicates if the COSName object is static so that it can be stored in the HashMap without synchronizing.
+ * @param staticValue Indicates if the COSName object is static so that it can
+ * be stored in the HashMap without synchronizing.
*/
private COSName( String aName, boolean staticValue )
{
name = aName;
if ( staticValue )
- commonNameMap.put( aName, this);
+ {
+ commonNameMap.put( aName, this);
+ }
else
- nameMap.put( aName, this );
+ {
+ nameMap.put( aName, this );
+ }
hashCode = name.hashCode();
}
@@ -563,7 +568,7 @@
*/
private COSName( String aName )
{
- this( aName, true );
+ this( aName, true );
}
/**
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSString.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSString.java?rev=802910&r1=802909&r2=802910&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSString.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSString.java Mon Aug 10 19:34:46 2009
@@ -219,10 +219,10 @@
*/
public String getString()
{
- if (this.str != null)
- {
- return this.str;
- }
+ if (this.str != null)
+ {
+ return this.str;
+ }
String retval;
String encoding = "ISO-8859-1";
byte[] data = getBytes();
@@ -403,13 +403,12 @@
*/
public boolean equals(Object obj)
{
- if (obj instanceof COSString)
- {
- obj = ((COSString) obj).getString();
+ if (obj instanceof COSString)
+ {
+ obj = ((COSString) obj).getString();
return this.getString().equals(obj);
- }
- else
- return false;
+ }
+ return false;
}
/**
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/Encoding.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/Encoding.java?rev=802910&r1=802909&r2=802910&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/Encoding.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/Encoding.java Mon Aug 10 19:34:46 2009
@@ -59,9 +59,11 @@
// Load an external glyph list file that user can give as JVM property
String location = System.getProperty("glyphlist_ext");
- if(location != null){
+ if(location != null)
+ {
File external = new File(location);
- if(external.exists()){
+ if(external.exists())
+ {
loadGlyphList(location);
}
}
@@ -84,10 +86,11 @@
/**
* Loads a glyph list from a given location and populates the NAME_TO_CHARACTER hashmap
- * for character lookups
+ * for character lookups.
* @param location - The string location of the glyphlist file
*/
- private static void loadGlyphList(String location){
+ private static void loadGlyphList(String location)
+ {
BufferedReader glyphStream = null;
try
{
@@ -248,7 +251,8 @@
String nameStr = baseName.getName();
// test if we have a suffix and if so remove it
- if ( nameStr.indexOf('.') > 0 ) {
+ if ( nameStr.indexOf('.') > 0 )
+ {
nameStr = nameStr.substring( 0, nameStr.indexOf('.') );
baseName = COSName.getPDFName( nameStr );
}
@@ -263,20 +267,25 @@
{
StringBuffer uniStr = new StringBuffer();
- for ( int chPos = 3; chPos + 4 <= nameStr.length(); chPos += 4 ) {
-
- try {
-
+ for ( int chPos = 3; chPos + 4 <= nameStr.length(); chPos += 4 )
+ {
+ try
+ {
int characterCode = Integer.parseInt( nameStr.substring( chPos, chPos + 4), 16 );
if ( ( characterCode > 0xD7FF ) && ( characterCode < 0xE000 ) )
+ {
Logger.getLogger(Encoding.class.getName()).log( Level.WARNING,
"Unicode character name with not allowed code area: " +
nameStr );
+ }
else
+ {
uniStr.append( (char) characterCode );
-
- } catch (NumberFormatException nfe) {
+ }
+ }
+ catch (NumberFormatException nfe)
+ {
Logger.getLogger(Encoding.class.getName()).log( Level.WARNING,
"Not a number in Unicode character name: " +
nameStr );
@@ -284,7 +293,8 @@
}
character = uniStr.toString();
}
- else {
+ else
+ {
character = nameStr;
}
}
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/conversion/CJKConverter.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/conversion/CJKConverter.java?rev=802910&r1=802909&r2=802910&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/conversion/CJKConverter.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/conversion/CJKConverter.java Mon Aug 10 19:34:46 2009
@@ -22,77 +22,92 @@
/**
- * CJKConverter converts encodings defined in CJKEncodings
+ * CJKConverter converts encodings defined in CJKEncodings.
*
- * @auther pinxue <http://www.pinxue.net>, Holly Lee <holly.lee (at) gmail.com>
+ * @author Pin Xue (http://www.pinxue.net), Holly Lee (holly.lee (at) gmail.com)
+ * @version $Revision: 1.0 $
*/
-class CJKConverter implements EncodingConverter
+public class CJKConverter implements EncodingConverter
{
- /** The encoding */
- private String _encoding = null;
- /** The java charset name */
- private String _charset = null;
-
-
- /**
- * Constructs a CJKConverter from a PDF encoding name
- */
- public CJKConverter(String encoding)
- {
- _encoding = encoding;
- _charset = CJKEncodings.getCharset(encoding);
- }
-
- /**
- * Convert a string. It occurs when a cmap lookup returned
- * converted bytes successfully, but we still need to convert its
- * encoding. The parameter s is constructs as one byte or a UTF-16BE
- * encoded string.
- *
- * Note: pdfbox set string to UTF-16BE charset before calling into
- * this.
- */
- public String convertString(String s)
- {
- if ( s.length() == 1 )
- return s;
-
- if ( _charset.equalsIgnoreCase("UTF-16BE") )
- return s;
-
- try {
- return new String(s.getBytes("UTF-16BE"), _charset);
- }
- catch ( UnsupportedEncodingException uee ) {
- return s;
- }
- }
-
- /**
- * Convert bytes to a string. We just convert bytes within
- * coderange defined in CMap.
- *
- * @return Converted string.
- */
- public String convertBytes(byte [] c, int offset, int length, CMap cmap)
- {
- if ( cmap != null ) {
-
- try {
- if ( cmap.isInCodeSpaceRanges(c, offset, length) )
- return new String(c, offset, length, _charset);
- else
- return null;
-
- }
- catch ( UnsupportedEncodingException uee ) {
- return new String(c, offset, length);
- }
-
- }
-
- // No cmap?
- return null;
- }
+ // The encoding
+ private String encodingName = null;
+ // The java charset name
+ private String charsetName = null;
+
+
+ /**
+ * Constructs a CJKConverter from a PDF encoding name.
+ *
+ * @param encoding the encoding to be used
+ */
+ public CJKConverter(String encoding)
+ {
+ encodingName = encoding;
+ charsetName = CJKEncodings.getCharset(encoding);
+ }
+
+ /**
+ * Convert a string. It occurs when a cmap lookup returned
+ * converted bytes successfully, but we still need to convert its
+ * encoding. The parameter s is constructs as one byte or a UTF-16BE
+ * encoded string.
+ *
+ * Note: pdfbox set string to UTF-16BE charset before calling into
+ * this.
+ *
+ * {@inheritDoc}
+ */
+ public String convertString(String s)
+ {
+ if ( s.length() == 1 )
+ {
+ return s;
+ }
+
+ if ( charsetName.equalsIgnoreCase("UTF-16BE") )
+ {
+ return s;
+ }
+
+ try
+ {
+ return new String(s.getBytes("UTF-16BE"), charsetName);
+ }
+ catch ( UnsupportedEncodingException uee )
+ {
+ return s;
+ }
+ }
+
+ /**
+ * Convert bytes to a string. We just convert bytes within
+ * coderange defined in CMap.
+ *
+ * {@inheritDoc}
+ */
+ public String convertBytes(byte [] c, int offset, int length, CMap cmap)
+ {
+ if ( cmap != null )
+ {
+ try
+ {
+ if ( cmap.isInCodeSpaceRanges(c, offset, length) )
+ {
+ return new String(c, offset, length, charsetName);
+ }
+ else
+ {
+ return null;
+ }
+
+ }
+ catch ( UnsupportedEncodingException uee )
+ {
+ return new String(c, offset, length);
+ }
+ }
+ // No cmap?
+ return null;
+ }
}
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/conversion/CJKEncoding.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/conversion/CJKEncoding.java?rev=802910&r1=802909&r2=802910&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/conversion/CJKEncoding.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/conversion/CJKEncoding.java Mon Aug 10 19:34:46 2009
@@ -20,7 +20,7 @@
import java.util.Iterator;
/**
- * This class represents PDF encoding name to Java charset name mapping
+ * This class represents PDF encoding name to Java charset name mapping.
*
* @author Pin Xue (http://www.pinxue.net), Holly Lee (holly.lee (at) gmail.com)
* @version $Revision: 1.0 $
@@ -28,78 +28,149 @@
class CJKEncodings
{
// Mapping: PDF encoding name -> Java (IANA) charset name
- private static HashMap _mapping = new HashMap();
+ private static HashMap charsetMapping = new HashMap();
+
+ private CJKEncodings()
+ {
+ }
static
{
// Chinese (Simplified)
- _mapping.put("GB-EUC-H", "GB2312"); // Microsoft Code Page 936 (lfCharSet 0x86), GB 2312-80 character set, EUC-CN encoding
- _mapping.put("GB-EUC-V", "GB2312"); // Vertical version of GB-EUC-H
- _mapping.put("GBpc-EUC-H", "GB2312"); // Mac OS, GB 2312-80 character set, EUC-CN encoding, Script Manager code 19
- _mapping.put("GBpc-EUC-V", "GB2312"); // Vertical version of GBpc-EUC-H
- _mapping.put("GBK-EUC-H", "GBK"); // Microsoft Code Page 936 (lfCharSet 0x86), GBK character set, GBK encoding
- _mapping.put("GBK-EUC-V", "GBK"); // Vertical version of GBK-EUC-H
- _mapping.put("GBKp-EUC-H", "GBK"); // Same as GBK-EUC-H but replaces half-width Latin characters with proportional forms and maps character code 0x24 to a dollar sign ($) instead of a yuan symbol (âÂÂâ´)
- _mapping.put("GBKp-EUC-V", "GBK"); // Vertical version of GBKp-EUC-H
- _mapping.put("GBK2K-H", "GB18030"); // GB 18030-2000 character set, mixed 1-, 2-, and 4-byte encoding
- _mapping.put("GBK2K-V", "GB18030"); // Vertical version of GBK2K-H
- _mapping.put("UniGB-UCS2-H", "ISO-10646-UCS-2"); // Unicode (UCS-2) encoding for the Adobe-GB1 character collection
- _mapping.put("UniGB-UCS2-V", "ISO-10646-UCS-2"); // Vertical version of UniGB-UCS2-H
- _mapping.put("UniGB-UTF16-H", "UTF-16BE"); // Unicode (UTF-16BE) encoding for the Adobe-GB1 character collection; contains mappings for all characters in the GB18030-2000 character set
- _mapping.put("UniGB-UTF16-V", "UTF-16BE"); // Vertical version of UniGB-UTF16-H
+ // Microsoft Code Page 936 (lfCharSet 0x86), GB 2312-80 character set, EUC-CN encoding
+ charsetMapping.put("GB-EUC-H", "GB2312");
+ // Vertical version of GB-EUC-H
+ charsetMapping.put("GB-EUC-V", "GB2312");
+ // Mac OS, GB 2312-80 character set, EUC-CN encoding, Script Manager code 19
+ charsetMapping.put("GBpc-EUC-H", "GB2312");
+ // Vertical version of GBpc-EUC-H
+ charsetMapping.put("GBpc-EUC-V", "GB2312");
+ // Microsoft Code Page 936 (lfCharSet 0x86), GBK character set, GBK encoding
+ charsetMapping.put("GBK-EUC-H", "GBK");
+ // Vertical version of GBK-EUC-H
+ charsetMapping.put("GBK-EUC-V", "GBK");
+ // Same as GBK-EUC-H but replaces half-width Latin characters with proportional
+ // forms and maps character code 0x24 to a dollar sign ($) instead of a yuan symbol (âÂÂâ´)
+ charsetMapping.put("GBKp-EUC-H", "GBK");
+ // Vertical version of GBKp-EUC-H
+ charsetMapping.put("GBKp-EUC-V", "GBK");
+ // GB 18030-2000 character set, mixed 1-, 2-, and 4-byte encoding
+ charsetMapping.put("GBK2K-H", "GB18030");
+ // Vertical version of GBK2K-H
+ charsetMapping.put("GBK2K-V", "GB18030");
+ // Unicode (UCS-2) encoding for the Adobe-GB1 character collection
+ charsetMapping.put("UniGB-UCS2-H", "ISO-10646-UCS-2");
+ // Vertical version of UniGB-UCS2-H
+ charsetMapping.put("UniGB-UCS2-V", "ISO-10646-UCS-2");
+ // Unicode (UTF-16BE) encoding for the Adobe-GB1 character collection; contains mappings
+ // for all characters in the GB18030-2000 character set
+ charsetMapping.put("UniGB-UTF16-H", "UTF-16BE");
+ // Vertical version of UniGB-UTF16-H
+ charsetMapping.put("UniGB-UTF16-V", "UTF-16BE");
// Chinese (Traditional)
- _mapping.put("B5pc-H", "BIG5"); // Mac OS, Big Five character set, Big Five encoding, Script Manager code 2
- _mapping.put("B5pc-V", "BIG5"); // Vertical version of B5pc-H
- _mapping.put("HKscs-B5-H", "Big5-HKSCS"); // Hong Kong SCS, an extension to the Big Five character set and encoding
- _mapping.put("HKscs-B5-V", "Big5-HKSCS"); // Vertical version of HKscs-B5-H
- _mapping.put("ETen-B5-H", "BIG5"); // Microsoft Code Page 950 (lfCharSet 0x88), Big Five character set with ETen extensions
- _mapping.put("ETen-B5-V", "BIG5"); // Vertical version of ETen-B5-H
- _mapping.put("ETenms-B5-H", "BIG5"); // Same as ETen-B5-H but replaces half-width Latin characters with proportional forms
- _mapping.put("ETenms-B5-V", "BIG5"); // Vertical version of ETenms-B5-H
- _mapping.put("CNS-EUC-H", "HZ"); // CNS 11643-1992 character set, EUC-TW encoding
- _mapping.put("CNS-EUC-V", "HZ"); // Vertical version of CNS-EUC-H
- _mapping.put("UniCNS-UCS2-H", "ISO-10646-UCS-2"); // Unicode (UCS-2) encoding for the Adobe-CNS1 character collection
- _mapping.put("UniCNS-UCS2-V", "ISO-10646-UCS-2"); // Vertical version of UniCNS-UCS2-H
- _mapping.put("UniCNS-UTF16-H", "UTF-16BE"); // Unicode (UTF-16BE) encoding for the Adobe-CNS1 character collection; contains mappings for all the characters in the HKSCS-2001 character set and contains both 2- and 4- byte character codes
- _mapping.put("UniCNS-UTF16-V", "UTF-16BE"); // Vertical version of UniCNS-UTF16-H
+ // Mac OS, Big Five character set, Big Five encoding, Script Manager code 2
+ charsetMapping.put("B5pc-H", "BIG5");
+ // Vertical version of B5pc-H
+ charsetMapping.put("B5pc-V", "BIG5");
+ // Hong Kong SCS, an extension to the Big Five character set and encoding
+ charsetMapping.put("HKscs-B5-H", "Big5-HKSCS");
+ // Vertical version of HKscs-B5-H
+ charsetMapping.put("HKscs-B5-V", "Big5-HKSCS");
+ // Microsoft Code Page 950 (lfCharSet 0x88), Big Five character set with ETen extensions
+ charsetMapping.put("ETen-B5-H", "BIG5");
+ // Vertical version of ETen-B5-H
+ charsetMapping.put("ETen-B5-V", "BIG5");
+ // Same as ETen-B5-H but replaces half-width Latin characters with proportional forms
+ charsetMapping.put("ETenms-B5-H", "BIG5");
+ // Vertical version of ETenms-B5-H
+ charsetMapping.put("ETenms-B5-V", "BIG5");
+ // CNS 11643-1992 character set, EUC-TW encoding
+ charsetMapping.put("CNS-EUC-H", "HZ");
+ // Vertical version of CNS-EUC-H
+ charsetMapping.put("CNS-EUC-V", "HZ");
+ // Unicode (UCS-2) encoding for the Adobe-CNS1 character collection
+ charsetMapping.put("UniCNS-UCS2-H", "ISO-10646-UCS-2");
+ // Vertical version of UniCNS-UCS2-H
+ charsetMapping.put("UniCNS-UCS2-V", "ISO-10646-UCS-2");
+ // Unicode (UTF-16BE) encoding for the Adobe-CNS1 character collection;
+ // contains mappings for all the characters in the HKSCS-2001 character set and
+ // contains both 2- and 4- byte character codes
+ charsetMapping.put("UniCNS-UTF16-H", "UTF-16BE");
+ // Vertical version of UniCNS-UTF16-H
+ charsetMapping.put("UniCNS-UTF16-V", "UTF-16BE");
//Japanese
- _mapping.put("83pv-RKSJ-H", "JIS"); // Mac OS, JIS X 0208 character set with KanjiTalk6 extensions, Shift-JIS encoding, Script Manager code 1
- _mapping.put("90ms-RKSJ-H", "JIS"); // Microsoft Code Page 932 (lfCharSet 0x80), JIS X 0208 character set with NEC and IBM- extensions
- _mapping.put("90ms-RKSJ-V", "JIS"); // Vertical version of 90ms-RKSJ-H
- _mapping.put("90msp-RKSJ-H", "JIS"); // Same as 90ms-RKSJ-H but replaces half-width Latin characters with proportional forms
- _mapping.put("90msp-RKSJ-V", "JIS"); // Vertical version of 90msp-RKSJ-H
- _mapping.put("90pv-RKSJ-H", "JIS"); // Mac OS, JIS X 0208 character set with KanjiTalk7 extensions, Shift-JIS encoding, Script Manager code 1
- _mapping.put("Add-RKSJ-H", "JIS"); // JIS X 0208 character set with Fujitsu FMR extensions, Shift-JIS encoding
- _mapping.put("Add-RKSJ-V", "JIS"); // Vertical version of Add-RKSJ-H
- _mapping.put("EUC-H", "JIS"); // JIS X 0208 character set, EUC-JP encoding
- _mapping.put("EUC-V", "JIS"); // Vertical version of EUC-H
- _mapping.put("Ext-RKSJ-H", "JIS"); // JIS C 6226 (JIS78) character set with NEC extensions, Shift-JIS encoding
- _mapping.put("Ext-RKSJ-V", "JIS"); // Vertical version of Ext-RKSJ-H
- _mapping.put("H", "JIS"); // JIS X 0208 character set, ISO-2022-JP encoding
- _mapping.put("V", "JIS"); // Vertical version of H
- _mapping.put("UniJIS-UCS2-H", "ISO-10646-UCS-2"); // Unicode (UCS-2) encoding for the Adobe-Japan1 character collection
- _mapping.put("UniJIS-UCS2-V", "ISO-10646-UCS-2"); // Vertical version of UniJIS-UCS2-H
- _mapping.put("UniJIS-UCS2-HW-H", "ISO-10646-UCS-2"); // Same as UniJIS-UCS2-H but replaces proportional Latin characters with half-width forms
- _mapping.put("UniJIS-UCS2-HW-V", "ISO-10646-UCS-2"); // Vertical version of UniJIS-UCS2-HW-H
- _mapping.put("UniJIS-UTF16-H", "UTF-16BE"); // Unicode (UTF-16BE) encoding for the Adobe-Japan1 character collection; contains mappings for all characters in the JIS X 0213:1000 character set
- _mapping.put("UniJIS-UTF16-V", "UTF-16BE"); // Vertical version of UniJIS-UTF16-H
- _mapping.put("Identity-H", "JIS"); // JIS X 0208 character set, ISO-2022-JP encoding
- _mapping.put("Identity-V", "JIS"); // Vertical version of H
+ // Mac OS, JIS X 0208 character set with KanjiTalk6 extensions, Shift-JIS encoding, Script Manager code 1
+ charsetMapping.put("83pv-RKSJ-H", "JIS");
+ // Microsoft Code Page 932 (lfCharSet 0x80), JIS X 0208 character set with NEC and IBM- extensions
+ charsetMapping.put("90ms-RKSJ-H", "JIS");
+ // Vertical version of 90ms-RKSJ-H
+ charsetMapping.put("90ms-RKSJ-V", "JIS");
+ // Same as 90ms-RKSJ-H but replaces half-width Latin characters with proportional forms
+ charsetMapping.put("90msp-RKSJ-H", "JIS");
+ // Vertical version of 90msp-RKSJ-H
+ charsetMapping.put("90msp-RKSJ-V", "JIS");
+ // Mac OS, JIS X 0208 character set with KanjiTalk7 extensions, Shift-JIS encoding, Script Manager code 1
+ charsetMapping.put("90pv-RKSJ-H", "JIS");
+ // JIS X 0208 character set with Fujitsu FMR extensions, Shift-JIS encoding
+ charsetMapping.put("Add-RKSJ-H", "JIS");
+ // Vertical version of Add-RKSJ-H
+ charsetMapping.put("Add-RKSJ-V", "JIS");
+ // JIS X 0208 character set, EUC-JP encoding
+ charsetMapping.put("EUC-H", "JIS");
+ // Vertical version of EUC-H
+ charsetMapping.put("EUC-V", "JIS");
+ // JIS C 6226 (JIS78) character set with NEC extensions, Shift-JIS encoding
+ charsetMapping.put("Ext-RKSJ-H", "JIS");
+ // Vertical version of Ext-RKSJ-H
+ charsetMapping.put("Ext-RKSJ-V", "JIS");
+ // JIS X 0208 character set, ISO-2022-JP encoding
+ charsetMapping.put("H", "JIS");
+ // Vertical version of H
+ charsetMapping.put("V", "JIS");
+ // Unicode (UCS-2) encoding for the Adobe-Japan1 character collection
+ charsetMapping.put("UniJIS-UCS2-H", "ISO-10646-UCS-2");
+ // Vertical version of UniJIS-UCS2-H
+ charsetMapping.put("UniJIS-UCS2-V", "ISO-10646-UCS-2");
+ // Same as UniJIS-UCS2-H but replaces proportional Latin characters with half-width forms
+ charsetMapping.put("UniJIS-UCS2-HW-H", "ISO-10646-UCS-2");
+ // Vertical version of UniJIS-UCS2-HW-H
+ charsetMapping.put("UniJIS-UCS2-HW-V", "ISO-10646-UCS-2");
+ // Unicode (UTF-16BE) encoding for the Adobe-Japan1 character collection;
+ // contains mappings for all characters in the JIS X 0213:1000 character set
+ charsetMapping.put("UniJIS-UTF16-H", "UTF-16BE");
+ // Vertical version of UniJIS-UTF16-H
+ charsetMapping.put("UniJIS-UTF16-V", "UTF-16BE");
+ // JIS X 0208 character set, ISO-2022-JP encoding
+ charsetMapping.put("Identity-H", "JIS");
+ // Vertical version of H
+ charsetMapping.put("Identity-V", "JIS");
//Korean
- _mapping.put("KSC-EUC-H", "KSC"); // KS X 1001:1992 character set, EUC-KR encoding
- _mapping.put("KSC-EUC-V", "KSC"); // Vertical version of KSC-EUC-H
- _mapping.put("KSCms-UHC-H", "KSC"); // Microsoft Code Page 949 (lfCharSet 0x81), KS X 1001:1992 character set plus 8822.putitional hangul, Unified Hangul Code (UHC) encoding
- _mapping.put("KSCms-UHC-V", "KSC"); // Vertical version of KSCms-UHC-H
- _mapping.put("KSCms-UHC-HW-H", "KSC"); // Same as KSCms-UHC-H but replaces proportional Latin characters with half-width forms
- _mapping.put("KSCms-UHC-HW-V", "KSC"); // Vertical version of KSCms-UHC-HW-H
- _mapping.put("KSCpc-EUC-H", "KSC"); // Mac OS, KS X 1001:1992 character set with Mac OS KH extensions, Script Manager Code 3
- _mapping.put("UniKS-UCS2-H", "ISO-10646-UCS-2"); // Unicode (UCS-2) encoding for the Adobe-Korea1 character collection
- _mapping.put("UniKS-UCS2-V", "ISO-10646-UCS-2"); // Vertical version of UniKS-UCS2-H
- _mapping.put("UniKS-UTF16-H", "UTF-16BE"); // Unicode (UTF-16BE) encoding for the Adobe-Korea1 character collection
- _mapping.put("UniKS-UTF16-V", "UTF-16BE"); // Vertical version of UniKS-UTF16-H
+ // KS X 1001:1992 character set, EUC-KR encoding
+ charsetMapping.put("KSC-EUC-H", "KSC");
+ // Vertical version of KSC-EUC-H
+ charsetMapping.put("KSC-EUC-V", "KSC");
+ // Microsoft Code Page 949 (lfCharSet 0x81), KS X 1001:1992 character set
+ // plus 8822.putitional hangul, Unified Hangul Code (UHC) encoding
+ charsetMapping.put("KSCms-UHC-H", "KSC");
+ // Vertical version of KSCms-UHC-H
+ charsetMapping.put("KSCms-UHC-V", "KSC");
+ // Same as KSCms-UHC-H but replaces proportional Latin characters with half-width forms
+ charsetMapping.put("KSCms-UHC-HW-H", "KSC");
+ // Vertical version of KSCms-UHC-HW-H
+ charsetMapping.put("KSCms-UHC-HW-V", "KSC");
+ // Mac OS, KS X 1001:1992 character set with Mac OS KH extensions, Script Manager Code 3
+ charsetMapping.put("KSCpc-EUC-H", "KSC");
+ // Unicode (UCS-2) encoding for the Adobe-Korea1 character collection
+ charsetMapping.put("UniKS-UCS2-H", "ISO-10646-UCS-2");
+ // Vertical version of UniKS-UCS2-H
+ charsetMapping.put("UniKS-UCS2-V", "ISO-10646-UCS-2");
+ // Unicode (UTF-16BE) encoding for the Adobe-Korea1 character collection
+ charsetMapping.put("UniKS-UTF16-H", "UTF-16BE");
+ // Vertical version of UniKS-UTF16-H
+ charsetMapping.put("UniKS-UTF16-V", "UTF-16BE");
}
@@ -112,17 +183,18 @@
public static final String getCharset( String encoding )
{
if ( encoding.startsWith("COSName"))
+ {
encoding = encoding.substring(8, encoding.length()-1);
-
- return (String)(_mapping.get(encoding));
+ }
+ return (String)(charsetMapping.get(encoding));
}
/**
- * Return an iterator to iterate through all encodings
+ * Return an iterator to iterate through all encodings.
*/
public static final Iterator getEncodingIterator()
{
- return _mapping.keySet().iterator();
+ return charsetMapping.keySet().iterator();
}
}
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/conversion/CMapSubstitution.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/conversion/CMapSubstitution.java?rev=802910&r1=802909&r2=802910&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/conversion/CMapSubstitution.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/conversion/CMapSubstitution.java Mon Aug 10 19:34:46 2009
@@ -19,51 +19,65 @@
import java.util.HashMap;
/**
- * This class provides a mapping from char code to unicode mapping files used for CJK-encoding
+ * This class provides a mapping from char code to unicode mapping files used for CJK-encoding.
* @author Andreas Lehmkühler
+ * @version $Revision: 1.0 $
*
*/
-public class CMapSubstitution {
+public class CMapSubstitution
+{
- private static HashMap cmapSubstitutions = new HashMap();
+ private static HashMap cmapSubstitutions = new HashMap();
- static {
-
- // I don't know if these mappings are complete. Perhaps there
- // has to be added still one or more
-
- // chinese simplified
- cmapSubstitutions.put( "Adobe-GB1-4", "Adobe-GB1-UCS2" );
- cmapSubstitutions.put( "GBK-EUC-H", "GBK-EUC-UCS2" );
- cmapSubstitutions.put( "GBK-EUC-V", "GBK-EUC-UCS2" );
- cmapSubstitutions.put( "GBpc-EUC-H", "GBpc-EUC-UCS2C" );
- cmapSubstitutions.put( "GBpc-EUC-V", "GBpc-EUC-UCS2C" );
-
- // chinese traditional
- cmapSubstitutions.put( "Adobe-CNS1-4", "Adobe-CNS1-UCS2" );
- cmapSubstitutions.put( "B5pc-H", "B5pc-UCS2" );
- cmapSubstitutions.put( "B5pc-V", "B5pc-UCS2" );
- cmapSubstitutions.put( "ETen-B5-H", "ETen-B5-UCS2" );
- cmapSubstitutions.put( "ETen-B5-V", "ETen-B5-UCS2" );
- cmapSubstitutions.put( "ETenms-B5-H", "ETen-B5-UCS2" );
- cmapSubstitutions.put( "ETenms-B5-V", "ETen-B5-UCS2" );
-
- // japanese
- cmapSubstitutions.put( "90ms-RKSJ-H", "90ms-RKSJ-UCS2" );
- cmapSubstitutions.put( "90ms-RKSJ-V", "90ms-RKSJ-UCS2" );
- cmapSubstitutions.put( "90msp-RKSJ-H", "90ms-RKSJ-UCS2" );
- cmapSubstitutions.put( "90msp-RKSJ-V", "90ms-RKSJ-UCS2" );
- cmapSubstitutions.put( "90pv-RKSJ-H", "90pv-RKSJ-UCS2");
- cmapSubstitutions.put( "UniJIS-UCS2-HW-H", "UniJIS-UCS2-H" );
- cmapSubstitutions.put( "Adobe-Japan1-4", "Adobe-Japan1-UCS2");
- cmapSubstitutions.put( "Identity-H", "Adobe-Japan1-UCS2");
-
- }
-
- public static String substituteCMap(String cmapName) {
- if (cmapSubstitutions.containsKey(cmapName))
- return (String)cmapSubstitutions.get(cmapName);
- return cmapName;
- }
+ private CMapSubstitution()
+ {
+ }
+
+ static
+ {
+ // I don't know if these mappings are complete. Perhaps there
+ // has to be added still one or more
+
+ // chinese simplified
+ cmapSubstitutions.put( "Adobe-GB1-4", "Adobe-GB1-UCS2" );
+ cmapSubstitutions.put( "GBK-EUC-H", "GBK-EUC-UCS2" );
+ cmapSubstitutions.put( "GBK-EUC-V", "GBK-EUC-UCS2" );
+ cmapSubstitutions.put( "GBpc-EUC-H", "GBpc-EUC-UCS2C" );
+ cmapSubstitutions.put( "GBpc-EUC-V", "GBpc-EUC-UCS2C" );
+
+ // chinese traditional
+ cmapSubstitutions.put( "Adobe-CNS1-4", "Adobe-CNS1-UCS2" );
+ cmapSubstitutions.put( "B5pc-H", "B5pc-UCS2" );
+ cmapSubstitutions.put( "B5pc-V", "B5pc-UCS2" );
+ cmapSubstitutions.put( "ETen-B5-H", "ETen-B5-UCS2" );
+ cmapSubstitutions.put( "ETen-B5-V", "ETen-B5-UCS2" );
+ cmapSubstitutions.put( "ETenms-B5-H", "ETen-B5-UCS2" );
+ cmapSubstitutions.put( "ETenms-B5-V", "ETen-B5-UCS2" );
+
+ // japanese
+ cmapSubstitutions.put( "90ms-RKSJ-H", "90ms-RKSJ-UCS2" );
+ cmapSubstitutions.put( "90ms-RKSJ-V", "90ms-RKSJ-UCS2" );
+ cmapSubstitutions.put( "90msp-RKSJ-H", "90ms-RKSJ-UCS2" );
+ cmapSubstitutions.put( "90msp-RKSJ-V", "90ms-RKSJ-UCS2" );
+ cmapSubstitutions.put( "90pv-RKSJ-H", "90pv-RKSJ-UCS2");
+ cmapSubstitutions.put( "UniJIS-UCS2-HW-H", "UniJIS-UCS2-H" );
+ cmapSubstitutions.put( "Adobe-Japan1-4", "Adobe-Japan1-UCS2");
+ cmapSubstitutions.put( "Identity-H", "Adobe-Japan1-UCS2");
+
+ }
+
+ /**
+ *
+ * @param cmapName The name of a cmap for which we have to find a possible substitution
+ * @return the substitution for the given cmap name
+ */
+ public static String substituteCMap(String cmapName)
+ {
+ if (cmapSubstitutions.containsKey(cmapName))
+ {
+ return (String)cmapSubstitutions.get(cmapName);
+ }
+ return cmapName;
+ }
}
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/conversion/EncodingConversionManager.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/conversion/EncodingConversionManager.java?rev=802910&r1=802909&r2=802910&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/conversion/EncodingConversionManager.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/conversion/EncodingConversionManager.java Mon Aug 10 19:34:46 2009
@@ -24,39 +24,49 @@
* and respective EncodingConverter instance. Those PDF encoding name like
* GBK-EUC-H should be converted to java charset name before constructing a
* java string instance
+ *
+ * @author Pin Xue (http://www.pinxue.net), Holly Lee (holly.lee (at) gmail.com)
+ * @version $Revision: 1.0 $
*/
public class EncodingConversionManager
{
/**
- * Mapping from PDF encoding name to EncodingConverter instance
+ * Mapping from PDF encoding name to EncodingConverter instance.
*/
- private static HashMap _encodingMap = new HashMap();
+ private static HashMap encodingMap = new HashMap();
- /**
- * Initialize the encodingMap before anything calls us
- */
+ private EncodingConversionManager()
+ {
+ }
+
+ /**
+ * Initialize the encodingMap before anything calls us.
+ */
static {
- // Add CJK encodings to map
- Iterator it = CJKEncodings.getEncodingIterator();
+ // Add CJK encodings to map
+ Iterator it = CJKEncodings.getEncodingIterator();
+
+ while ( it.hasNext() )
+ {
+ String encodingName = (String)(it.next());
+ encodingMap.put(encodingName, new CJKConverter(encodingName));
+ }
+ // If there is any other encoding conversions, please add it here.
- while ( it.hasNext() ) {
- String encodingName = (String)(it.next());
- _encodingMap.put(encodingName, new CJKConverter(encodingName));
- }
-
- // If there is any other encoding conversions, please add it here.
-
- }
-
- /**
- * Get converter from given encoding name. If no converted defined,
- * a null is returned
- */
- public static final EncodingConverter getConverter(String encoding)
- {
- return (EncodingConverter)(_encodingMap.get(encoding));
- }
+ }
+
+ /**
+ * Get converter from given encoding name. If no converted defined,
+ * a null is returned.
+ *
+ * @param encoding search for a converter for the given encoding name
+ * @return the converter for the given encoding name
+ */
+ public static final EncodingConverter getConverter(String encoding)
+ {
+ return (EncodingConverter)(encodingMap.get(encoding));
+ }
}
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/conversion/EncodingConverter.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/conversion/EncodingConverter.java?rev=802910&r1=802909&r2=802910&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/conversion/EncodingConverter.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/encoding/conversion/EncodingConverter.java Mon Aug 10 19:34:46 2009
@@ -22,16 +22,28 @@
* EncodingConverter converts string or characters in one encoding, which is specified in PDF
* file, to another string with respective java charset. The mapping from
* PDF encoding name to java charset name is maintained by EncodingConversionManager
+
+ * @author Pin Xue (http://www.pinxue.net), Holly Lee (holly.lee (at) gmail.com)
+ * @version $Revision: 1.0 $
*/
public interface EncodingConverter
{
/**
- * Convert a string
+ * Convert a string.
+ *
+ * @param s the string to be converted
+ * @return the converted string
*/
public String convertString(String s);
- /**
- * Convert bytes to a string
- */
- public String convertBytes(byte [] c, int offset, int length, CMap cmap);
+ /**
+ * Convert bytes to a string.
+ *
+ * @param c the byte array to be converted
+ * @param offset the starting offset of the array
+ * @param length the number of bytes
+ * @param cmap the cmap to be used for conversion
+ * @return the converted string
+ */
+ public String convertBytes(byte [] c, int offset, int length, CMap cmap);
}
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/pdmodel/AddImageToPDF.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/pdmodel/AddImageToPDF.java?rev=802910&r1=802909&r2=802910&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/pdmodel/AddImageToPDF.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/pdmodel/AddImageToPDF.java Mon Aug 10 19:34:46 2009
@@ -54,7 +54,8 @@
* @throws IOException If there is an error writing the data.
* @throws COSVisitorException If there is an error writing the PDF.
*/
- public void createPDFFromImage( String inputFile, String image, String outputFile ) throws IOException, COSVisitorException
+ public void createPDFFromImage( String inputFile, String image, String outputFile )
+ throws IOException, COSVisitorException
{
// the document
PDDocument doc = null;
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/pdmodel/AddMetadataFromDocInfo.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/pdmodel/AddMetadataFromDocInfo.java?rev=802910&r1=802909&r2=802910&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/pdmodel/AddMetadataFromDocInfo.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/pdmodel/AddMetadataFromDocInfo.java Mon Aug 10 19:34:46 2009
@@ -109,6 +109,7 @@
*/
private static void usage()
{
- System.err.println( "Usage: java org.apache.pdfbox.examples.pdmodel.AddMetadataFromDocInfo <input-pdf> <output-pdf>" );
+ System.err.println( "Usage: java org.apache.pdfbox.examples.pdmodel.AddMetadataFromDocInfo " +
+ "<input-pdf> <output-pdf>" );
}
}
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/pdmodel/GoToSecondBookmarkOnOpen.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/pdmodel/GoToSecondBookmarkOnOpen.java?rev=802910&r1=802909&r2=802910&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/pdmodel/GoToSecondBookmarkOnOpen.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/pdmodel/GoToSecondBookmarkOnOpen.java Mon Aug 10 19:34:46 2009
@@ -98,6 +98,7 @@
*/
private static void usage()
{
- System.err.println( "Usage: java org.apache.pdfbox.examples.pdmodel.GoToSecondBookmarkOnOpen <input-pdf> <output-pdf>" );
+ System.err.println( "Usage: java org.apache.pdfbox.examples.pdmodel.GoToSecondBookmarkOnOpen" +
+ "<input-pdf> <output-pdf>" );
}
}
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/signature/ShowSignature.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/signature/ShowSignature.java?rev=802910&r1=802909&r2=802910&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/signature/ShowSignature.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/signature/ShowSignature.java Mon Aug 10 19:34:46 2009
@@ -42,6 +42,9 @@
public class ShowSignature
{
+ private ShowSignature()
+ {
+ }
/**
* This is the entry point for the application.
*
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java?rev=802910&r1=802909&r2=802910&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java Mon Aug 10 19:34:46 2009
@@ -105,7 +105,7 @@
/**
* A method provided as an event interface to allow a subclass to perform
- * some specific functionality when text needs to be processed
+ * some specific functionality when text needs to be processed.
*
* @param text The text to be processed
*/
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/exceptions/LoggingObject.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/exceptions/LoggingObject.java?rev=802910&r1=802909&r2=802910&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/exceptions/LoggingObject.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/exceptions/LoggingObject.java Mon Aug 10 19:34:46 2009
@@ -28,17 +28,15 @@
public abstract class LoggingObject
{
private static Logger logger_;//dwilson 3/15/07
-
- protected Logger logger() throws IOException //dwilson 3/15/07
- {//I would like to just bury any error here ... but unfortunately an error would result in a Null Reference Exception
- //Therefore, I might as well throw the original error.
-
- //http://www.rgagnon.com/javadetails/java-0501.html
- if (logger_ == null){
- FileHandler fh = new FileHandler("PDFBox.log", true);
- fh.setFormatter(new SimpleFormatter());
- logger_ = Logger.getLogger("TestLog");
- logger_.addHandler(fh);
+
+ static
+ {
+ try
+ {
+ FileHandler fh = new FileHandler("PDFBox.log", true);
+ fh.setFormatter(new SimpleFormatter());
+ logger_ = Logger.getLogger("TestLog");
+ logger_.addHandler(fh);
/*Set the log level here.
The lower your logging level, the more stuff will be logged.
@@ -56,27 +54,36 @@
I recommend INFO for debug builds and either SEVERE or OFF for production builds.
*/
logger_.setLevel(Level.WARNING);
- }
-
- return logger_;
+// logger_.setLevel(Level.INFO);
+ }
+ catch (IOException exception)
+ {
+ System.err.println("Error while opening the logfile:");
+ exception.printStackTrace();
+ }
+ }
+ protected Logger logger() throws IOException //dwilson 3/15/07
+ {
+ return logger_;
}
protected static String FullStackTrace(Throwable e){
- String sRet;
int i;
StackTraceElement [] L;
- sRet = new String();
+ StringBuffer sRet = new StringBuffer();
L = e.getStackTrace();
- for (i=0; i<L.length; i++){
- sRet = sRet + (L[i].toString())+ "\n";
+ for (i=0; i<L.length; i++)
+ {
+ sRet.append((L[i].toString())).append("\n");
}
- if (e.getCause() != null){
- sRet = sRet + "Caused By \n\t" + e.getCause().getMessage();
- sRet = sRet + FullStackTrace(e.getCause());
+ if (e.getCause() != null)
+ {
+ sRet.append("Caused By \n\t").append(e.getCause().getMessage());
+ sRet.append(FullStackTrace(e.getCause()));
}
- return sRet;
+ return sRet.toString();
}
}