You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2013/07/17 19:30:38 UTC
svn commit: r1504209 [1/2] - in /pdfbox/trunk/pdfbox: ./
src/main/java/org/apache/pdfbox/encoding/
src/main/resources/org/apache/pdfbox/resources/
Author: lehmi
Date: Wed Jul 17 17:30:38 2013
New Revision: 1504209
URL: http://svn.apache.org/r1504209
Log:
PDFBOX-1665: replace glyphlist.txt with our own implementation
Added:
pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/additional_glyphlist.properties
- copied, changed from r1498073, pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/additional_glyphlist.txt
pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/glyphlist.properties (with props)
Removed:
pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/additional_glyphlist.txt
Modified:
pdfbox/trunk/pdfbox/build.xml
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/Encoding.java
Modified: pdfbox/trunk/pdfbox/build.xml
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/build.xml?rev=1504209&r1=1504208&r2=1504209&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/build.xml (original)
+++ pdfbox/trunk/pdfbox/build.xml Wed Jul 17 17:30:38 2013
@@ -237,12 +237,6 @@
</patternset>
<mapper type="flatten"/>
</unjar>
- <unjar src="${adobefiles.jar}" dest="${pdfbox.dest.dir}/org/apache/pdfbox/resources">
- <patternset>
- <include name="com/adobe/pdf/pcfi/glyphlist.txt"/>
- </patternset>
- <mapper type="flatten"/>
- </unjar>
</target>
<target name="find.testfiles">
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/Encoding.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/Encoding.java?rev=1504209&r1=1504208&r2=1504209&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/Encoding.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/Encoding.java Wed Jul 17 17:30:38 2013
@@ -16,15 +16,14 @@
*/
package org.apache.pdfbox.encoding;
-import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
import java.util.Collections;
+import java.util.Enumeration;
import java.util.HashMap;
import java.util.Map;
import java.util.MissingResourceException;
+import java.util.Properties;
import java.util.StringTokenizer;
import org.apache.commons.logging.Log;
@@ -34,9 +33,9 @@ import org.apache.pdfbox.util.ResourceLo
/**
* This is an interface to a text encoder.
- *
+ *
* @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
- * @version $Revision: 1.15 $
+ *
*/
public abstract class Encoding implements COSObjectable
{
@@ -52,131 +51,94 @@ public abstract class Encoding implement
/**
* This is a mapping from a character code to a character name.
*/
- protected final Map<Integer, String> codeToName =
- new HashMap<Integer, String>();
+ protected final Map<Integer, String> codeToName = new HashMap<Integer, String>();
/**
* This is a mapping from a character name to a character code.
*/
- protected final Map<String, Integer> nameToCode =
- new HashMap<String, Integer>();
+ protected final Map<String, Integer> nameToCode = new HashMap<String, Integer>();
- private static final Map<String, String> NAME_TO_CHARACTER =
- new HashMap<String, String>();
+ private static final Map<String, String> NAME_TO_CHARACTER = new HashMap<String, String>();
- private static final Map<String, String> CHARACTER_TO_NAME =
- new HashMap<String, String>();
+ private static final Map<String, String> CHARACTER_TO_NAME = new HashMap<String, String>();
static
{
- //Loads the official Adobe Glyph List
- loadGlyphList("org/apache/pdfbox/resources/glyphlist.txt");
- //Loads some additional glyph mappings
- loadGlyphList("org/apache/pdfbox/resources/additional_glyphlist.txt");
+ // Loads the official glyph List based on adobes glyph list
+ loadGlyphProperties("org/apache/pdfbox/resources/glyphlist.properties");
+ // Loads some additional glyph mappings
+ loadGlyphProperties("org/apache/pdfbox/resources/additional_glyphlist.properties");
// Load an external glyph list file that user can give as JVM property
String location = System.getProperty("glyphlist_ext");
- if(location != null)
+ if (location != null)
{
File external = new File(location);
- if(external.exists())
+ if (external.exists())
{
- loadGlyphList(location);
+ loadGlyphProperties(location);
}
}
- NAME_TO_CHARACTER.put( NOTDEF, "" );
- NAME_TO_CHARACTER.put( "fi", "fi" );
- NAME_TO_CHARACTER.put( "fl", "fl" );
- NAME_TO_CHARACTER.put( "ffi", "ffi" );
- NAME_TO_CHARACTER.put( "ff", "ff" );
- NAME_TO_CHARACTER.put( "pi", "pi" );
+ NAME_TO_CHARACTER.put(NOTDEF, "");
+ NAME_TO_CHARACTER.put("fi", "fi");
+ NAME_TO_CHARACTER.put("fl", "fl");
+ NAME_TO_CHARACTER.put("ffi", "ffi");
+ NAME_TO_CHARACTER.put("ff", "ff");
+ NAME_TO_CHARACTER.put("pi", "pi");
- for( Map.Entry<String, String> entry : NAME_TO_CHARACTER.entrySet() )
+ for (Map.Entry<String, String> entry : NAME_TO_CHARACTER.entrySet())
{
- CHARACTER_TO_NAME.put( entry.getValue(), entry.getKey() );
+ CHARACTER_TO_NAME.put(entry.getValue(), entry.getKey());
}
}
/**
- * Loads a glyph list from a given location and populates the NAME_TO_CHARACTER hashmap
- * for character lookups.
+ * Loads a glyph list from a given location and populates the NAME_TO_CHARACTER hashmap for character lookups.
+ *
* @param location - The string location of the glyphlist file
*/
- private static void loadGlyphList(String location)
+ private static void loadGlyphProperties(String location)
{
- BufferedReader glyphStream = null;
try
{
- InputStream resource = ResourceLoader.loadResource( location );
- if (resource == null)
+ Properties glyphProperties = ResourceLoader.loadProperties(location, false);
+ if (glyphProperties == null)
{
- throw new MissingResourceException("Glyphlist not found: " + location,
- Encoding.class.getName(), location);
+ throw new MissingResourceException("Glyphlist not found: " + location, Encoding.class.getName(),
+ location);
}
- glyphStream = new BufferedReader( new InputStreamReader( resource ) );
- String line = null;
- while( (line = glyphStream.readLine()) != null )
+ Enumeration<?> names = glyphProperties.propertyNames();
+ for (Object name : Collections.list(names))
{
- line = line.trim();
- //lines starting with # are comments which we can ignore.
- if( !line.startsWith("#" ) )
+ String glyphName = name.toString();
+ String unicodeValue = glyphProperties.getProperty(glyphName);
+ StringTokenizer tokenizer = new StringTokenizer(unicodeValue, " ", false);
+ StringBuilder value = new StringBuilder();
+ while (tokenizer.hasMoreTokens())
{
- int semicolonIndex = line.indexOf( ';' );
- if( semicolonIndex >= 0 )
- {
- String unicodeValue = null;
- try
- {
- String characterName = line.substring( 0, semicolonIndex );
- unicodeValue = line.substring( semicolonIndex+1, line.length() );
- StringTokenizer tokenizer = new StringTokenizer( unicodeValue, " ", false );
- StringBuilder value = new StringBuilder();
- while(tokenizer.hasMoreTokens())
- {
- int characterCode = Integer.parseInt( tokenizer.nextToken(), 16 );
- value.append((char)characterCode);
- }
- if (NAME_TO_CHARACTER.containsKey(characterName))
- {
- LOG.warn("duplicate value for characterName="+characterName+","+value);
- }
- else
- {
- NAME_TO_CHARACTER.put( characterName, value.toString() );
- }
- }
- catch( NumberFormatException nfe )
- {
- LOG.error("malformed unicode value "+ unicodeValue, nfe);
- }
- }
+ int characterCode = Integer.parseInt(tokenizer.nextToken(), 16);
+ value.append((char) characterCode);
}
- }
- }
- catch( IOException io )
- {
- LOG.error("error while reading the glyph list.", io);
- }
- finally
- {
- if( glyphStream != null )
- {
- try
+ if (NAME_TO_CHARACTER.containsKey(glyphName))
{
- glyphStream.close();
+ LOG.warn("duplicate value for characterName=" + glyphName + "," + value);
}
- catch( IOException e )
+ else
{
- LOG.error("error when closing the glyph list.", e);
+ NAME_TO_CHARACTER.put(glyphName, value.toString());
}
-
}
}
+ catch (IOException io)
+ {
+ LOG.error("error while reading the glyph property file.", io);
+ }
}
/**
* Returns an unmodifiable view of the Code2Name mapping.
+ *
* @return the Code2Name map
*/
public Map<Integer, String> getCodeToNameMap()
@@ -186,6 +148,7 @@ public abstract class Encoding implement
/**
* Returns an unmodifiable view of the Name2Code mapping.
+ *
* @return the Name2Code map
*/
public Map<String, Integer> getNameToCodeMap()
@@ -195,124 +158,124 @@ public abstract class Encoding implement
/**
* This will add a character encoding.
- *
+ *
* @param code The character code that matches the character.
* @param name The name of the character.
*/
- public void addCharacterEncoding( int code, String name )
+ public void addCharacterEncoding(int code, String name)
{
- codeToName.put( code, name );
- nameToCode.put( name, code );
+ codeToName.put(code, name);
+ nameToCode.put(name, code);
}
/**
* This will get the character code for the name.
- *
+ *
* @param name The name of the character.
- *
+ *
* @return The code for the character.
- *
+ *
* @throws IOException If there is no character code for the name.
*/
- public int getCode( String name ) throws IOException
+ public int getCode(String name) throws IOException
{
- Integer code = nameToCode.get( name );
- if( code == null )
+ Integer code = nameToCode.get(name);
+ if (code == null)
{
- throw new IOException( "No character code for character name '" + name + "'" );
+ throw new IOException("No character code for character name '" + name + "'");
}
return code;
}
/**
* This will take a character code and get the name from the code.
- *
+ *
* @param code The character code.
- *
+ *
* @return The name of the character.
- *
+ *
* @throws IOException If there is no name for the code.
*/
- public String getName( int code ) throws IOException
+ public String getName(int code) throws IOException
{
- return codeToName.get( code );
+ return codeToName.get(code);
}
/**
* This will take a character code and get the name from the code.
- *
+ *
* @param c The character.
- *
+ *
* @return The name of the character.
- *
+ *
* @throws IOException If there is no name for the character.
*/
- public String getNameFromCharacter( char c ) throws IOException
+ public String getNameFromCharacter(char c) throws IOException
{
- String name = CHARACTER_TO_NAME.get( Character.toString(c) );
- if( name == null )
+ String name = CHARACTER_TO_NAME.get(Character.toString(c));
+ if (name == null)
{
- throw new IOException( "No name for character '" + c + "'" );
+ throw new IOException("No name for character '" + c + "'");
}
return name;
}
/**
* This will get the character from the code.
- *
+ *
* @param code The character code.
- *
+ *
* @return The printable character for the code.
- *
+ *
* @throws IOException If there is not name for the character.
*/
- public String getCharacter( int code ) throws IOException
+ public String getCharacter(int code) throws IOException
{
- String name = getName( code );
+ String name = getName(code);
if (name != null)
{
- return getCharacter( getName( code ) );
+ return getCharacter(getName(code));
}
return null;
}
/**
* This will get the character from the name.
- *
+ *
* @param name The name of the character.
- *
+ *
* @return The printable character for the code.
*/
- public String getCharacter( String name )
+ public String getCharacter(String name)
{
- String character = NAME_TO_CHARACTER.get( name );
- if( character == null )
+ String character = NAME_TO_CHARACTER.get(name);
+ if (character == null)
{
// test if we have a suffix and if so remove it
- if ( name.indexOf('.') > 0 )
+ if (name.indexOf('.') > 0)
{
- character = getCharacter(name.substring( 0, name.indexOf('.') ));
+ character = getCharacter(name.substring(0, name.indexOf('.')));
}
// test for Unicode name
// (uniXXXX - XXXX must be a multiple of four;
// each representing a hexadecimal Unicode code point)
- else if ( name.startsWith( "uni" ) )
+ else if (name.startsWith("uni"))
{
int nameLength = name.length();
StringBuilder uniStr = new StringBuilder();
try
{
- for ( int chPos = 3; chPos + 4 <= nameLength; chPos += 4 )
+ for (int chPos = 3; chPos + 4 <= nameLength; chPos += 4)
{
- int characterCode = Integer.parseInt( name.substring( chPos, chPos + 4), 16 );
+ int characterCode = Integer.parseInt(name.substring(chPos, chPos + 4), 16);
- if ( characterCode > 0xD7FF && characterCode < 0xE000 )
+ if (characterCode > 0xD7FF && characterCode < 0xE000)
{
- LOG.warn( "Unicode character name with not allowed code area: " + name );
+ LOG.warn("Unicode character name with not allowed code area: " + name);
}
else
{
- uniStr.append( (char) characterCode );
+ uniStr.append((char) characterCode);
}
}
character = uniStr.toString();
@@ -320,36 +283,36 @@ public abstract class Encoding implement
}
catch (NumberFormatException nfe)
{
- LOG.warn( "Not a number in Unicode character name: " + name );
+ LOG.warn("Not a number in Unicode character name: " + name);
character = name;
}
}
- // test for an alternate Unicode name representation
- else if ( name.startsWith( "u" ) )
+ // test for an alternate Unicode name representation
+ else if (name.startsWith("u"))
{
try
{
- int characterCode = Integer.parseInt( name.substring( 1 ), 16 );
- if ( characterCode > 0xD7FF && characterCode < 0xE000 )
+ int characterCode = Integer.parseInt(name.substring(1), 16);
+ if (characterCode > 0xD7FF && characterCode < 0xE000)
{
- LOG.warn( "Unicode character name with not allowed code area: " + name );
+ LOG.warn("Unicode character name with not allowed code area: " + name);
}
else
{
- character = String.valueOf((char)characterCode);
+ character = String.valueOf((char) characterCode);
NAME_TO_CHARACTER.put(name, character);
}
}
catch (NumberFormatException nfe)
{
- LOG.warn( "Not a number in Unicode character name: " + name );
+ LOG.warn("Not a number in Unicode character name: " + name);
character = name;
}
}
else if (nameToCode.containsKey(name))
{
int code = nameToCode.get(name);
- character = Character.toString((char)code);
+ character = Character.toString((char) code);
}
else
{
Copied: pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/additional_glyphlist.properties (from r1498073, pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/additional_glyphlist.txt)
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/additional_glyphlist.properties?p2=pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/additional_glyphlist.properties&p1=pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/additional_glyphlist.txt&r1=1498073&r2=1504209&rev=1504209&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/additional_glyphlist.txt (original)
+++ pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/additional_glyphlist.properties Wed Jul 17 17:30:38 2013
@@ -18,143 +18,143 @@
# (1) glyph name
# (2) Unicode scalar value
#
-# These mappings are missing in the original copy of the adobe glyphlist.txt
+# These mappings are missing in glyphlist.properties.
#
-angbracketleft;3008
-angbracketright;3009
-circlecopyrt;00A9
-controlNULL;0000
+angbracketleft=3008
+angbracketright=3009
+circlecopyrt=00A9
+controlNULL=0000
#
# TeX-related mappings using named values
#
-angbracketleftbig;2329
-angbracketleftBig;2329
-angbracketleftbigg;2329
-angbracketleftBigg;2329
-angbracketrightBig;232A
-angbracketrightbig;232A
-angbracketrightBigg;232A
-angbracketrightbigg;232A
-arrowhookleft;21AA
-arrowhookright;21A9
-arrowlefttophalf;21BC
-arrowleftbothalf;21BD
-arrownortheast;2197
-arrownorthwest;2196
-arrowrighttophalf;21C0
-arrowrightbothalf;21C1
-arrowsoutheast;2198
-arrowsouthwest;2199
-backslashbig;2216
-backslashBig;2216
-backslashBigg;2216
-backslashbigg;2216
-bardbl;2016
-bracehtipdownleft;FE37
-bracehtipdownright;FE37
-bracehtipupleft;FE38
-bracehtipupright;FE38
-braceleftBig;007B
-braceleftbig;007B
-braceleftbigg;007B
-braceleftBigg;007B
-bracerightBig;007D
-bracerightbig;007D
-bracerightbigg;007D
-bracerightBigg;007D
-bracketleftbig;005B
-bracketleftBig;005B
-bracketleftbigg;005B
-bracketleftBigg;005B
-bracketrightBig;005D
-bracketrightbig;005D
-bracketrightbigg;005D
-bracketrightBigg;005D
-ceilingleftbig;2308
-ceilingleftBig;2308
-ceilingleftBigg;2308
-ceilingleftbigg;2308
-ceilingrightbig;2309
-ceilingrightBig;2309
-ceilingrightbigg;2309
-ceilingrightBigg;2309
-circledotdisplay;2299
-circledottext;2299
-circlemultiplydisplay;2297
-circlemultiplytext;2297
-circleplusdisplay;2295
-circleplustext;2295
-contintegraldisplay;222E
-contintegraltext;222E
-coproductdisplay;2210
-coproducttext;2210
-floorleftBig;230A
-floorleftbig;230A
-floorleftbigg;230A
-floorleftBigg;230A
-floorrightbig;230B
-floorrightBig;230B
-floorrightBigg;230B
-floorrightbigg;230B
-hatwide;0302
-hatwider;0302
-hatwidest;0302
-intercal;1D40
-integraldisplay;222B
-integraltext;222B
-intersectiondisplay;22C2
-intersectiontext;22C2
-logicalanddisplay;2227
-logicalandtext;2227
-logicalordisplay;2228
-logicalortext;2228
-parenleftBig;0028
-parenleftbig;0028
-parenleftBigg;0028
-parenleftbigg;0028
-parenrightBig;0029
-parenrightbig;0029
-parenrightBigg;0029
-parenrightbigg;0029
-prime;2032
-productdisplay;220F
-producttext;220F
-radicalbig;221A
-radicalBig;221A
-radicalBigg;221A
-radicalbigg;221A
-radicalbt;221A
-radicaltp;221A
-radicalvertex;221A
-slashbig;002F
-slashBig;002F
-slashBigg;002F
-slashbigg;002F
-summationdisplay;2211
-summationtext;2211
-tildewide;02DC
-tildewider;02DC
-tildewidest;02DC
-uniondisplay;22C3
-unionmultidisplay;228E
-unionmultitext;228E
-unionsqdisplay;2294
-unionsqtext;2294
-uniontext;22C3
-vextenddouble;2225
-vextendsingle;2223
+angbracketleftbig=2329
+angbracketleftBig=2329
+angbracketleftbigg=2329
+angbracketleftBigg=2329
+angbracketrightBig=232A
+angbracketrightbig=232A
+angbracketrightBigg=232A
+angbracketrightbigg=232A
+arrowhookleft=21AA
+arrowhookright=21A9
+arrowlefttophalf=21BC
+arrowleftbothalf=21BD
+arrownortheast=2197
+arrownorthwest=2196
+arrowrighttophalf=21C0
+arrowrightbothalf=21C1
+arrowsoutheast=2198
+arrowsouthwest=2199
+backslashbig=2216
+backslashBig=2216
+backslashBigg=2216
+backslashbigg=2216
+bardbl=2016
+bracehtipdownleft=FE37
+bracehtipdownright=FE37
+bracehtipupleft=FE38
+bracehtipupright=FE38
+braceleftBig=007B
+braceleftbig=007B
+braceleftbigg=007B
+braceleftBigg=007B
+bracerightBig=007D
+bracerightbig=007D
+bracerightbigg=007D
+bracerightBigg=007D
+bracketleftbig=005B
+bracketleftBig=005B
+bracketleftbigg=005B
+bracketleftBigg=005B
+bracketrightBig=005D
+bracketrightbig=005D
+bracketrightbigg=005D
+bracketrightBigg=005D
+ceilingleftbig=2308
+ceilingleftBig=2308
+ceilingleftBigg=2308
+ceilingleftbigg=2308
+ceilingrightbig=2309
+ceilingrightBig=2309
+ceilingrightbigg=2309
+ceilingrightBigg=2309
+circledotdisplay=2299
+circledottext=2299
+circlemultiplydisplay=2297
+circlemultiplytext=2297
+circleplusdisplay=2295
+circleplustext=2295
+contintegraldisplay=222E
+contintegraltext=222E
+coproductdisplay=2210
+coproducttext=2210
+floorleftBig=230A
+floorleftbig=230A
+floorleftbigg=230A
+floorleftBigg=230A
+floorrightbig=230B
+floorrightBig=230B
+floorrightBigg=230B
+floorrightbigg=230B
+hatwide=0302
+hatwider=0302
+hatwidest=0302
+intercal=1D40
+integraldisplay=222B
+integraltext=222B
+intersectiondisplay=22C2
+intersectiontext=22C2
+logicalanddisplay=2227
+logicalandtext=2227
+logicalordisplay=2228
+logicalortext=2228
+parenleftBig=0028
+parenleftbig=0028
+parenleftBigg=0028
+parenleftbigg=0028
+parenrightBig=0029
+parenrightbig=0029
+parenrightBigg=0029
+parenrightbigg=0029
+prime=2032
+productdisplay=220F
+producttext=220F
+radicalbig=221A
+radicalBig=221A
+radicalBigg=221A
+radicalbigg=221A
+radicalbt=221A
+radicaltp=221A
+radicalvertex=221A
+slashbig=002F
+slashBig=002F
+slashBigg=002F
+slashbigg=002F
+summationdisplay=2211
+summationtext=2211
+tildewide=02DC
+tildewider=02DC
+tildewidest=02DC
+uniondisplay=22C3
+unionmultidisplay=228E
+unionmultitext=228E
+unionsqdisplay=2294
+unionsqtext=2294
+uniontext=22C3
+vextenddouble=2225
+vextendsingle=2223
#
# TeX-related mappings using hexadecimal or decimal values
#
-x1b;FB00
-x1c;FB01
-x1d;FB02
-x1e;FB03
-x8a;0141
-xff;00DF
-a27;FB00
-a28;FB01
-a29;FB02
-a30;FB03
-a138;0141
-a255;00DF
\ No newline at end of file
+x1b=FB00
+x1c=FB01
+x1d=FB02
+x1e=FB03
+x8a=0141
+xff=00DF
+a27=FB00
+a28=FB01
+a29=FB02
+a30=FB03
+a138=0141
+a255=00DF
\ No newline at end of file