You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2013/08/09 20:25:22 UTC

svn commit: r1512433 - /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextNormalize.java

Author: lehmi
Date: Fri Aug  9 18:25:21 2013
New Revision: 1512433

URL: http://svn.apache.org/r1512433
Log:
PDFBOX-1622: DIACHASH HashMap is static now to support thread safety as proposed by Florent Guillaume

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextNormalize.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextNormalize.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextNormalize.java?rev=1512433&r1=1512432&r2=1512433&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextNormalize.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/TextNormalize.java Fri Aug  9 18:25:21 2013
@@ -19,52 +19,55 @@ package org.apache.pdfbox.util;
 import java.util.HashMap;
 
 /**
- * This class allows a caller to normalize text in various ways.
- * It will load the ICU4J jar file if it is defined on the classpath.
+ * This class allows a caller to normalize text in various ways. It will load the ICU4J jar file if it is defined on the
+ * classpath.
  * 
  * @author <a href="mailto:carrier@digital-evidence.org">Brian Carrier</a>
- * @version $Revision: 1.0 $
+ * 
  */
-public class TextNormalize 
+public class TextNormalize
 {
     private ICU4JImpl icu4j = null;
-    private static final HashMap DIACHASH = new HashMap();
+    private static final HashMap<Integer, String> DIACHASH = new HashMap<Integer, String>();
     private String outputEncoding;
 
+    static
+    {
+        populateDiacHash();
+    }
+
     /**
      * 
      * @param encoding The Encoding that the text will eventually be written as (or null)
      */
-    public TextNormalize(String encoding) 
+    public TextNormalize(String encoding)
     {
         findICU4J();
-        populateDiacHash();
-        this.outputEncoding = encoding;
+        outputEncoding = encoding;
     }
 
-    private void findICU4J() 
+    private void findICU4J()
     {
         // see if we can load the icu4j classes from the classpath
-        try 
+        try
         {
             this.getClass().getClassLoader().loadClass("com.ibm.icu.text.Bidi");
             this.getClass().getClassLoader().loadClass("com.ibm.icu.text.Normalizer");
             icu4j = new ICU4JImpl();
-        } 
-        catch (ClassNotFoundException e) 
+        }
+        catch (ClassNotFoundException e)
         {
             icu4j = null;
         }
     }
+
     /*
-     * Adds non-decomposing diacritics to the hash with their related
-     * combining character. These are values that the unicode spec claims
-     * are equivalent but are not mapped in the form NFKC normalization method.
-     * Determined by going through the Combining Diacritical Marks section of 
-     * the Unicode spec and identifying which characters are not mapped to by 
-     * the normalization. 
+     * Adds non-decomposing diacritics to the hash with their related combining character. These are values that the
+     * unicode spec claims are equivalent but are not mapped in the form NFKC normalization method. Determined by going
+     * through the Combining Diacritical Marks section of the Unicode spec and identifying which characters are not
+     * mapped to by the normalization.
      */
-    private void populateDiacHash()
+    private static void populateDiacHash()
     {
         DIACHASH.put(new Integer(0x0060), "\u0300");
         DIACHASH.put(new Integer(0x02CB), "\u0300");
@@ -100,74 +103,70 @@ public class TextNormalize 
     }
 
     /**
-     * Takes a line of text in presentation order and converts it to logical order.
-     * For most text other than Arabic and Hebrew, the presentation and logical
-     * orders are the same. However, for Arabic and Hebrew, they are different and
-     * if the text involves both RTL and LTR text then the Unicode BIDI algorithm
-     * must be used to determine how to map between them.  
+     * Takes a line of text in presentation order and converts it to logical order. For most text other than Arabic and
+     * Hebrew, the presentation and logical orders are the same. However, for Arabic and Hebrew, they are different and
+     * if the text involves both RTL and LTR text then the Unicode BIDI algorithm must be used to determine how to map
+     * between them.
      * 
      * @param str Presentation form of line to convert (i.e. left most char is first char)
      * @param isRtlDominant true if the PAGE has a dominant right to left ordering
      * @return Logical form of string (or original string if ICU4J library is not on classpath)
      */
-    public String makeLineLogicalOrder(String str, boolean isRtlDominant) 
+    public String makeLineLogicalOrder(String str, boolean isRtlDominant)
     {
-        if (icu4j != null) 
+        if (icu4j != null)
         {
             return icu4j.makeLineLogicalOrder(str, isRtlDominant);
         }
-        else 
+        else
         {
             return str;
         }
     }
 
     /**
-     * Normalize the presentation forms of characters in the string.
-     * For example, convert the single "fi" ligature to "f" and "i".
+     * Normalize the presentation forms of characters in the string. For example, convert the single "fi" ligature to
+     * "f" and "i".
      * 
      * @param str String to normalize
      * @return Normalized string (or original string if ICU4J library is not on classpath)
      */
-    public String normalizePres(String str) 
+    public String normalizePres(String str)
     {
-        if (icu4j != null) 
+        if (icu4j != null)
         {
             return icu4j.normalizePres(str);
         }
-        else 
+        else
         {
             return str;
         }
     }
-    
+
     /**
-     * Normalize the diacritic, for example, 
-     * convert non-combining diacritic characters to their combining
-     * counterparts. 
+     * Normalize the diacritic, for example, convert non-combining diacritic characters to their combining counterparts.
      * 
-     * @param str String to normalize 
+     * @param str String to normalize
      * @return Normalized string (or original string if ICU4J library is not on classpath)
      */
     public String normalizeDiac(String str)
     {
         /*
-         * Unicode contains special combining forms of the diacritic characters
-         * and we want to use these. 
+         * Unicode contains special combining forms of the diacritic characters and we want to use these.
          */
-        if(outputEncoding != null && outputEncoding.toUpperCase().startsWith("UTF"))
+        if (outputEncoding != null && outputEncoding.toUpperCase().startsWith("UTF"))
         {
             Integer c = new Integer(str.charAt(0));
             // convert the characters not defined in the Unicode spec
-            if(DIACHASH.containsKey(c))
+            if (DIACHASH.containsKey(c))
             {
-                return (String)DIACHASH.get(c);
+                return (String) DIACHASH.get(c);
             }
-            else if (icu4j != null) 
+            else if (icu4j != null)
             {
                 return icu4j.normalizeDiac(str);
             }
-            else 
+            else
             {
                 return str;
             }