You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xalan.apache.org by zo...@apache.org on 2003/04/06 22:54:07 UTC

cvs commit: xml-xalan/java/src/org/apache/xml/serializer HTMLEntities.properties XMLEntities.properties CharInfo.java output_html.properties output_xml.properties

zongaro     2003/04/06 13:54:06

  Modified:    java/src/org/apache/xml/serializer CharInfo.java
                        output_html.properties output_xml.properties
  Added:       java/src/org/apache/xml/serializer HTMLEntities.properties
                        XMLEntities.properties
  Log:
  Patch from Christine Li (jycli@ca.ibm.com).  Changed CharInfo so that it allows
  descriptions of entities to be specified as a properties file in addition to a
  Xalan-specific resource file format.
  
  The code previously always assumed a Xalan-specific resource file format, for
  both user-supplied descriptions of entities, and for those supplied with
  Xalan-J.  The security restrictions of applets are such that an applet that
  relies on a copy of Xalan-J in the Java run-time would find that Xalan-J would
  be unable to read the default entities file that is packaged with the JRE on
  the applet's behalf - it can only do so with a properties, via ResourceBundle.
  
  Revision  Changes    Path
  1.2       +142 -115  xml-xalan/java/src/org/apache/xml/serializer/CharInfo.java
  
  Index: CharInfo.java
  ===================================================================
  RCS file: /home/cvs/xml-xalan/java/src/org/apache/xml/serializer/CharInfo.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- CharInfo.java	1 Apr 2003 19:24:54 -0000	1.1
  +++ CharInfo.java	6 Apr 2003 20:54:06 -0000	1.2
  @@ -60,15 +60,20 @@
   import java.io.InputStream;
   import java.io.InputStreamReader;
   import java.net.URL;
  +import java.net.MalformedURLException;
   import java.util.BitSet;
   import java.util.Hashtable;
  +import java.util.PropertyResourceBundle;
  +import java.util.ResourceBundle;
  +import java.util.Locale;
  +import java.util.MissingResourceException;
  +import java.util.Enumeration;
   
   import org.apache.xml.res.XMLErrorResources;
   import org.apache.xml.res.XMLMessages;
   import org.apache.xml.utils.CharKey;
   import org.apache.xml.utils.SystemIDResolver;
   import org.apache.xml.utils.SystemIDResolver;
  -
   /**
    * This class provides services that tell if a character should have
    * special treatement, such as entity reference substitution or normalization
  @@ -90,13 +95,13 @@
        * The name of the HTML entities file.
        * If specified, the file will be resource loaded with the default class loader.
        */
  -    public static String HTML_ENTITIES_RESOURCE = "HTMLEntities.res";
  +    public static String HTML_ENTITIES_RESOURCE = "org.apache.xml.serializer.HTMLEntities";
   
       /**
        * The name of the XML entities file.
        * If specified, the file will be resource loaded with the default class loader.
        */
  -    public static String XML_ENTITIES_RESOURCE = "XMLEntities.res";
  +    public static String XML_ENTITIES_RESOURCE = "org.apache.xml.serializer.XMLEntities";
   
       /** The linefeed character, which the parser should always normalize. */
       public static final char S_LINEFEED = 0x0A;
  @@ -108,21 +113,24 @@
        * Constructor that reads in a resource file that describes the mapping of
        * characters to entity references.
        *
  -     * Resource files must be encoded in UTF-8 and have a format like:
  +     * Resource files must be encoded in UTF-8 and can either be properties
  +     * files with a .properties extension assumed.  Alternatively, they can
  +     * have the following form, with no particular extension assumed:
  +     *
        * <pre>
        * # First char # is a comment
        * Entity numericValue
        * quot 34
        * amp 38
        * </pre>
  -     * (Note: Why don't we just switch to .properties files? Oct-01 -sc)
        *
  -     * @param entitiesResource Name of entities resource file that should
  -     * be loaded, which describes that mapping of characters to entity references.
  +     * @param entitiesResource Name of properties or resource file that should
  +     * be loaded, which describes that mapping of characters to entity
  +     * references.
        */
       public CharInfo(String entitiesResource)
       {
  -
  +        PropertyResourceBundle entities;
           InputStream is = null;
           BufferedReader reader = null;
           int index;
  @@ -131,132 +139,134 @@
           int code;
           String line;
   
  -        try
  -        {
  -            try
  -            {
  -                // Maintenance note: we should evaluate replacing getting the 
  -                //  ClassLoader with javax.xml.transform.FactoryFinder.findClassLoader()
  -                //  or similar code
  -                ClassLoader cl = CharInfo.class.getClassLoader();
  -
  -                if (cl == null)
  -                {
  -                    is =
  -                        ClassLoader.getSystemResourceAsStream(entitiesResource);
  -                }
  -                else
  -                {
  -                    is = cl.getResourceAsStream(entitiesResource);
  -                }
  -            }
  -            catch (Exception e)
  -            {
  +        // Make various attempts to interpret the parameter as a properties
  +        // file or resource file, as follows:
  +        //
  +        //   1) attempt to load .properties file using ResourceBundle
  +        //   2) try using the class loader to find the specified file a resource
  +        //      file
  +        //   3) try treating the resource a URI
  +        entities = loadEntitiesResource(entitiesResource);
  +        if (null != entities) {
  +            Enumeration enum = entities.getKeys();
  +            while (enum.hasMoreElements()){
  +                name = (String) enum.nextElement();
  +                value = entities.getString(name);
  +                code = Integer.parseInt(value);
  +                defineEntity(name, (char) code);
               }
  +            m_specialsMap.set(S_LINEFEED);
  +            m_specialsMap.set(S_CARRIAGERETURN);
  +        } else {
  +            // Load user specified resource file by using URL loading, it
  +            // requires a valid URI as parameter;                
  +            try {
  +                try {
  +                    // Maintenance note: we should evaluate replacing getting
  +                    // the ClassLoader with
  +                    // javax.xml.transform.FactoryFinder.findClassLoader()
  +                    // or similar code
  +                    ClassLoader cl = CharInfo.class.getClassLoader();
  +
  +                    if (cl == null) {
  +                        is = ClassLoader.getSystemResourceAsStream(
  +                                                             entitiesResource);
  +                    } else {
  +                        is = cl.getResourceAsStream(entitiesResource);
  +                    }
  +                } catch (Exception e) { }
   
  -            if (is == null)
  -                is = CharInfo.class.getResourceAsStream(entitiesResource);
  +                if (is == null) {
  +                    is = CharInfo.class.getResourceAsStream(entitiesResource);
  +                }
   
  -            if (is == null)
  -            {
  -                URL url = new URL(entitiesResource);
  +                if (is == null) {
  +                    URL url = new URL(entitiesResource);
   
  -                is = url.openStream();
  -            }
  +                    is = url.openStream();
  +                }
   
  -            if (is == null)
  -                throw new RuntimeException(
  -                    XMLMessages.createXMLMessage(
  -                        XMLErrorResources.ER_RESOURCE_COULD_NOT_FIND,
  -                        new Object[] { entitiesResource, entitiesResource }));
  +                if (is == null) {
  +                    throw new RuntimeException(
  +                        XMLMessages.createXMLMessage(
  +                            XMLErrorResources.ER_RESOURCE_COULD_NOT_FIND,
  +                            new Object[] {entitiesResource, entitiesResource}));
  +                }
   
  -            // Fix Bugzilla#4000: force reading in UTF-8
  -            //  This creates the de facto standard that Xalan's resource 
  -            //  files must be encoded in UTF-8. This should work in all JVMs.
  -            //
  -            // %REVIEW% KNOWN ISSUE: IT FAILS IN MICROSOFT VJ++, which
  -            // didn't implement the UTF-8 encoding. Theoretically, we should
  -            // simply let it fail in that case, since the JVM is obviously
  -            // broken if it doesn't support such a basic standard.  But
  -            // since there are still some users attempting to use VJ++ for
  -            // development, we have dropped in a fallback which makes a
  -            // second attempt using the platform's default encoding. In VJ++
  -            // this is apparently ASCII, which is subset of UTF-8... and
  -            // since the strings we'll be reading here are also primarily
  -            // limited to the 7-bit ASCII range (at least, in English
  -            // versions of Xalan), this should work well enough to keep us
  -            // on the air until we're ready to officially decommit from
  -            // VJ++.
  -            try
  -            {
  -                reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
  -            }
  -            catch (java.io.UnsupportedEncodingException e)
  -            {
  -                reader = new BufferedReader(new InputStreamReader(is));
  -            }
  +                // Fix Bugzilla#4000: force reading in UTF-8
  +                //  This creates the de facto standard that Xalan's resource 
  +                //  files must be encoded in UTF-8. This should work in all
  +                // JVMs.
  +                //
  +                // %REVIEW% KNOWN ISSUE: IT FAILS IN MICROSOFT VJ++, which
  +                // didn't implement the UTF-8 encoding. Theoretically, we should
  +                // simply let it fail in that case, since the JVM is obviously
  +                // broken if it doesn't support such a basic standard.  But
  +                // since there are still some users attempting to use VJ++ for
  +                // development, we have dropped in a fallback which makes a
  +                // second attempt using the platform's default encoding. In VJ++
  +                // this is apparently ASCII, which is subset of UTF-8... and
  +                // since the strings we'll be reading here are also primarily
  +                // limited to the 7-bit ASCII range (at least, in English
  +                // versions of Xalan), this should work well enough to keep us
  +                // on the air until we're ready to officially decommit from
  +                // VJ++.
  +                try {
  +                    reader = new BufferedReader(new InputStreamReader(is,
  +                                                                      "UTF-8"));
  +                } catch (java.io.UnsupportedEncodingException e) {
  +                    reader = new BufferedReader(new InputStreamReader(is));
  +                }
   
  -            line = reader.readLine();
  +                line = reader.readLine();
   
  -            while (line != null)
  -            {
  -                if (line.length() == 0 || line.charAt(0) == '#')
  -                {
  -                    line = reader.readLine();
  +                while (line != null) {
  +                    if (line.length() == 0 || line.charAt(0) == '#') {
  +                        line = reader.readLine();
   
  -                    continue;
  -                }
  +                        continue;
  +                    }
   
  -                index = line.indexOf(' ');
  +                    index = line.indexOf(' ');
   
  -                if (index > 1)
  -                {
  -                    name = line.substring(0, index);
  +                    if (index > 1) {
  +                        name = line.substring(0, index);
   
  -                    ++index;
  +                        ++index;
   
  -                    if (index < line.length())
  -                    {
  -                        value = line.substring(index);
  -                        index = value.indexOf(' ');
  +                        if (index < line.length()) {
  +                            value = line.substring(index);
  +                            index = value.indexOf(' ');
   
  -                        if (index > 0)
  -                            value = value.substring(0, index);
  +                            if (index > 0) {
  +                                value = value.substring(0, index);
  +                            }
   
  -                        code = Integer.parseInt(value);
  +                            code = Integer.parseInt(value);
   
  -                        defineEntity(name, (char) code);
  +                            defineEntity(name, (char) code);
  +                        }
                       }
  -                }
  -
  -                line = reader.readLine();
  -            }
   
  -            is.close();
  -            m_specialsMap.set(S_LINEFEED);
  -            m_specialsMap.set(S_CARRIAGERETURN);
  -        }
  -        catch (Exception except)
  -        {
  -            throw new RuntimeException(
  -                XMLMessages.createXMLMessage(
  -                    XMLErrorResources.ER_RESOURCE_COULD_NOT_LOAD,
  -                    new Object[] {
  -                        entitiesResource,
  -                        except.toString(),
  -                        entitiesResource,
  -                        except.toString()}));
  -        }
  -        finally
  -        {
  -            if (is != null)
  -            {
  -                try
  -                {
  -                    is.close();
  +                    line = reader.readLine();
                   }
  -                catch (Exception except)
  -                {
  +
  +                is.close();
  +                m_specialsMap.set(S_LINEFEED);
  +                m_specialsMap.set(S_CARRIAGERETURN);
  +            } catch (Exception except) {
  +                throw new RuntimeException(
  +                    XMLMessages.createXMLMessage(
  +                        XMLErrorResources.ER_RESOURCE_COULD_NOT_LOAD,
  +                        new Object[] { entitiesResource,
  +                                       except.toString(),
  +                                       entitiesResource,
  +                                       except.toString()}));
  +            } finally {
  +                if (is != null) {
  +                    try {
  +                        is.close();
  +                    } catch (Exception except) { }
                   }
               }
           }
  @@ -394,4 +404,21 @@
           }
           return retobj;
       }
  +    
  +//Load entity property files by using PropertyResourceBundle, cause of security issure for applets
  +        private PropertyResourceBundle loadEntitiesResource(String baseName)
  +                                throws MissingResourceException
  +        {    
  +                try
  +                {
  +                        Locale locale = Locale.getDefault();
  +                        java.lang.ClassLoader loader = this.getClass().getClassLoader(); 
  +                        return (PropertyResourceBundle)PropertyResourceBundle.getBundle(baseName);
  +                }
  +                catch (MissingResourceException e)
  +                {
  +                        return null;
  +                }
  +        }
  +    
   }
  
  
  
  1.2       +1 -1      xml-xalan/java/src/org/apache/xml/serializer/output_html.properties
  
  Index: output_html.properties
  ===================================================================
  RCS file: /home/cvs/xml-xalan/java/src/org/apache/xml/serializer/output_html.properties,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- output_html.properties	1 Apr 2003 19:24:54 -0000	1.1
  +++ output_html.properties	6 Apr 2003 20:54:06 -0000	1.2
  @@ -20,6 +20,6 @@
   # Note that the colon after the protocol needs to be escaped.
   {http\u003a//xml.apache.org/xalan}indent-amount=0
   {http\u003a//xml.apache.org/xalan}content-handler=org.apache.xml.serializer.ToHTMLStream
  -{http\u003a//xml.apache.org/xalan}entities=org/apache/xml/serializer/HTMLEntities.res
  +{http\u003a//xml.apache.org/xalan}entities=org/apache/xml/serializer/HTMLEntities
   {http\u003a//xml.apache.org/xalan}use-url-escaping=yes
   {http\u003a//xml.apache.org/xalan}omit-meta-tag=no
  
  
  
  1.2       +1 -1      xml-xalan/java/src/org/apache/xml/serializer/output_xml.properties
  
  Index: output_xml.properties
  ===================================================================
  RCS file: /home/cvs/xml-xalan/java/src/org/apache/xml/serializer/output_xml.properties,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- output_xml.properties	1 Apr 2003 19:24:54 -0000	1.1
  +++ output_xml.properties	6 Apr 2003 20:54:06 -0000	1.2
  @@ -23,5 +23,5 @@
   # Note that the colon after the protocol needs to be escaped.
   {http\u003a//xml.apache.org/xalan}indent-amount=0
   {http\u003a//xml.apache.org/xalan}content-handler=org.apache.xml.serializer.ToXMLStream
  -{http\u003a//xml.apache.org/xalan}entities=org/apache/xml/serializer/XMLEntities.res
  +{http\u003a//xml.apache.org/xalan}entities=org/apache/xml/serializer/XMLEntities
   
  
  
  
  1.1                  xml-xalan/java/src/org/apache/xml/serializer/HTMLEntities.properties
  
  Index: HTMLEntities.properties
  ===================================================================
  # $Id: HTMLEntities.properties,v 1.1 2003/04/06 20:54:06 zongaro Exp $
  # in ./java/src/org/apache/xml/serializer
  #
  # @version $Revision: 1.1 $ $Date: 2003/04/06 20:54:06 $
  # @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
  # This file must be encoded in UTF-8; see org.apache.xml.serializer.CharInfo
  #
  # Character entity references for markup-significant
  #
  quot=34
  amp=38
  lt=60
  gt=62
  nbsp=160
  #
  # Character entity references for ISO 8859-1 characters
  #
  iexcl=161
  cent=162
  pound=163
  curren=164
  yen=165
  brvbar=166
  sect=167
  uml=168
  copy=169
  ordf=170
  laquo=171
  not=172
  shy=173
  reg=174
  macr=175
  deg=176
  plusmn=177
  sup2=178
  sup3=179
  acute=180
  micro=181
  para=182
  middot=183
  cedil=184
  sup1=185
  ordm=186
  raquo=187
  frac14=188
  frac12=189
  frac34=190
  iquest=191
  Agrave=192
  Aacute=193
  Acirc=194
  Atilde=195
  Auml=196
  Aring=197
  AElig=198
  Ccedil=199
  Egrave=200
  Eacute=201
  Ecirc=202
  Euml=203
  Igrave=204
  Iacute=205
  Icirc=206
  Iuml=207
  ETH=208
  Ntilde=209
  Ograve=210
  Oacute=211
  Ocirc=212
  Otilde=213
  Ouml=214
  times=215
  Oslash=216
  Ugrave=217
  Uacute=218
  Ucirc=219
  Uuml=220
  Yacute=221
  THORN=222
  szlig=223
  agrave=224
  aacute=225
  acirc=226
  atilde=227
  auml=228
  aring=229
  aelig=230
  ccedil=231
  egrave=232
  eacute=233
  ecirc=234
  euml=235
  igrave=236
  iacute=237
  icirc=238
  iuml=239
  eth=240
  ntilde=241
  ograve=242
  oacute=243
  ocirc=244
  otilde=245
  ouml=246
  divide=247
  oslash=248
  ugrave=249
  uacute=250
  ucirc=251
  uuml=252
  yacute=253
  thorn=254
  yuml=255
  #
  # Character entity references for symbols, mathematical symbols, and Greek letters
  #
  # Latin Extended -- Netscape can't handle
  # fnof 402
  #
  # Greek - Netscape can't handle these
  # Alpha 913
  # Beta 914
  # Gamma 915
  # Delta 916
  # Epsilon 917
  # Zeta 918
  # Eta 919
  # Theta 920
  # Iota 921
  # Kappa 922
  # Lambda 923
  # Mu 924
  # Nu 925
  # Xi 926
  # Omicron 927
  # Pi 928
  # Rho 929
  # Sigma 931
  # Tau 932
  # Upsilon 933
  # Phi 934
  # Chi 935
  # Psi 936
  # Omega 937
  # alpha 945
  # beta 946
  # gamma 947
  # delta 948
  # epsilon 949
  # zeta 950
  # eta 951
  # theta 952
  # iota 953
  # kappa 954
  # lambda 955
  # mu 956
  # nu 957
  # xi 958
  # omicron 959
  # pi 960
  # rho 961
  # sigmaf 962
  # sigma 963
  # tau 964
  # upsilon 965
  # phi 966
  # chi 967
  # psi 968
  # omega 969
  # thetasym 977
  # upsih 978
  # piv 982
  #
  # General Punctuation
  bull=8226
  hellip=8230
  prime=8242
  Prime=8243
  oline=8254
  frasl=8260
  #
  # Letterlike Symbols
  weierp=8472
  image=8465
  real=8476
  trade=8482
  alefsym=8501
  #
  # Arrows
  larr=8592
  uarr=8593
  rarr=8594
  darr=8595
  harr=8596
  crarr=8629
  lArr=8656
  uArr=8657
  rArr=8658
  dArr=8659
  hArr=8660
  #
  # Mathematical Operators
  forall=8704
  part=8706
  exist=8707
  empty=8709
  nabla=8711
  isin=8712
  notin=8713
  ni=8715
  prod=8719
  sum=8721
  minus=8722
  lowast=8727
  radic=8730
  prop=8733
  infin=8734
  ang=8736
  and=8743
  or=8744
  cap=8745
  cup=8746
  int=8747
  there4=8756
  sim=8764
  cong=8773
  asymp=8776
  ne=8800
  equiv=8801
  le=8804
  ge=8805
  sub=8834
  sup=8835
  nsub=8836
  sube=8838
  supe=8839
  oplus=8853
  otimes=8855
  perp=8869
  sdot=8901
  #
  # Miscellaneous Technical
  lceil=8968
  rceil=8969
  lfloor=8970
  rfloor=8971
  lang=9001
  rang=9002
  #
  # Geometric Shapes
  loz=9674
  #
  # Miscellaneous Symbols
  spades=9824
  clubs=9827
  hearts=9829
  diams=9830
  #
  # Character entity references for internationalization characters
  #
  # Latin Extended-A
  # Netscape can't handle!
  # OElig 338
  # oelig 339
  
  #-- NN 4.7 does not seem to support these, so they might ought to be commented.
  # Scaron 352
  # scaron 353
  # Yuml 376
  #
  # Spacing Modifier Letters -- Netscape can't handle
  # circ 710
  # tilde 732
  #
  # General Punctuation
  ensp=8194
  emsp=8195
  thinsp=8201
  zwnj=8204
  zwj=8205
  lrm=8206
  rlm=8207
  ndash=8211
  mdash=8212
  lsquo=8216
  rsquo=8217
  sbquo=8218
  ldquo=8220
  rdquo=8221
  bdquo=8222
  dagger=8224
  Dagger=8225
  permil=8240
  lsaquo=8249
  rsaquo=8250
  euro=8364
  
  
  
  1.1                  xml-xalan/java/src/org/apache/xml/serializer/XMLEntities.properties
  
  Index: XMLEntities.properties
  ===================================================================
  # $Id: XMLEntities.properties,v 1.1 2003/04/06 20:54:06 zongaro Exp $
  #
  # @version $Revision: 1.1 $ $Date: 2003/04/06 20:54:06 $
  # @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
  # This file must be encoded in UTF-8; see org.apache.xml.serializer.CharInfo
  #
  # Character entity references for markup-significant
  #
  quot=34
  amp=38
  lt=60
  gt=62
  
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: xalan-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xalan-cvs-help@xml.apache.org