You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xalan.apache.org by zo...@apache.org on 2003/04/06 22:54:07 UTC
cvs commit: xml-xalan/java/src/org/apache/xml/serializer HTMLEntities.properties XMLEntities.properties CharInfo.java output_html.properties output_xml.properties
zongaro 2003/04/06 13:54:06
Modified: java/src/org/apache/xml/serializer CharInfo.java
output_html.properties output_xml.properties
Added: java/src/org/apache/xml/serializer HTMLEntities.properties
XMLEntities.properties
Log:
Patch from Christine Li (jycli@ca.ibm.com). Changed CharInfo so that it allows
descriptions of entities to be specified as a properties file in addition to a
Xalan-specific resource file format.
The code previously always assumed a Xalan-specific resource file format, for
both user-supplied descriptions of entities, and for those supplied with
Xalan-J. The security restrictions of applets are such that an applet that
relies on a copy of Xalan-J in the Java run-time would find that Xalan-J would
be unable to read the default entities file that is packaged with the JRE on
the applet's behalf - it can only do so with a properties, via ResourceBundle.
Revision Changes Path
1.2 +142 -115 xml-xalan/java/src/org/apache/xml/serializer/CharInfo.java
Index: CharInfo.java
===================================================================
RCS file: /home/cvs/xml-xalan/java/src/org/apache/xml/serializer/CharInfo.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- CharInfo.java 1 Apr 2003 19:24:54 -0000 1.1
+++ CharInfo.java 6 Apr 2003 20:54:06 -0000 1.2
@@ -60,15 +60,20 @@
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
+import java.net.MalformedURLException;
import java.util.BitSet;
import java.util.Hashtable;
+import java.util.PropertyResourceBundle;
+import java.util.ResourceBundle;
+import java.util.Locale;
+import java.util.MissingResourceException;
+import java.util.Enumeration;
import org.apache.xml.res.XMLErrorResources;
import org.apache.xml.res.XMLMessages;
import org.apache.xml.utils.CharKey;
import org.apache.xml.utils.SystemIDResolver;
import org.apache.xml.utils.SystemIDResolver;
-
/**
* This class provides services that tell if a character should have
* special treatement, such as entity reference substitution or normalization
@@ -90,13 +95,13 @@
* The name of the HTML entities file.
* If specified, the file will be resource loaded with the default class loader.
*/
- public static String HTML_ENTITIES_RESOURCE = "HTMLEntities.res";
+ public static String HTML_ENTITIES_RESOURCE = "org.apache.xml.serializer.HTMLEntities";
/**
* The name of the XML entities file.
* If specified, the file will be resource loaded with the default class loader.
*/
- public static String XML_ENTITIES_RESOURCE = "XMLEntities.res";
+ public static String XML_ENTITIES_RESOURCE = "org.apache.xml.serializer.XMLEntities";
/** The linefeed character, which the parser should always normalize. */
public static final char S_LINEFEED = 0x0A;
@@ -108,21 +113,24 @@
* Constructor that reads in a resource file that describes the mapping of
* characters to entity references.
*
- * Resource files must be encoded in UTF-8 and have a format like:
+ * Resource files must be encoded in UTF-8 and can either be properties
+ * files with a .properties extension assumed. Alternatively, they can
+ * have the following form, with no particular extension assumed:
+ *
* <pre>
* # First char # is a comment
* Entity numericValue
* quot 34
* amp 38
* </pre>
- * (Note: Why don't we just switch to .properties files? Oct-01 -sc)
*
- * @param entitiesResource Name of entities resource file that should
- * be loaded, which describes that mapping of characters to entity references.
+ * @param entitiesResource Name of properties or resource file that should
+ * be loaded, which describes that mapping of characters to entity
+ * references.
*/
public CharInfo(String entitiesResource)
{
-
+ PropertyResourceBundle entities;
InputStream is = null;
BufferedReader reader = null;
int index;
@@ -131,132 +139,134 @@
int code;
String line;
- try
- {
- try
- {
- // Maintenance note: we should evaluate replacing getting the
- // ClassLoader with javax.xml.transform.FactoryFinder.findClassLoader()
- // or similar code
- ClassLoader cl = CharInfo.class.getClassLoader();
-
- if (cl == null)
- {
- is =
- ClassLoader.getSystemResourceAsStream(entitiesResource);
- }
- else
- {
- is = cl.getResourceAsStream(entitiesResource);
- }
- }
- catch (Exception e)
- {
+ // Make various attempts to interpret the parameter as a properties
+ // file or resource file, as follows:
+ //
+ // 1) attempt to load .properties file using ResourceBundle
+ // 2) try using the class loader to find the specified file a resource
+ // file
+ // 3) try treating the resource a URI
+ entities = loadEntitiesResource(entitiesResource);
+ if (null != entities) {
+ Enumeration enum = entities.getKeys();
+ while (enum.hasMoreElements()){
+ name = (String) enum.nextElement();
+ value = entities.getString(name);
+ code = Integer.parseInt(value);
+ defineEntity(name, (char) code);
}
+ m_specialsMap.set(S_LINEFEED);
+ m_specialsMap.set(S_CARRIAGERETURN);
+ } else {
+ // Load user specified resource file by using URL loading, it
+ // requires a valid URI as parameter;
+ try {
+ try {
+ // Maintenance note: we should evaluate replacing getting
+ // the ClassLoader with
+ // javax.xml.transform.FactoryFinder.findClassLoader()
+ // or similar code
+ ClassLoader cl = CharInfo.class.getClassLoader();
+
+ if (cl == null) {
+ is = ClassLoader.getSystemResourceAsStream(
+ entitiesResource);
+ } else {
+ is = cl.getResourceAsStream(entitiesResource);
+ }
+ } catch (Exception e) { }
- if (is == null)
- is = CharInfo.class.getResourceAsStream(entitiesResource);
+ if (is == null) {
+ is = CharInfo.class.getResourceAsStream(entitiesResource);
+ }
- if (is == null)
- {
- URL url = new URL(entitiesResource);
+ if (is == null) {
+ URL url = new URL(entitiesResource);
- is = url.openStream();
- }
+ is = url.openStream();
+ }
- if (is == null)
- throw new RuntimeException(
- XMLMessages.createXMLMessage(
- XMLErrorResources.ER_RESOURCE_COULD_NOT_FIND,
- new Object[] { entitiesResource, entitiesResource }));
+ if (is == null) {
+ throw new RuntimeException(
+ XMLMessages.createXMLMessage(
+ XMLErrorResources.ER_RESOURCE_COULD_NOT_FIND,
+ new Object[] {entitiesResource, entitiesResource}));
+ }
- // Fix Bugzilla#4000: force reading in UTF-8
- // This creates the de facto standard that Xalan's resource
- // files must be encoded in UTF-8. This should work in all JVMs.
- //
- // %REVIEW% KNOWN ISSUE: IT FAILS IN MICROSOFT VJ++, which
- // didn't implement the UTF-8 encoding. Theoretically, we should
- // simply let it fail in that case, since the JVM is obviously
- // broken if it doesn't support such a basic standard. But
- // since there are still some users attempting to use VJ++ for
- // development, we have dropped in a fallback which makes a
- // second attempt using the platform's default encoding. In VJ++
- // this is apparently ASCII, which is subset of UTF-8... and
- // since the strings we'll be reading here are also primarily
- // limited to the 7-bit ASCII range (at least, in English
- // versions of Xalan), this should work well enough to keep us
- // on the air until we're ready to officially decommit from
- // VJ++.
- try
- {
- reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
- }
- catch (java.io.UnsupportedEncodingException e)
- {
- reader = new BufferedReader(new InputStreamReader(is));
- }
+ // Fix Bugzilla#4000: force reading in UTF-8
+ // This creates the de facto standard that Xalan's resource
+ // files must be encoded in UTF-8. This should work in all
+ // JVMs.
+ //
+ // %REVIEW% KNOWN ISSUE: IT FAILS IN MICROSOFT VJ++, which
+ // didn't implement the UTF-8 encoding. Theoretically, we should
+ // simply let it fail in that case, since the JVM is obviously
+ // broken if it doesn't support such a basic standard. But
+ // since there are still some users attempting to use VJ++ for
+ // development, we have dropped in a fallback which makes a
+ // second attempt using the platform's default encoding. In VJ++
+ // this is apparently ASCII, which is subset of UTF-8... and
+ // since the strings we'll be reading here are also primarily
+ // limited to the 7-bit ASCII range (at least, in English
+ // versions of Xalan), this should work well enough to keep us
+ // on the air until we're ready to officially decommit from
+ // VJ++.
+ try {
+ reader = new BufferedReader(new InputStreamReader(is,
+ "UTF-8"));
+ } catch (java.io.UnsupportedEncodingException e) {
+ reader = new BufferedReader(new InputStreamReader(is));
+ }
- line = reader.readLine();
+ line = reader.readLine();
- while (line != null)
- {
- if (line.length() == 0 || line.charAt(0) == '#')
- {
- line = reader.readLine();
+ while (line != null) {
+ if (line.length() == 0 || line.charAt(0) == '#') {
+ line = reader.readLine();
- continue;
- }
+ continue;
+ }
- index = line.indexOf(' ');
+ index = line.indexOf(' ');
- if (index > 1)
- {
- name = line.substring(0, index);
+ if (index > 1) {
+ name = line.substring(0, index);
- ++index;
+ ++index;
- if (index < line.length())
- {
- value = line.substring(index);
- index = value.indexOf(' ');
+ if (index < line.length()) {
+ value = line.substring(index);
+ index = value.indexOf(' ');
- if (index > 0)
- value = value.substring(0, index);
+ if (index > 0) {
+ value = value.substring(0, index);
+ }
- code = Integer.parseInt(value);
+ code = Integer.parseInt(value);
- defineEntity(name, (char) code);
+ defineEntity(name, (char) code);
+ }
}
- }
-
- line = reader.readLine();
- }
- is.close();
- m_specialsMap.set(S_LINEFEED);
- m_specialsMap.set(S_CARRIAGERETURN);
- }
- catch (Exception except)
- {
- throw new RuntimeException(
- XMLMessages.createXMLMessage(
- XMLErrorResources.ER_RESOURCE_COULD_NOT_LOAD,
- new Object[] {
- entitiesResource,
- except.toString(),
- entitiesResource,
- except.toString()}));
- }
- finally
- {
- if (is != null)
- {
- try
- {
- is.close();
+ line = reader.readLine();
}
- catch (Exception except)
- {
+
+ is.close();
+ m_specialsMap.set(S_LINEFEED);
+ m_specialsMap.set(S_CARRIAGERETURN);
+ } catch (Exception except) {
+ throw new RuntimeException(
+ XMLMessages.createXMLMessage(
+ XMLErrorResources.ER_RESOURCE_COULD_NOT_LOAD,
+ new Object[] { entitiesResource,
+ except.toString(),
+ entitiesResource,
+ except.toString()}));
+ } finally {
+ if (is != null) {
+ try {
+ is.close();
+ } catch (Exception except) { }
}
}
}
@@ -394,4 +404,21 @@
}
return retobj;
}
+
+//Load entity property files by using PropertyResourceBundle, cause of security issure for applets
+ private PropertyResourceBundle loadEntitiesResource(String baseName)
+ throws MissingResourceException
+ {
+ try
+ {
+ Locale locale = Locale.getDefault();
+ java.lang.ClassLoader loader = this.getClass().getClassLoader();
+ return (PropertyResourceBundle)PropertyResourceBundle.getBundle(baseName);
+ }
+ catch (MissingResourceException e)
+ {
+ return null;
+ }
+ }
+
}
1.2 +1 -1 xml-xalan/java/src/org/apache/xml/serializer/output_html.properties
Index: output_html.properties
===================================================================
RCS file: /home/cvs/xml-xalan/java/src/org/apache/xml/serializer/output_html.properties,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- output_html.properties 1 Apr 2003 19:24:54 -0000 1.1
+++ output_html.properties 6 Apr 2003 20:54:06 -0000 1.2
@@ -20,6 +20,6 @@
# Note that the colon after the protocol needs to be escaped.
{http\u003a//xml.apache.org/xalan}indent-amount=0
{http\u003a//xml.apache.org/xalan}content-handler=org.apache.xml.serializer.ToHTMLStream
-{http\u003a//xml.apache.org/xalan}entities=org/apache/xml/serializer/HTMLEntities.res
+{http\u003a//xml.apache.org/xalan}entities=org/apache/xml/serializer/HTMLEntities
{http\u003a//xml.apache.org/xalan}use-url-escaping=yes
{http\u003a//xml.apache.org/xalan}omit-meta-tag=no
1.2 +1 -1 xml-xalan/java/src/org/apache/xml/serializer/output_xml.properties
Index: output_xml.properties
===================================================================
RCS file: /home/cvs/xml-xalan/java/src/org/apache/xml/serializer/output_xml.properties,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- output_xml.properties 1 Apr 2003 19:24:54 -0000 1.1
+++ output_xml.properties 6 Apr 2003 20:54:06 -0000 1.2
@@ -23,5 +23,5 @@
# Note that the colon after the protocol needs to be escaped.
{http\u003a//xml.apache.org/xalan}indent-amount=0
{http\u003a//xml.apache.org/xalan}content-handler=org.apache.xml.serializer.ToXMLStream
-{http\u003a//xml.apache.org/xalan}entities=org/apache/xml/serializer/XMLEntities.res
+{http\u003a//xml.apache.org/xalan}entities=org/apache/xml/serializer/XMLEntities
1.1 xml-xalan/java/src/org/apache/xml/serializer/HTMLEntities.properties
Index: HTMLEntities.properties
===================================================================
# $Id: HTMLEntities.properties,v 1.1 2003/04/06 20:54:06 zongaro Exp $
# in ./java/src/org/apache/xml/serializer
#
# @version $Revision: 1.1 $ $Date: 2003/04/06 20:54:06 $
# @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
# This file must be encoded in UTF-8; see org.apache.xml.serializer.CharInfo
#
# Character entity references for markup-significant
#
quot=34
amp=38
lt=60
gt=62
nbsp=160
#
# Character entity references for ISO 8859-1 characters
#
iexcl=161
cent=162
pound=163
curren=164
yen=165
brvbar=166
sect=167
uml=168
copy=169
ordf=170
laquo=171
not=172
shy=173
reg=174
macr=175
deg=176
plusmn=177
sup2=178
sup3=179
acute=180
micro=181
para=182
middot=183
cedil=184
sup1=185
ordm=186
raquo=187
frac14=188
frac12=189
frac34=190
iquest=191
Agrave=192
Aacute=193
Acirc=194
Atilde=195
Auml=196
Aring=197
AElig=198
Ccedil=199
Egrave=200
Eacute=201
Ecirc=202
Euml=203
Igrave=204
Iacute=205
Icirc=206
Iuml=207
ETH=208
Ntilde=209
Ograve=210
Oacute=211
Ocirc=212
Otilde=213
Ouml=214
times=215
Oslash=216
Ugrave=217
Uacute=218
Ucirc=219
Uuml=220
Yacute=221
THORN=222
szlig=223
agrave=224
aacute=225
acirc=226
atilde=227
auml=228
aring=229
aelig=230
ccedil=231
egrave=232
eacute=233
ecirc=234
euml=235
igrave=236
iacute=237
icirc=238
iuml=239
eth=240
ntilde=241
ograve=242
oacute=243
ocirc=244
otilde=245
ouml=246
divide=247
oslash=248
ugrave=249
uacute=250
ucirc=251
uuml=252
yacute=253
thorn=254
yuml=255
#
# Character entity references for symbols, mathematical symbols, and Greek letters
#
# Latin Extended -- Netscape can't handle
# fnof 402
#
# Greek - Netscape can't handle these
# Alpha 913
# Beta 914
# Gamma 915
# Delta 916
# Epsilon 917
# Zeta 918
# Eta 919
# Theta 920
# Iota 921
# Kappa 922
# Lambda 923
# Mu 924
# Nu 925
# Xi 926
# Omicron 927
# Pi 928
# Rho 929
# Sigma 931
# Tau 932
# Upsilon 933
# Phi 934
# Chi 935
# Psi 936
# Omega 937
# alpha 945
# beta 946
# gamma 947
# delta 948
# epsilon 949
# zeta 950
# eta 951
# theta 952
# iota 953
# kappa 954
# lambda 955
# mu 956
# nu 957
# xi 958
# omicron 959
# pi 960
# rho 961
# sigmaf 962
# sigma 963
# tau 964
# upsilon 965
# phi 966
# chi 967
# psi 968
# omega 969
# thetasym 977
# upsih 978
# piv 982
#
# General Punctuation
bull=8226
hellip=8230
prime=8242
Prime=8243
oline=8254
frasl=8260
#
# Letterlike Symbols
weierp=8472
image=8465
real=8476
trade=8482
alefsym=8501
#
# Arrows
larr=8592
uarr=8593
rarr=8594
darr=8595
harr=8596
crarr=8629
lArr=8656
uArr=8657
rArr=8658
dArr=8659
hArr=8660
#
# Mathematical Operators
forall=8704
part=8706
exist=8707
empty=8709
nabla=8711
isin=8712
notin=8713
ni=8715
prod=8719
sum=8721
minus=8722
lowast=8727
radic=8730
prop=8733
infin=8734
ang=8736
and=8743
or=8744
cap=8745
cup=8746
int=8747
there4=8756
sim=8764
cong=8773
asymp=8776
ne=8800
equiv=8801
le=8804
ge=8805
sub=8834
sup=8835
nsub=8836
sube=8838
supe=8839
oplus=8853
otimes=8855
perp=8869
sdot=8901
#
# Miscellaneous Technical
lceil=8968
rceil=8969
lfloor=8970
rfloor=8971
lang=9001
rang=9002
#
# Geometric Shapes
loz=9674
#
# Miscellaneous Symbols
spades=9824
clubs=9827
hearts=9829
diams=9830
#
# Character entity references for internationalization characters
#
# Latin Extended-A
# Netscape can't handle!
# OElig 338
# oelig 339
#-- NN 4.7 does not seem to support these, so they might ought to be commented.
# Scaron 352
# scaron 353
# Yuml 376
#
# Spacing Modifier Letters -- Netscape can't handle
# circ 710
# tilde 732
#
# General Punctuation
ensp=8194
emsp=8195
thinsp=8201
zwnj=8204
zwj=8205
lrm=8206
rlm=8207
ndash=8211
mdash=8212
lsquo=8216
rsquo=8217
sbquo=8218
ldquo=8220
rdquo=8221
bdquo=8222
dagger=8224
Dagger=8225
permil=8240
lsaquo=8249
rsaquo=8250
euro=8364
1.1 xml-xalan/java/src/org/apache/xml/serializer/XMLEntities.properties
Index: XMLEntities.properties
===================================================================
# $Id: XMLEntities.properties,v 1.1 2003/04/06 20:54:06 zongaro Exp $
#
# @version $Revision: 1.1 $ $Date: 2003/04/06 20:54:06 $
# @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
# This file must be encoded in UTF-8; see org.apache.xml.serializer.CharInfo
#
# Character entity references for markup-significant
#
quot=34
amp=38
lt=60
gt=62
---------------------------------------------------------------------
To unsubscribe, e-mail: xalan-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xalan-cvs-help@xml.apache.org