You are viewing a plain text version of this content. The canonical link for it is here.
Posted to j-dev@xerces.apache.org by TAMURA Kent <ke...@hauN.org> on 2000/10/30 10:43:58 UTC
[PATCH] Attribute-value normalization
I found the validating parser of Xerces 1.2.1 did not normalize
any attribute values ;-( Attribute-value normalization is
required process to implement Canonical XML. The following
patch adds the normalization feature,
http://apache.org/xml/features/validation/normalize-attribute-values.
The default values of this feature is `true', and we can disable
the normalization.
--
TAMURA Kent
Index: org/apache/xerces/framework/XMLAttrList.java
===================================================================
RCS file: /home/cvspublic/xml-xerces/java/src/org/apache/xerces/framework/XMLAttrList.java,v
retrieving revision 1.4
diff -u -r1.4 XMLAttrList.java
--- org/apache/xerces/framework/XMLAttrList.java 2000/05/19 02:27:19 1.4
+++ org/apache/xerces/framework/XMLAttrList.java 2000/10/30 08:54:11
@@ -303,6 +303,17 @@
return fAttValue[chunk][index];
}
+ /**
+ * Sets the value of the attribute.
+ */
+ public void setAttValue(int attrIndex, int attrValue) {
+ if (attrIndex < 0 || attrIndex >= fAttrCount)
+ return;
+ int chunk = attrIndex >> CHUNK_SHIFT;
+ int index = attrIndex & CHUNK_MASK;
+ fAttValue[chunk][index] = attrValue;
+ }
+
/** Sets the type of the attribute. */
public void setAttType(int attrIndex, int attTypeIndex) {
if (attrIndex < 0 || attrIndex >= fAttrCount)
Index: org/apache/xerces/framework/XMLParser.java
===================================================================
RCS file: /home/cvspublic/xml-xerces/java/src/org/apache/xerces/framework/XMLParser.java,v
retrieving revision 1.22
diff -u -r1.22 XMLParser.java
--- org/apache/xerces/framework/XMLParser.java 2000/08/11 22:31:52 1.22
+++ org/apache/xerces/framework/XMLParser.java 2000/10/30 08:54:11
@@ -593,6 +593,14 @@
throws SAXNotRecognizedException, SAXNotSupportedException {
return fValidator.getDynamicValidationEnabled();
}
+
+ /**
+ *
+ */
+ protected void setNormalizeAttributeValues(boolean normalize) {
+ fValidator.setNormalizeAttributeValues(normalize);
+ }
+
/**
* Allows the parser to have the choice to load DTD grammar when
* validation is off.
@@ -1147,7 +1155,13 @@
throw new SAXNotSupportedException(featureId);
}
//
- // http://apache.org/xml/features/validation/default-attribute-values
+ // http://apache.org/xml/features/validation/normalize-attribute-values
+ //
+ if (feature.equals("validation/normalize-attribute-values")) {
+ setNormalizeAttributeValues(state);
+ }
+ //
+ // http://apache.org/xml/features/validation/validate-content-models
//
if (feature.equals("validation/validate-content-models")) {
// REVISIT
Index: org/apache/xerces/validators/common/XMLValidator.java
===================================================================
RCS file: /home/cvspublic/xml-xerces/java/src/org/apache/xerces/validators/common/XMLValidator.java,v
retrieving revision 1.89
diff -u -r1.89 XMLValidator.java
--- org/apache/xerces/validators/common/XMLValidator.java 2000/10/27 21:06:12 1.89
+++ org/apache/xerces/validators/common/XMLValidator.java 2000/10/30 08:54:17
@@ -257,6 +257,7 @@
private boolean fDynamicDisabledByValidation = false;
private boolean fWarningOnDuplicateAttDef = false;
private boolean fWarningOnUndeclaredElements = false;
+ private boolean fNormalizeAttributeValues = true;
private boolean fLoadDTDGrammar = true;
// declarations
@@ -500,6 +501,11 @@
return fDynamicValidation;
}
+ /** Sets fNormalizeAttributeValues **/
+ public void setNormalizeAttributeValues(boolean normalize){
+ fNormalizeAttributeValues = normalize;
+ }
+
/** Sets fLoadDTDGrammar when validation is off **/
public void setLoadDTDGrammar(boolean loadDG){
if (fValidating) {
@@ -2779,7 +2785,9 @@
fTempAttDecl.type == XMLAttributeDecl.TYPE_NMTOKEN ||
fTempAttDecl.type == XMLAttributeDecl.TYPE_NOTATION)
) {
- validateDTDattribute(element, attrList.getAttValue(index), fTempAttDecl);
+ int normalizedValue = validateDTDattribute(element, attrList.getAttValue(index), fTempAttDecl);
+ attrList.setAttValue(index, normalizedValue);
+
}
// check to see if this attribute matched an attribute wildcard
@@ -2951,8 +2959,11 @@
} // validateElementAndAttributes(QName,XMLAttrList)
- //validate attributes in DTD fashion
- private void validateDTDattribute(QName element, int attValue,
+ /**
+ * Validate attributes in DTD fashion.
+ * @return normalized attribute value
+ */
+ private int validateDTDattribute(QName element, int attValue,
XMLAttributeDecl attributeDecl) throws Exception{
AttributeValidator av = null;
switch (attributeDecl.type) {
@@ -2996,6 +3007,15 @@
av = fAttValidatorENTITY;
}*/
+ if (fNormalizeAttributeValues) {
+ if (attributeDecl.list) {
+ attValue = normalizeListAttribute(value);
+ } else {
+ if (value != unTrimValue) {
+ attValue = fStringPool.addSymbol(value);
+ }
+ }
+ }
}
break;
case XMLAttributeDecl.TYPE_ENUMERATION:
@@ -3025,6 +3045,10 @@
ex.getMinorCode(),
fStringPool.toString( attributeDecl.name.rawname), value );
}
+
+ if (fNormalizeAttributeValues && value != unTrimValue) {
+ attValue = fStringPool.addSymbol(value);
+ }
}
break;
case XMLAttributeDecl.TYPE_IDREF:
@@ -3058,6 +3082,15 @@
}
}
+ if (fNormalizeAttributeValues) {
+ if (attributeDecl.list) {
+ attValue = normalizeListAttribute(value);
+ } else {
+ if (value != unTrimValue) {
+ attValue = fStringPool.addSymbol(value);
+ }
+ }
+ }
}
break;
case XMLAttributeDecl.TYPE_NOTATION:
@@ -3118,12 +3151,56 @@
fStringPool.toString(attributeDecl.name.rawname), value);//TODO NMTOKENS messge
}
+ if (fNormalizeAttributeValues) {
+ if (attributeDecl.list) {
+ attValue = normalizeListAttribute(value);
+ } else {
+ if (value != unTrimValue) {
+ attValue = fStringPool.addSymbol(value);
+ }
+ }
+ }
}
break;
}
- if ( av != null )
- av.normalize(element, attributeDecl.name, attValue,
- attributeDecl.type, attributeDecl.enumeration);
+ if ( av != null ) {
+ int newValue = av.normalize(element, attributeDecl.name, attValue,
+ attributeDecl.type, attributeDecl.enumeration);
+ if (fNormalizeAttributeValues)
+ attValue = newValue;
+ }
+ return attValue;
+ }
+
+ /**
+ * @param value This is already trimmed.
+ */
+ private int normalizeListAttribute(String value) {
+ int length = value.length();
+ StringBuffer buffer = null;
+ int state = 0; // 0:non-S, 1: 1st S, 2: non-1st S
+ int copyStart = 0;
+ for (int i = 0; i < length; i++) {
+ int ch = value.charAt(i);
+ if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') {
+ if (state == 0) {
+ state = 1;
+ } else if (state == 1) {
+ state = 2;
+ if (buffer == null)
+ buffer = new StringBuffer(length);
+ buffer.append(value.substring(copyStart, i));
+ }
+ } else {
+ if (state == 2)
+ copyStart = i;
+ state = 0;
+ }
+ }
+ if (buffer == null)
+ return fStringPool.addSymbol(value);
+ buffer.append(value.substring(copyStart));
+ return fStringPool.addSymbol(new String(buffer));
}
/** Character data in content. */