You are viewing a plain text version of this content. The canonical link for it is here.
Posted to j-dev@xerces.apache.org by TAMURA Kent <ke...@hauN.org> on 2000/10/30 10:43:58 UTC

[PATCH] Attribute-value normalization

I found the validating parser of Xerces 1.2.1 did not normalize
any attribute values ;-( Attribute-value normalization is
required process to implement Canonical XML.  The following
patch adds the normalization feature,
http://apache.org/xml/features/validation/normalize-attribute-values.
The default values of this feature is `true', and we can disable
the normalization.

-- 
TAMURA Kent


Index: org/apache/xerces/framework/XMLAttrList.java
===================================================================
RCS file: /home/cvspublic/xml-xerces/java/src/org/apache/xerces/framework/XMLAttrList.java,v
retrieving revision 1.4
diff -u -r1.4 XMLAttrList.java
--- org/apache/xerces/framework/XMLAttrList.java	2000/05/19 02:27:19	1.4
+++ org/apache/xerces/framework/XMLAttrList.java	2000/10/30 08:54:11
@@ -303,6 +303,17 @@
         return fAttValue[chunk][index];
     }
 
+    /**
+     * Sets the value of the attribute.
+     */
+    public void setAttValue(int attrIndex, int attrValue) {
+        if (attrIndex < 0 || attrIndex >= fAttrCount)
+            return;
+        int chunk = attrIndex >> CHUNK_SHIFT;
+        int index = attrIndex & CHUNK_MASK;
+        fAttValue[chunk][index] = attrValue;
+    }
+
     /** Sets the type of the attribute. */
     public void setAttType(int attrIndex, int attTypeIndex) {
         if (attrIndex < 0 || attrIndex >= fAttrCount)
Index: org/apache/xerces/framework/XMLParser.java
===================================================================
RCS file: /home/cvspublic/xml-xerces/java/src/org/apache/xerces/framework/XMLParser.java,v
retrieving revision 1.22
diff -u -r1.22 XMLParser.java
--- org/apache/xerces/framework/XMLParser.java	2000/08/11 22:31:52	1.22
+++ org/apache/xerces/framework/XMLParser.java	2000/10/30 08:54:11
@@ -593,6 +593,14 @@
         throws SAXNotRecognizedException, SAXNotSupportedException {
         return fValidator.getDynamicValidationEnabled();
     }
+
+    /**
+     *
+     */
+    protected void setNormalizeAttributeValues(boolean normalize) {
+        fValidator.setNormalizeAttributeValues(normalize);
+    }
+
     /**
      * Allows the parser to have the choice to load DTD grammar when 
      * validation is off.
@@ -1147,7 +1155,13 @@
                 throw new SAXNotSupportedException(featureId);
             }
             //
-            // http://apache.org/xml/features/validation/default-attribute-values
+            // http://apache.org/xml/features/validation/normalize-attribute-values
+            //
+            if (feature.equals("validation/normalize-attribute-values")) {
+                setNormalizeAttributeValues(state);
+            }
+            //
+            // http://apache.org/xml/features/validation/validate-content-models
             //
             if (feature.equals("validation/validate-content-models")) {
                 // REVISIT
Index: org/apache/xerces/validators/common/XMLValidator.java
===================================================================
RCS file: /home/cvspublic/xml-xerces/java/src/org/apache/xerces/validators/common/XMLValidator.java,v
retrieving revision 1.89
diff -u -r1.89 XMLValidator.java
--- org/apache/xerces/validators/common/XMLValidator.java	2000/10/27 21:06:12	1.89
+++ org/apache/xerces/validators/common/XMLValidator.java	2000/10/30 08:54:17
@@ -257,6 +257,7 @@
    private boolean fDynamicDisabledByValidation = false;
    private boolean fWarningOnDuplicateAttDef = false;
    private boolean fWarningOnUndeclaredElements = false;
+   private boolean fNormalizeAttributeValues = true;
    private boolean fLoadDTDGrammar = true;
 
    // declarations
@@ -500,6 +501,11 @@
       return fDynamicValidation;
    }
 
+   /** Sets fNormalizeAttributeValues **/
+   public void setNormalizeAttributeValues(boolean normalize){
+      fNormalizeAttributeValues = normalize;
+   }
+
    /** Sets fLoadDTDGrammar when validation is off **/
    public void setLoadDTDGrammar(boolean loadDG){
       if (fValidating) {
@@ -2779,7 +2785,9 @@
                                    fTempAttDecl.type == XMLAttributeDecl.TYPE_NMTOKEN ||
                                    fTempAttDecl.type == XMLAttributeDecl.TYPE_NOTATION)
                                  ) {
-                                 validateDTDattribute(element, attrList.getAttValue(index), fTempAttDecl);
+                                  int normalizedValue = validateDTDattribute(element, attrList.getAttValue(index), fTempAttDecl);
+                                  attrList.setAttValue(index, normalizedValue);
+                                 
                               }
 
                               // check to see if this attribute matched an attribute wildcard
@@ -2951,8 +2959,11 @@
    } // validateElementAndAttributes(QName,XMLAttrList)
 
 
-   //validate attributes in DTD fashion
-   private void validateDTDattribute(QName element, int attValue, 
+   /**
+    * Validate attributes in DTD fashion.
+    * @return normalized attribute value
+    */
+   private int validateDTDattribute(QName element, int attValue, 
                                      XMLAttributeDecl attributeDecl) throws Exception{
       AttributeValidator av = null;
       switch (attributeDecl.type) {
@@ -2996,6 +3007,15 @@
                 av = fAttValidatorENTITY;
             }*/
 
+            if (fNormalizeAttributeValues) {
+                if (attributeDecl.list) {
+                    attValue = normalizeListAttribute(value);
+                } else {
+                    if (value != unTrimValue) {
+                        attValue = fStringPool.addSymbol(value);
+                    }
+                }
+            }
          }
          break;
       case XMLAttributeDecl.TYPE_ENUMERATION:
@@ -3025,6 +3045,10 @@
                                          ex.getMinorCode(),
                                          fStringPool.toString( attributeDecl.name.rawname), value );
             }
+
+            if (fNormalizeAttributeValues && value != unTrimValue) {
+                attValue = fStringPool.addSymbol(value);
+            }
          }
          break;
       case XMLAttributeDecl.TYPE_IDREF:
@@ -3058,6 +3082,15 @@
                }
             }
 
+            if (fNormalizeAttributeValues) {
+                if (attributeDecl.list) {
+                    attValue = normalizeListAttribute(value);
+                } else {
+                    if (value != unTrimValue) {
+                        attValue = fStringPool.addSymbol(value);
+                    }
+                }
+            }
          }
          break;
       case XMLAttributeDecl.TYPE_NOTATION:
@@ -3118,12 +3151,56 @@
                                          fStringPool.toString(attributeDecl.name.rawname), value);//TODO NMTOKENS messge
             }
 
+            if (fNormalizeAttributeValues) {
+                if (attributeDecl.list) {
+                    attValue = normalizeListAttribute(value);
+                } else {
+                    if (value != unTrimValue) {
+                        attValue = fStringPool.addSymbol(value);
+                    }
+                }
+            }
          }
          break;
       }
-      if ( av != null )
-         av.normalize(element, attributeDecl.name, attValue, 
-                      attributeDecl.type, attributeDecl.enumeration);
+      if ( av != null ) {
+          int newValue = av.normalize(element, attributeDecl.name, attValue, 
+                                  attributeDecl.type, attributeDecl.enumeration);
+          if (fNormalizeAttributeValues)
+              attValue = newValue;
+      }
+      return attValue;
+   }
+
+   /**
+    * @param value This is already trimmed.
+    */
+   private int normalizeListAttribute(String value) {
+       int length = value.length();
+       StringBuffer buffer = null;
+       int state = 0;           // 0:non-S, 1: 1st S, 2: non-1st S
+       int copyStart = 0;
+       for (int i = 0;  i < length;  i++) {
+           int ch = value.charAt(i);
+           if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') {
+               if (state == 0) {
+                   state = 1;
+               } else if (state == 1) {
+                   state = 2;
+                   if (buffer == null)
+                       buffer = new StringBuffer(length);
+                   buffer.append(value.substring(copyStart, i));
+               }
+           } else {
+               if (state == 2)
+                   copyStart = i;
+               state = 0;
+           }
+       }
+       if (buffer == null)
+           return fStringPool.addSymbol(value);
+       buffer.append(value.substring(copyStart));
+       return fStringPool.addSymbol(new String(buffer));
    }
 
    /** Character data in content. */