You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by el...@apache.org on 2002/01/28 01:35:45 UTC

cvs commit: xml-xerces/java/src/org/apache/xerces/parsers AbstractDOMParser.java AbstractSAXParser.java StandardParserConfiguration.java

elena       02/01/27 16:35:45

  Modified:    java/src/org/apache/xerces/impl Constants.java
               java/src/org/apache/xerces/impl/dv XSSimpleType.java
               java/src/org/apache/xerces/impl/dv/xs XSSimpleTypeDecl.java
               java/src/org/apache/xerces/impl/msg
                        XMLSchemaMessages.properties
               java/src/org/apache/xerces/impl/validation
                        ValidationContext.java ValidationState.java
               java/src/org/apache/xerces/impl/xs XMLSchemaValidator.java
               java/src/org/apache/xerces/parsers AbstractDOMParser.java
                        AbstractSAXParser.java
                        StandardParserConfiguration.java
  Log:
  Added new XML Schema features:
  a) validation/schema/normalized-value -- allows to expose XML Schema normalized values via DOM/SAX
  b) validation/schema/element-default  -- sends characters() calls for schema element default values.
  
  By default, both feature are set to true. The features are no-op if validation occurs against DTD.
  If XML Schema validation is turned on, characters() include 2 values: XMLString always includes original value, Augmentations includes ElementPSVI for which normalizedSchemaValue property is set (if in element content).
  XML Schema default attribute are always included in the pipeline with name, namespace, specified and value properties. Additional information, such as schema_specified is stored in Augmentations.
  
  For all simpleTypes, but union, normalization occurs for each chunk of data we receive.
  For union types, we can't normalize data until we receive all of it. Thus, for union types we do the following:
  (1) sending of characters back to application is delayed till endElement()
  (2) if characters() include some augmentations, XMLSchemaValidator will omit character() call with augmentations and empty string as a value. At SAX/DOM level those calls will be ignored.
  (3) the normalized value for union datatype is send before endElement() call.
  
  To do:
  -- update docs
  -- verify what should be behavior if entityref or cdata section
  
  Revision  Changes    Path
  1.10      +20 -1     xml-xerces/java/src/org/apache/xerces/impl/Constants.java
  
  Index: Constants.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/Constants.java,v
  retrieving revision 1.9
  retrieving revision 1.10
  diff -u -r1.9 -r1.10
  --- Constants.java	17 Jan 2002 23:54:58 -0000	1.9
  +++ Constants.java	28 Jan 2002 00:35:44 -0000	1.10
  @@ -65,7 +65,7 @@
    *
    * @author Andy Clark, IBM
    *
  - * @version $Id: Constants.java,v 1.9 2002/01/17 23:54:58 neilg Exp $
  + * @version $Id: Constants.java,v 1.10 2002/01/28 00:35:44 elena Exp $
    */
   public final class Constants {
   
  @@ -123,6 +123,9 @@
       /** Create cdata nodes feature ("create-cdata-nodes"). */
       public static final String CREATE_CDATA_NODES_FEATURE = "create-cdata-nodes";
   
  +    /** Feature id: load as infoset. */
  +    public static final String LOAD_AS_INFOSET = "load-as-infoset";
  +
       // xerces features
   
       /** Xerces features prefix ("http://apache.org/xml/features/"). */
  @@ -131,6 +134,12 @@
       /** Schema validation feature ("validation/schema"). */
       public static final String SCHEMA_VALIDATION_FEATURE = "validation/schema";
   
  +    /** Expose schema normalized values */
  +    public static final String SCHEMA_NORMALIZED_VALUE = "validation/schema/normalized-value";
  +
  +    /** Send schema default value via characters() */
  +    public static final String SCHEMA_ELEMENT_DEFAULT = "validation/schema/element-default";
  +
       /** Schema full constraint checking ("validation/schema-full-checking"). */
       public static final String SCHEMA_FULL_CHECKING = "validation/schema-full-checking";
   
  @@ -237,6 +246,16 @@
   
       /** Validation manager property ("internal/validation-manager"). */
       public static final String VALIDATION_MANAGER_PROPERTY = "internal/validation-manager";
  +
  +
  +    // general constants
  +    
  +    /** Element PSVI is stored in augmentations using string "ELEMENT_PSVI" */    
  +    public final static String ELEMENT_PSVI = "ELEMENT_PSVI";
  +
  +    /* Attribute PSVI is stored in augmentations using string "ATTRIBUTE_PSVI" */
  +    public final static String ATTRIBUTE_PSVI = "ATTRIBUTE_PSVI";
  +
   
       // private
   
  
  
  
  1.6       +11 -1     xml-xerces/java/src/org/apache/xerces/impl/dv/XSSimpleType.java
  
  Index: XSSimpleType.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/dv/XSSimpleType.java,v
  retrieving revision 1.5
  retrieving revision 1.6
  diff -u -r1.5 -r1.6
  --- XSSimpleType.java	10 Jan 2002 17:10:23 -0000	1.5
  +++ XSSimpleType.java	28 Jan 2002 00:35:44 -0000	1.6
  @@ -69,7 +69,7 @@
    *
    * @author Sandy Gao, IBM
    *
  - * @version $Id: XSSimpleType.java,v 1.5 2002/01/10 17:10:23 sandygao Exp $
  + * @version $Id: XSSimpleType.java,v 1.6 2002/01/28 00:35:44 elena Exp $
    */
   public interface XSSimpleType extends XSTypeDecl {
   
  @@ -274,4 +274,14 @@
        * @return  a constant corresponding to the "cardinality" facet.
        */
       public short getCardinalityFacet();
  +
  +    
  +    /**
  +     * Return the whitespace corresponding to this datatype.
  +     * 
  +     * @return valid values are WS_PRESERVE, WS_REPLACE, WS_COLLAPSE.
  +     * @exception DatatypeException
  +     *                   union datatypes don't have whitespace facet associated with them
  +     */
  +    public short getWhitespace () throws DatatypeException;
   }
  
  
  
  1.2       +24 -3     xml-xerces/java/src/org/apache/xerces/impl/dv/xs/XSSimpleTypeDecl.java
  
  Index: XSSimpleTypeDecl.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/dv/xs/XSSimpleTypeDecl.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- XSSimpleTypeDecl.java	24 Jan 2002 16:38:28 -0000	1.1
  +++ XSSimpleTypeDecl.java	28 Jan 2002 00:35:44 -0000	1.2
  @@ -62,6 +62,7 @@
   import org.apache.xerces.impl.dv.XSListSimpleType;
   import org.apache.xerces.impl.dv.XSUnionSimpleType;
   import org.apache.xerces.impl.dv.XSFacets;
  +import org.apache.xerces.impl.dv.DatatypeException;
   import org.apache.xerces.impl.dv.InvalidDatatypeValueException;
   import org.apache.xerces.impl.dv.InvalidDatatypeFacetException;
   import org.apache.xerces.impl.dv.ValidatedInfo;
  @@ -77,7 +78,7 @@
    * @author Sandy Gao, IBM
    * @author Neeraj Bajaj, Sun Microsystems, inc.
    *
  - * @version $Id: XSSimpleTypeDecl.java,v 1.1 2002/01/24 16:38:28 sandygao Exp $
  + * @version $Id: XSSimpleTypeDecl.java,v 1.2 2002/01/28 00:35:44 elena Exp $
    */
   class XSSimpleTypeDecl implements XSAtomicSimpleType, XSListSimpleType, XSUnionSimpleType {
   
  @@ -323,6 +324,13 @@
           return (fValidationDV == DV_ID);
       }
   
  +    public short getWhitespace() throws DatatypeException{
  +        if (fVariety == VARIETY_UNION) {
  +            throw new DatatypeException("dt-whitespace", new Object[]{fTypeName});
  +        }
  +        return fWhiteSpace;
  +    }
  +
       public short getPrimitiveKind() {
           if (fVariety == VARIETY_ATOMIC && fValidationDV != DV_ANYSIMPLETYPE) {
               if (fVariety == DV_ID || fVariety == DV_IDREF || fVariety == DV_ENTITY)
  @@ -1171,7 +1179,12 @@
   
           if (fVariety == VARIETY_ATOMIC) {
   
  -            String nvalue = normalize(content, fWhiteSpace);
  +            String nvalue; 
  +            if (context==null ||context.needToNormalize()) {
  +                nvalue = normalize(content, fWhiteSpace);
  +            } else {
  +                nvalue = content;
  +            }
   
               // validate special kinds of token, in place of old pattern matching
               if (fTokenType != SPECIAL_TOKEN_NONE) {
  @@ -1216,7 +1229,12 @@
   
           } else if (fVariety == VARIETY_LIST) {
   
  -            String nvalue = normalize(content, fWhiteSpace);
  +            String nvalue; 
  +            if (context==null ||context.needToNormalize()) {
  +                nvalue = normalize(content, fWhiteSpace);
  +            } else {
  +                nvalue = content;
  +            }
               StringTokenizer parsedList = new StringTokenizer(nvalue);
               int countOfTokens = parsedList.countTokens() ;
               Object[] avalue = new Object[countOfTokens];
  @@ -1592,6 +1610,9 @@
   
           public boolean needExtraChecking() {
               return fExternal.needExtraChecking();
  +        }
  +        public boolean needToNormalize() {
  +            return fExternal.needToNormalize();
           }
   
           public boolean isEntityDeclared (String name) {
  
  
  
  1.41      +1 -0      xml-xerces/java/src/org/apache/xerces/impl/msg/XMLSchemaMessages.properties
  
  Index: XMLSchemaMessages.properties
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/msg/XMLSchemaMessages.properties,v
  retrieving revision 1.40
  retrieving revision 1.41
  diff -u -r1.40 -r1.41
  --- XMLSchemaMessages.properties	24 Jan 2002 20:09:41 -0000	1.40
  +++ XMLSchemaMessages.properties	28 Jan 2002 00:35:44 -0000	1.41
  @@ -377,6 +377,7 @@
      dt-restriction-base = Datatypes 4.1.3: error in the simpleType ''{0}'' base attribute or simpleType child constraint
      dt-union-memberType = Datatypes 4.1.3: error in the simpleType ''{0}'' - memberTypes attribute or simpleType children constraint
      dt-enumeration-notation = Datatypes 3.2.19: enumeration facet value required for NOTATION type in element/attribute ''{0}''
  +   dt-whitespace = Whitespace facet value is not available for the union simpleType ''{0}''
   
   
   # Datatypes
  
  
  
  1.4       +3 -0      xml-xerces/java/src/org/apache/xerces/impl/validation/ValidationContext.java
  
  Index: ValidationContext.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/validation/ValidationContext.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- ValidationContext.java	28 Nov 2001 17:49:01 -0000	1.3
  +++ ValidationContext.java	28 Jan 2002 00:35:44 -0000	1.4
  @@ -70,6 +70,9 @@
       // whether to do extra id/idref/entity checking
       public boolean needExtraChecking();
   
  +    // whether we need to normalize the value that is passed!
  +    public boolean needToNormalize();
  +
       // entity
       public boolean isEntityDeclared (String name);
       public boolean isEntityUnparsed (String name);
  
  
  
  1.7       +10 -1     xml-xerces/java/src/org/apache/xerces/impl/validation/ValidationState.java
  
  Index: ValidationState.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/validation/ValidationState.java,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- ValidationState.java	6 Dec 2001 22:43:20 -0000	1.6
  +++ ValidationState.java	28 Jan 2002 00:35:44 -0000	1.7
  @@ -68,7 +68,7 @@
    * environment for simple type validation.
    *
    * @author Elena Litani, IBM
  - * @version $Id: ValidationState.java,v 1.6 2001/12/06 22:43:20 sandygao Exp $
  + * @version $Id: ValidationState.java,v 1.7 2002/01/28 00:35:44 elena Exp $
    */
   public class ValidationState implements ValidationContext {
   
  @@ -77,6 +77,7 @@
       //
       private boolean fExtraChecking              = true;
       private boolean fFacetChecking              = true;
  +    private boolean fNormalize                  = true;
   
       private EntityState fEntityState            = null;
       private NamespaceSupport fNamespaceSupport  = null;
  @@ -98,6 +99,10 @@
           fFacetChecking = newValue;
       }
   
  +    public void setNormalizationRequired (boolean newValue) {
  +          fNormalize = newValue;
  +    }
  +
       public void setEntityState(EntityState state) {
           fEntityState = state;
       }
  @@ -156,6 +161,10 @@
       // whether to validate against facets
       public boolean needFacetChecking() {
           return fFacetChecking;
  +    }
  +
  +    public boolean needToNormalize (){
  +        return fNormalize;
       }
   
       // entity
  
  
  
  1.38      +344 -47   xml-xerces/java/src/org/apache/xerces/impl/xs/XMLSchemaValidator.java
  
  Index: XMLSchemaValidator.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/xs/XMLSchemaValidator.java,v
  retrieving revision 1.37
  retrieving revision 1.38
  diff -u -r1.37 -r1.38
  --- XMLSchemaValidator.java	24 Jan 2002 20:52:21 -0000	1.37
  +++ XMLSchemaValidator.java	28 Jan 2002 00:35:44 -0000	1.38
  @@ -60,6 +60,7 @@
   import org.apache.xerces.impl.dv.XSSimpleType;
   import org.apache.xerces.impl.dv.XSAtomicSimpleType;
   import org.apache.xerces.impl.dv.ValidatedInfo;
  +import org.apache.xerces.impl.dv.DatatypeException;
   import org.apache.xerces.impl.dv.InvalidDatatypeValueException;
   import org.apache.xerces.impl.xs.identity.*;
   import org.apache.xerces.impl.Constants;
  @@ -129,7 +130,7 @@
    * @author Elena Litani IBM
    * @author Andy Clark IBM
    * @author Neeraj Bajaj, Sun Microsystems, inc.
  - * @version $Id: XMLSchemaValidator.java,v 1.37 2002/01/24 20:52:21 neilg Exp $
  + * @version $Id: XMLSchemaValidator.java,v 1.38 2002/01/28 00:35:44 elena Exp $
    */
   public class XMLSchemaValidator
                implements XMLComponent, XMLDocumentFilter, FieldActivator {
  @@ -160,6 +161,16 @@
       protected static final String DYNAMIC_VALIDATION =
       Constants.XERCES_FEATURE_PREFIX + Constants.DYNAMIC_VALIDATION_FEATURE;
   
  +    /** Feature identifier: expose schema normalized value */
  +    protected static final String NORMALIZE_DATA =
  +    Constants.XERCES_FEATURE_PREFIX + Constants.SCHEMA_NORMALIZED_VALUE;
  +
  +
  +    /** Feature identifier: send element default value via characters() */
  +    protected static final String SCHEMA_ELEMENT_DEFAULT =
  +    Constants.XERCES_FEATURE_PREFIX + Constants.SCHEMA_ELEMENT_DEFAULT;
  +
  +
       // property identifiers
   
       /** Property identifier: symbol table. */
  @@ -230,9 +241,6 @@
       /** current PSVI element info */
       protected ElementPSVImpl fCurrentPSVI = null;
   
  -    // REVISIT: define constant here?
  -    protected final static String ELEM_PSVI = "ELEM_PSVI";
  -    protected final static String ATTR_PSVI = "ATTR_PSVI";
   
       // since it is the responsibility of each component to an
       // Augmentations parameter if one is null, to save ourselves from
  @@ -249,6 +257,10 @@
       protected boolean fDynamicValidation = false;
       protected boolean fDoValidation = false;
       protected boolean fFullChecking = false;
  +    protected boolean fNormalizeData = true;
  +    protected boolean fSchemaElementDefault = true;
  +    protected boolean fEntityRef = false;
  +    protected boolean fInCDATA = false;
   
       // properties
   
  @@ -547,26 +559,31 @@
       throws XNIException {
   
           Augmentations modifiedAugs = handleStartElement(element, attributes, augs);
  +
  +        // we need to save PSVI information: because it will be reset in the
  +        // handleEndElement(): type, notation, validation context        
  +        XSTypeDecl type = fCurrentPSVI.fTypeDecl;
  +        XSNotationDecl notation = fCurrentPSVI.fNotation;
  +        String vContext = fCurrentPSVI.fValidationContext;
  +
           // in the case where there is a {value constraint}, and the element
           // doesn't have any text content, change emptyElement call to
           // start + characters + end
           modifiedAugs = handleEndElement(element, modifiedAugs);
  -        XMLString defaultValue = fDefaultValue;
  +        
           // call handlers
           if (fDocumentHandler != null) {
  -
  -            fDocumentHandler.emptyElement(element, attributes, modifiedAugs);
  -
  -            // REVISIT: should we send default element value?
  -            /*if (defaultValue == null) {
  -                fDocumentHandler.emptyElement(element, attributes);
  +            fCurrentPSVI.fTypeDecl = type;
  +            fCurrentPSVI.fNotation = notation;
  +            fCurrentPSVI.fValidationContext = vContext;
  +            if (!fSchemaElementDefault || fDefaultValue == null) {
  +                fDocumentHandler.emptyElement(element, attributes, modifiedAugs);
               } else {
  -                fDocumentHandler.startElement(element, attributes);
  -                fDocumentHandler.characters(defaultValue);
  -                fDocumentHandler.endElement(element);
  -            }
  -            */
  -        }
  +                fDocumentHandler.startElement(element, attributes, modifiedAugs);
  +                fDocumentHandler.characters(fDefaultValue, modifiedAugs);
  +                fDocumentHandler.endElement(element, modifiedAugs);
  +            }            
  +       }
   
       } // emptyElement(QName,XMLAttributes, Augmentations)
   
  @@ -579,11 +596,34 @@
        * @throws XNIException Thrown by handler to signal an error.
        */
       public void characters(XMLString text, Augmentations augs) throws XNIException {
  +        
  +        if (augs == null) {
  +            augs = fAugmentations;
  +            augs.clear();
  +        }
  +        // get PSVI object
  +        fCurrentPSVI = (ElementPSVImpl)augs.getItem(Constants.ELEMENT_PSVI);
  +        if (fCurrentPSVI == null) {
  +            fCurrentPSVI = fElemPSVI;
  +            fCurrentPSVI.reset();
  +            augs.putItem(Constants.ELEMENT_PSVI, fCurrentPSVI);
  +        }
  +
   
           handleCharacters(text);
           // call handlers
           if (fDocumentHandler != null) {
  -            fDocumentHandler.characters(text, augs);
  +            if (fUnionType) {
  +                // for union types we can't normalize data
  +                // thus we only need to send augs information if any;
  +                // the normalized data for union will be send
  +                // after normalization is performed (at the endElement())
  +                if (augs != null) {
  +                    fDocumentHandler.characters(fEmptyXMLStr, augs);
  +                }
  +            } else {            
  +                fDocumentHandler.characters(text, augs);
  +            }
           }
   
       } // characters(XMLString)
  @@ -624,13 +664,21 @@
           // in the case where there is a {value constraint}, and the element
           // doesn't have any text content, add a characters call.
           Augmentations modifiedAugs = handleEndElement(element, augs);
  -        XMLString defaultValue  = fDefaultValue;
           // call handlers
           if (fDocumentHandler != null) {
  -            // REVISIT: should we send default element values??
  -            //if (defaultValue != null)
  -            //    fDocumentHandler.characters(defaultValue);
  -            fDocumentHandler.endElement(element, modifiedAugs);
  +            if (fSchemaElementDefault || fDefaultValue == null) {
  +                   fDocumentHandler.endElement(element, modifiedAugs);
  +            } else {
  +                fDocumentHandler.characters(fDefaultValue, modifiedAugs);
  +                fDocumentHandler.endElement(element, modifiedAugs);
  +            }            
  +        }
  +        // reset normalization values
  +        if (fNormalizeData) {
  +            fTrailing = false;
  +            fUnionType = false;
  +            fWhiteSpace = -1;
  +        
           }
   
       } // endElement(QName, Augmentations)
  @@ -662,6 +710,9 @@
        */
       public void startCDATA(Augmentations augs) throws XNIException {
   
  +
  +        // REVISIT: what should we do here if schema normalization is on?? 
  +        fInCDATA = true;
           // call handlers
           if (fDocumentHandler != null) {
               fDocumentHandler.startCDATA(augs);
  @@ -679,6 +730,7 @@
       public void endCDATA(Augmentations augs) throws XNIException {
   
           // call handlers
  +        fInCDATA = false;
           if (fDocumentHandler != null) {
               fDocumentHandler.endCDATA(augs);
           }
  @@ -727,7 +779,9 @@
                                      XMLResourceIdentifier identifier,
                                      String encoding,
                                      Augmentations augs) throws XNIException {
  -
  +        
  +        // REVISIT: what should happen if normalize_data_ is on?? 
  +        fEntityRef = true;
           // call handlers
           if (fDocumentHandler != null) {
               fDocumentHandler.startGeneralEntity(name, identifier, encoding, augs);
  @@ -826,6 +880,7 @@
       public void endGeneralEntity(String name, Augmentations augs) throws XNIException {
   
           // call handlers
  +        fEntityRef = false;
           if (fDocumentHandler != null) {
               fDocumentHandler.endGeneralEntity(name, augs);
           }
  @@ -850,6 +905,23 @@
       // Data
       //
   
  +
  +    // Schema Normalization
  +    
  +    private static final boolean DEBUG_NORMALIZATION = false;
  +    // temporary empty string buffer.
  +    private final XMLString fEmptyXMLStr = new XMLString(null, 0, -1);
  +    // temporary character buffer, and empty string buffer.
  +    private static final int BUFFER_SIZE = 20;
  +    private char[] fCharBuffer =  new char[BUFFER_SIZE];
  +    private final StringBuffer fNormalizedStr = new StringBuffer();    
  +    private final XMLString fXMLString = new XMLString(fCharBuffer, 0, -1);
  +    private boolean fFirstChunk = true; // got first chunk in characters() (SAX)
  +    private boolean fTrailing = false;  // Previous chunk had a trailing space
  +    private short fWhiteSpace = -1;  //whiteSpace: preserve/replace/collapse
  +    private boolean fUnionType = false;
  +    
  +
       /** Schema grammar resolver. */
       final XSGrammarBucket fGrammarBucket;
       final SubstitutionGroupHandler fSubGroupHandler;
  @@ -1036,13 +1108,9 @@
               fValidation = false;
           }
   
  +        // Xerces features
           try {
  -            // REVISIT: should schema validation depend on validation?
  -            // NOTE: YES! That's the way it's documented and has worked
  -            //       in the past. Therefore, it must keep the same value
  -            //       to retain the same behavior. -Ac
               fValidation = fValidation && componentManager.getFeature(SCHEMA_VALIDATION);
  -            //fValidation =  componentManager.getFeature(SCHEMA_VALIDATION);
           }
           catch (XMLConfigurationException e) {
               fValidation = false;
  @@ -1055,7 +1123,6 @@
               fFullChecking = false;
           }
   
  -        // Xerces features
           try {
               fDynamicValidation = componentManager.getFeature(DYNAMIC_VALIDATION);
           }
  @@ -1063,6 +1130,20 @@
               fDynamicValidation = false;
           }
   
  +        try {
  +           fNormalizeData = componentManager.getFeature(NORMALIZE_DATA);
  +        }
  +        catch (XMLConfigurationException e) {
  +            fNormalizeData = false;
  +        }
  +
  +        try {
  +           fSchemaElementDefault = componentManager.getFeature(SCHEMA_ELEMENT_DEFAULT);
  +        }
  +        catch (XMLConfigurationException e) {
  +            fSchemaElementDefault = false;
  +        }
  +
           // REVISIT: use default entity resolution from ENTITY MANAGER - temporary solution
           fEntityResolver = (XMLEntityResolver)componentManager.getProperty(ENTITY_MANAGER);
   
  @@ -1112,6 +1193,17 @@
           fElementDepth = -1;
           fChildCount = 0;
   
  +        // datatype normalization
  +        fFirstChunk = true;
  +        fTrailing = false;
  +        fNormalizedStr.setLength(0);
  +        fWhiteSpace = -1;
  +        fUnionType = false;
  +        fWhiteSpace = -1;
  +        fAugmentations.clear();
  +        fEntityRef = false;
  +        fInCDATA = false;
  +
           fMatcherStack.clear();
   
           fValueStoreCache = new ValueStoreCache();
  @@ -1251,18 +1343,85 @@
       // handle character contents
       void handleCharacters(XMLString text) {
   
  +        //System.out.println('\n'+"===>Characters("+text.toString()+")");
  +        fCurrentPSVI.fNormalizedValue = null;
           if (fSkipValidationDepth >= 0)
               return;
   
  +        String normalizedStr = null;
           boolean allWhiteSpace = true;
  -        for (int i=text.offset; i< text.offset+text.length; i++) {
  -            if (!XMLChar.isSpace(text.ch[i])) {
  -                allWhiteSpace = false;
  -                break;
  +
  +        // find out if type is union, what is whitespace,
  +        // determine if there is a need to do normalization
  +        if (fNormalizeData && !fEntityRef && !fInCDATA) {
  +            // if whitespace == -1 skip normalization, because it is a complexType
  +            if (fWhiteSpace != -1 && !fUnionType && fWhiteSpace != XSSimpleType.WS_PRESERVE) {
  +                // normalize data
  +                int spaces = normalizeWhitespace(text, fWhiteSpace == XSSimpleType.WS_COLLAPSE);
  +                int length = fNormalizedStr.length();
  +                if (length > 0) {
  +                    //System.out.println("firstchunk="+fFirstChunk+"; trailing="+fTrailing);
  +                    if (!fFirstChunk && (fWhiteSpace==XSSimpleType.WS_COLLAPSE) ) {
  +                        if (fTrailing) { 
  +                            // previous chunk ended on whitespace 
  +                            // insert whitespace
  +                         fNormalizedStr.insert(0, ' ');
  +                        } else if (spaces == 1 || spaces == 3) {
  +                            // previous chunk ended on character,
  +                            // this chunk starts with whitespace
  +                            fNormalizedStr.insert(0, ' ');
  +                        }
  +                    }
  +                }
  +                normalizedStr = fNormalizedStr.toString();
  +                fCurrentPSVI.fNormalizedValue = normalizedStr;
  +                fTrailing = (spaces > 1)?true:false;
               }
           }
   
  -        fBuffer.append(text.toString());
  +        boolean mixed = false;
  +        if (fCurrentType != null && fCurrentType.getXSType() == XSTypeDecl.COMPLEX_TYPE) {
  +              XSComplexTypeDecl ctype = (XSComplexTypeDecl)fCurrentType;
  +              if (ctype.fContentType == XSComplexTypeDecl.CONTENTTYPE_MIXED) {
  +                    mixed = true;
  +              }
  +        }
  +
  +        if (DEBUG) {        
  +         System.out.println("==>characters()"+fCurrentType.getTypeName()+":"+mixed);
  +        }
  +
  +        if (mixed || fWhiteSpace !=-1 || fUnionType) {
  +            // don't check characters: since it is either 
  +            // a) mixed content model - we don't care if there were some characters
  +            // b) simpleType/simpleContent - in which case it is data in ELEMENT content
  +        }
  +        else  {
  +
  +            if (DEBUG) {        
  +             System.out.println("==>check for whitespace");
  +            }
  +            // data outside of element content
  +            for (int i=text.offset; i< text.offset+text.length; i++) {
  +                if (!XMLChar.isSpace(text.ch[i])) {
  +                    allWhiteSpace = false;
  +                    break;
  +                }
  +            }
  +        } 
  +
  +
  +        // we saw first chunk of characters
  +        fFirstChunk = false;
  +        
  +        // we need to save normalized value so that we can perform
  +        // validation at the end element after all data was received
  +        if (normalizedStr != null) { 
  +            fBuffer.append(normalizedStr);
  +        } else {
  +            fBuffer.append(text.toString());
  +        }
  +        
           if (!allWhiteSpace) {
               fSawCharacters = true;
           }
  @@ -1275,6 +1434,61 @@
           }
       } // handleCharacters(XMLString)
   
  +    /**
  +     * Normalize whitespace in an XMLString according to the rules defined
  +     * in XML Schema specifications.     
  +     * @param value    The string to normalize.
  +     * @param collapse replace or collapse
  +     * @returns 0 if no triming is done or if there is neither leading nor
  +     *            trailing whitespace,
  +     *          1 if there is only leading whitespace,
  +     *          2 if there is only trailing whitespace,
  +     *          3 if there is both leading and trailing whitespace.
  +     */
  +    private int normalizeWhitespace( XMLString value, boolean collapse) {
  +        boolean skipSpace = collapse;
  +        boolean sawNonWS = false;
  +        int leading = 0;
  +        int trailing = 0;
  +        int c;
  +        int size = value.offset+value.length;
  +        fNormalizedStr.setLength(0);
  +        for (int i = value.offset; i < size; i++) {
  +            c = value.ch[i];
  +            if (c == 0x20 || c == 0x0D || c == 0x0A || c == 0x09) {
  +                if (!skipSpace) {
  +                    // take the first whitespace as a space and skip the others
  +                    fNormalizedStr.append(' ');
  +                    skipSpace = collapse;
  +                }
  +                if (!sawNonWS) {
  +                    // this is a leading whitespace, record it
  +                    leading = 1;
  +                }
  +            }
  +            else {
  +                fNormalizedStr.append((char)c);
  +                skipSpace = false;
  +                sawNonWS = true;
  +            }
  +        }
  +        if (skipSpace) {
  +            c = fNormalizedStr.length();
  +            if ( c != 0) {
  +                // if we finished on a space trim it but also record it
  +                fNormalizedStr.setLength (--c);
  +                trailing = 2;
  +            }
  +            else if (leading != 0 && !sawNonWS) {
  +                // if all we had was whitespace we skipped record it as
  +                // trailing whitespace as well
  +                trailing = 2;
  +            }
  +        }
  +        return collapse ? leading + trailing : 0;
  +    }
  +
  +
       // handle ignorable whitespace
       void handleIgnorableWhitespace(XMLString text) {
   
  @@ -1304,6 +1518,13 @@
               System.out.println("handleStartElement: " +element);
           }
   
  +        if (fNormalizeData) {
  +            // reset values
  +            fFirstChunk = true;
  +            fUnionType  = false;
  +            fWhiteSpace = -1;
  +        }
  +
           // if we are not skipping this element, and there is a content model,
           // we try to find the corresponding decl object for this element.
           // the reason we move this part of code here is to make sure the
  @@ -1355,10 +1576,10 @@
               parseSchemas(fExternalSchemas, fExternalNoNamespaceSchema);
           }
   
  -        fCurrentPSVI = (ElementPSVImpl)augs.getItem(ELEM_PSVI);
  +        fCurrentPSVI = (ElementPSVImpl)augs.getItem(Constants.ELEMENT_PSVI);
           if (fCurrentPSVI == null) {
               fCurrentPSVI = fElemPSVI;
  -            augs.putItem(ELEM_PSVI, fCurrentPSVI);
  +            augs.putItem(Constants.ELEMENT_PSVI, fCurrentPSVI);
           }
           fCurrentPSVI.reset();
   
  @@ -1494,6 +1715,7 @@
           // PSVI: add element type
           fCurrentPSVI.fTypeDecl = fCurrentType;
   
  +
           // Element Locally Valid (Type)
           // 2 Its {abstract} must be false.
           if (fCurrentType.getXSType() == XSTypeDecl.COMPLEX_TYPE) {
  @@ -1501,8 +1723,37 @@
               if (ctype.isAbstractType()) {
                   reportSchemaError("cvc-type.2", new Object[]{"Element " + element.rawname + " is declared with a type that is abstract.  Use xsi:type to specify a non-abstract type"});
               }
  +            if (fNormalizeData) {        
  +                // find out if the content type is simple and if variety is union
  +                // to be able to do character normalization
  +                if (ctype.fContentType == XSComplexTypeDecl.CONTENTTYPE_SIMPLE) { 
  +                        if (ctype.fXSSimpleType.getVariety() == XSSimpleType.VARIETY_UNION) {
  +                            fUnionType = true;
  +                        } else {
  +                            try {                            
  +                                fWhiteSpace = ctype.fXSSimpleType.getWhitespace();
  +                            } catch (DatatypeException e){
  +                                // do nothing
  +                            }
  +                        }
  +                }
  +            }
           }
  -
  +        // normalization
  +        if (fNormalizeData && fCurrentType.getXSType() == XSTypeDecl.SIMPLE_TYPE) {
  +            // if !union type
  +             XSSimpleType dv = (XSSimpleType)fCurrentType;
  +             if (dv.getVariety() == XSSimpleType.VARIETY_UNION) {
  +                    fUnionType = true;
  +             } else {
  +                 try {
  +                    fWhiteSpace = dv.getWhitespace();
  +                 } catch (DatatypeException e){
  +                     // do nothing
  +                 }
  +             }
  +        }
  +    
           // then try to get the content model
           fCurrentCM = null;
           if (fCurrentType.getXSType() == XSTypeDecl.COMPLEX_TYPE) {
  @@ -1577,10 +1828,10 @@
               augs.clear();
           }
   
  -        fCurrentPSVI = (ElementPSVImpl)augs.getItem(ELEM_PSVI);
  +        fCurrentPSVI = (ElementPSVImpl)augs.getItem(Constants.ELEMENT_PSVI);
           if (fCurrentPSVI == null) {
               fCurrentPSVI = fElemPSVI;
  -            augs.putItem(ELEM_PSVI, fCurrentPSVI);
  +            augs.putItem(Constants.ELEMENT_PSVI, fCurrentPSVI);
           }
           fCurrentPSVI.reset();
   
  @@ -1860,12 +2111,12 @@
           AttributePSVImpl attrPSVI = null;
           for (int k=0;k<attributes.getLength();k++) {
               augs = attributes.getAugmentations(k);
  -            attrPSVI = (AttributePSVImpl) augs.getItem(ATTR_PSVI);
  +            attrPSVI = (AttributePSVImpl) augs.getItem(Constants.ATTRIBUTE_PSVI);
               if (attrPSVI != null) {
                   attrPSVI.reset();
               } else {
                   attrPSVI= new AttributePSVImpl();
  -                augs.putItem(ATTR_PSVI, attrPSVI);
  +                augs.putItem(Constants.ATTRIBUTE_PSVI, attrPSVI);
               }
               // PSVI attribute: validation context
               attrPSVI.fValidationContext = element.rawname;
  @@ -1894,7 +2145,7 @@
               for (int index = 0; index < attCount; index++) {
                   attributes.getName(index, fTempQName);
                   // get attribute PSVI
  -                attrPSVI = (AttributePSVImpl)attributes.getAugmentations(index).getItem(ATTR_PSVI);
  +                attrPSVI = (AttributePSVImpl)attributes.getAugmentations(index).getItem(Constants.ATTRIBUTE_PSVI);
                   // PSVI: validation attempted, validity
                   attrPSVI.fValidationAttempted = AttributePSVI.FULL_VALIDATION;
                   attrPSVI.fValidity = AttributePSVI.VALID_VALIDITY;
  @@ -1938,7 +2189,7 @@
           for (int index = 0; index < attCount; index++) {
   
               // get attribute PSVI
  -            attrPSVI = (AttributePSVImpl)attributes.getAugmentations(index).getItem(ATTR_PSVI);
  +            attrPSVI = (AttributePSVImpl)attributes.getAugmentations(index).getItem(Constants.ATTRIBUTE_PSVI);
               // PSVI: set Attribute valid and attempted validation to full.
               attrPSVI.fValidationAttempted = AttributePSVI.FULL_VALIDATION;
               attrPSVI.fValidity = AttributePSVI.VALID_VALIDITY;
  @@ -2170,7 +2421,7 @@
                   // PSVI: attribute is "schema" specified
                   Augmentations augs = attributes.getAugmentations(attrIndex);
   
  -                AttributePSVImpl attrPSVI = (AttributePSVImpl)augs.getItem(ATTR_PSVI);
  +                AttributePSVImpl attrPSVI = (AttributePSVImpl)augs.getItem(Constants.ATTRIBUTE_PSVI);
   
                   // check if PSVIAttribute was added to Augmentations.
                   // it is possible that we just created new chunck of attributes
  @@ -2179,7 +2430,7 @@
                       attrPSVI.reset();
                   } else {
                       attrPSVI = new AttributePSVImpl();
  -                    augs.putItem(ATTR_PSVI, attrPSVI);
  +                    augs.putItem(Constants.ATTRIBUTE_PSVI, attrPSVI);
                   }
   
                   attrPSVI.fSpecified = false;
  @@ -2204,7 +2455,7 @@
   
               // PSVI: specified
               fCurrentPSVI.fSpecified = false;
  -
  +            
               int bufLen = fCurrentElemDecl.fDefault.normalizedValue.length();
               char [] chars = new char[bufLen];
               fCurrentElemDecl.fDefault.normalizedValue.getChars(0, bufLen, chars, 0);
  @@ -2304,6 +2555,19 @@
           if (fCurrentType == null)
               return null;
   
  +        if (fUnionType) {
  +            // for union types we need to send data because we delayed sending this data
  +            // when we received it in the characters() call.
  +            // XMLString will inlude non-normalized value, PSVIElement will include 
  +            // normalized value
  +            int bufLen = textContent.length();
  +            if (bufLen >= BUFFER_SIZE) {
  +                fCharBuffer = new char[bufLen*2];
  +            }
  +            textContent.getChars(0, bufLen, fCharBuffer, 0);
  +            fXMLString.setValues(fCharBuffer, 0, bufLen);
  +        }
  +
           Object retValue = null;
           // Element Locally Valid (Type)
           // 3 The appropriate case among the following must be true:
  @@ -2316,14 +2580,32 @@
               if (!fNil) {
                   XSSimpleType dv = (XSSimpleType)fCurrentType;
                   try {
  +                
  +                    if (!fNormalizeData || fUnionType) {
  +                        fValidationState.setNormalizationRequired(true);
  +                    }
                       retValue = dv.validate(textContent, fValidationState, fValidatedInfo);
                       // PSVI: schema normalized value
                       //
                       fCurrentPSVI.fNormalizedValue = fValidatedInfo.normalizedValue;
                       // PSVI: memberType
                       fCurrentPSVI.fMemberType = fValidatedInfo.memberType;
  +                    
  +                    if (fDocumentHandler != null && fUnionType) {
  +                        // send normalized values
  +                        // at this point we should only rely on normalized value
  +                        // available via PSVI
  +                        fAugmentations.putItem(Constants.ELEMENT_PSVI, fCurrentPSVI);
  +                        fDocumentHandler.characters(fXMLString, fAugmentations);
  +                    }
                   }
                   catch (InvalidDatatypeValueException e) {
  +                    if (fDocumentHandler != null && fUnionType) {
  +                        fCurrentPSVI.fNormalizedValue = null;
  +                        fAugmentations.putItem(Constants.ELEMENT_PSVI, fCurrentPSVI);
  +                        fDocumentHandler.characters(fXMLString, fAugmentations);
  +
  +                    }
                       reportSchemaError("cvc-type.3.1.3", new Object[]{element.rawname, textContent});
                   }
               }
  @@ -2356,6 +2638,10 @@
                       reportSchemaError("cvc-complex-type.2.2", new Object[]{element.rawname});
                   XSSimpleType dv = ctype.fXSSimpleType;
                   try {
  +                    
  +                    if (!fNormalizeData || fUnionType) {
  +                        fValidationState.setNormalizationRequired(true);
  +                    }
                       actualValue = dv.validate(textContent, fValidationState, fValidatedInfo);
   
                       // PSVI: schema normalized value
  @@ -2363,8 +2649,19 @@
                       fCurrentPSVI.fNormalizedValue = fValidatedInfo.normalizedValue;
                       // PSVI: memberType
                       fCurrentPSVI.fMemberType = fValidatedInfo.memberType;
  +                    
  +                    if (fDocumentHandler != null && fUnionType) {
  +                        fAugmentations.putItem(Constants.ELEMENT_PSVI, fCurrentPSVI);
  +                        fDocumentHandler.characters(fXMLString, fAugmentations);
  +                    }
                   }
                   catch (InvalidDatatypeValueException e) {
  +                    if (fDocumentHandler != null && fUnionType) {
  +                        fCurrentPSVI.fNormalizedValue = null;
  +                        fAugmentations.putItem(Constants.ELEMENT_PSVI, fCurrentPSVI);
  +                        fDocumentHandler.characters(fXMLString, fAugmentations);
  +
  +                    }
                       reportSchemaError("cvc-complex-type.2.2", new Object[]{element.rawname});
                   }
                   // REVISIT: eventually, this method should return the same actualValue as elementLocallyValidType...
  
  
  
  1.32      +97 -12    xml-xerces/java/src/org/apache/xerces/parsers/AbstractDOMParser.java
  
  Index: AbstractDOMParser.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/parsers/AbstractDOMParser.java,v
  retrieving revision 1.31
  retrieving revision 1.32
  diff -u -r1.31 -r1.32
  --- AbstractDOMParser.java	27 Jan 2002 01:53:27 -0000	1.31
  +++ AbstractDOMParser.java	28 Jan 2002 00:35:44 -0000	1.32
  @@ -78,6 +78,8 @@
   import org.apache.xerces.xni.XMLString;
   import org.apache.xerces.xni.XNIException;
   import org.apache.xerces.xni.parser.XMLParserConfiguration;
  +import org.apache.xerces.xni.psvi.AttributePSVI;
  +import org.apache.xerces.xni.psvi.ElementPSVI;
   
   import org.w3c.dom.Attr;
   import org.w3c.dom.CDATASection;
  @@ -103,7 +105,7 @@
    * @author Andy Clark, IBM
    * @author Elena Litani, IBM
    *
  - * @version $Id: AbstractDOMParser.java,v 1.31 2002/01/27 01:53:27 lehors Exp $
  + * @version $Id: AbstractDOMParser.java,v 1.32 2002/01/28 00:35:44 elena Exp $
    */
   public abstract class AbstractDOMParser
       extends AbstractXMLDocumentParser {
  @@ -135,6 +137,10 @@
       /** Feature id: defer node expansion. */
       protected static final String DEFER_NODE_EXPANSION =
           "http://apache.org/xml/features/dom/defer-node-expansion";
  +    
  +    /** Expose XML Schema normalize value */
  +    protected static final String NORMALIZE_DATA = 
  +        Constants.XERCES_FEATURE_PREFIX + Constants.SCHEMA_NORMALIZED_VALUE;
   
       // property ids
   
  @@ -151,10 +157,12 @@
       protected static final String  CURRENT_ELEMENT_NODE=  
           Constants.XERCES_FEATURE_PREFIX + Constants.CURRENT_ELEMENT_NODE_PROPERTY;
   
  +
       // debugging
   
       /** Set to true and recompile to debug entity references. */
       private static final boolean DEBUG_ENTITY_REF = false;
  +    private static final boolean DEBUG_EVENTS = false;
   
   
       //
  @@ -175,6 +183,9 @@
       /** Create cdata nodes. */
       protected boolean fCreateCDATANodes;
   
  +    /** Expose XML Schema schema_normalize_values via DOM*/
  +    protected boolean fNormalizeData = true;
  +
       // dom information
   
       /** The document. */
  @@ -347,6 +358,9 @@
   
           fCreateCDATANodes = fConfiguration.getFeature(CREATE_CDATA_NODES_FEATURE);
   
  +        fNormalizeData = fConfiguration.getFeature(NORMALIZE_DATA);
  +        
  +
           // get property
           setDocumentClassName((String)
                                fConfiguration.getProperty(DOCUMENT_CLASS_NAME));
  @@ -394,11 +408,9 @@
                                      XMLResourceIdentifier identifier,
                                      String encoding, Augmentations augs) 
           throws XNIException {
  -
  -        // REVISIT: investigate fInDTD & fInDocument flags
  -        // this method now only called by DocumentHandler
  -        // comment(), endEntity(), processingInstruction(), textDecl()
  -        // REVISIT: need to set the Entity.actualEncoding somehow
  +        if (DEBUG_EVENTS) {        
  +            System.out.println("==>startGeneralEntity ("+name+")");
  +        }
           if (fCreateEntityRefNodes) {
               if (!fDeferNodeExpansion) {
                   EntityReference er = fDocument.createEntityReference(name);
  @@ -416,7 +428,7 @@
               }
           }
   
  -    } // startEntity(String,String,String,String)
  +    } // startGeneralEntity(String,XMLResourceIdentifier, Augmentations)
   
       /**
        * Notifies of the presence of a TextDecl line in an entity. If present,
  @@ -674,6 +686,9 @@
        */
       public void startElement(QName element, XMLAttributes attributes, Augmentations augs)
           throws XNIException {
  +        if (DEBUG_EVENTS) {
  +            System.out.println("==>startElement ("+element.rawname+")");
  +        }
           if (!fDeferNodeExpansion) {
               Element el;
               if (fNamespaceAware) {
  @@ -701,7 +716,15 @@
                   else {
                       attr = fDocument.createAttribute(fAttrQName.rawname);
                   }
  +
                   String attrValue = attributes.getValue(i);
  +                if (fNormalizeData) {
  +                    AttributePSVI attrPSVI = (AttributePSVI)attributes.getAugmentations(i).getItem(Constants.ATTRIBUTE_PSVI);
  +                    if (attrPSVI != null) {
  +                        attrValue = attrPSVI.schemaNormalizedValue();
  +                    }
  +
  +                }
                   attr.setValue(attrValue);
                   el.setAttributeNode(attr);
                   // NOTE: The specified value MUST be set after you set
  @@ -730,6 +753,13 @@
               int attrCount = attributes.getLength();
               for (int i = 0; i < attrCount; i++) {
   		String attrValue = attributes.getValue(i);
  +                if (fNormalizeData) {
  +                    AttributePSVI attrPSVI = (AttributePSVI)attributes.getAugmentations(i).getItem(Constants.ATTRIBUTE_PSVI);
  +                    if (attrPSVI != null) {
  +                        attrValue = attrPSVI.schemaNormalizedValue();
  +                    }
  +
  +                }
   		fDeferredDocumentImpl.setDeferredAttribute(el,
   						    attributes.getQName(i),
   						    attributes.getURI(i),
  @@ -754,6 +784,10 @@
        * @throws XNIException Thrown by handler to signal an error.
        */
       public void characters(XMLString text, Augmentations augs) throws XNIException {
  +        
  +        if (DEBUG_EVENTS) {
  +            System.out.println("==>characters(): "+text.toString());
  +        }
           if (!fDeferNodeExpansion) {
               if (fInCDATASection && fCreateCDATANodes) {
                   if (fCurrentCDATASection == null) {
  @@ -767,13 +801,31 @@
                   }
               }
               else if (!fInDTD) {
  +                // if type is union (XML Schema) it is possible that we receive
  +                // character call with empty data
  +                if (text.length == 0) {
  +                    return;
  +                }
  +
  +                String value = null;
  +                // normalized value for element is stored in schema_normalize_value property
  +                // of PSVI element.
  +                if (fNormalizeData && augs != null) {
  +                    ElementPSVI elemPSVI = (ElementPSVI)augs.getItem(Constants.ELEMENT_PSVI);
  +                    if (elemPSVI != null) {
  +                        value = elemPSVI.schemaNormalizedValue();
  +                    } 
  +                } 
  +                if (value == null) {
  +                     value = text.toString();
  +                }
                   Node child = fCurrentNode.getLastChild();
                   if (child != null && child.getNodeType() == Node.TEXT_NODE) {
                       Text textNode = (Text)child;
  -                    textNode.appendData(text.toString());
  +                    textNode.appendData(value);
                   }
                   else {
  -                    Text textNode = fDocument.createTextNode(text.toString());
  +                    Text textNode = fDocument.createTextNode(value);
                       fCurrentNode.appendChild(textNode);
                   }
               }
  @@ -796,9 +848,33 @@
                       fDeferredDocumentImpl.appendChild(fCurrentNodeIndex, txt);
                   }
               } else if (!fInDTD) {
  +                if (DEBUG_EVENTS) {                    
  +                   System.out.println("==>currentNode: type="+fDeferredDocumentImpl.getNodeType(fCurrentNodeIndex)+
  +                                      "; name="+fDeferredDocumentImpl.getNodeName(fCurrentNodeIndex));
  +                }
  +                // if type is union (XML Schema) it is possible that we receive
  +                // character call with empty data
  +                if (text.length == 0) {
  +                    return;
  +                }
  +
  +                String value = null;
  +                // normalized value for element is stored in schema_normalize_value property
  +                // of PSVI element.
  +                if (fNormalizeData && augs != null) {
  +                    ElementPSVI elemPSVI = (ElementPSVI)augs.getItem(Constants.ELEMENT_PSVI);
  +                    if (elemPSVI != null) {
  +                        value = elemPSVI.schemaNormalizedValue();
  +                    } 
  +                } 
  +
  +                if (value == null) {
  +                     value = text.toString();
  +                }
                   int txt = fDeferredDocumentImpl.
  -                    createDeferredTextNode(text.toString(), false);
  +                    createDeferredTextNode(value, false);
                   fDeferredDocumentImpl.appendChild(fCurrentNodeIndex, txt);
  +
               }
           }
       } // characters(XMLString)
  @@ -856,6 +932,9 @@
        * @throws XNIException Thrown by handler to signal an error.
        */
       public void endElement(QName element, Augmentations augs) throws XNIException {
  +        if (DEBUG_EVENTS) {
  +            System.out.println("==>endElement ("+element.rawname+")");
  +        }
           if (!fDeferNodeExpansion) {
               fCurrentNode = fCurrentNode.getParentNode();
           }
  @@ -952,7 +1031,9 @@
        *                   Thrown by handler to signal an error.
        */
       public void endGeneralEntity(String name, Augmentations augs) throws XNIException {
  -
  +        if (DEBUG_EVENTS || DEBUG_ENTITY_REF) {
  +            System.out.println("==>endGeneralEntity: ("+name+")");
  +        }
           if (fCreateEntityRefNodes) {
               if (!fDeferNodeExpansion) {
                   if (fDocumentType != null) {
  @@ -1000,13 +1081,17 @@
                           childIndex = fDeferredDocumentImpl.getRealPrevSibling(childIndex, false);
                       }
                   }
  +                if (DEBUG_ENTITY_REF) {
  +                    System.out.println("==>currentNode type="+fDeferredDocumentImpl.getNodeType(fCurrentNodeIndex)+
  +                                       "; name="+fDeferredDocumentImpl.getNodeName(fCurrentNodeIndex));
  +                }
                   fCurrentNodeIndex =
                       fDeferredDocumentImpl.getParentNode(fCurrentNodeIndex,
                                                           false);
               }
           }
   
  -    } // endEntity(String)
  +    } // endGeneralEntity(String, Augmentations)
   
       //
       // XMLDTDHandler methods
  
  
  
  1.17      +65 -6     xml-xerces/java/src/org/apache/xerces/parsers/AbstractSAXParser.java
  
  Index: AbstractSAXParser.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/parsers/AbstractSAXParser.java,v
  retrieving revision 1.16
  retrieving revision 1.17
  diff -u -r1.16 -r1.17
  --- AbstractSAXParser.java	24 Jan 2002 06:42:59 -0000	1.16
  +++ AbstractSAXParser.java	28 Jan 2002 00:35:44 -0000	1.17
  @@ -82,6 +82,8 @@
   import org.apache.xerces.xni.parser.XMLErrorHandler;
   import org.apache.xerces.xni.parser.XMLInputSource;
   import org.apache.xerces.xni.parser.XMLParserConfiguration;
  +import org.apache.xerces.xni.psvi.ElementPSVI;
  +import org.apache.xerces.xni.psvi.AttributePSVI;
   
   import org.xml.sax.AttributeList;
   import org.xml.sax.Attributes;
  @@ -111,7 +113,7 @@
    * @author Arnaud Le Hors, IBM
    * @author Andy Clark, IBM
    *
  - * @version $Id: AbstractSAXParser.java,v 1.16 2002/01/24 06:42:59 neilg Exp $
  + * @version $Id: AbstractSAXParser.java,v 1.17 2002/01/28 00:35:44 elena Exp $
    */
   public abstract class AbstractSAXParser
       extends AbstractXMLDocumentParser
  @@ -132,6 +134,10 @@
       protected static final String NAMESPACE_PREFIXES =
           Constants.SAX_FEATURE_PREFIX + Constants.NAMESPACE_PREFIXES_FEATURE;
   
  +    /** Expose XML Schema normalize value */
  +    protected static final String NORMALIZE_DATA = 
  +        Constants.XERCES_FEATURE_PREFIX + Constants.SCHEMA_NORMALIZED_VALUE;
  +
       // NOTE: The symbol table properties is for internal use. -Ac
   
       /** Property identifier: symbol table. */
  @@ -150,6 +156,9 @@
       /** Namespace prefixes. */
       protected boolean fNamespacePrefixes = false;
   
  +    /** Expose XML Schema schema_normalize_values via DOM*/
  +    protected boolean fNormalizeData = true;
  +
       // parser handlers
   
       /** Content handler. */
  @@ -187,6 +196,11 @@
       // temp vars
       private final AttributesProxy fAttributesProxy = new AttributesProxy();
   
  +    // temporary buffer for sending normalized values
  +    // REVISIT: what should be the size of the buffer?
  +    private static final int BUFFER_SIZE = 20;
  +    private char[] fCharBuffer =  new char[BUFFER_SIZE];
  +
       //
       // Constructors
       //
  @@ -387,6 +401,8 @@
           try {
               // SAX1
               if (fDocumentHandler != null) {
  +                // REVISIT: should we support schema-normalized-value for SAX1 events
  +                // 
                   fAttributesProxy.setAttributes(attributes);
                   fDocumentHandler.startElement(element.rawname, fAttributesProxy);
               }
  @@ -397,20 +413,30 @@
                   int len = attributes.getLength();
                   for (int i = len - 1; i >= 0; i--) {
                       attributes.getName(i, fQName);
  +                    // change attribute value to normalized value
  +                    if (fNormalizeData) {
  +                        AttributePSVI attrPSVI = (AttributePSVI)attributes.getAugmentations(i).getItem(Constants.ATTRIBUTE_PSVI);
  +                        if (attrPSVI != null) {
  +                            attributes.setValue(i, attrPSVI.schemaNormalizedValue());
  +                        }
  +                    }
  +
                       if (fQName.prefix == fXmlnsSymbol || 
                           fQName.rawname == fXmlnsSymbol) {
                           if (!fNamespacePrefixes) {
                               // remove namespace declaration attributes
                               attributes.removeAttributeAt(i);
                           }
  -                        else if (fNamespaces && fNamespacePrefixes) {
  +                        if (fNamespaces && fNamespacePrefixes) {
                               // localpart should be empty string as per SAX documentation:
                               // http://www.saxproject.org/?selected=namespaces
                               fQName.prefix = fEmptySymbol;
                               fQName.localpart = fEmptySymbol;
                               attributes.setName(i, fQName);
                           }
  -                    }
  +                    } 
  +                    
  +
                     
                   }
                   
  @@ -436,17 +462,49 @@
        * @throws XNIException Thrown by handler to signal an error.
        */
       public void characters(XMLString text, Augmentations augs) throws XNIException {
  +        
  +        // if type is union (XML Schema) it is possible that we receive
  +        // character call with empty data
  +        if (text.length == 0) {
  +            return;
  +        }
   
   
           try {
               // SAX1
               if (fDocumentHandler != null) {
  +                // REVISIT: should we support schema-normalized-value for SAX1 events
  +                // 
                   fDocumentHandler.characters(text.ch, text.offset, text.length);
               }
   
               // SAX2
               if (fContentHandler != null) {
  -                fContentHandler.characters(text.ch, text.offset, text.length);
  +                String value = null;
  +                // normalized value for element is stored in schema_normalize_value property
  +                // of PSVI element.
  +                if (fNormalizeData && augs != null) {
  +                    ElementPSVI elemPSVI = (ElementPSVI)augs.getItem(Constants.ELEMENT_PSVI);
  +                    if (elemPSVI != null) {
  +                        value = elemPSVI.schemaNormalizedValue();
  +                    }
  +                }
  +
  +                int length = 0;
  +                if (value != null) {
  +                     // if normalized value is available copy it into a temp buffer
  +                     length = value.length();
  +                     if (length >= BUFFER_SIZE) {
  +                        fCharBuffer = new char[length*2];
  +                     }
  +                     value.getChars(0, length, fCharBuffer, 0);
  +                }
  +                if (value == null) {                
  +                    fContentHandler.characters(text.ch, text.offset, text.length);
  +                }
  +                else {
  +                    fContentHandler.characters(fCharBuffer, 0, length);
  +                }
               }
           }
           catch (SAXException e) {
  @@ -1854,9 +1912,10 @@
           fInDTD = false;
   
           // features
  -        fNamespaces = fConfiguration.getFeature(NAMESPACES);
  +        fNamespaces = fConfiguration.getFeature(NAMESPACES);           
           fNamespacePrefixes = fConfiguration.getFeature(NAMESPACE_PREFIXES);
  -
  +        fNormalizeData = fConfiguration.getFeature(NORMALIZE_DATA);
  +        
           // save needed symbols
           SymbolTable symbolTable = (SymbolTable)fConfiguration.getProperty(SYMBOL_TABLE);
           if (symbolTable != null) {
  
  
  
  1.16      +15 -2     xml-xerces/java/src/org/apache/xerces/parsers/StandardParserConfiguration.java
  
  Index: StandardParserConfiguration.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/parsers/StandardParserConfiguration.java,v
  retrieving revision 1.15
  retrieving revision 1.16
  diff -u -r1.15 -r1.16
  --- StandardParserConfiguration.java	25 Jan 2002 23:58:38 -0000	1.15
  +++ StandardParserConfiguration.java	28 Jan 2002 00:35:45 -0000	1.16
  @@ -122,7 +122,7 @@
    * @author Arnaud  Le Hors, IBM
    * @author Andy Clark, IBM
    *
  - * @version $Id: StandardParserConfiguration.java,v 1.15 2002/01/25 23:58:38 elena Exp $
  + * @version $Id: StandardParserConfiguration.java,v 1.16 2002/01/28 00:35:45 elena Exp $
    */
   public class StandardParserConfiguration
       extends BasicParserConfiguration 
  @@ -162,6 +162,16 @@
       protected static final String NOTIFY_CHAR_REFS =
           Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_CHAR_REFS_FEATURE;
       
  +
  +    /** Feature identifier: expose schema normalized value */
  +    protected static final String NORMALIZE_DATA =
  +    Constants.XERCES_FEATURE_PREFIX + Constants.SCHEMA_NORMALIZED_VALUE;
  +
  +
  +    /** Feature identifier: send element default value via characters() */
  +    protected static final String SCHEMA_ELEMENT_DEFAULT =
  +    Constants.XERCES_FEATURE_PREFIX + Constants.SCHEMA_ELEMENT_DEFAULT;
  +
       // property identifiers
   
       /** Property identifier: error reporter. */
  @@ -316,7 +326,8 @@
               WARN_ON_DUPLICATE_ATTDEF,   WARN_ON_UNDECLARED_ELEMDEF,
               ALLOW_JAVA_ENCODINGS,       CONTINUE_AFTER_FATAL_ERROR,
               LOAD_EXTERNAL_DTD,          NOTIFY_BUILTIN_REFS,
  -            NOTIFY_CHAR_REFS,
  +            NOTIFY_CHAR_REFS,           NORMALIZE_DATA,
  +            SCHEMA_ELEMENT_DEFAULT
           };
           addRecognizedFeatures(recognizedFeatures);
   
  @@ -328,6 +339,8 @@
           setFeature(LOAD_EXTERNAL_DTD, true);
           setFeature(NOTIFY_BUILTIN_REFS, false);
           setFeature(NOTIFY_CHAR_REFS, false);
  +        setFeature(SCHEMA_ELEMENT_DEFAULT, true);
  +        setFeature(NORMALIZE_DATA, true);
   
           // add default recognized properties
           final String[] recognizedProperties = {
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-cvs-help@xml.apache.org