You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by pe...@apache.org on 2002/05/29 00:39:39 UTC
cvs commit: xml-xerces/c/src/xercesc/dom/impl DOMWriterImpl.hpp DOMWriterImpl.cpp Makefile.in
peiyongz 02/05/28 15:39:39
Modified: c/src/xercesc/dom/impl Makefile.in
Added: c/src/xercesc/dom/impl DOMWriterImpl.hpp DOMWriterImpl.cpp
Log:
DOM3 Save Interface: DOMWriter/DOMWriterFilter
Revision Changes Path
1.3 +3 -1 xml-xerces/c/src/xercesc/dom/impl/Makefile.in
Index: Makefile.in
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/dom/impl/Makefile.in,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- Makefile.in 23 May 2002 15:47:24 -0000 1.2
+++ Makefile.in 28 May 2002 22:39:39 -0000 1.3
@@ -54,7 +54,7 @@
# <http://www.apache.org/>.
#
#
-# $Id: Makefile.in,v 1.2 2002/05/23 15:47:24 knoaman Exp $
+# $Id: Makefile.in,v 1.3 2002/05/28 22:39:39 peiyongz Exp $
#
PLATFORM = @platform@
@@ -113,6 +113,7 @@
DOMStringPool.hpp \
DOMTextImpl.hpp \
DOMTreeWalkerImpl.hpp \
+ DOMWriterImpl.hpp \
XSDElementNSImpl.hpp
DOM_IMPL_C_FILES = \
@@ -151,6 +152,7 @@
DOMStringPool.$(TO) \
DOMTextImpl.$(TO) \
DOMTreeWalkerImpl.$(TO) \
+ DOMWriterImpl.$(TO) \
XSDElementNSImpl.$(TO)
1.1 xml-xerces/c/src/xercesc/dom/impl/DOMWriterImpl.hpp
Index: DOMWriterImpl.hpp
===================================================================
/*
* The Apache Software License, Version 1.1
*
* Copyright (c) 2002 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache\@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation, and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.ibm.com . For more information
* on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
/*
* $Id: DOMWriterImpl.hpp,v 1.1 2002/05/28 22:39:39 peiyongz Exp $
* $Log: DOMWriterImpl.hpp,v $
* Revision 1.1 2002/05/28 22:39:39 peiyongz
* DOM3 Save Interface: DOMWriter/DOMWriterFilter
*
*/
/**
* DOMWriterImpl provides an API for serializing (writing) a DOM document out in
* an XML document. The XML data is written to an output stream, the type of
* which depends on the specific language bindings in use. During
* serialization of XML data, namespace fixup is done when possible.
* <p> <code>DOMWriterImpl</code> accepts any node type for serialization. For
* nodes of type <code>Document</code> or <code>Entity</code>, well formed
* XML will be created if possible. The serialized output for these node
* types is either as a Document or an External Entity, respectively, and is
* acceptable input for an XML parser. For all other types of nodes the
* serialized form is not specified, but should be something useful to a
* human for debugging or diagnostic purposes. Note: rigorously designing an
* external (source) form for stand-alone node types that don't already have
* one defined in seems a bit much to take on here.
* <p>Within a Document or Entity being serialized, Nodes are processed as
* follows Documents are written including an XML declaration and a DTD
* subset, if one exists in the DOM. Writing a document node serializes the
* entire document. Entity nodes, when written directly by
* <code>writeNode</code> defined in the <code>DOMWriterImpl</code> interface,
* output the entity expansion but no namespace fixup is done. The resulting
* output will be valid as an external entity. Entity References nodes are
* serializes as an entity reference of the form
* <code>"&entityName;"</code>) in the output. Child nodes (the
* expansion) of the entity reference are ignored. CDATA sections
* containing content characters that can not be represented in the
* specified output encoding are handled according to the
* "split-cdata-sections" feature.If the feature is <code>true</code>, CDATA
* sections are split, and the unrepresentable characters are serialized as
* numeric character references in ordinary content. The exact position and
* number of splits is not specified. If the feature is <code>false</code>,
* unrepresentable characters in a CDATA section are reported as errors. The
* error is not recoverable - there is no mechanism for supplying
* alternative characters and continuing with the serialization. All other
* node types (Element, Text, etc.) are serialized to their corresponding
* XML source form.
* <p> Within the character data of a document (outside of markup), any
* characters that cannot be represented directly are replaced with
* character references. Occurrences of '<' and '&' are replaced by
* the predefined entities &lt; and &amp. The other predefined
* entities (&gt, &apos, etc.) are not used; these characters can be
* included directly. Any character that can not be represented directly in
* the output character encoding is serialized as a numeric character
* reference.
* <p> Attributes not containing quotes are serialized in quotes. Attributes
* containing quotes but no apostrophes are serialized in apostrophes
* (single quotes). Attributes containing both forms of quotes are
* serialized in quotes, with quotes within the value represented by the
* predefined entity &quot;. Any character that can not be represented
* directly in the output character encoding is serialized as a numeric
* character reference.
* <p> Within markup, but outside of attributes, any occurrence of a character
* that cannot be represented in the output character encoding is reported
* as an error. An example would be serializing the element
* <LaCa�ada/> with the encoding="us-ascii".
* <p> When requested by setting the <code>normalize-characters</code> feature
* on <code>DOMWriterImpl</code>, all data to be serialized, both markup and
* character data, is W3C Text normalized according to the rules defined in
* . The W3C Text normalization process affects only the data as it is being
* written; it does not alter the DOM's view of the document after
* serialization has completed.
* <p>Namespaces are fixed up during serialization, the serialization process
* will verify that namespace declarations, namespace prefixes and the
* namespace URIs associated with Elements and Attributes are consistent. If
* inconsistencies are found, the serialized form of the document will be
* altered to remove them. The algorithm used for doing the namespace fixup
* while seralizing a document is a combination of the algorithms used for
* lookupNamespaceURI and lookupNamespacePrefix . previous paragraph to be
* defined closer here.
* <p>Any changes made affect only the namespace prefixes and declarations
* appearing in the serialized data. The DOM's view of the document is not
* altered by the serialization operation, and does not reflect any changes
* made to namespace declarations or prefixes in the serialized output.
* <p> While serializing a document the serializer will write out
* non-specified values (such as attributes whose <code>specified</code> is
* <code>false</code>) if the <code>output-default-values</code> feature is
* set to <code>true</code>. If the <code>output-default-values</code> flag
* is set to <code>false</code> and the <code>use-abstract-schema</code>
* feature is set to <code>true</code> the abstract schema will be used to
* determine if a value is specified or not, if
* <code>use-abstract-schema</code> is not set the <code>specified</code>
* flag on attribute nodes is used to determine if attribute values should
* be written out.
* <p> Ref to Core spec (1.1.9, XML namespaces, 5th paragraph) entity ref
* description about warning about unbound entity refs. Entity refs are
* always serialized as &foo;, also mention this in the load part of
* this spec.
* <p> When serializing a document the DOMWriterImpl checks to see if the document
* element in the document is a DOM Level 1 element or a DOM Level 2 (or
* higher) element (this check is done by looking at the localName of the
* root element). If the root element is a DOM Level 1 element then the
* DOMWriterImpl will issue an error if a DOM Level 2 (or higher) element is
* found while serializing. Likewise if the document element is a DOM Level
* 2 (or higher) element and the DOMWriterImpl sees a DOM Level 1 element an
* error is issued. Mixing DOM Level 1 elements with DOM Level 2 (or higher)
* is not supported.
* <p> <code>DOMWriterImpl</code>s have a number of named features that can be
* queried or set. The name of <code>DOMWriterImpl</code> features must be valid
* XML names. Implementation specific features (extensions) should choose an
* implementation dependent prefix to avoid name collisions.
* <p>Here is a list of properties that must be recognized by all
* implementations.
* <dl>
* <dt><code>"normalize-characters"</code></dt>
* <dd>
* <dl>
* <dt><code>true</code></dt>
* <dd>[
* optional] (default) Perform the W3C Text Normalization of the characters
* in document as they are written out. Only the characters being written
* are (potentially) altered. The DOM document itself is unchanged. </dd>
* <dt>
* <code>false</code></dt>
* <dd>[required] do not perform character normalization. </dd>
* </dl></dd>
* <dt>
* <code>"split-cdata-sections"</code></dt>
* <dd>
* <dl>
* <dt><code>true</code></dt>
* <dd>[required] (default)
* Split CDATA sections containing the CDATA section termination marker
* ']]>' or characters that can not be represented in the output
* encoding, and output the characters using numeric character references.
* If a CDATA section is split a warning is issued. </dd>
* <dt><code>false</code></dt>
* <dd>[
* required] Signal an error if a <code>CDATASection</code> contains an
* unrepresentable character. </dd>
* </dl></dd>
* <dt><code>"validation"</code></dt>
* <dd>
* <dl>
* <dt><code>true</code></dt>
* <dd>[
* optional] Use the abstract schema to validate the document as it is being
* serialized. If validation errors are found the error handler is notified
* about the error. Setting this state will also set the feature
* <code>use-abstract-schema</code> to <code>true</code>. </dd>
* <dt><code>false</code></dt>
* <dd>[
* required] (default) Don't validate the document as it is being
* serialized. </dd>
* </dl></dd>
* <dt><code>"expand-entity-references"</code></dt>
* <dd>
* <dl>
* <dt><code>true</code></dt>
* <dd>[
* optional] Expand <code>EntityReference</code> nodes when serializing. </dd>
* <dt>
* <code>false</code></dt>
* <dd>[required] (default) Serialize all
* <code>EntityReference</code> nodes as XML entity references. </dd>
* </dl></dd>
* <dt>
* <code>"whitespace-in-element-content"</code></dt>
* <dd>
* <dl>
* <dt><code>true</code></dt>
* <dd>[required] (
* default) Output all white spaces in the document. </dd>
* <dt><code>false</code></dt>
* <dd>[
* optional] Only output white space that is not within element content. The
* implementation is expected to use the
* <code>isWhitespaceInElementContent</code> flag on <code>Text</code> nodes
* to determine if a text node should be written out or not. </dd>
* </dl></dd>
* <dt>
* <code>"discard-default-content"</code></dt>
* <dd>
* <dl>
* <dt><code>true</code></dt>
* <dd>[required] (default
* ) Use whatever information available to the implementation (i.e. XML
* schema, DTD, the <code>specified</code> flag on <code>Attr</code> nodes,
* and so on) to decide what attributes and content should be serialized or
* not. Note that the <code>specified</code> flag on <code>Attr</code> nodes
* in itself is not always reliable, it is only reliable when it is set to
* <code>false</code> since the only case where it can be set to
* <code>false</code> is if the attribute was created by a Level 1
* implementation. </dd>
* <dt><code>false</code></dt>
* <dd>[required] Output all attributes and
* all content. </dd>
* </dl></dd>
* <dt><code>"format-canonical"</code></dt>
* <dd>
* <dl>
* <dt><code>true</code></dt>
* <dd>[optional]
* This formatting writes the document according to the rules specified in .
* Setting this feature to true will set the feature "format-pretty-print"
* to false. </dd>
* <dt><code>false</code></dt>
* <dd>[required] (default) Don't canonicalize the
* output. </dd>
* </dl></dd>
* <dt><code>"format-pretty-print"</code></dt>
* <dd>
* <dl>
* <dt><code>true</code></dt>
* <dd>[optional]
* Formatting the output by adding whitespace to produce a pretty-printed,
* indented, human-readable form. The exact form of the transformations is
* not specified by this specification. Setting this feature to true will
* set the feature "format-canonical" to false. </dd>
* <dt><code>false</code></dt>
* <dd>[required]
* (default) Don't pretty-print the result. </dd>
* </dl></dd>
* </dl>
* <p>See also the <a href='http://www.w3.org/TR/2001/WD-DOM-Level-3-ASLS-20011025'>Document Object Model (DOM) Level 3 Abstract Schemas and Load
* and Save Specification</a>.
*/
#ifndef DOMWriterImpl_HEADER_GUARD_
#define DOMWriterImpl_HEADER_GUARD_
#include <xercesc/dom/DOMWriter.hpp>
#include <xercesc/util/RefHashTableOf.hpp>
#include <xercesc/util/KVStringPair.hpp>
class CDOM_EXPORT DOMWriterImpl:public DOMWriter {
public:
/** @name Constructor and Destructor */
//@{
/**
* Constructor.
*/
DOMWriterImpl();
/**
* Destructor.
*/
~DOMWriterImpl();
//@}
/** @name Inplementation of Abstract interface */
virtual bool canSetFeature(const XMLCh* const featName
, bool state) const;
virtual void setFeature(const XMLCh* const featName
, bool state);
virtual bool getFeature(const XMLCh* const featName) const;
virtual void setEncoding(const XMLCh* const encoding);
virtual const XMLCh* getEncoding() const;
virtual void setNewLine(const XMLCh* const newLine);
virtual const XMLCh* getNewLine() const;
virtual void setErrorHandler(DOMErrorHandler *errorHandler);
virtual DOMErrorHandler* getErrorHandler() const;
virtual void setFilter(DOMWriterFilter *filter);
virtual DOMWriterFilter* getFilter() const;
virtual bool writeNode(XMLFormatTarget* const destination
, const DOMNode &nodeToWrite);
/**
* The caller is responsible for the release of the returned string
*/
virtual XMLCh* writeToString(const DOMNode &nodeToWrite);
//@}
private:
/** unimplemented copy ctor and assignment operator */
DOMWriterImpl(const DOMWriterImpl&);
DOMWriterImpl & operator = (const DOMWriterImpl&);
/** helper **/
void initSession(const DOMNode* const);
void processNode(const DOMNode* const);
DOMNodeFilter::FilterAction checkFilter(const DOMNode* const) const;
inline void setURCharRef();
// -----------------------------------------------------------------------
// Private data members
//
// fFeatures
// own it
//
// fEncoding
// own it
//
// fNewLine
// own it
//
// fErrorHandler
// don't own it
//
// fFilter
// don't own it
//
// fEncodingUsed (session var)
// the actual encoding used in WriteNode(),
// it does not own any data(memory).
//
// fNewLineUsed (session var)
// the actual "end of line" sequence used in WriteNode(),
// it does not own any data(memory).
//
// fFormatter (session var)
// the formatter used in WriteNode()
//
// fErrorCount
//
// -----------------------------------------------------------------------
RefHashTableOf<KVStringPair> *fFeatures;
XMLCh *fEncoding;
XMLCh *fNewLine;
DOMErrorHandler *fErrorHandler;
DOMWriterFilter *fFilter;
//session vars
const XMLCh *fEncodingUsed;
const XMLCh *fNewLineUsed;
XMLFormatter *fFormatter;
int fErrorCount;
};
#endif
1.1 xml-xerces/c/src/xercesc/dom/impl/DOMWriterImpl.cpp
Index: DOMWriterImpl.cpp
===================================================================
/*
* The Apache Software License, Version 1.1
*
* Copyright (c) 2002 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache\@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their featName, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation, and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.ibm.com . For more information
* on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
/*
* $Id: DOMWriterImpl.cpp,v 1.1 2002/05/28 22:39:39 peiyongz Exp $
* $Log: DOMWriterImpl.cpp,v $
* Revision 1.1 2002/05/28 22:39:39 peiyongz
* DOM3 Save Interface: DOMWriter/DOMWriterFilter
*
*/
#include <xercesc/dom/impl/DOMWriterImpl.hpp>
#include <xercesc/dom/DOM.hpp>
#include <xercesc/dom/impl/DOMErrorImpl.hpp>
#include <xercesc/dom/impl/DOMLocatorImpl.hpp>
#include <xercesc/framework/MemBufFormatTarget.hpp>
#include <xercesc/util/XMLUniDefs.hpp>
#include <xercesc/util/TranscodingException.hpp>
#include <xercesc/util/Janitor.hpp>
// ---------------------------------------------------------------------------
// Local const data
//
// ---------------------------------------------------------------------------
static const XMLCh gTrue[] =
{
chLatin_T, chNull
};
static const XMLCh gFalse[] =
{
chLatin_F, chNull
};
// default end-of-line sequence
static const XMLCh gEOLSeq[] =
{
chLF, chNull
};
//UTF-8
static const XMLCh gUTF8[] =
{
chLatin_U, chLatin_T, chLatin_F, chDash, chDigit_8, chNull
};
//</
static const XMLCh gEndElement[] =
{
chOpenAngle, chForwardSlash, chNull
};
//?>
static const XMLCh gEndPI[] =
{
chQuestion, chCloseAngle, chNull
};
//<?
static const XMLCh gStartPI[] =
{
chOpenAngle, chQuestion, chNull
};
//<?xml version="1.0
static const XMLCh gXMLDecl1[] =
{
chOpenAngle, chQuestion, chLatin_x, chLatin_m, chLatin_l, chSpace,
chLatin_v, chLatin_e, chLatin_r, chLatin_s, chLatin_i, chLatin_o,
chLatin_n, chEqual, chDoubleQuote, chDigit_1, chPeriod, chDigit_0,
chNull
};
//" encoding="
static const XMLCh gXMLDecl2[] =
{
chDoubleQuote, chSpace, chLatin_e, chLatin_n, chLatin_c,
chLatin_o, chLatin_d, chLatin_i, chLatin_n, chLatin_g, chEqual,
chDoubleQuote, chNull
};
//"?>
static const XMLCh gXMLDecl3[] =
{
chDoubleQuote, chQuestion, chCloseAngle, chNull
};
//<![CDATA[
static const XMLCh gStartCDATA[] =
{
chOpenAngle, chBang, chOpenSquare, chLatin_C, chLatin_D,
chLatin_A, chLatin_T, chLatin_A, chOpenSquare, chNull
};
//]]>
static const XMLCh gEndCDATA[] =
{
chCloseSquare, chCloseSquare, chCloseAngle, chNull
};
//<!--
static const XMLCh gStartComment[] =
{
chOpenAngle, chBang, chDash, chDash, chNull
};
//-->
static const XMLCh gEndComment[] =
{
chDash, chDash, chCloseAngle, chNull
};
//<!DOCTYPE
static const XMLCh gStartDoctype[] =
{
chOpenAngle, chBang, chLatin_D, chLatin_O, chLatin_C, chLatin_T,
chLatin_Y, chLatin_P, chLatin_E, chSpace, chNull
};
//PUBLIC "
static const XMLCh gPublic[] =
{
chLatin_P, chLatin_U, chLatin_B, chLatin_L, chLatin_I,
chLatin_C, chSpace, chDoubleQuote, chNull
};
//SYSTEM "
static const XMLCh gSystem[] =
{
chLatin_S, chLatin_Y, chLatin_S, chLatin_T, chLatin_E,
chLatin_M, chSpace, chDoubleQuote, chNull
};
//<!ENTITY
static const XMLCh gStartEntity[] =
{
chOpenAngle, chBang, chLatin_E, chLatin_N, chLatin_T, chLatin_I,
chLatin_T, chLatin_Y, chSpace, chNull
};
//NDATA "
static const XMLCh gNotation[] =
{
chLatin_N, chLatin_D, chLatin_A, chLatin_T, chLatin_A,
chSpace, chDoubleQuote, chNull
};
// Unrecognized node type
static const XMLCh gUnrecognizedNodeType[] =
{
chLatin_U, chLatin_n, chLatin_r, chLatin_e, chLatin_c, chLatin_o,
chLatin_g, chLatin_n, chLatin_i, chLatin_z, chLatin_e, chLatin_d,
chSpace, chLatin_N, chLatin_o, chLatin_d, chLatin_e, chSpace,
chLatin_T, chLatin_y, chLatin_p, chLatin_e, chNull
};
//
// Notification of the error though error handler
//
// The application may instruct the engine to abort serialization
// by returning "false".
//
// REVISIT: update the locator ctor once the line#, col#, uri and offset
// are available from DOM3 core
//
// REVISIT: use throwing exception to abort serialization is an interesting
// thing here, since the serializer is a recusively called function, we
// can't use return, obviously. However we may have multiple try/catch
// along it is way go back to writeNode(). So far we can't think of a
// "short-cut" to go "directly" back.
//
#define TRY_CATCH_THROW(action, forceToRethrow) \
fFormatter->setUnRepFlags(XMLFormatter::UnRep_Fail); \
try \
{ \
action; \
} \
catch(TranscodingException const &e) \
{ \
DOMLocatorImpl locator(0 \
, 0 \
, (DOMNode* const)nodeToWrite \
, 0 \
, 0); \
DOMErrorImpl domError(DOMError::SEVERITY_FATAL_ERROR \
, e.getMessage() \
, &locator); \
bool retVal = fErrorHandler->handleError(domError); \
\
if (forceToRethrow || !retVal) \
throw; \
else \
fErrorCount++; \
}
DOMWriterImpl::~DOMWriterImpl()
{
delete fFeatures;
delete fEncoding;
delete fNewLine;
// we don't own/adopt error handler and filter
}
// feature true false
// ================================================================================
//canonical-form [optional] Not Supported [required] (default)
//discard-default-content [required] (default) [required]
//entity [required] (default) [optional]
//format-pretty-print [optional] Not Supported [required] (default)
//normalize-characters [optional] Not Supported [required] (default)
//split-cdata-sections [required] (default) [required]
//validation [optional] Not Supported [required] (default)
//whitespace-in-element-content [requierd] (default) [optional] Not Supported
//
DOMWriterImpl::DOMWriterImpl()
:fFeatures(0)
,fEncoding(0)
,fNewLine(0)
,fErrorHandler(0)
,fFilter(0)
{
fFeatures = new RefHashTableOf<KVStringPair>(9, true);
fFeatures->put((void*)CanonicalForm,
new KVStringPair(CanonicalForm, gFalse));
fFeatures->put((void*)DiscardDefaultContent,
new KVStringPair(DiscardDefaultContent, gTrue));
fFeatures->put((void*)Entities,
new KVStringPair(Entities, gTrue));
fFeatures->put((void*)FormatPrettyPrint,
new KVStringPair(FormatPrettyPrint, gFalse));
fFeatures->put((void*)NormalizeCharacters,
new KVStringPair(NormalizeCharacters, gFalse));
fFeatures->put((void*)SplitCdataSections,
new KVStringPair(SplitCdataSections, gTrue));
fFeatures->put((void*)Validation,
new KVStringPair(Validation, gFalse));
fFeatures->put((void*)WhitespaceInElementContent,
new KVStringPair(WhitespaceInElementContent, gTrue));
}
//
// refer to the feature table above
//
bool DOMWriterImpl::canSetFeature(const XMLCh* const featName
, bool state) const
{
// featName not recognized
if ((!featName) || (!fFeatures->get(featName)))
return false;
if ((XMLString::compareString(featName, CanonicalForm)==0) && state)
return false;
else if ((XMLString::compareString(featName, FormatPrettyPrint)==0) && state)
return false;
else if ((XMLString::compareString(featName, NormalizeCharacters)==0) && state)
return false;
else if ((XMLString::compareString(featName, Validation)==0) && state)
return false;
else if ((XMLString::compareString(featName, WhitespaceInElementContent)==0) && !state)
return false;
else
return true;
}
void DOMWriterImpl::setFeature(const XMLCh* const featName
, bool state)
{
if ((!featName) || (XMLString::stringLen(featName)==0))
throw DOMException(DOMException::NOT_FOUND_ERR, 0);
if (!fFeatures->get(featName))
throw DOMException(DOMException::NOT_FOUND_ERR, featName);
//"Feature "+featName+" not found");
if (!canSetFeature(featName, state))
throw DOMException(DOMException::NOT_SUPPORTED_ERR, featName);
//DOMException.NOT_SUPPORTED_ERR,"Feature "+featName+" cannot be set as "+state);
// REVISIT
// canonical-form and format-pretty-print can not be both set to true
// meaning set canonical-form true will automatically set
// format-pretty-print to false and vise versa.
// right now only false is supported for both of them, we need
// not worry about that, but later if we decide to support true for
// them, we need to add code to ensure they are not true at the same time.
fFeatures->put((void*)featName, new KVStringPair(featName, (state? gTrue : gFalse)));
return;
}
bool DOMWriterImpl::getFeature(const XMLCh* const featName) const
{
if ((!featName) || (XMLString::stringLen(featName)==0))
throw DOMException(DOMException::NOT_FOUND_ERR, 0);
KVStringPair *kvData = fFeatures->get(featName);
if (!kvData)
{
throw DOMException(DOMException::NOT_FOUND_ERR, featName);
//DOMException.NOT_FOUND_ERR,"Feature "+featName+" not found");
}
return ( (XMLString::compareString(kvData->getValue(), gTrue)==0)? true : false);
}
// we don't check the validity of the encoding set
void DOMWriterImpl::setEncoding(const XMLCh* const encoding)
{
delete fEncoding;
fEncoding = XMLString::replicate(encoding);
}
const XMLCh* DOMWriterImpl::getEncoding() const
{
return fEncoding;
}
void DOMWriterImpl::setNewLine(const XMLCh* const newLine)
{
delete fNewLine;
fNewLine = XMLString::replicate(newLine);
}
const XMLCh* DOMWriterImpl::getNewLine() const
{
return fNewLine;
}
void DOMWriterImpl::setErrorHandler(DOMErrorHandler *errorHandler)
{
fErrorHandler = errorHandler;
}
DOMErrorHandler* DOMWriterImpl::getErrorHandler() const
{
return fErrorHandler;
}
void DOMWriterImpl::setFilter(DOMWriterFilter *filter)
{
fFilter = filter;
}
DOMWriterFilter* DOMWriterImpl::getFilter() const
{
return fFilter;
}
//
//
//
bool DOMWriterImpl::writeNode(XMLFormatTarget* const destination
, const DOMNode &nodeToWrite)
{
//init session vars
initSession(&nodeToWrite);
try
{
fFormatter = new XMLFormatter(fEncodingUsed
, destination
, XMLFormatter::NoEscapes
, XMLFormatter::UnRep_CharRef);
Janitor<XMLFormatter> janName(fFormatter);
//
// if this is a document node
// print out the XML Decl node first
//
if (nodeToWrite.getNodeType() == DOMNode::DOCUMENT_NODE)
{
setURCharRef();
*fFormatter << gXMLDecl1 << gXMLDecl2 << fEncodingUsed << gXMLDecl3 << fNewLineUsed;
}
processNode(&nodeToWrite);
*fFormatter << fNewLineUsed; // add linefeed in requested output encoding
}
//
// The serialize engine (processNode) throws an exception to abort
// serialization if
//
// . A fatal error occurs which renters the output ill-formed, or
// . Instructed by the application's error handler
//
catch (const TranscodingException&)
{
return false;
}
catch (const DOMException&)
{
return false;
}
//
// DOMSystemException
// This exception will be raised in response to any sort of IO or system
// error that occurs while writing to the destination. It may wrap an
// underlying system exception.
//
//catch (RuntimeException const &)
catch (...)
{
// REVISIT generate a DOMSystemException wrapping the underlying
// exception.
throw;
}
//
// true if node was successfully serialized and
// false in case a failure occured and the
// failure wasn't canceled by the error handler.
//
return ((fErrorCount == 0)? true : false);
}
//
// We don't throw DOMSTRING_SIZE_ERR since we are no longer
// using DOMString.
//
XMLCh* DOMWriterImpl::writeToString(const DOMNode &nodeToWrite)
{
MemBufFormatTarget destination;
bool retVal;
try
{
retVal = writeNode(&destination, nodeToWrite);
}
catch (...)
{
//
// there is a possibility that memeory allocation
// exception thrown in XMLBuffer class
//
return 0;
}
return (retVal ? destination.getString() : 0);
}
void DOMWriterImpl::initSession(const DOMNode* const nodeToWrite)
{
/**
* The encoding to use when writing is determined as follows:
* If the encoding attribute has been set, that value will be used.
* If the encoding attribute is null or empty,
* but the item to be written, or
* the owner document specified encoding (ie. the "actualEncoding"
* from the document) that value will be used.
* If neither of the above provides an encoding name, a default encoding of
* "UTF-8" will be used.
*/
fEncodingUsed = gUTF8;
if (fEncoding && XMLString::stringLen(fEncoding))
{
fEncodingUsed = fEncoding;
}
else
{
// REVISIT: DOM3 core may provide getEncoding()
// fEncodingUsed = nodeToWrite.getEncoding();
}
/**
* The end-of-line sequence of characters to be used in the XML being
* written out. The only permitted values are these:
* . null
*
* Use a default end-of-line sequence. DOM implementations should choose
* the default to match the usual convention for text files in the
* environment being used. Implementations must choose a default
* sequence that matches one of those allowed by 2.11 "End-of-Line
* Handling".
*
* CR The carriage-return character (#xD)
* CR-LF The carriage-return and line-feed characters (#xD #xA)
* LF The line-feed character (#xA)
*
* The default value for this attribute is null
*/
fNewLineUsed = (fNewLine && XMLString::stringLen(fNewLine))? fNewLine : gEOLSeq;
fErrorCount = 0;
}
//
// Characters not representable in output encoding,
//
// 1. CHARACTER DATA (outside of markup) --- no error
// ordinary character -> numeric character reference
// '<' and '&' -> < and &
//
// 2. Within MARKUP, but outside of attributes
// reported as an error --- ERROR
// markup:
// start tag done
// end tag done
// empty element tag done
// entity references done
// character references // REVISIT
// comments done
// CDATA section delimiters done, done
// document type declarartions done
// processing instructions (PI) done
//
// 3. With in ATTRIBUTE
// -> numeric character reference
// no quotes -> in quotes
// with quotes, no apostrophe -> in apostrophe
// with quotes and apostrophe -> in quotes and "
//
// 4. CDATA sections
// "split_cdata_section" true --- char ref
// false --- ERROR
//
// ---------------------------------------------------------------------------
// Stream out a DOM node, and, recursively, all of its children. This
// function is the heart of writing a DOM tree out as XML source. Give it
// a document node and it will do the whole thing.
// ---------------------------------------------------------------------------
void DOMWriterImpl::processNode(const DOMNode* const nodeToWrite)
{
// Get the name and value out for convenience
const XMLCh* nodeName = nodeToWrite->getNodeName();
const XMLCh* nodeValue = nodeToWrite->getNodeValue();
unsigned long lent = XMLString::stringLen(nodeValue);
/***
{FILTER_ACCEPT = 1,
FILTER_REJECT = 2,
FILTER_SKIP = 3};
***/
switch (nodeToWrite->getNodeType())
{
case DOMNode::TEXT_NODE:
{
if (checkFilter(nodeToWrite) != DOMNodeFilter::FILTER_ACCEPT)
break;
setURCharRef(); // character data
fFormatter->formatBuf(nodeValue, lent, XMLFormatter::CharEscapes);
break;
}
case DOMNode::PROCESSING_INSTRUCTION_NODE:
{
if (checkFilter(nodeToWrite) != DOMNodeFilter::FILTER_ACCEPT)
break;
TRY_CATCH_THROW
(
*fFormatter << XMLFormatter::NoEscapes << gStartPI << nodeName;
if (lent > 0)
{
*fFormatter << chSpace << nodeValue;
}
*fFormatter << gEndPI;
,true
)
break;
}
case DOMNode::DOCUMENT_NODE: // Not to be shown to Filter
{
DOMNode *child = nodeToWrite->getFirstChild();
while( child != 0)
{
processNode(child);
*fFormatter << fNewLineUsed; // add linefeed in requested output encoding
child = child->getNextSibling();
}
break;
}
case DOMNode::ELEMENT_NODE:
{
DOMNodeFilter::FilterAction filterAction = checkFilter(nodeToWrite);
if ( filterAction == DOMNodeFilter::FILTER_REJECT)
break;
if ( filterAction == DOMNodeFilter::FILTER_ACCEPT)
{
// this element attributes child elements
// accept yes yes yes
// skip no no yes
//
TRY_CATCH_THROW
(
// The name has to be representable without any escapes
*fFormatter << XMLFormatter::NoEscapes
<< chOpenAngle << nodeName;
,true
)
// Output any attributes on this element
setURCharRef();
DOMNamedNodeMap *attributes = nodeToWrite->getAttributes();
int attrCount = attributes->getLength();
bool discard = getFeature(DiscardDefaultContent);
for (int i = 0; i < attrCount; i++)
{
DOMNode *attribute = attributes->item(i);
// Not to be shown to Filter
//
//"discard-default-content"
// true
// [required] (default)
// Use whatever information available to the implementation
// (i.e. XML schema, DTD, the specified flag on Attr nodes,
// and so on) to decide what attributes and content should be
// discarded or not.
// Note that the specified flag on Attr nodes in itself is
// not always reliable, it is only reliable when it is set
// to false since the only case where it can be set to false
// is if the attribute was created by the implementation.
// The default content won't be removed if an implementation
// does not have any information available.
// false
// [required]
// Keep all attributes and all content.
//
if (discard && !((DOMAttr*)attribute )->getSpecified())
continue;
//
// Again the name has to be completely representable. But the
// attribute can have refs and requires the attribute style
// escaping.
//
*fFormatter << XMLFormatter::NoEscapes
<< chSpace << attribute->getNodeName()
<< chEqual << chDoubleQuote
<< XMLFormatter::AttrEscapes
<< attribute->getNodeValue()
<< XMLFormatter::NoEscapes
<< chDoubleQuote;
} // end of for
} // end of FILTER_ACCEPT
// FILTER_SKIP may start from here
//
// Test for the presence of children, which includes both
// text content and nested elements.
//
DOMNode *child = nodeToWrite->getFirstChild();
if (child != 0)
{
// There are children. Close start-tag, and output children.
// No escapes are legal here
if (filterAction == DOMNodeFilter::FILTER_ACCEPT)
*fFormatter << XMLFormatter::NoEscapes << chCloseAngle;
while( child != 0)
{
processNode(child);
child = child->getNextSibling();
}
if (filterAction == DOMNodeFilter::FILTER_ACCEPT)
{
TRY_CATCH_THROW
(
*fFormatter << XMLFormatter::NoEscapes << gEndElement
<< nodeName << chCloseAngle;
,true
)
}
}
else
{
//
// There were no children. Output the short form close of
// the element start tag, making it an empty-element tag.
//
if (filterAction == DOMNodeFilter::FILTER_ACCEPT)
{
TRY_CATCH_THROW
(
*fFormatter << XMLFormatter::NoEscapes << chForwardSlash << chCloseAngle;
, true
)
}
}
break;
}
case DOMNode::ENTITY_REFERENCE_NODE:
{
//"entities"
//true
//[required] (default)
//Keep EntityReference and Entity nodes in the document.
//false
//[optional]
//Remove all EntityReference and Entity nodes from the document,
// putting the entity expansions directly in their place.
// Text nodes are into "normal" form.
//Only EntityReference nodes to non-defined entities are kept in the document.
if (checkFilter(nodeToWrite) != DOMNodeFilter::FILTER_ACCEPT)
break;
if (getFeature(Entities))
{
TRY_CATCH_THROW
(
*fFormatter << XMLFormatter::NoEscapes << chAmpersand
<< nodeName << chSemiColon;
, true
)
}
else
{
DOMNode *child;
for (child = nodeToWrite->getFirstChild();
child != 0;
child = child->getNextSibling())
{
processNode(child);
}
}
break;
}
case DOMNode::CDATA_SECTION_NODE:
{
if (checkFilter(nodeToWrite) != DOMNodeFilter::FILTER_ACCEPT)
break;
TRY_CATCH_THROW
(
*fFormatter << XMLFormatter::NoEscapes << gStartCDATA;
, true
)
if (getFeature(SplitCdataSections))
{
setURCharRef();
*fFormatter << nodeValue;
}
else
{
TRY_CATCH_THROW
(
*fFormatter << nodeValue;
, true
)
}
TRY_CATCH_THROW
(
*fFormatter << gEndCDATA;
, true
)
break;
}
case DOMNode::COMMENT_NODE:
{
if (checkFilter(nodeToWrite) != DOMNodeFilter::FILTER_ACCEPT)
break;
TRY_CATCH_THROW
(
*fFormatter << XMLFormatter::NoEscapes << gStartComment
<< nodeValue << gEndComment;
, true
)
break;
}
case DOMNode::DOCUMENT_TYPE_NODE: // Not to be shown to Filter
{
DOMDocumentType *doctype = (DOMDocumentType *)nodeToWrite;;
fFormatter->setEscapeFlags(XMLFormatter::NoEscapes);
TRY_CATCH_THROW
(
*fFormatter << gStartDoctype << nodeName;
const XMLCh *id = doctype->getPublicId();
if (id && *id)
{
*fFormatter << chSpace << gPublic << id << chDoubleQuote;
id = doctype->getSystemId();
if (id && *id)
{
*fFormatter << chSpace << chDoubleQuote << id << chDoubleQuote;
}
else
{
//
// 4.2.2 External Entities
// [Definition: If the entity is not internal,
// it is an external entity, declared as follows:]
// External Entity Declaration
// [75] ExternalID ::= 'SYSTEM' S SystemLiteral
// | 'PUBLIC' S PubidLiteral S SystemLiteral
//
DOMLocatorImpl locator(0, 0, (DOMNode* const)nodeToWrite, 0, 0);
DOMErrorImpl domError(DOMError::SEVERITY_FATAL_ERROR
, gUnrecognizedNodeType
, &locator);
fErrorHandler->handleError(domError);
throw DOMException(DOMException::NOT_FOUND_ERR, 0);
// systemLiteral not found
}
}
else
{
id = doctype->getSystemId();
if (id && *id)
{
*fFormatter << chSpace << gSystem << id << chDoubleQuote;
}
}
id = doctype->getInternalSubset();
if (id && *id)
{
*fFormatter << chSpace << chOpenSquare << id << chCloseSquare;
}
*fFormatter << chCloseAngle;
, true
) // end of TRY_CATCH_THROW
break;
}
case DOMNode::ENTITY_NODE: // Not to be shown to Filter
{
//
// REVISIT: how does the feature "entities" impact
// entity node?
//
fFormatter->setEscapeFlags(XMLFormatter::NoEscapes);
*fFormatter << gStartEntity << nodeName;
const XMLCh * id = ((DOMEntity*)nodeToWrite)->getPublicId();
if (id)
*fFormatter << gPublic << id << chDoubleQuote;
id = ((DOMEntity*)nodeToWrite)->getSystemId();
if (id)
*fFormatter << gSystem << id << chDoubleQuote;
id = ((DOMEntity*)nodeToWrite)->getNotationName();
if (id)
*fFormatter << gNotation << id << chDoubleQuote;
*fFormatter << chCloseAngle << fNewLineUsed;
break;
}
default:
/***
This is an implementation specific behaviour, we abort serialization
once unrecognized node type encountered.
***/
{
DOMLocatorImpl locator(0, 0, (DOMNode* const)nodeToWrite, 0, 0);
DOMErrorImpl domError(DOMError::SEVERITY_FATAL_ERROR
, gUnrecognizedNodeType
, &locator);
fErrorHandler->handleError(domError);
throw DOMException(DOMException::NOT_FOUND_ERR, 0);
// UnreognizedNodeType;
}
break;
}
}
DOMNodeFilter::FilterAction DOMWriterImpl::checkFilter(const DOMNode* const node) const
{
if (!fFilter || (fFilter->showNode(node) == false))
return DOMNodeFilter::FILTER_ACCEPT;
return (DOMNodeFilter::FilterAction) fFilter->acceptNode(node);
}
inline void DOMWriterImpl::setURCharRef()
{
fFormatter->setUnRepFlags(XMLFormatter::UnRep_CharRef);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-cvs-help@xml.apache.org