You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xalan.apache.org by db...@apache.org on 2001/01/08 19:24:32 UTC
cvs commit: xml-xalan/c/src/XMLSupport FormatterToHTML.cpp FormatterToHTML.hpp FormatterToXML.cpp FormatterToXML.hpp
dbertoni 01/01/08 10:24:32
Modified: c/src/XMLSupport FormatterToHTML.cpp FormatterToHTML.hpp
FormatterToXML.cpp FormatterToXML.hpp
Log:
Cleaned up some unused code and moved common code into a function.
Revision Changes Path
1.43 +166 -74 xml-xalan/c/src/XMLSupport/FormatterToHTML.cpp
Index: FormatterToHTML.cpp
===================================================================
RCS file: /home/cvs/xml-xalan/c/src/XMLSupport/FormatterToHTML.cpp,v
retrieving revision 1.42
retrieving revision 1.43
diff -u -r1.42 -r1.43
--- FormatterToHTML.cpp 2001/01/04 19:22:37 1.42
+++ FormatterToHTML.cpp 2001/01/08 18:24:30 1.43
@@ -79,6 +79,7 @@
#include <PlatformSupport/DOMStringHelper.hpp>
#include <PlatformSupport/Writer.hpp>
#include <PlatformSupport/XalanUnicode.hpp>
+#include <PlatformSupport/XalanXMLChar.hpp>
@@ -226,7 +227,7 @@
int indent,
const XalanDOMString& version,
const XalanDOMString& standalone,
- bool xmlDecl) :
+ bool xmlDecl) :
FormatterToXML(
writer,
version,
@@ -243,7 +244,8 @@
m_inBlockElem(false),
m_isRawStack(),
m_isScriptOrStyleElem(false),
- m_isFirstElem(true)
+ m_escapeURLs(false),
+ m_elementLevel(0)
{
initCharsMap();
}
@@ -326,6 +328,12 @@
void
FormatterToHTML::startDocument()
{
+ // Clear the buffer, just in case...
+ clear(m_stringBuffer);
+
+ // Reset this, just in case...
+ m_elementLevel = 0;
+
m_startNewLine = false;
m_shouldWriteXMLHeader = false;
@@ -375,7 +383,7 @@
void
FormatterToHTML::endDocument()
{
- m_isFirstElem = true;
+ assert(m_elementLevel == 0);
FormatterToXML::endDocument();
}
@@ -404,7 +412,7 @@
m_ispreserve = false;
}
else if(m_doIndent &&
- m_isFirstElem == false &&
+ m_elementLevel > 0 &&
(m_inBlockElem == false || isBlockElement == true))
{
m_startNewLine = true;
@@ -434,8 +442,6 @@
m_isprevtext = false;
- m_isFirstElem = false;
-
if (isHeadElement)
{
writeParentTagEnd();
@@ -448,6 +454,11 @@
accumContent(XalanUnicode::charQuoteMark);
accumContent(XalanUnicode::charGreaterThanSign);
}
+
+ // Increment the level...
+ ++m_elementLevel;
+
+ assert(m_elementLevel > 0);
}
@@ -523,9 +534,12 @@
}
}
- m_isFirstElem = false;
-
m_isprevtext = false;
+
+ // Decrement the level...
+ --m_elementLevel;
+
+ assert(m_elementLevel >= 0);
}
@@ -668,10 +682,10 @@
// If outside of an element, then put in a new line. This whitespace
// is not significant.
-// if (m_elemStack.empty() == true)
-// {
-// outputLineSep();
-// }
+ if (m_elementLevel == 0)
+ {
+ outputLineSep();
+ }
m_startNewLine = true;
}
@@ -811,7 +825,7 @@
{
accumContent(ch); // no escaping in this case, as specified in 15.2
}
- else if (accumDefaultEntity(ch, i, string, strLen, false) == false)
+ else if (accumDefaultEntity(ch, i, string, strLen, true) == false)
{
if (0xd800 <= ch && ch < 0xdc00)
{
@@ -837,7 +851,10 @@
accumContent(XalanUnicode::charAmpersand);
accumContent(XalanUnicode::charNumberSign);
- accumContent(UnsignedLongToDOMString(next));
+
+ accumContent(UnsignedLongToDOMString(next, m_stringBuffer));
+ clear(m_stringBuffer);
+
accumContent(XalanUnicode::charSemicolon);
}
else if(ch >= 160 && ch <= 255)
@@ -933,21 +950,6 @@
void
-FormatterToHTML::copyEntityIntoBuffer(const XalanDOMCharVectorType& s)
-{
- accumContent(XalanUnicode::charAmpersand);
-
- for(XalanDOMCharVectorType::const_iterator i = s.begin(); *i != 0; ++i)
- {
- accumContent(*i);
- }
-
- accumContent(XalanUnicode::charSemicolon);
-}
-
-
-
-void
FormatterToHTML::processAttribute(
const XalanDOMChar* name,
const XalanDOMChar* value,
@@ -1030,91 +1032,181 @@
// causing damage. If the URL is already properly escaped, in theory, this
// function should not change the string value.
- char[] stringArray = string.toCharArray();
- int len = stringArray.length;
-
- accum('"');
+ const unsigned int len = length(string);
- for (int i = 0; i < len; i++)
- {
- char ch = stringArray[i];
+ for (unsigned int i = 0; i < len; ++i)
+ {
+ const XalanDOMChar ch = string[i];
- // if first 8 bytes are 0, no need to append them.
- if ((ch < 9) || (ch > 127)
- || /*(ch == '"') || -sb, as per #PDIK4L9LZY */ (ch == ' '))
+ if (ch < 33 || ch > 126)
{
- if (m_specialEscapeURLs)
+ if (m_escapeURLs == true)
{
+ // For the gory details of encoding these characters as
+ // UTF-8 hex, see:
+ //
+ // Unicode, A Primer, by Tony Graham, p. 92.
+ //
if(ch <= 0x7F)
{
- accum("%");
- accum(Integer.toHexString(ch).toUpperCase());
+ accumHexNumber(ch);
}
else if(ch <= 0x7FF)
{
- int high = (int) ((((int) ch) & 0xFFC0) >> 6) | 0xC0; // Clear high bytes?
- int low = (int) (((int) ch) & 0x3F) | 0x80; // First 6 bits, + high bit
- accum("%");
- accum(Integer.toHexString(high).toUpperCase());
- accum("%");
- accum(Integer.toHexString(low).toUpperCase());
+ const XalanDOMChar highByte = XalanDOMChar((ch >> 6) | 0xC0);
+ const XalanDOMChar lowByte = XalanDOMChar((ch & 0x3F) | 0x80);
+
+ accumHexNumber(highByte);
+
+ accumHexNumber(lowByte);
+ }
+ else if(isUTF16Surrogate(ch) == true) // high surrogate
+ {
+ // I'm sure this can be done in 3 instructions, but I choose
+ // to try and do it exactly like it is done in the book, at least
+ // until we are sure this is totally clean. I don't think performance
+ // is a big issue with this particular function, though I could be
+ // wrong. Also, the stuff below clearly does more masking than
+ // it needs to do.
+
+ // Clear high 6 bits.
+ const XalanDOMChar highSurrogate = XalanDOMChar(ch & 0x03FF);
+
+ // Middle 4 bits (wwww) + 1
+ // "Note that the value of wwww from the high surrogate bit pattern
+ // is incremented to make the uuuuu bit pattern in the scalar value
+ // so the surrogate pair don't address the BMP."
+ const XalanDOMChar wwww = XalanDOMChar((highSurrogate & 0x03C0) >> 6);
+ const XalanDOMChar uuuuu = XalanDOMChar(wwww + 1);
+
+ // next 4 bits
+ const XalanDOMChar zzzz = XalanDOMChar((highSurrogate & 0x003C) >> 2);
+
+ // low 2 bits
+ const XalanDOMChar temp = XalanDOMChar(((highSurrogate & 0x0003) << 4) & 0x30);
+
+ // Get low surrogate character.
+ const XalanDOMChar nextChar = string[++i];
+
+ // Clear high 6 bits.
+ const XalanDOMChar lowSurrogate = XalanDOMChar(nextChar & 0x03FF);
+
+ // put the middle 4 bits into the bottom of yyyyyy (byte 3)
+ const XalanDOMChar yyyyyy = XalanDOMChar(temp | ((lowSurrogate & 0x03C0) >> 6));
+
+ // bottom 6 bits.
+ const XalanDOMChar xxxxxx = XalanDOMChar(lowSurrogate & 0x003F);
+
+ const XalanDOMChar byte1 = XalanDOMChar(0xF0 | (uuuuu >> 2)); // top 3 bits of uuuuu
+ const XalanDOMChar byte2 = XalanDOMChar(0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz);
+ const XalanDOMChar byte3 = XalanDOMChar(0x80 | yyyyyy);
+ const XalanDOMChar byte4 = XalanDOMChar(0x80 | xxxxxx);
+
+ accumHexNumber(byte1);
+
+ accumHexNumber(byte2);
+
+ accumHexNumber(byte3);
+
+ accumHexNumber(byte4);
}
else
{
- int high = (int) ((((int) ch) & 0xF000) >> 12) | 0xE0; // top 4 bits
- int middle = (int) ((((int) ch) & 0x0FC0) >> 6) | 0x80; // middle 6 bits
- int low = (int) (((int) ch) & 0x3F) | 0x80; // First 6 bits, + high bit
- accum("%");
- accum(Integer.toHexString(high).toUpperCase());
- accum("%");
- accum(Integer.toHexString(middle).toUpperCase());
- accum("%");
- accum(Integer.toHexString(low).toUpperCase());
+ const XalanDOMChar highByte = XalanDOMChar((ch >> 12) | 0xE0);
+ const XalanDOMChar middleByte = XalanDOMChar(((ch & 0x0FC0) >> 6) | 0x80);
+ const XalanDOMChar lowByte = XalanDOMChar((ch & 0x3F) | 0x80);
+
+ accumHexNumber(highByte);
+
+ accumHexNumber(middleByte);
+
+ accumHexNumber(lowByte);
}
}
+ else if (ch == XalanUnicode::charSpace)
+ {
+ accumHexNumber(ch);
+ }
else if (ch < m_maxCharacter)
{
- accum(ch);
+ accumContent(ch);
}
else
{
- accum("&#");
- accum(Integer.toString(ch));
- accum(';');
+ accumContent(XalanUnicode::charAmpersand);
+ accumContent(XalanUnicode::charNumberSign);
+
+ accumContent(UnsignedLongToDOMString(ch, m_stringBuffer));
+ clear(m_stringBuffer);
+
+ accumContent(XalanUnicode::charSemicolon);
}
}
- else if('%' == ch)
+ else if(ch == XalanUnicode::charPercentSign)
{
// If the character is a '%' number number, try to avoid double-escaping.
// There is a question if this is legal behavior.
- if(((i+2) < len) && Character.isDigit(stringArray[i+1])
- && Character.isDigit(stringArray[i+2]))
+ if (i + 2 < len &&
+ XalanXMLChar::isDigit(string[i + 1]) == true &&
+ XalanXMLChar::isDigit(string[i + 2]) == true)
{
- accum(ch);
+ accumContent(ch);
}
else
{
- accum("%");
- accum(Integer.toHexString(ch).toUpperCase());
+ if (m_escapeURLs == true)
+ {
+ accumHexNumber(ch);
+ }
+ else
+ {
+ accumContent(ch);
+ }
}
}
// Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as
// not allowing quotes in the URI proper syntax, nor in the fragment
// identifier, we believe that double quotes should be escaped.
- else if (ch == '"')
+ else if (ch == XalanUnicode::charQuoteMark)
{
- accum('%');
- accum('2');
- accum('2');
+ if (m_escapeURLs == true)
+ {
+ accumContent(XalanUnicode::charPercentSign);
+ accumContent(XalanUnicode::charDigit_2);
+ accumContent(XalanUnicode::charDigit_2);
+ }
+ else
+ {
+ accumDefaultEntity(ch, i, string, len, true);
+ }
}
else
{
- accum(ch);
+ accumContent(ch);
}
}
-
- accum('"');
#endif
+}
+
+
+
+void
+FormatterToHTML::accumHexNumber(const XalanDOMChar theChar)
+{
+ accumContent(XalanUnicode::charPercentSign);
+
+ assert(length(m_stringBuffer) == 0);
+
+ UnsignedLongToHexDOMString(theChar, m_stringBuffer);
+
+ if (length(m_stringBuffer) == 1)
+ {
+ accumContent(XalanUnicode::charDigit_0);
+ }
+
+ accumContent(m_stringBuffer);
+
+ clear(m_stringBuffer);
}
1.20 +19 -7 xml-xalan/c/src/XMLSupport/FormatterToHTML.hpp
Index: FormatterToHTML.hpp
===================================================================
RCS file: /home/cvs/xml-xalan/c/src/XMLSupport/FormatterToHTML.hpp,v
retrieving revision 1.19
retrieving revision 1.20
diff -u -r1.19 -r1.20
--- FormatterToHTML.hpp 2000/12/18 20:05:44 1.19
+++ FormatterToHTML.hpp 2001/01/08 18:24:30 1.20
@@ -58,7 +58,7 @@
#define FORMATTERTOHTML_HEADER_GUARD_1357924680
/**
- * $Id: FormatterToHTML.hpp,v 1.19 2000/12/18 20:05:44 auriemma Exp $
+ * $Id: FormatterToHTML.hpp,v 1.20 2001/01/08 18:24:30 dbertoni Exp $
*
* $State: Exp $
*
@@ -374,9 +374,6 @@
void
copyEntityIntoBuffer(const XalanDOMString& s);
- void
- copyEntityIntoBuffer(const XalanDOMCharVectorType& s);
-
/**
* Get an ElemDesc instance for the specified name.
*
@@ -392,7 +389,7 @@
* @return map of element flags.
*/
static void
- initializeElementFlagsMap(ElementFlagsMapType& );
+ initializeElementFlagsMap(ElementFlagsMapType& theMap);
/**
* Process an attribute.
@@ -410,7 +407,6 @@
* with <CODE>%HH</CODE>, where HH is the hex of the byte value.
*
* @param string String to convert to XML format.
- * @param specials Chracters, should be represeted in chracter referenfces.
* @param encoding CURRENTLY NOT IMPLEMENTED.
*/
void
@@ -418,6 +414,16 @@
const XalanDOMChar* string,
const XalanDOMString encoding);
+ /**
+ * Accumulate the specified character by converting its numeric value to
+ * a hex string, making sure that any string of length 1 are written with
+ * a '0' before the number.
+ *
+ * @param theChar The character to accumulate
+ */
+ void
+ accumHexNumber(const XalanDOMChar theChar);
+
XalanDOMString m_currentElementName;
bool m_inBlockElem;
@@ -425,8 +431,14 @@
BoolStackType m_isRawStack;
bool m_isScriptOrStyleElem;
+
+ bool m_escapeURLs;
+
+ /**
+ * A counter so we can tell if we're inside the document element.
+ */
+ int m_elementLevel;
- bool m_isFirstElem;
};
1.42 +10 -20 xml-xalan/c/src/XMLSupport/FormatterToXML.cpp
Index: FormatterToXML.cpp
===================================================================
RCS file: /home/cvs/xml-xalan/c/src/XMLSupport/FormatterToXML.cpp,v
retrieving revision 1.41
retrieving revision 1.42
diff -u -r1.41 -r1.42
--- FormatterToXML.cpp 2000/12/18 20:06:14 1.41
+++ FormatterToXML.cpp 2001/01/08 18:24:30 1.42
@@ -115,11 +115,11 @@
m_isUTF8(false),
m_doctypeSystem(doctypeSystem),
m_doctypePublic(doctypePublic),
- m_encoding(isEmpty(encoding) == false ? encoding :
- XalanDOMString(&s_defaultMIMEEncoding[0], s_defaultMIMEEncoding.size() - 1)),
+ m_encoding(isEmpty(encoding) == false ? encoding : XalanDOMString(XalanTranscodingServices::s_utf8String)),
m_currentIndent(0),
m_indent(indent),
m_preserves(),
+ m_stringBuffer(),
m_bytesEqualChars(false),
m_shouldFlush(true),
m_spaceBeforeClose(false),
@@ -132,7 +132,6 @@
m_charBuf(),
m_pos(0),
m_byteBuf(),
- m_level(0),
m_elemStack(),
m_accumNameFunction(0),
m_accumContentFunction(0)
@@ -431,22 +430,6 @@
void
-FormatterToXML::accumName(const XalanDOMCharVectorType& theVector)
-{
- accumName(c_wstr(theVector), 0, theVector.size() - 1);
-}
-
-
-
-void
-FormatterToXML::accumContent(const XalanDOMCharVectorType& theVector)
-{
- accumContent(c_wstr(theVector), 0, theVector.size() - 1);
-}
-
-
-
-void
FormatterToXML::throwInvalidUTF16SurrogateException(XalanDOMChar ch)
{
const XalanDOMString theMessage(TranscodeFromLocalCodePage("Invalid UTF-16 surrogate detected: ") +
@@ -666,6 +649,9 @@
void
FormatterToXML::startDocument()
{
+ // Clear the buffer, just in case...
+ clear(m_stringBuffer);
+
if(m_inEntityRef == false)
{
m_needToOutputDocTypeDecl = true;
@@ -719,6 +705,7 @@
}
flush();
+
flushWriter();
}
@@ -1127,7 +1114,10 @@
{
accumContent(XalanUnicode::charAmpersand);
accumContent(XalanUnicode::charNumberSign);
- accumContent(UnsignedLongToDOMString(theNumber));
+
+ accumContent(UnsignedLongToDOMString(theNumber, m_stringBuffer));
+ clear(m_stringBuffer);
+
accumContent(XalanUnicode::charSemicolon);
}
1.28 +12 -26 xml-xalan/c/src/XMLSupport/FormatterToXML.hpp
Index: FormatterToXML.hpp
===================================================================
RCS file: /home/cvs/xml-xalan/c/src/XMLSupport/FormatterToXML.hpp,v
retrieving revision 1.27
retrieving revision 1.28
diff -u -r1.27 -r1.28
--- FormatterToXML.hpp 2000/12/18 20:06:14 1.27
+++ FormatterToXML.hpp 2001/01/08 18:24:31 1.28
@@ -417,22 +417,6 @@
accumContent(const XalanDOMString& str);
/**
- * Append a vector of wide characters to the buffer.
- *
- * @param theVector the vector to append
- */
- void
- accumName(const XalanDOMCharVectorType& theVector);
-
- /**
- * Append a vector of wide characters to the buffer.
- *
- * @param theVector the vector to append
- */
- void
- accumContent(const XalanDOMCharVectorType& theVector);
-
- /**
* Escape and accum a character.
*/
void
@@ -569,6 +553,12 @@
XalanDOMChar ch,
unsigned int next);
+ static bool
+ isUTF16Surrogate(XalanDOMChar ch)
+ {
+ return (ch & 0xFC00) == 0xD800 ? true : false;
+ }
+
enum eDummyTwo { SPECIALSSIZE = 256};
/**
@@ -677,6 +667,11 @@
*/
BoolStackType m_preserves;
+ // A text buffer. We use it mostly for converting
+ // to string values. See uses of UnsignedLongToString()
+ // and UnsignedLongToHexString().
+ XalanDOMString m_stringBuffer;
+
private:
// These are not implemented.
@@ -889,19 +884,10 @@
static const DOMCharBufferType::size_type s_maxBufferSize;
/**
- * Current level of indent.
- */
- int m_level;
-
-protected:
-
- /**
* A stack of Boolean objects that tell if the given element
* has children.
*/
- BoolStackType m_elemStack;
-
-private:
+ BoolStackType m_elemStack;
/**
* A pointer to the member function that will do the accumulating