You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by kn...@apache.org on 2001/05/03 20:43:14 UTC
cvs commit: xml-xerces/c/src/util PlatformUtils.cpp PlatformUtils.hpp XMLUniDefs.hpp
knoaman 01/05/03 11:43:13
Modified: c/src/internal XMLReader.cpp XMLReader.hpp
c/src/util PlatformUtils.cpp PlatformUtils.hpp
XMLUniDefs.hpp
Log:
Added new option to the parsers so that the NEL (0x85) char can be treated as a newline character.
Revision Changes Path
1.25 +66 -21 xml-xerces/c/src/internal/XMLReader.cpp
Index: XMLReader.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/internal/XMLReader.cpp,v
retrieving revision 1.24
retrieving revision 1.25
diff -u -r1.24 -r1.25
--- XMLReader.cpp 2001/02/14 14:44:11 1.24
+++ XMLReader.cpp 2001/05/03 18:42:46 1.25
@@ -55,7 +55,7 @@
*/
/*
- * $Id: XMLReader.cpp,v 1.24 2001/02/14 14:44:11 knoaman Exp $
+ * $Id: XMLReader.cpp,v 1.25 2001/05/03 18:42:46 knoaman Exp $
*/
// ---------------------------------------------------------------------------
@@ -70,7 +70,6 @@
#include <util/UTFDataFormatException.hpp>
#include <util/XMLEBCDICTranscoder.hpp>
#include <util/XMLString.hpp>
-#include <util/XMLUniDefs.hpp>
#include <util/XMLUni.hpp>
#include <sax/InputSource.hpp>
#include <framework/XMLBuffer.hpp>
@@ -80,7 +79,10 @@
#include <string.h>
-
+// ---------------------------------------------------------------------------
+// XMLReader: static data initialization
+// ---------------------------------------------------------------------------
+bool XMLReader::fNEL = false;
// ---------------------------------------------------------------------------
// XMLReader: Public, static methods
@@ -88,11 +90,17 @@
bool XMLReader::isFirstNameChar(const XMLCh toCheck)
{
static const XMLByte ourMask = gBaseCharMask | gLetterCharMask;
- if ((fgCharCharsTable[toCheck] & ourMask) != 0)
+ XMLCh toCheckCh = toCheck;
+
+ if (fNEL && (toCheckCh == chNEL)) {
+ toCheckCh = chLF;
+ }
+
+ if ((fgCharCharsTable[toCheckCh] & ourMask) != 0)
return true;
// Check the two special case name start chars
- if ((toCheck == chUnderscore) || (toCheck == chColon))
+ if ((toCheckCh == chUnderscore) || (toCheckCh == chColon))
return true;
return false;
@@ -110,6 +118,12 @@
const XMLCh* endPtr = toCheck + count;
while (curCh < endPtr)
{
+ if (fNEL && (fgCharCharsTable[*curCh] == chNEL)) {
+
+ *curCh++;
+ continue;
+ }
+
if (!(fgCharCharsTable[*curCh++] & gWhitespaceCharMask))
return false;
}
@@ -128,6 +142,10 @@
const XMLCh* endPtr = toCheck + count;
while (curCh < endPtr)
{
+ if (fNEL && (fgCharCharsTable[*curCh] == chNEL)) {
+ return true;
+ }
+
if (fgCharCharsTable[*curCh++] & gWhitespaceCharMask)
return true;
}
@@ -147,6 +165,13 @@
}
+void XMLReader::enableNELWS() {
+
+ if (!fNEL) {
+ fNEL = true;
+ }
+}
+
// ---------------------------------------------------------------------------
// XMLReader: Constructors and Destructor
// ---------------------------------------------------------------------------
@@ -596,14 +621,16 @@
//
if (fCharIndex < fCharsAvail)
{
- if (fCharBuf[fCharIndex] == chLF)
+ if (fCharBuf[fCharIndex] == chLF
+ || (fNEL && (fCharBuf[fCharIndex] == chNEL)))
fCharIndex++;
}
else
{
if (refreshCharBuffer())
{
- if (fCharBuf[fCharIndex] == chLF)
+ if (fCharBuf[fCharIndex] == chLF
+ || (fNEL && (fCharBuf[fCharIndex] == chNEL)))
fCharIndex++;
}
}
@@ -616,8 +643,10 @@
fCurCol = 1;
fCurLine++;
}
- else if (chGotten == chLF)
+ else if (chGotten == chLF
+ || (fNEL && (chGotten == chNEL)))
{
+ chGotten = chLF;
fCurLine++;
fCurCol = 1;
}
@@ -682,22 +711,26 @@
{
if (fCharIndex < fCharsAvail)
{
- if (fCharBuf[fCharIndex] == chLF)
+ if (fCharBuf[fCharIndex] == chLF
+ || (fNEL && (fCharBuf[fCharIndex] == chNEL)))
fCharIndex++;
}
else
{
if (refreshCharBuffer())
{
- if (fCharBuf[fCharIndex] == chLF)
+ if (fCharBuf[fCharIndex] == chLF
+ || (fNEL && (fCharBuf[fCharIndex] == chNEL)))
fCharIndex++;
}
}
curCh = chLF;
}
}
- else if (curCh == chLF)
+ else if (curCh == chLF
+ || (fNEL && (curCh == chNEL)))
{
+ curCh = chLF;
fCurCol = 1;
fCurLine++;
}
@@ -765,22 +798,26 @@
{
if (fCharIndex < fCharsAvail)
{
- if (fCharBuf[fCharIndex] == chLF)
+ if (fCharBuf[fCharIndex] == chLF
+ || (fNEL && (fCharBuf[fCharIndex] == chNEL)))
fCharIndex++;
}
else
{
if (refreshCharBuffer())
{
- if (fCharBuf[fCharIndex] == chLF)
+ if (fCharBuf[fCharIndex] == chLF
+ || (fNEL && (fCharBuf[fCharIndex] == chNEL)))
fCharIndex++;
}
}
curCh = chLF;
}
}
- else if (curCh == chLF)
+ else if (curCh == chLF
+ || (fNEL && (curCh == chNEL)))
{
+ curCh = chLF;
fCurCol = 1;
fCurLine++;
}
@@ -839,7 +876,8 @@
// normal char get method in regards to newline normalization, though
// its not as complicated as the actual character getting method's.
//
- if ((fSource == Source_External) && (chGotten == chCR))
+ if ((fSource == Source_External)
+ && (chGotten == chCR || (fNEL && (chGotten == chNEL))))
chGotten = chLF;
return true;
@@ -912,22 +950,26 @@
{
if (fCharIndex < fCharsAvail)
{
- if (fCharBuf[fCharIndex] == chLF)
+ if (fCharBuf[fCharIndex] == chLF
+ || (fNEL && (fCharBuf[fCharIndex] == chNEL)))
fCharIndex++;
}
else
{
if (refreshCharBuffer())
{
- if (fCharBuf[fCharIndex] == chLF)
+ if (fCharBuf[fCharIndex] == chLF
+ || (fNEL && (fCharBuf[fCharIndex] == chNEL)))
fCharIndex++;
}
}
curCh = chLF;
}
}
- else if (curCh == chLF)
+ else if (curCh == chLF
+ || (fNEL && (curCh == chNEL)))
{
+ curCh = chLF;
fCurCol = 1;
fCurLine++;
}
@@ -1015,20 +1057,23 @@
{
if (fCharIndex < fCharsAvail)
{
- if (fCharBuf[fCharIndex] == chLF)
+ if (fCharBuf[fCharIndex] == chLF
+ || (fNEL && (fCharBuf[fCharIndex] == chNEL)))
fCharIndex++;
}
else
{
if (refreshCharBuffer())
{
- if (fCharBuf[fCharIndex] == chLF)
+ if (fCharBuf[fCharIndex] == chLF
+ || (fNEL && (fCharBuf[fCharIndex] == chNEL)))
fCharIndex++;
}
}
}
}
- else if (curCh == chLF)
+ else if (curCh == chLF
+ || (fNEL && (curCh == chNEL)))
{
fCurLine++;
fCurCol = 1;
1.15 +48 -10 xml-xerces/c/src/internal/XMLReader.hpp
Index: XMLReader.hpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/internal/XMLReader.hpp,v
retrieving revision 1.14
retrieving revision 1.15
diff -u -r1.14 -r1.15
--- XMLReader.hpp 2001/01/25 19:16:58 1.14
+++ XMLReader.hpp 2001/05/03 18:42:51 1.15
@@ -56,6 +56,9 @@
/*
* $Log: XMLReader.hpp,v $
+ * Revision 1.15 2001/05/03 18:42:51 knoaman
+ * Added new option to the parsers so that the NEL (0x85) char can be treated as a newline character.
+ *
* Revision 1.14 2001/01/25 19:16:58 tng
* const should be used instead of static const. Fixed by Khaled Noaman.
*
@@ -118,6 +121,7 @@
#include <util/XMLUniDefs.hpp>
#include <framework/XMLRecognizer.hpp>
#include <framework/XMLBuffer.hpp>
+#include <util/XMLUniDefs.hpp>
class InputSource;
class BinInputStream;
@@ -138,7 +142,6 @@
const XMLByte gWhitespaceCharMask = 0x80;
-
// ---------------------------------------------------------------------------
// Instances of this class are used to manage the content of entities. The
// scanner maintains a stack of these, one for each entity (this means entity
@@ -151,6 +154,12 @@
// methods.
//
// This is NOT to be derived from.
+//
+// Note: We have added support for handling 390 NEL character as a whitespace.
+// Since the option is turned on, we will not be able to modify the
+// corresponding value of NEL (0x85) in the fgCharCharsTable. As a result,
+// everytime we use the fgCharChars table and the NEL option is turned on,
+// we will use the value of chLF in the fgCharCharsTable instead.
// ---------------------------------------------------------------------------
class XMLPARSER_EXPORT XMLReader
{
@@ -204,6 +213,11 @@
static bool isXMLChar(const XMLCh toCheck);
static bool isWhitespace(const XMLCh toCheck);
+ /**
+ * Return the value of fgNEL flag.
+ */
+ static bool isNELRecognized();
+
// -----------------------------------------------------------------------
// Constructors and Destructor
@@ -320,6 +334,11 @@
);
+ /**
+ * Method to enable NEL char to be treated as white space char.
+ */
+ static void enableNELWS();
+
// -----------------------------------------------------------------------
// Private helper methods
// -----------------------------------------------------------------------
@@ -517,8 +536,15 @@
// The character characteristics table. Bits in each byte, represent
// the characteristics of each character. It is generated via some
// code and then hard coded into the cpp file for speed.
+ //
+ // fNEL
+ // Flag to respresents whether NEL whitespace recognition is enabled
+ // or disabled
// -----------------------------------------------------------------------
static const XMLByte fgCharCharsTable[0x10000];
+ static bool fNEL;
+
+ friend class XMLPlatformUtils;
};
@@ -527,17 +553,19 @@
// ---------------------------------------------------------------------------
inline bool XMLReader::isBaseChar(const XMLCh toCheck)
{
- return (fgCharCharsTable[toCheck] & gBaseCharMask) != 0;
+ return ((fgCharCharsTable[toCheck] & gBaseCharMask) != 0);
}
inline bool XMLReader::isNameChar(const XMLCh toCheck)
{
- return (fgCharCharsTable[toCheck] & gNameCharMask) != 0;
+ return ((fgCharCharsTable[toCheck] & gNameCharMask) != 0);
}
inline bool XMLReader::isPlainContentChar(const XMLCh toCheck)
{
- return ((fgCharCharsTable[toCheck] & gPlainContentCharMask) != 0);
+ return (fNEL && (toCheck == chNEL))
+ ? ((fgCharCharsTable[chLF] & gPlainContentCharMask) != 0)
+ : ((fgCharCharsTable[toCheck] & gPlainContentCharMask) != 0);
}
@@ -548,7 +576,9 @@
inline bool XMLReader::isSpecialStartTagChar(const XMLCh toCheck)
{
- return ((fgCharCharsTable[toCheck] & gSpecialStartTagCharMask) != 0);
+ return (fNEL && (toCheck == chNEL))
+ ? ((fgCharCharsTable[chLF] & gSpecialStartTagCharMask) != 0)
+ : ((fgCharCharsTable[toCheck] & gSpecialStartTagCharMask) != 0);
}
inline bool XMLReader::isXMLChar(const XMLCh toCheck)
@@ -564,10 +594,11 @@
inline bool XMLReader::isWhitespace(const XMLCh toCheck)
{
- return ((fgCharCharsTable[toCheck] & gWhitespaceCharMask) != 0);
+ return (fNEL && (toCheck == chNEL))
+ ? ((fgCharCharsTable[chLF] & gWhitespaceCharMask) != 0)
+ : ((fgCharCharsTable[toCheck] & gWhitespaceCharMask) != 0);
}
-
// ---------------------------------------------------------------------------
// XMLReader: Buffer management methods
// ---------------------------------------------------------------------------
@@ -635,7 +666,10 @@
return fType;
}
+inline bool XMLReader::isNELRecognized() {
+ return fNEL;
+}
// ---------------------------------------------------------------------------
// XMLReader: Setter methods
@@ -750,14 +784,16 @@
//
if (fCharIndex < fCharsAvail)
{
- if (fCharBuf[fCharIndex] == chLF)
+ if (fCharBuf[fCharIndex] == chLF
+ || (fNEL && (fCharBuf[fCharIndex] == chNEL)))
fCharIndex++;
}
else
{
if (refreshCharBuffer())
{
- if (fCharBuf[fCharIndex] == chLF)
+ if (fCharBuf[fCharIndex] == chLF
+ || (fNEL && (fCharBuf[fCharIndex] == chNEL)))
fCharIndex++;
}
}
@@ -770,8 +806,10 @@
fCurCol = 1;
fCurLine++;
}
- else if (chGotten == chLF)
+ else if (chGotten == chLF
+ || (fNEL && (chGotten == chNEL)))
{
+ chGotten = chLF;
fCurLine++;
fCurCol = 1;
}
1.12 +37 -0 xml-xerces/c/src/util/PlatformUtils.cpp
Index: PlatformUtils.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/util/PlatformUtils.cpp,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -r1.11 -r1.12
--- PlatformUtils.cpp 2001/02/07 17:46:34 1.11
+++ PlatformUtils.cpp 2001/05/03 18:43:01 1.12
@@ -56,6 +56,9 @@
/*
* $Log: PlatformUtils.cpp,v $
+ * Revision 1.12 2001/05/03 18:43:01 knoaman
+ * Added new option to the parsers so that the NEL (0x85) char can be treated as a newline character.
+ *
* Revision 1.11 2001/02/07 17:46:34 billsch
* Rearranged statements in Initialize() so that platformInit() is called
* before an XMLMutex is created.
@@ -119,6 +122,8 @@
#include <util/XMLString.hpp>
#include <util/XMLNetAccessor.hpp>
#include <util/XMLUni.hpp>
+#include <internal/XMLReader.hpp>
+#include <util/RuntimeException.hpp>
// ---------------------------------------------------------------------------
@@ -306,3 +311,34 @@
delete gLazyData;
gLazyData = 0;
}
+
+// ---------------------------------------------------------------------------
+// XMLPlatformUtils: NEL Character Handling
+// ---------------------------------------------------------------------------
+void XMLPlatformUtils::recognizeNEL(bool state) {
+
+ //Make sure initialize has been called
+ if (gInitFlag == 0) {
+ return;
+ }
+
+ if (state) {
+
+ if (!XMLReader::isNELRecognized()) {
+ XMLReader::enableNELWS();
+ }
+ }
+ else {
+
+ if (XMLReader::isNELRecognized()) {
+ ThrowXML(RuntimeException, XMLExcepts::NEL_RepeatedCalls);
+ }
+ }
+}
+
+
+bool XMLPlatformUtils::isNELRecognized() {
+
+ return XMLReader::isNELRecognized();
+}
+
1.10 +20 -1 xml-xerces/c/src/util/PlatformUtils.hpp
Index: PlatformUtils.hpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/util/PlatformUtils.hpp,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -r1.9 -r1.10
--- PlatformUtils.hpp 2000/05/04 02:43:45 1.9
+++ PlatformUtils.hpp 2001/05/03 18:43:03 1.10
@@ -55,7 +55,7 @@
*/
/*
- * $Id: PlatformUtils.hpp,v 1.9 2000/05/04 02:43:45 aruna1 Exp $
+ * $Id: PlatformUtils.hpp,v 1.10 2001/05/03 18:43:03 knoaman Exp $
*/
@@ -515,6 +515,23 @@
//@}
+ /** @name NEL Character Handling */
+ //@{
+ /**
+ * This function enables the recognition of NEL char as whitespace chars
+ * which is disabled by default.
+ * It is only called once per process. Once it is set, any subsequent calls
+ * will result in exception being thrown.
+ *
+ * Note: Turning this option on will make the parser non complicant.
+ */
+ static void recognizeNEL(bool state);
+
+ /**
+ * Return the value of fgNEL flag.
+ */
+ static bool isNELRecognized();
+ //@}
//
// For internal use only.
@@ -576,6 +593,8 @@
* parser or utilities services!
*/
static void platformTerm();
+
+ //@}
};
1.4 +2 -1 xml-xerces/c/src/util/XMLUniDefs.hpp
Index: XMLUniDefs.hpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/util/XMLUniDefs.hpp,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- XMLUniDefs.hpp 2001/03/02 20:52:47 1.3
+++ XMLUniDefs.hpp 2001/05/03 18:43:06 1.4
@@ -55,7 +55,7 @@
*/
/*
- * $Id: XMLUniDefs.hpp,v 1.3 2001/03/02 20:52:47 knoaman Exp $
+ * $Id: XMLUniDefs.hpp,v 1.4 2001/05/03 18:43:06 knoaman Exp $
*/
#if !defined(XMLUNIDEFS_HPP)
#define XMLUNIDEFS_HPP
@@ -94,6 +94,7 @@
const XMLCh chEqual = 0x3D;
const XMLCh chForwardSlash = 0x2F;
const XMLCh chGrave = 0x60;
+const XMLCh chNEL = 0x85;
const XMLCh chOpenAngle = 0x3C;
const XMLCh chOpenCurly = 0x7B;
const XMLCh chOpenParen = 0x28;
---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-cvs-help@xml.apache.org