You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by kn...@apache.org on 2003/03/18 20:38:29 UTC
cvs commit: xml-xerces/c/src/xercesc/util/regx ParserForXMLSchema.cpp RegxParser.cpp RegxParser.hpp
knoaman 2003/03/18 11:38:28
Modified: c/src/xercesc/util/regx ParserForXMLSchema.cpp
RegxParser.cpp RegxParser.hpp
Log:
Schema Errata E2-18 + misc. regex fixes.
Revision Changes Path
1.5 +22 -23 xml-xerces/c/src/xercesc/util/regx/ParserForXMLSchema.cpp
Index: ParserForXMLSchema.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/util/regx/ParserForXMLSchema.cpp,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- ParserForXMLSchema.cpp 13 Jan 2003 19:02:23 -0000 1.4
+++ ParserForXMLSchema.cpp 18 Mar 2003 19:38:28 -0000 1.5
@@ -56,6 +56,9 @@
/*
* $Log$
+ * Revision 1.5 2003/03/18 19:38:28 knoaman
+ * Schema Errata E2-18 + misc. regex fixes.
+ *
* Revision 1.4 2003/01/13 19:02:23 knoaman
* [Bug 14390] C++ Indentifier collision with Python.
*
@@ -169,7 +172,7 @@
Token* ParserForXMLSchema::processParen() {
processNext();
- Token* retTok = getTokenFactory()->createParenthesis(parseRegx(), 0);
+ Token* retTok = getTokenFactory()->createParenthesis(parseRegx(true), 0);
if (getState() != REGX_T_RPAREN) {
ThrowXML(ParseException, XMLExcepts::Parser_Factor1);
@@ -283,13 +286,13 @@
if (!end) {
- if (type == REGX_T_CHAR) {
-
- if (ch == chOpenSquare)
- ThrowXML(ParseException,XMLExcepts::Parser_CC6);
-
- if (ch == chCloseSquare)
- ThrowXML(ParseException,XMLExcepts::Parser_CC7);
+ if (type == REGX_T_CHAR
+ && (ch == chOpenSquare
+ || ch == chCloseSquare
+ || ch == chDash)) {
+ // '[', ']', '-' not allowed and should be esacaped
+ XMLCh chStr[] = { ch, chNull };
+ ThrowXML2(ParseException,XMLExcepts::Parser_CC6, chStr, chStr);
}
if (getState() != REGX_T_CHAR || getCharData() != chDash) {
@@ -301,36 +304,32 @@
if ((type = getState()) == REGX_T_EOF)
ThrowXML(ParseException,XMLExcepts::Parser_CC2);
- if (type == REGX_T_CHAR && getCharData() == chCloseSquare) {
+ if ((type == REGX_T_CHAR && getCharData() == chCloseSquare)
+ || type == REGX_T_XMLSCHEMA_CC_SUBTRACTION) {
- tok->addRange(ch, ch);
- tok->addRange(chDash, chDash);
- }
- else if (type == REGX_T_XMLSCHEMA_CC_SUBTRACTION) {
- tok->addRange(ch, ch);
- tok->addRange(chDash, chDash);
+ static const XMLCh dashStr[] = { chDash, chNull};
+ ThrowXML2(ParseException, XMLExcepts::Parser_CC6, dashStr, dashStr);
}
else {
XMLInt32 rangeEnd = getCharData();
+ XMLCh rangeEndStr[] = { rangeEnd, chNull };
if (type == REGX_T_CHAR) {
- if (rangeEnd == chOpenSquare)
- ThrowXML(ParseException,XMLExcepts::Parser_CC6);
-
- if (rangeEnd == chCloseSquare)
- ThrowXML(ParseException,XMLExcepts::Parser_CC7);
+ if (rangeEnd == chOpenSquare
+ || rangeEnd == chCloseSquare
+ || rangeEnd == chDash)
+ // '[', ']', '-' not allowed and should be esacaped
+ ThrowXML2(ParseException, XMLExcepts::Parser_CC6, rangeEndStr, rangeEndStr);
}
-
- if (type == REGX_T_BACKSOLIDUS) {
+ else if (type == REGX_T_BACKSOLIDUS) {
rangeEnd = decodeEscaped();
}
processNext();
if (ch > rangeEnd) {
- XMLCh rangeEndStr[] = { rangeEnd, chNull };
XMLCh chStr[] = { ch, chNull };
ThrowXML2(ParseException,XMLExcepts::Parser_Ope3, rangeEndStr, chStr);
}
1.6 +64 -74 xml-xerces/c/src/xercesc/util/regx/RegxParser.cpp
Index: RegxParser.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/util/regx/RegxParser.cpp,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -r1.5 -r1.6
--- RegxParser.cpp 4 Mar 2003 16:36:17 -0000 1.5
+++ RegxParser.cpp 18 Mar 2003 19:38:28 -0000 1.6
@@ -56,6 +56,9 @@
/*
* $Log$
+ * Revision 1.6 2003/03/18 19:38:28 knoaman
+ * Schema Errata E2-18 + misc. regex fixes.
+ *
* Revision 1.5 2003/03/04 16:36:17 knoaman
* RegEx: fix for character category escape
*
@@ -420,9 +423,9 @@
}
-Token* RegxParser::parseRegx() {
+Token* RegxParser::parseRegx(const bool matchingRParen) {
- Token* tok = parseTerm();
+ Token* tok = parseTerm(matchingRParen);
Token* parentTok = 0;
while (fState == REGX_T_OR) {
@@ -435,26 +438,28 @@
tok = parentTok;
}
- tok->addChild(parseTerm(), fTokenFactory);
+ tok->addChild(parseTerm(matchingRParen), fTokenFactory);
}
return tok;
}
-Token* RegxParser::parseTerm() {
+Token* RegxParser::parseTerm(const bool matchingRParen) {
unsigned short state = fState;
- if (state == REGX_T_OR || state == REGX_T_RPAREN || state == REGX_T_EOF) {
+ if (state == REGX_T_OR || state == REGX_T_EOF
+ || (state == REGX_T_RPAREN && matchingRParen)) {
return fTokenFactory->createToken(Token::T_EMPTY);
}
else {
- Token* tok = parseFactor();
- Token* concatTok = 0;
+ Token* tok = parseFactor();
+ Token* concatTok = 0;
- while ((state = fState) != REGX_T_OR && state != REGX_T_RPAREN && state != REGX_T_EOF)
+ while ((state = fState) != REGX_T_OR && state != REGX_T_EOF
+ && (state != REGX_T_RPAREN || !matchingRParen))
{
if (concatTok == 0) {
@@ -605,7 +610,7 @@
processNext();
int num = fNoGroups++;
- Token* tok = fTokenFactory->createParenthesis(parseRegx(),num);
+ Token* tok = fTokenFactory->createParenthesis(parseRegx(true),num);
if (fState != REGX_T_RPAREN)
ThrowXML(ParseException,XMLExcepts::Parser_Factor1);
@@ -893,85 +898,74 @@
case REGX_T_QUESTION:
return processQuestion(tok);
case REGX_T_CHAR:
- if (fCharData == chOpenCurly) {
+ if (fCharData == chOpenCurly && fOffset < fStringLen) {
- int offset = fOffset;
int min = 0;
int max = -1;
- bool minExist = false;
-
- if (offset >= fStringLen)
- break;
+ XMLInt32 ch = fString[fOffset++];
- XMLInt32 ch = fString[offset++];
+ if (ch >= chDigit_0 && ch <= chDigit_9) {
- if (ch != chComma && (ch < chDigit_0 || ch > chDigit_9))
- ThrowXML1(ParseException, XMLExcepts::Regex_InvalidQuantifier, fString);
-
- if (ch != chComma) {
- minExist = true;
min = ch - chDigit_0;
- while (offset < fStringLen
- && (ch = fString[offset++]) >= chDigit_0
+ while (fOffset < fStringLen
+ && (ch = fString[fOffset++]) >= chDigit_0
&& ch <= chDigit_9) {
min = min*10 + ch - chDigit_0;
- ch = -1;
}
+
+ if (min < 0)
+ ThrowXML1(ParseException, XMLExcepts::Parser_Quantifier5, fString);
+ }
+ else {
+ ThrowXML1(ParseException, XMLExcepts::Parser_Quantifier1, fString);
}
max = min;
- if (ch != chCloseCurly && ch != chComma) {
- ThrowXML1(ParseException, XMLExcepts::Regex_InvalidQuantifier, fString);
- }
-
if (ch == chComma) {
- if (offset >= fStringLen)
- break;
-
- if (((ch = fString[offset++]) < chDigit_0 || ch > chDigit_9)
- && ch != chCloseCurly)
- ThrowXML1(ParseException, XMLExcepts::Regex_InvalidQuantifier, fString);
-
- if (ch == chCloseCurly) {
- if (minExist)
- max = -1;
- else
- ThrowXML1(ParseException, XMLExcepts::Regex_InvalidQuantifier, fString);
+ if (fOffset >= fStringLen) {
+ ThrowXML1(ParseException, XMLExcepts::Parser_Quantifier3, fString);
}
- else {
+ else if ((ch = fString[fOffset++]) >= chDigit_0 && ch <= chDigit_9) {
+
max = ch - chDigit_0;
- while (offset < fStringLen
- && (ch = fString[offset++]) >= chDigit_0
+ while (fOffset < fStringLen
+ && (ch = fString[fOffset++]) >= chDigit_0
&& ch <= chDigit_9) {
max = max*10 + ch - chDigit_0;
- ch = -1;
}
- if (ch != chCloseCurly) {
- ThrowXML1(ParseException, XMLExcepts::Regex_InvalidQuantifier, fString);
- }
+ if (max < 0)
+ ThrowXML1(ParseException, XMLExcepts::Parser_Quantifier5, fString);
+ else if (min > max)
+ ThrowXML1(ParseException, XMLExcepts::Parser_Quantifier4, fString);
+ }
+ else {
+ max = -1;
}
- } // end if ch = chComma
+ }
+
+ if (ch != chCloseCurly) {
+ ThrowXML1(ParseException, XMLExcepts::Parser_Quantifier2, fString);
+ }
- if (checkQuestion(offset)) {
+ if (checkQuestion(fOffset)) {
tok = fTokenFactory->createClosure(tok, true);
- fOffset = offset + 1;
+ fOffset++;
}
else {
-
tok = fTokenFactory->createClosure(tok);
- fOffset = offset;
}
tok->setMin(min);
tok->setMax(max);
processNext();
}
+ break;
}
return tok;
@@ -1014,27 +1008,6 @@
tok = getTokenForShorthand(fCharData);
processNext();
return tok;
- case chLatin_e:
- case chLatin_f:
- case chLatin_n:
- case chLatin_r:
- case chLatin_t:
- case chLatin_u:
- case chLatin_v:
- case chLatin_x:
- {
- XMLInt32 ch = decodeEscaped();
- if (ch < 0x10000) {
- tok = fTokenFactory->createChar(ch);
- }
- else {
-
- XMLCh* surrogateStr = RegxUtil::decomposeToSurrogates(ch);
- ArrayJanitor<XMLCh> janSurrogate(surrogateStr);
- tok = fTokenFactory->createString(surrogateStr);
- }
- }
- break;
case chLatin_c:
return processBacksolidus_c();
case chLatin_C:
@@ -1069,12 +1042,29 @@
}
break;
default:
- tok = fTokenFactory->createChar(fCharData);
+ {
+ XMLInt32 ch = decodeEscaped();
+ if (ch < 0x10000) {
+ tok = fTokenFactory->createChar(ch);
+ }
+ else {
+
+ XMLCh* surrogateStr = RegxUtil::decomposeToSurrogates(ch);
+ ArrayJanitor<XMLCh> janSurrogate(surrogateStr);
+ tok = fTokenFactory->createString(surrogateStr);
+ }
+ }
+ break;
} // end switch
processNext();
break;
case REGX_T_CHAR:
+ if (fCharData == chOpenCurly
+ || fCharData == chCloseCurly
+ || fCharData == chCloseSquare)
+ ThrowXML(ParseException,XMLExcepts::Parser_Atom4);
+
tok = fTokenFactory->createChar(fCharData);
processNext();
break;
1.4 +3 -3 xml-xerces/c/src/xercesc/util/regx/RegxParser.hpp
Index: RegxParser.hpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/util/regx/RegxParser.hpp,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- RegxParser.hpp 13 Jan 2003 19:02:23 -0000 1.3
+++ RegxParser.hpp 18 Mar 2003 19:38:28 -0000 1.4
@@ -158,7 +158,7 @@
// Protected Parsing/Processing methods
// -----------------------------------------------------------------------
void processNext();
- Token* parseRegx();
+ Token* parseRegx(const bool matchingRParen = false);
virtual Token* processCaret();
virtual Token* processDollar();
virtual Token* processLook(const unsigned short tokType);
@@ -199,7 +199,7 @@
// -----------------------------------------------------------------------
// Private parsing/processing methods
// -----------------------------------------------------------------------
- Token* parseTerm();
+ Token* parseTerm(const bool matchingRParen = false);
Token* parseFactor();
Token* parseAtom();
---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-cvs-help@xml.apache.org