You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by am...@apache.org on 2007/08/24 18:56:31 UTC

svn commit: r569455 - /xerces/c/trunk/src/xercesc/util/regx/RegularExpression.cpp

Author: amassari
Date: Fri Aug 24 09:56:30 2007
New Revision: 569455

URL: http://svn.apache.org/viewvc?rev=569455&view=rev
Log:
Avoid recursion when checking for closures. Patch by Vitaly Prapirny (XERCESC-1242)

Modified:
    xerces/c/trunk/src/xercesc/util/regx/RegularExpression.cpp

Modified: xerces/c/trunk/src/xercesc/util/regx/RegularExpression.cpp
URL: http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/util/regx/RegularExpression.cpp?rev=569455&r1=569454&r2=569455&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/util/regx/RegularExpression.cpp (original)
+++ xerces/c/trunk/src/xercesc/util/regx/RegularExpression.cpp Fri Aug 24 09:56:30 2007
@@ -37,6 +37,7 @@
 #include <xercesc/util/XMLInitializer.hpp>
 #include <xercesc/util/XMLRegisterCleanup.hpp>
 #include <xercesc/util/XMLUniDefs.hpp>
+#include <xercesc/util/ValueStackOf.hpp>
 
 XERCES_CPP_NAMESPACE_BEGIN
 
@@ -546,7 +547,7 @@
 
 		int matchEnd = match(&context, fOperations, context.fStart, 1);
 
-		if (matchEnd == context.fLimit) {
+		if (matchEnd == (int)context.fLimit) {
 
 			if (context.fMatch != 0) {
 
@@ -983,149 +984,188 @@
 	return ret;
 }
 
+struct RE_RuntimeContext {
+	const Op    *op_;
+	XMLSize_t   offs_;
+
+	RE_RuntimeContext(const Op *op, XMLSize_t offs) : op_(op), offs_(offs) { }
+};
 
 int RegularExpression::match(Context* const context, const Op* const operations
 							 , XMLSize_t offset, const short direction)
 {
+    ValueStackOf<RE_RuntimeContext>	opStack((unsigned int)(context->fLength - offset), fMemoryManager);
 	const Op* tmpOp = operations;
 	bool ignoreCase = isSet(fOptions, IGNORE_CASE);
+	int	doReturn;
 
 	while (tmpOp != 0) {
+        // no one wants to return -5, only -1, 0, and greater
+		doReturn = -5;
 
 		if (offset > context->fLimit || offset < context->fStart)
-			return -1;
-
-		switch(tmpOp->getOpType()) {
-		case Op::O_CHAR:
-			if (!matchChar(context, tmpOp->getData(), offset, direction,
-						   ignoreCase))
-				return -1;
-			tmpOp = tmpOp->getNextOp();
-			break;
-		case Op::O_DOT:
-			if (!matchDot(context, offset, direction))
-				return -1;
-			tmpOp = tmpOp->getNextOp();
-			break;
-		case Op::O_RANGE:
-		case Op::O_NRANGE:
-			if (!matchRange(context, tmpOp, offset, direction, ignoreCase))
-				return -1;
-			tmpOp = tmpOp->getNextOp();
-			break;
-		case Op::O_ANCHOR:
-			if (!matchAnchor(context, tmpOp->getData(), offset))
-				return -1;
-			tmpOp = tmpOp->getNextOp();
-			break;
-		case Op::O_BACKREFERENCE:
-			if (!matchBackReference(context, tmpOp->getData(), offset,
-									direction, ignoreCase))
-				return -1;
-			tmpOp = tmpOp->getNextOp();
-			break;
-		case Op::O_STRING:
-			if (!matchString(context, tmpOp->getLiteral(), offset, direction,
-							 ignoreCase))
-				return -1;
-			tmpOp = tmpOp->getNextOp();
-			break;
-		case Op::O_CLOSURE:
-			{
+			doReturn = -1;
+        else
+        {
+		    switch(tmpOp->getOpType()) {
+		    case Op::O_CHAR:
+			    if (!matchChar(context, tmpOp->getData(), offset, direction,
+						       ignoreCase))
+				    doReturn = -1;
+			    else
+    			    tmpOp = tmpOp->getNextOp();
+			    break;
+		    case Op::O_DOT:
+			    if (!matchDot(context, offset, direction))
+				    doReturn = -1;
+			    else
+    			    tmpOp = tmpOp->getNextOp();
+			    break;
+		    case Op::O_RANGE:
+		    case Op::O_NRANGE:
+			    if (!matchRange(context, tmpOp, offset, direction, ignoreCase))
+				    doReturn = -1;
+			    else
+    			    tmpOp = tmpOp->getNextOp();
+			    break;
+		    case Op::O_ANCHOR:
+			    if (!matchAnchor(context, tmpOp->getData(), offset))
+				    doReturn = -1;
+			    else
+    			    tmpOp = tmpOp->getNextOp();
+			    break;
+		    case Op::O_BACKREFERENCE:
+			    if (!matchBackReference(context, tmpOp->getData(), offset,
+									    direction, ignoreCase))
+				    doReturn = -1;
+			    else
+    			    tmpOp = tmpOp->getNextOp();
+			    break;
+		    case Op::O_STRING:
+			    if (!matchString(context, tmpOp->getLiteral(), offset, direction,
+							     ignoreCase))
+				    doReturn = -1;
+			    else
+    			    tmpOp = tmpOp->getNextOp();
+			    break;
+		    case Op::O_CLOSURE:
+			    {
+				    XMLInt32 id = tmpOp->getData();
+                    // if id is not -1, it's a closure with a child token having a minumum length, 
+                    // where id is the index of the fOffsets array where its status is stored
+			        if (id >= 0) {
+				        int prevOffset = context->fOffsets[id];
+				        if (prevOffset < 0 || prevOffset != (int)offset) {
+					        context->fOffsets[id] = (int)offset;
+				        }
+				        else {
+                            // the status didn't change, we haven't found other copies; move on to the next match
+					        context->fOffsets[id] = -1;
+					        tmpOp = tmpOp->getNextOp();
+					        break;
+				        }
+			        }
+
+				    opStack.push(RE_RuntimeContext(tmpOp, offset));
+				    tmpOp = tmpOp->getChild();
+			    }
+			    break;
+		    case Op::O_QUESTION:
+			    {
+				    opStack.push(RE_RuntimeContext(tmpOp, offset));
+				    tmpOp = tmpOp->getChild();
+			    }
+			    break;
+		    case Op::O_NONGREEDYCLOSURE:
+		    case Op::O_NONGREEDYQUESTION:
+			    {
+				    int ret = match(context,tmpOp->getNextOp(),offset,direction);
+				    if (ret >= 0)
+                        doReturn = ret;
+                    else
+    				    tmpOp = tmpOp->getChild();
+			    }
+			    break;
+		    case Op::O_UNION:
+			    doReturn = matchUnion(context, tmpOp, offset, direction);
+                break;
+		    case Op::O_CAPTURE:
+			    if (context->fMatch != 0 && tmpOp->getData() != 0)
+				    doReturn = matchCapture(context, tmpOp, offset, direction);
+                else
+			        tmpOp = tmpOp->getNextOp();
+			    break;
+		    case Op::O_LOOKAHEAD:
+			    if (0 > match(context, tmpOp->getChild(), offset, 1))
+    				doReturn = -1;
+    			else
+	    		    tmpOp = tmpOp->getNextOp();
+			    break;
+		    case Op::O_NEGATIVELOOKAHEAD:
+			    if (0 <= match(context, tmpOp->getChild(), offset, 1))
+    				doReturn = -1;
+    			else
+    			    tmpOp = tmpOp->getNextOp();
+			    break;
+		    case Op::O_LOOKBEHIND:
+			    if (0 > match(context, tmpOp->getChild(), offset, -1))
+    				doReturn = -1;
+    			else
+    			    tmpOp = tmpOp->getNextOp();
+			    break;
+		    case Op::O_NEGATIVELOOKBEHIND:
+			    if (0 <= match(context, tmpOp->getChild(), offset, -1))
+    				doReturn = -1;
+    			else
+    			    tmpOp = tmpOp->getNextOp();
+			    break;
+		    case Op::O_INDEPENDENT:
+            case Op::O_MODIFIER:
+			    {
+				    int ret = (tmpOp->getOpType() == Op::O_INDEPENDENT)
+					       ? match(context, tmpOp->getChild(), offset, direction)
+                           : matchModifier(context, tmpOp, offset, direction);
+                    if (ret < 0)
+    				    doReturn = ret;
+                    else {
+				        offset = ret;
+				        tmpOp = tmpOp->getNextOp();
+                    }
+			    }
+			    break;
+		    case Op::O_CONDITION:
+			    if (tmpOp->getRefNo() >= fNoGroups)
+    				doReturn = -1;
+    			else
+                {
+                    if (matchCondition(context, tmpOp, offset, direction))
+				        tmpOp = tmpOp->getYesFlow();
+			        else
+				        if (tmpOp->getNoFlow() != 0)
+                            tmpOp = tmpOp->getNoFlow();
+                        else
+                            tmpOp = tmpOp->getNextOp();
+                }
+			    break;
+		    }
+        }
+		if (doReturn != -5) {
+			if (opStack.size() == 0)
+				return doReturn;
+			RE_RuntimeContext	ctx = opStack.pop();
+			tmpOp = ctx.op_;
+			offset = ctx.offs_;
+			if (tmpOp->getOpType() == Op::O_CLOSURE) {
 				XMLInt32 id = tmpOp->getData();
 				if (id >= 0) {
-					int prevOffset = context->fOffsets[id];
-					if (prevOffset < 0 || prevOffset != offset) {
-						context->fOffsets[id] = (int)offset;
-					}
-					else {
-
-						context->fOffsets[id] = -1;
-						tmpOp = tmpOp->getNextOp();
-						break;
-					}
-				}
-
-				int ret = match(context, tmpOp->getChild(), offset, direction);
-				if (id >= 0) {
 					context->fOffsets[id] = -1;
 				}
-
-				if (ret >= 0)
-					return ret;
-
-				tmpOp = tmpOp->getNextOp();
 			}
-			break;
-		case Op::O_QUESTION:
-			{
-				int ret = match(context, tmpOp->getChild(), offset, direction);
-				if (ret >= 0)
-					return ret;
-				tmpOp = tmpOp->getNextOp();
+			if (tmpOp->getOpType() == Op::O_CLOSURE || tmpOp->getOpType() == Op::O_QUESTION) {
+				if (doReturn >= 0)
+					return doReturn;
 			}
-			break;
-		case Op::O_NONGREEDYCLOSURE:
-		case Op::O_NONGREEDYQUESTION:
-			{
-				int ret = match(context,tmpOp->getNextOp(),offset,direction);
-				if (ret >= 0)
-					return ret;
-				tmpOp = tmpOp->getChild();
-			}
-			break;
-		case Op::O_UNION:
-			{
-				return matchUnion(context, tmpOp, offset, direction);
-			}
-		case Op::O_CAPTURE:
-			if (context->fMatch != 0 && tmpOp->getData() != 0)
-				return matchCapture(context, tmpOp, offset, direction);
-			tmpOp = tmpOp->getNextOp();
-			break;
-		case Op::O_LOOKAHEAD:
-			if (0 > match(context, tmpOp->getChild(), offset, 1))
-				return -1;
 			tmpOp = tmpOp->getNextOp();
-			break;
-		case Op::O_NEGATIVELOOKAHEAD:
-			if (0 <= match(context, tmpOp->getChild(), offset, 1))
-				return -1;
-			tmpOp = tmpOp->getNextOp();
-			break;
-		case Op::O_LOOKBEHIND:
-			if (0 > match(context, tmpOp->getChild(), offset, -1))
-				return - 1;
-			tmpOp = tmpOp->getNextOp();
-			break;
-		case Op::O_NEGATIVELOOKBEHIND:
-			if (0 <= match(context, tmpOp->getChild(), offset, -1))
-				return -1;
-			tmpOp = tmpOp->getNextOp();
-			break;
-		case Op::O_INDEPENDENT:
-        case Op::O_MODIFIER:
-			{
-				int ret = (tmpOp->getOpType() == Op::O_INDEPENDENT)
-					   ? match(context, tmpOp->getChild(), offset, direction)
-                       : matchModifier(context, tmpOp, offset, direction);
-                if (ret < 0)
-                    return ret;
-				offset = ret;
-				tmpOp = tmpOp->getNextOp();
-			}
-			break;
-		case Op::O_CONDITION:
-			if (tmpOp->getRefNo() >= fNoGroups)
-				return -1;
-			if (matchCondition(context, tmpOp, offset, direction))
-				tmpOp = tmpOp->getYesFlow();
-			else
-				if (tmpOp->getNoFlow() != 0)
-                    tmpOp = tmpOp->getNoFlow();
-                else
-                    tmpOp = tmpOp->getNextOp();
-			break;
 		}
 	}
 	



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@xerces.apache.org
For additional commands, e-mail: commits-help@xerces.apache.org