You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2019/09/24 16:30:38 UTC
svn commit: r1867469 - in /manifoldcf/branches/release-2.14-branch: ./
connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/
framework/connector-common/src/main/java/org/apache/manifoldcf/connectorcommon/fuz...
Author: kwright
Date: Tue Sep 24 16:30:38 2019
New Revision: 1867469
URL: http://svn.apache.org/viewvc?rev=1867469&view=rev
Log:
Pull up fix for CONNECTORS-1623 from trunk
Modified:
manifoldcf/branches/release-2.14-branch/ (props changed)
manifoldcf/branches/release-2.14-branch/CHANGES.txt
manifoldcf/branches/release-2.14-branch/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ScriptParseState.java
manifoldcf/branches/release-2.14-branch/framework/connector-common/src/main/java/org/apache/manifoldcf/connectorcommon/fuzzyml/TagParseState.java
Propchange: manifoldcf/branches/release-2.14-branch/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue Sep 24 16:30:38 2019
@@ -155,4 +155,4 @@
/manifoldcf/branches/CONNECTORS-981:1605049-1605773
/manifoldcf/branches/CONNECTORS-989:1611600-1612101
/manifoldcf/branches/CONNECTORS-990:1610284-1610707
-/manifoldcf/trunk:1867444,1867447
+/manifoldcf/trunk:1867444,1867447,1867468
Modified: manifoldcf/branches/release-2.14-branch/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/branches/release-2.14-branch/CHANGES.txt?rev=1867469&r1=1867468&r2=1867469&view=diff
==============================================================================
--- manifoldcf/branches/release-2.14-branch/CHANGES.txt (original)
+++ manifoldcf/branches/release-2.14-branch/CHANGES.txt Tue Sep 24 16:30:38 2019
@@ -3,6 +3,9 @@ $Id$
======================= Release 2.14 =====================
+CONNECTORS-1623: Harden HTML parser against stuff in SCRIPT tags that looks like tags.
+(Julien Massiera, Karl Wright)
+
CONNECTORS-1566: Develop csws connector to replace deprecated LiveLink connector.
(Markus Schuch, Karl Wright)
Modified: manifoldcf/branches/release-2.14-branch/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ScriptParseState.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/release-2.14-branch/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ScriptParseState.java?rev=1867469&r1=1867468&r2=1867469&view=diff
==============================================================================
--- manifoldcf/branches/release-2.14-branch/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ScriptParseState.java (original)
+++ manifoldcf/branches/release-2.14-branch/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ScriptParseState.java Tue Sep 24 16:30:38 2019
@@ -63,6 +63,11 @@ public class ScriptParseState extends HT
}
@Override
+ protected boolean acceptNewTag() {
+ return scriptParseState != SCRIPTPARSESTATE_INSCRIPT;
+ }
+
+ @Override
protected boolean noteTagEnd(String tagName)
throws ManifoldCFException
{
Modified: manifoldcf/branches/release-2.14-branch/framework/connector-common/src/main/java/org/apache/manifoldcf/connectorcommon/fuzzyml/TagParseState.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/release-2.14-branch/framework/connector-common/src/main/java/org/apache/manifoldcf/connectorcommon/fuzzyml/TagParseState.java?rev=1867469&r1=1867468&r2=1867469&view=diff
==============================================================================
--- manifoldcf/branches/release-2.14-branch/framework/connector-common/src/main/java/org/apache/manifoldcf/connectorcommon/fuzzyml/TagParseState.java (original)
+++ manifoldcf/branches/release-2.14-branch/framework/connector-common/src/main/java/org/apache/manifoldcf/connectorcommon/fuzzyml/TagParseState.java Tue Sep 24 16:30:38 2019
@@ -285,7 +285,7 @@ public class TagParseState extends Singl
}
else if (bTagDepth == 0)
{
- if (isWhitespace(thisChar))
+ if (isWhitespace(thisChar) || !acceptNewTag())
{
// Not a tag.
currentState = TAGPARSESTATE_NORMAL;
@@ -937,6 +937,12 @@ public class TagParseState extends Singl
return false;
}
+ /** Allow parsing within tag.
+ */
+ protected boolean acceptNewTag() {
+ return true;
+ }
+
/** Allocate the buffer.
*/
protected StringBuilder newBuffer()