You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2015/06/24 17:17:36 UTC

svn commit: r1687303 - in /manifoldcf/trunk: CHANGES.txt framework/connector-common/src/main/java/org/apache/manifoldcf/connectorcommon/fuzzyml/TagParseState.java

Author: kwright
Date: Wed Jun 24 15:17:36 2015
New Revision: 1687303

URL: http://svn.apache.org/r1687303
Log:
Tentative fix for CONNECTORS-1215.

Modified:
    manifoldcf/trunk/CHANGES.txt
    manifoldcf/trunk/framework/connector-common/src/main/java/org/apache/manifoldcf/connectorcommon/fuzzyml/TagParseState.java

Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1687303&r1=1687302&r2=1687303&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Wed Jun 24 15:17:36 2015
@@ -3,6 +3,11 @@ $Id$
 
 ======================= 2.2-dev =====================
 
+CONNECTORS-1215: Fuzzyml parser needs to ignore "<" followed by
+whitespace, since those are technically not valid tags and some sites
+care.
+(Brad Dennis, Karl Wright)
+
 CONNECTORS-1213: Job notification information did not export/import
 correctly.
 (Cathal McGuinness, Karl Wright)

Modified: manifoldcf/trunk/framework/connector-common/src/main/java/org/apache/manifoldcf/connectorcommon/fuzzyml/TagParseState.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/connector-common/src/main/java/org/apache/manifoldcf/connectorcommon/fuzzyml/TagParseState.java?rev=1687303&r1=1687302&r2=1687303&view=diff
==============================================================================
--- manifoldcf/trunk/framework/connector-common/src/main/java/org/apache/manifoldcf/connectorcommon/fuzzyml/TagParseState.java (original)
+++ manifoldcf/trunk/framework/connector-common/src/main/java/org/apache/manifoldcf/connectorcommon/fuzzyml/TagParseState.java Wed Jun 24 15:17:36 2015
@@ -282,10 +282,21 @@ public class TagParseState extends Singl
       }
       else if (bTagDepth == 0)
       {
-        currentState = TAGPARSESTATE_IN_TAG_NAME;
-        currentTagNameBuffer = newBuffer();
-        if (!isWhitespace(thisChar))
+        if (isWhitespace(thisChar))
+        {
+          // Not a tag.
+          currentState = TAGPARSESTATE_NORMAL;
+          if (noteNormalCharacter('<'))
+            return true;
+          if (noteNormalCharacter(thisChar))
+            return true;
+        }
+        else
+        {
+          currentState = TAGPARSESTATE_IN_TAG_NAME;
+          currentTagNameBuffer = newBuffer();
           currentTagNameBuffer.append(thisChar);
+        }
       }
       else
       {