You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2013/02/06 02:20:56 UTC
svn commit: r1442826 - in
/manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml:
BasicParseState.java TagParseState.java
Author: kwright
Date: Wed Feb 6 01:20:56 2013
New Revision: 1442826
URL: http://svn.apache.org/viewvc?rev=1442826&view=rev
Log:
Rename tag parser, so I can introduce other kinds of parse state objects easily.
Added:
manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/TagParseState.java
- copied, changed from r1442822, manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/BasicParseState.java
Removed:
manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/BasicParseState.java
Copied: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/TagParseState.java (from r1442822, manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/BasicParseState.java)
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/TagParseState.java?p2=manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/TagParseState.java&p1=manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/BasicParseState.java&r1=1442822&r2=1442826&rev=1442826&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/BasicParseState.java (original)
+++ manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/TagParseState.java Wed Feb 6 01:20:56 2013
@@ -23,27 +23,27 @@ import org.apache.manifoldcf.core.system
import java.util.*;
/** This class represents the basic, outermost parse state. */
-public class BasicParseState
+public class TagParseState
{
- protected static final int BASICPARSESTATE_NORMAL = 0;
- protected static final int BASICPARSESTATE_SAWLEFTBRACKET = 1;
- protected static final int BASICPARSESTATE_SAWEXCLAMATION = 2;
- protected static final int BASICPARSESTATE_SAWDASH = 3;
- protected static final int BASICPARSESTATE_IN_COMMENT = 4;
- protected static final int BASICPARSESTATE_SAWCOMMENTDASH = 5;
- protected static final int BASICPARSESTATE_SAWSECONDCOMMENTDASH = 6;
- protected static final int BASICPARSESTATE_IN_TAG_NAME = 7;
- protected static final int BASICPARSESTATE_IN_ATTR_NAME = 8;
- protected static final int BASICPARSESTATE_IN_ATTR_VALUE = 9;
- protected static final int BASICPARSESTATE_IN_TAG_SAW_SLASH = 10;
- protected static final int BASICPARSESTATE_IN_END_TAG_NAME = 11;
- protected static final int BASICPARSESTATE_IN_ATTR_LOOKING_FOR_VALUE = 12;
- protected static final int BASICPARSESTATE_IN_SINGLE_QUOTES_ATTR_VALUE = 13;
- protected static final int BASICPARSESTATE_IN_DOUBLE_QUOTES_ATTR_VALUE = 14;
- protected static final int BASICPARSESTATE_IN_UNQUOTED_ATTR_VALUE = 15;
+ protected static final int TAGPARSESTATE_NORMAL = 0;
+ protected static final int TAGPARSESTATE_SAWLEFTBRACKET = 1;
+ protected static final int TAGPARSESTATE_SAWEXCLAMATION = 2;
+ protected static final int TAGPARSESTATE_SAWDASH = 3;
+ protected static final int TAGPARSESTATE_IN_COMMENT = 4;
+ protected static final int TAGPARSESTATE_SAWCOMMENTDASH = 5;
+ protected static final int TAGPARSESTATE_SAWSECONDCOMMENTDASH = 6;
+ protected static final int TAGPARSESTATE_IN_TAG_NAME = 7;
+ protected static final int TAGPARSESTATE_IN_ATTR_NAME = 8;
+ protected static final int TAGPARSESTATE_IN_ATTR_VALUE = 9;
+ protected static final int TAGPARSESTATE_IN_TAG_SAW_SLASH = 10;
+ protected static final int TAGPARSESTATE_IN_END_TAG_NAME = 11;
+ protected static final int TAGPARSESTATE_IN_ATTR_LOOKING_FOR_VALUE = 12;
+ protected static final int TAGPARSESTATE_IN_SINGLE_QUOTES_ATTR_VALUE = 13;
+ protected static final int TAGPARSESTATE_IN_DOUBLE_QUOTES_ATTR_VALUE = 14;
+ protected static final int TAGPARSESTATE_IN_UNQUOTED_ATTR_VALUE = 15;
- protected int currentState = BASICPARSESTATE_NORMAL;
+ protected int currentState = TAGPARSESTATE_NORMAL;
protected StringBuilder currentTagNameBuffer = null;
protected StringBuilder currentAttrNameBuffer = null;
@@ -63,7 +63,7 @@ public class BasicParseState
mapLookup.put("apos","'");
}
- public BasicParseState()
+ public TagParseState()
{
}
@@ -75,58 +75,58 @@ public class BasicParseState
char thisCharLower = Character.toLowerCase(thisChar);
switch (currentState)
{
- case BASICPARSESTATE_NORMAL:
+ case TAGPARSESTATE_NORMAL:
if (thisChar == '<')
- currentState = BASICPARSESTATE_SAWLEFTBRACKET;
+ currentState = TAGPARSESTATE_SAWLEFTBRACKET;
else
noteNormalCharacter(thisChar);
break;
- case BASICPARSESTATE_SAWLEFTBRACKET:
+ case TAGPARSESTATE_SAWLEFTBRACKET:
if (thisChar == '!')
- currentState = BASICPARSESTATE_SAWEXCLAMATION;
+ currentState = TAGPARSESTATE_SAWEXCLAMATION;
else if (thisChar == '/')
{
- currentState = BASICPARSESTATE_IN_END_TAG_NAME;
+ currentState = TAGPARSESTATE_IN_END_TAG_NAME;
currentTagNameBuffer = new StringBuilder();
}
else
{
- currentState = BASICPARSESTATE_IN_TAG_NAME;
+ currentState = TAGPARSESTATE_IN_TAG_NAME;
currentTagNameBuffer = new StringBuilder();
if (!isHTMLWhitespace(thisChar))
currentTagNameBuffer.append(thisCharLower);
}
break;
- case BASICPARSESTATE_SAWEXCLAMATION:
+ case TAGPARSESTATE_SAWEXCLAMATION:
if (thisChar == '-')
- currentState = BASICPARSESTATE_SAWDASH;
+ currentState = TAGPARSESTATE_SAWDASH;
else
- currentState = BASICPARSESTATE_NORMAL;
+ currentState = TAGPARSESTATE_NORMAL;
break;
- case BASICPARSESTATE_SAWDASH:
+ case TAGPARSESTATE_SAWDASH:
if (thisChar == '-')
- currentState = BASICPARSESTATE_IN_COMMENT;
+ currentState = TAGPARSESTATE_IN_COMMENT;
else
- currentState = BASICPARSESTATE_NORMAL;
+ currentState = TAGPARSESTATE_NORMAL;
break;
- case BASICPARSESTATE_IN_COMMENT:
+ case TAGPARSESTATE_IN_COMMENT:
// We're in a comment. All we should look for is the end of the comment.
if (thisChar == '-')
- currentState = BASICPARSESTATE_SAWCOMMENTDASH;
+ currentState = TAGPARSESTATE_SAWCOMMENTDASH;
break;
- case BASICPARSESTATE_SAWCOMMENTDASH:
+ case TAGPARSESTATE_SAWCOMMENTDASH:
if (thisChar == '-')
- currentState = BASICPARSESTATE_SAWSECONDCOMMENTDASH;
+ currentState = TAGPARSESTATE_SAWSECONDCOMMENTDASH;
else
- currentState = BASICPARSESTATE_IN_COMMENT;
+ currentState = TAGPARSESTATE_IN_COMMENT;
break;
- case BASICPARSESTATE_SAWSECONDCOMMENTDASH:
+ case TAGPARSESTATE_SAWSECONDCOMMENTDASH:
if (thisChar == '>')
- currentState = BASICPARSESTATE_NORMAL;
+ currentState = TAGPARSESTATE_NORMAL;
else if (thisChar != '-')
- currentState = BASICPARSESTATE_IN_COMMENT;
+ currentState = TAGPARSESTATE_IN_COMMENT;
break;
- case BASICPARSESTATE_IN_TAG_NAME:
+ case TAGPARSESTATE_IN_TAG_NAME:
if (isHTMLWhitespace(thisChar))
{
if (currentTagNameBuffer.length() > 0)
@@ -135,7 +135,7 @@ public class BasicParseState
currentTagName = currentTagNameBuffer.toString();
currentTagNameBuffer = null;
currentAttrMap = new HashMap<String,String>();
- currentState = BASICPARSESTATE_IN_ATTR_NAME;
+ currentState = TAGPARSESTATE_IN_ATTR_NAME;
currentAttrNameBuffer = new StringBuilder();
}
}
@@ -146,12 +146,12 @@ public class BasicParseState
currentTagName = currentTagNameBuffer.toString();
currentTagNameBuffer = null;
currentAttrMap = new HashMap<String,String>();
- currentState = BASICPARSESTATE_IN_TAG_SAW_SLASH;
+ currentState = TAGPARSESTATE_IN_TAG_SAW_SLASH;
noteTag(currentTagName,currentAttrMap);
}
else
{
- currentState = BASICPARSESTATE_NORMAL;
+ currentState = TAGPARSESTATE_NORMAL;
currentTagNameBuffer = null;
}
}
@@ -167,14 +167,14 @@ public class BasicParseState
{
noteTag(currentTagName,currentAttrMap);
}
- currentState = BASICPARSESTATE_NORMAL;
+ currentState = TAGPARSESTATE_NORMAL;
currentTagName = null;
currentAttrMap = null;
}
else
currentTagNameBuffer.append(thisCharLower);
break;
- case BASICPARSESTATE_IN_ATTR_NAME:
+ case TAGPARSESTATE_IN_ATTR_NAME:
if (isHTMLWhitespace(thisChar))
{
if (currentAttrNameBuffer.length() > 0)
@@ -182,7 +182,7 @@ public class BasicParseState
// Done with attr name!
currentAttrName = currentAttrNameBuffer.toString();
currentAttrNameBuffer = null;
- currentState = BASICPARSESTATE_IN_ATTR_LOOKING_FOR_VALUE;
+ currentState = TAGPARSESTATE_IN_ATTR_LOOKING_FOR_VALUE;
}
}
else if (thisChar == '=')
@@ -191,7 +191,7 @@ public class BasicParseState
{
currentAttrName = currentAttrNameBuffer.toString();
currentAttrNameBuffer = null;
- currentState = BASICPARSESTATE_IN_ATTR_VALUE;
+ currentState = TAGPARSESTATE_IN_ATTR_VALUE;
currentValueBuffer = new StringBuilder();
}
}
@@ -208,7 +208,7 @@ public class BasicParseState
currentAttrName = null;
}
noteTag(currentTagName,currentAttrMap);
- currentState = BASICPARSESTATE_IN_TAG_SAW_SLASH;
+ currentState = TAGPARSESTATE_IN_TAG_SAW_SLASH;
}
else if (thisChar == '>')
{
@@ -222,7 +222,7 @@ public class BasicParseState
currentAttrMap.put(currentAttrName,"");
currentAttrName = null;
}
- currentState = BASICPARSESTATE_NORMAL;
+ currentState = TAGPARSESTATE_NORMAL;
noteTag(currentTagName,currentAttrMap);
currentTagName = null;
currentAttrMap = null;
@@ -230,22 +230,22 @@ public class BasicParseState
else
currentAttrNameBuffer.append(thisCharLower);
break;
- case BASICPARSESTATE_IN_ATTR_LOOKING_FOR_VALUE:
+ case TAGPARSESTATE_IN_ATTR_LOOKING_FOR_VALUE:
if (thisChar == '=')
{
- currentState = BASICPARSESTATE_IN_ATTR_VALUE;
+ currentState = TAGPARSESTATE_IN_ATTR_VALUE;
currentValueBuffer = new StringBuilder();
}
else if (thisChar == '>')
{
- currentState = BASICPARSESTATE_NORMAL;
+ currentState = TAGPARSESTATE_NORMAL;
noteTag(currentTagName,currentAttrMap);
currentTagName = null;
currentAttrMap = null;
}
else if (thisChar == '/')
{
- currentState = BASICPARSESTATE_IN_TAG_SAW_SLASH;
+ currentState = TAGPARSESTATE_IN_TAG_SAW_SLASH;
currentAttrMap.put(currentAttrName,"");
currentAttrName = null;
noteTag(currentTagName,currentAttrMap);
@@ -253,33 +253,33 @@ public class BasicParseState
else if (!isHTMLWhitespace(thisChar))
{
currentAttrMap.put(currentAttrName,"");
- currentState = BASICPARSESTATE_IN_ATTR_NAME;
+ currentState = TAGPARSESTATE_IN_ATTR_NAME;
currentAttrNameBuffer = new StringBuilder();
currentAttrNameBuffer.append(thisCharLower);
currentAttrName = null;
}
break;
- case BASICPARSESTATE_IN_ATTR_VALUE:
+ case TAGPARSESTATE_IN_ATTR_VALUE:
if (thisChar == '\'')
- currentState = BASICPARSESTATE_IN_SINGLE_QUOTES_ATTR_VALUE;
+ currentState = TAGPARSESTATE_IN_SINGLE_QUOTES_ATTR_VALUE;
else if (thisChar == '"')
- currentState = BASICPARSESTATE_IN_DOUBLE_QUOTES_ATTR_VALUE;
+ currentState = TAGPARSESTATE_IN_DOUBLE_QUOTES_ATTR_VALUE;
else if (!isHTMLWhitespace(thisChar))
{
- currentState = BASICPARSESTATE_IN_UNQUOTED_ATTR_VALUE;
+ currentState = TAGPARSESTATE_IN_UNQUOTED_ATTR_VALUE;
currentValueBuffer.append(thisChar);
}
break;
- case BASICPARSESTATE_IN_TAG_SAW_SLASH:
+ case TAGPARSESTATE_IN_TAG_SAW_SLASH:
if (thisChar == '>')
{
noteEndTag(currentTagName);
- currentState = BASICPARSESTATE_NORMAL;
+ currentState = TAGPARSESTATE_NORMAL;
currentTagName = null;
currentAttrMap = null;
}
break;
- case BASICPARSESTATE_IN_END_TAG_NAME:
+ case TAGPARSESTATE_IN_END_TAG_NAME:
if (isHTMLWhitespace(thisChar))
{
if (currentTagNameBuffer != null && currentTagNameBuffer.length() > 0)
@@ -301,56 +301,56 @@ public class BasicParseState
noteEndTag(currentTagName);
}
currentTagName = null;
- currentState = BASICPARSESTATE_NORMAL;
+ currentState = TAGPARSESTATE_NORMAL;
}
else if (currentTagNameBuffer != null)
currentTagNameBuffer.append(thisCharLower);
break;
- case BASICPARSESTATE_IN_SINGLE_QUOTES_ATTR_VALUE:
+ case TAGPARSESTATE_IN_SINGLE_QUOTES_ATTR_VALUE:
if (thisChar == '\'' || thisChar == '\n' || thisChar == '\r')
{
currentAttrMap.put(currentAttrName,attributeDecode(currentValueBuffer.toString()));
currentAttrName = null;
currentValueBuffer = null;
- currentState = BASICPARSESTATE_IN_ATTR_NAME;
+ currentState = TAGPARSESTATE_IN_ATTR_NAME;
currentAttrNameBuffer = new StringBuilder();
}
else
currentValueBuffer.append(thisChar);
break;
- case BASICPARSESTATE_IN_DOUBLE_QUOTES_ATTR_VALUE:
+ case TAGPARSESTATE_IN_DOUBLE_QUOTES_ATTR_VALUE:
if (thisChar == '"' || thisChar == '\n' || thisChar == '\r')
{
currentAttrMap.put(currentAttrName,attributeDecode(currentValueBuffer.toString()));
currentAttrName = null;
currentValueBuffer = null;
- currentState = BASICPARSESTATE_IN_ATTR_NAME;
+ currentState = TAGPARSESTATE_IN_ATTR_NAME;
currentAttrNameBuffer = new StringBuilder();
}
else
currentValueBuffer.append(thisChar);
break;
- case BASICPARSESTATE_IN_UNQUOTED_ATTR_VALUE:
+ case TAGPARSESTATE_IN_UNQUOTED_ATTR_VALUE:
if (isHTMLWhitespace(thisChar))
{
currentAttrMap.put(currentAttrName,attributeDecode(currentValueBuffer.toString()));
currentAttrName = null;
currentValueBuffer = null;
- currentState = BASICPARSESTATE_IN_ATTR_NAME;
+ currentState = TAGPARSESTATE_IN_ATTR_NAME;
currentAttrNameBuffer = new StringBuilder();
}
else if (thisChar == '/')
{
currentAttrMap.put(currentAttrName,attributeDecode(currentValueBuffer.toString()));
noteTag(currentTagName,currentAttrMap);
- currentState = BASICPARSESTATE_IN_TAG_SAW_SLASH;
+ currentState = TAGPARSESTATE_IN_TAG_SAW_SLASH;
}
else if (thisChar == '>')
{
currentAttrMap.put(currentAttrName,attributeDecode(currentValueBuffer.toString()));
currentAttrName = null;
currentValueBuffer = null;
- currentState = BASICPARSESTATE_NORMAL;
+ currentState = TAGPARSESTATE_NORMAL;
noteTag(currentTagName,currentAttrMap);
currentTagName = null;
currentAttrMap = null;