You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2013/02/06 22:48:52 UTC
svn commit: r1443233 - in
/manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml:
PretagParseState.java SingleByteReceiver.java SingleCharacterReceiver.java
TagParseState.java
Author: kwright
Date: Wed Feb 6 21:48:52 2013
New Revision: 1443233
URL: http://svn.apache.org/viewvc?rev=1443233&view=rev
Log:
Code for hooking up passthrough into second level CharacterReceiver
Modified:
manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/PretagParseState.java
manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleByteReceiver.java
manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleCharacterReceiver.java
manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/TagParseState.java
Modified: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/PretagParseState.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/PretagParseState.java?rev=1443233&r1=1443232&r2=1443233&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/PretagParseState.java (original)
+++ manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/PretagParseState.java Wed Feb 6 21:48:52 2013
@@ -21,6 +21,7 @@ package org.apache.manifoldcf.core.fuzzy
import org.apache.manifoldcf.core.interfaces.*;
import org.apache.manifoldcf.core.system.Logging;
import java.util.*;
+import java.io.*;
/** This class represents the ability to parse <?...?> preamble tags.
*/
@@ -39,10 +40,10 @@ public class PretagParseState extends Si
protected static final int PRETAGPARSESTATE_IN_ATTR_LOOKING_FOR_VALUE = 12;
protected static final int PRETAGPARSESTATE_IN_SINGLE_QUOTES_ATTR_VALUE = 13;
protected static final int PRETAGPARSESTATE_IN_DOUBLE_QUOTES_ATTR_VALUE = 14;
- protected static final int PRETAGPARSESTATE_POST = 15;
protected int currentState = PRETAGPARSESTATE_NORMAL;
-
+ protected boolean passThrough = false;
+
protected StringBuilder currentTagNameBuffer = null;
protected StringBuilder currentAttrNameBuffer = null;
protected StringBuilder currentValueBuffer = null;
@@ -50,6 +51,7 @@ public class PretagParseState extends Si
protected String currentTagName = null;
protected String currentAttrName = null;
protected Map<String,String> currentAttrMap = null;
+ protected final CharacterBuffer charBuffer = new CharacterBuffer();
protected static final Map<String,String> mapLookup = new HashMap<String,String>();
static
@@ -71,13 +73,42 @@ public class PretagParseState extends Si
this.postPreambleReceiver = postPreambleReceiver;
}
- /** Receive a byte.
+ /** Receive a set of characters; process one chunk worth.
+ *@return true if done.
+ */
+ @Override
+ public boolean dealWithCharacters()
+ throws IOException, ManifoldCFException
+ {
+ if (passThrough)
+ {
+ if (postPreambleReceiver == null)
+ return true;
+ return postPreambleReceiver.dealWithCharacters();
+ }
+ return super.dealWithCharacters();
+ }
+
+ /** Receive a character.
* @return true if done.
*/
@Override
public boolean dealWithCharacter(char c)
- throws ManifoldCFException
+ throws IOException, ManifoldCFException
{
+ c = Character.toLowerCase(c);
+ if (currentState == PRETAGPARSESTATE_NORMAL && isWhitespace(c))
+ return false;
+ if (currentState == PRETAGPARSESTATE_NORMAL && c != '<' ||
+ currentState == PRETAGPARSESTATE_SAWLEFTBRACKET && c != '?' && c != '!')
+ {
+ // Initialize the post preamble receiver with a wrapped reader
+ if (postPreambleReceiver == null)
+ return true;
+ postPreambleReceiver.setReader(new PrefixedReader(charBuffer,reader));
+ passThrough = true;
+ return false;
+ }
// MHL
return true;
}
@@ -88,5 +119,10 @@ public class PretagParseState extends Si
Logging.misc.debug(" Saw pretag '"+tagName+"'");
}
+ /** Is a character markup language whitespace? */
+ protected static boolean isWhitespace(char x)
+ {
+ return x <= ' ';
+ }
}
Modified: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleByteReceiver.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleByteReceiver.java?rev=1443233&r1=1443232&r2=1443233&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleByteReceiver.java (original)
+++ manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleByteReceiver.java Wed Feb 6 21:48:52 2013
@@ -55,7 +55,7 @@ public abstract class SingleByteReceiver
*@return true to stop further processing.
*/
public abstract boolean dealWithByte(byte b)
- throws ManifoldCFException;
+ throws IOException, ManifoldCFException;
}
Modified: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleCharacterReceiver.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleCharacterReceiver.java?rev=1443233&r1=1443232&r2=1443233&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleCharacterReceiver.java (original)
+++ manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleCharacterReceiver.java Wed Feb 6 21:48:52 2013
@@ -56,6 +56,6 @@ public abstract class SingleCharacterRec
* @return true if done.
*/
public abstract boolean dealWithCharacter(char c)
- throws ManifoldCFException;
+ throws IOException, ManifoldCFException;
}
Modified: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/TagParseState.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/TagParseState.java?rev=1443233&r1=1443232&r2=1443233&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/TagParseState.java (original)
+++ manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/TagParseState.java Wed Feb 6 21:48:52 2013
@@ -96,7 +96,7 @@ public class TagParseState extends Singl
{
currentState = TAGPARSESTATE_IN_TAG_NAME;
currentTagNameBuffer = new StringBuilder();
- if (!isHTMLWhitespace(thisChar))
+ if (!isWhitespace(thisChar))
currentTagNameBuffer.append(thisCharLower);
}
break;
@@ -130,7 +130,7 @@ public class TagParseState extends Singl
currentState = TAGPARSESTATE_IN_COMMENT;
break;
case TAGPARSESTATE_IN_TAG_NAME:
- if (isHTMLWhitespace(thisChar))
+ if (isWhitespace(thisChar))
{
if (currentTagNameBuffer.length() > 0)
{
@@ -178,7 +178,7 @@ public class TagParseState extends Singl
currentTagNameBuffer.append(thisCharLower);
break;
case TAGPARSESTATE_IN_ATTR_NAME:
- if (isHTMLWhitespace(thisChar))
+ if (isWhitespace(thisChar))
{
if (currentAttrNameBuffer.length() > 0)
{
@@ -253,7 +253,7 @@ public class TagParseState extends Singl
currentAttrName = null;
noteTag(currentTagName,currentAttrMap);
}
- else if (!isHTMLWhitespace(thisChar))
+ else if (!isWhitespace(thisChar))
{
currentAttrMap.put(currentAttrName,"");
currentState = TAGPARSESTATE_IN_ATTR_NAME;
@@ -267,7 +267,7 @@ public class TagParseState extends Singl
currentState = TAGPARSESTATE_IN_SINGLE_QUOTES_ATTR_VALUE;
else if (thisChar == '"')
currentState = TAGPARSESTATE_IN_DOUBLE_QUOTES_ATTR_VALUE;
- else if (!isHTMLWhitespace(thisChar))
+ else if (!isWhitespace(thisChar))
{
currentState = TAGPARSESTATE_IN_UNQUOTED_ATTR_VALUE;
currentValueBuffer.append(thisChar);
@@ -283,7 +283,7 @@ public class TagParseState extends Singl
}
break;
case TAGPARSESTATE_IN_END_TAG_NAME:
- if (isHTMLWhitespace(thisChar))
+ if (isWhitespace(thisChar))
{
if (currentTagNameBuffer != null && currentTagNameBuffer.length() > 0)
{
@@ -334,7 +334,7 @@ public class TagParseState extends Singl
currentValueBuffer.append(thisChar);
break;
case TAGPARSESTATE_IN_UNQUOTED_ATTR_VALUE:
- if (isHTMLWhitespace(thisChar))
+ if (isWhitespace(thisChar))
{
currentAttrMap.put(currentAttrName,attributeDecode(currentValueBuffer.toString()));
currentAttrName = null;
@@ -440,8 +440,8 @@ public class TagParseState extends Singl
return mapLookup.get(input);
}
- /** Is a character HTML whitespace? */
- protected static boolean isHTMLWhitespace(char x)
+ /** Is a character markup language whitespace? */
+ protected static boolean isWhitespace(char x)
{
return x <= ' ';
}